refactor: Move upload/delete transcript int video_config service
Merge pull request #37793 from openedx/farhan/upload-delete-transcript
This commit is contained in:
@@ -130,6 +130,7 @@ def _get_course_block(course_descriptor, user):
|
||||
""" Gets course block that takes into account user state and permissions """
|
||||
# Adding courseware imports here to insulate other apps (e.g. schedules) to
|
||||
# avoid import errors.
|
||||
# TODO remove the LMS dependency https://github.com/openedx/edx-platform/issues/37659
|
||||
from lms.djangoapps.courseware.model_data import FieldDataCache
|
||||
from lms.djangoapps.courseware.block_render import get_block_for_descriptor
|
||||
|
||||
|
||||
@@ -9,9 +9,13 @@ for the extracted video block in xblocks-contrib repository.
|
||||
import logging
|
||||
|
||||
from opaque_keys.edx.keys import CourseKey, UsageKey
|
||||
from opaque_keys.edx.locator import LibraryLocatorV2
|
||||
from xblocks_contrib.video.exceptions import TranscriptNotFoundError
|
||||
|
||||
from openedx.core.djangoapps.video_config import sharing
|
||||
from django.core.files import File
|
||||
from django.core.files.base import ContentFile
|
||||
from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript
|
||||
from organizations.api import get_course_organization
|
||||
from openedx.core.djangoapps.video_config.models import (
|
||||
CourseYoutubeBlockedFlag,
|
||||
@@ -19,6 +23,16 @@ from openedx.core.djangoapps.video_config.models import (
|
||||
)
|
||||
from openedx.core.djangoapps.video_config.toggles import TRANSCRIPT_FEEDBACK
|
||||
from openedx.core.djangoapps.video_pipeline.config.waffle import DEPRECATE_YOUTUBE
|
||||
from openedx.core.djangoapps.content_libraries.api import (
|
||||
add_library_block_static_asset_file,
|
||||
delete_library_block_static_asset_file,
|
||||
)
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import (
|
||||
Transcript,
|
||||
clean_video_id,
|
||||
get_html5_ids,
|
||||
remove_subs_from_store,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -30,6 +44,9 @@ class VideoConfigService:
|
||||
This service abstracts away edx-platform specific functionality
|
||||
that the Video XBlock needs, allowing the Video XBlock to be
|
||||
extracted to a separate repository.
|
||||
|
||||
TODO: This service could be improved in a few ways:
|
||||
https://github.com/openedx/edx-platform/issues/37656
|
||||
"""
|
||||
|
||||
def get_public_video_url(self, usage_id: UsageKey) -> str:
|
||||
@@ -120,3 +137,117 @@ class VideoConfigService:
|
||||
raise TranscriptNotFoundError(
|
||||
f"Failed to get transcript: {exc}"
|
||||
) from exc
|
||||
|
||||
def upload_transcript(
|
||||
self,
|
||||
*,
|
||||
video_block,
|
||||
language_code: str,
|
||||
new_language_code: str | None,
|
||||
transcript_file: File,
|
||||
edx_video_id: str | None,
|
||||
) -> None:
|
||||
"""
|
||||
Store a transcript, however the runtime prefers to.
|
||||
|
||||
Mutates:
|
||||
* video_block.transcripts
|
||||
* video_block.edx_video_id, iff a new video is created in edx-val.
|
||||
|
||||
Can raise:
|
||||
* UnicodeDecodeError
|
||||
* TranscriptsGenerationException
|
||||
"""
|
||||
is_library = isinstance(video_block.usage_key.context_key, LibraryLocatorV2)
|
||||
content: bytes = transcript_file.read()
|
||||
if is_library:
|
||||
# Save transcript as static asset in Learning Core if is a library component
|
||||
filename = f'static/transcript-{new_language_code}.srt'
|
||||
add_library_block_static_asset_file(video_block.usage_key, filename, content)
|
||||
else:
|
||||
edx_video_id = clean_video_id(edx_video_id)
|
||||
if not edx_video_id:
|
||||
# Back-populate the video ID for an external video.
|
||||
# pylint: disable=attribute-defined-outside-init
|
||||
edx_video_id = create_external_video(display_name='external video')
|
||||
video_block.edx_video_id = edx_video_id
|
||||
filename = f'{edx_video_id}-{new_language_code}.srt'
|
||||
# Convert SRT transcript into an SJSON format and upload it to S3 if a course component
|
||||
sjson_subs = Transcript.convert(
|
||||
content=content.decode('utf-8'),
|
||||
input_format=Transcript.SRT,
|
||||
output_format=Transcript.SJSON
|
||||
).encode()
|
||||
create_or_update_video_transcript(
|
||||
video_id=edx_video_id,
|
||||
language_code=language_code,
|
||||
metadata={
|
||||
'file_format': Transcript.SJSON,
|
||||
'language_code': new_language_code,
|
||||
},
|
||||
file_data=ContentFile(sjson_subs),
|
||||
)
|
||||
|
||||
# If a new transcript is added, then both new_language_code and
|
||||
# language_code fields will have the same value.
|
||||
if language_code != new_language_code:
|
||||
video_block.transcripts.pop(language_code, None)
|
||||
video_block.transcripts[new_language_code] = filename
|
||||
|
||||
if is_library:
|
||||
_save_transcript_field(video_block)
|
||||
|
||||
def delete_transcript(
|
||||
self,
|
||||
*,
|
||||
video_block,
|
||||
edx_video_id: str | None,
|
||||
language_code: str,
|
||||
) -> None:
|
||||
"""
|
||||
Delete a transcript from the runtime's storage.
|
||||
"""
|
||||
edx_video_id = clean_video_id(edx_video_id)
|
||||
if edx_video_id:
|
||||
delete_video_transcript(video_id=edx_video_id, language_code=language_code)
|
||||
if isinstance(video_block.context_key, LibraryLocatorV2):
|
||||
transcript_name = video_block.transcripts.pop(language_code, None)
|
||||
if transcript_name:
|
||||
delete_library_block_static_asset_file(video_block.usage_key, f"static/{transcript_name}")
|
||||
_save_transcript_field(video_block)
|
||||
else:
|
||||
if language_code == 'en':
|
||||
# remove any transcript file from content store for the video ids
|
||||
possible_sub_ids = [
|
||||
video_block.sub, # pylint: disable=access-member-before-definition
|
||||
video_block.youtube_id_1_0
|
||||
] + get_html5_ids(video_block.html5_sources)
|
||||
for sub_id in possible_sub_ids:
|
||||
remove_subs_from_store(sub_id, video_block, language_code)
|
||||
# update metadata as `en` can also be present in `transcripts` field
|
||||
remove_subs_from_store(
|
||||
video_block.transcripts.pop(language_code, None), video_block, language_code
|
||||
)
|
||||
# also empty `sub` field
|
||||
video_block.sub = '' # pylint: disable=attribute-defined-outside-init
|
||||
else:
|
||||
remove_subs_from_store(
|
||||
video_block.transcripts.pop(language_code, None), video_block, language_code
|
||||
)
|
||||
|
||||
|
||||
def _save_transcript_field(video_block):
|
||||
"""
|
||||
Hacky workaround to ensure that transcript field is saved for Learning Core video blocks.
|
||||
|
||||
It's not clear why this is necessary.
|
||||
"""
|
||||
field = video_block.fields['transcripts']
|
||||
if video_block.transcripts:
|
||||
transcripts_copy = video_block.transcripts.copy()
|
||||
# Need to delete to overwrite, it's weird behavior,
|
||||
# but it only works like this.
|
||||
field.delete_from(video_block)
|
||||
field.write_to(video_block, transcripts_copy)
|
||||
else:
|
||||
field.delete_from(video_block)
|
||||
|
||||
@@ -28,7 +28,6 @@ from xmodule.contentstore.content import StaticContent
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.exceptions import NotFoundError
|
||||
|
||||
from xmodule.video_block.bumper_utils import get_bumper_settings
|
||||
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
|
||||
|
||||
|
||||
@@ -834,6 +833,11 @@ class VideoTranscriptsMixin:
|
||||
is_bumper(bool): If True, the request is for the bumper transcripts
|
||||
include_val_transcripts(bool): If True, include edx-val transcripts as well
|
||||
"""
|
||||
# TODO: This causes a circular import when imported at the top-level.
|
||||
# This import will be removed as part of the VideoBlock extraction.
|
||||
# https://github.com/openedx/edx-platform/issues/36282
|
||||
from xmodule.video_block.bumper_utils import get_bumper_settings
|
||||
|
||||
if is_bumper:
|
||||
transcripts = copy.deepcopy(get_bumper_settings(self).get('transcripts', {}))
|
||||
sub = transcripts.pop("en", "")
|
||||
|
||||
@@ -22,7 +22,6 @@ from xblock.field_data import DictFieldData, FieldData, SplitFieldData
|
||||
from xblock.fields import Scope, ScopeIds
|
||||
from xblock.runtime import IdReader, KvsFieldData, MemoryIdManager, Runtime
|
||||
|
||||
from openedx.core.djangoapps.video_config.services import VideoConfigService
|
||||
from xmodule.errortracker import make_error_tracker
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.modulestore.django import XBlockI18nService
|
||||
@@ -229,6 +228,8 @@ class XBlockRuntime(RuntimeShim, Runtime):
|
||||
Submit a grade for the block.
|
||||
"""
|
||||
if self.user and not self.user.is_anonymous:
|
||||
# TODO: we shouldn't be using an LMS API here.
|
||||
# https://github.com/openedx/edx-platform/issues/37660
|
||||
grades_signals.SCORE_PUBLISHED.send(
|
||||
sender=None,
|
||||
block=block,
|
||||
@@ -343,6 +344,8 @@ class XBlockRuntime(RuntimeShim, Runtime):
|
||||
elif service_name == 'error_tracker':
|
||||
return make_error_tracker()
|
||||
elif service_name == 'video_config':
|
||||
# Import here to avoid circular dependency
|
||||
from openedx.core.djangoapps.video_config.services import VideoConfigService
|
||||
return VideoConfigService()
|
||||
|
||||
# Otherwise, fall back to the base implementation which loads services
|
||||
|
||||
24
setup.cfg
24
setup.cfg
@@ -107,12 +107,6 @@ ignore_imports =
|
||||
# -> openedx.core.djangoapps.course_groups.partition_scheme
|
||||
# -> lms.djangoapps.courseware.masquerade
|
||||
openedx.core.djangoapps.course_groups.partition_scheme -> lms.djangoapps.courseware.masquerade
|
||||
# cms.djangoapps.contentstore.[various]
|
||||
# -> openedx.core.djangoapps.content.course_overviews.models
|
||||
# -> lms.djangoapps.ccx.utils
|
||||
# & lms.djangoapps.certificates.api
|
||||
# & lms.djangoapps.discussion.django_comment_client
|
||||
openedx.core.djangoapps.content.course_overviews.models -> lms.djangoapps.*.*
|
||||
# cms.djangoapps.export_course_metadata.tasks
|
||||
# -> openedx.core.djangoapps.schedules.content_highlights
|
||||
# -> lms.djangoapps.courseware.block_render & lms.djangoapps.courseware.model_data
|
||||
@@ -170,6 +164,12 @@ ignore_imports =
|
||||
# https://github.com/openedx/edx-platform/issues/33428
|
||||
openedx.core.djangoapps.content_libraries.permissions -> cms.djangoapps.course_creators.views
|
||||
openedx.core.djangoapps.content_libraries.tasks -> cms.djangoapps.contentstore.storage
|
||||
# Outstanding arch issue: course_overviews is tangled up with LMS
|
||||
# https://github.com/openedx/edx-platform/issues/37658
|
||||
openedx.core.djangoapps.content.course_overviews.models -> lms.djangoapps.**
|
||||
# Outstanding arch issue: content_highlights uses courseware block_render
|
||||
# https://github.com/openedx/edx-platform/issues/37659
|
||||
openedx.core.djangoapps.schedules.content_highlights -> lms.djangoapps.courseware.block_render
|
||||
|
||||
[importlinter:contract:2]
|
||||
name = Do not depend on non-public API of isolated apps.
|
||||
@@ -234,3 +234,15 @@ ignore_imports =
|
||||
# Content libraries imports contentstore.helpers which imports upstream_sync
|
||||
openedx.core.djangoapps.content_libraries.api.blocks -> cms.djangoapps.contentstore.helpers
|
||||
openedx.core.djangoapps.content_libraries.api.libraries -> cms.djangoapps.contentstore.helpers
|
||||
# Outstanding arch issue: course_overviews is tangled up with LMS
|
||||
# https://github.com/openedx/edx-platform/issues/37658
|
||||
openedx.core.djangoapps.content.course_overviews.models -> lms.djangoapps.**
|
||||
# Outstanding arch issue: content_highlights uses courseware block_render
|
||||
# https://github.com/openedx/edx-platform/issues/37659
|
||||
openedx.core.djangoapps.schedules.content_highlights -> lms.djangoapps.courseware.block_render
|
||||
# This import happens only because grading signals are defined in LMS rather than openedx-events
|
||||
# https://github.com/openedx/edx-platform/issues/37660
|
||||
openedx.core.djangoapps.xblock.runtime.runtime -> lms.djangoapps.grades.api
|
||||
# These imports will become OK once we worked on the following issue:
|
||||
# https://github.com/openedx/edx-platform/issues/33428
|
||||
openedx.core.djangoapps.video_config.services -> openedx.core.djangoapps.content_libraries.api
|
||||
|
||||
@@ -10,24 +10,19 @@ import json
|
||||
import logging
|
||||
import math
|
||||
|
||||
from django.core.files.base import ContentFile
|
||||
from django.utils.timezone import now
|
||||
from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript
|
||||
from opaque_keys.edx.locator import CourseLocator, LibraryLocatorV2
|
||||
from opaque_keys.edx.locator import CourseLocator
|
||||
from webob import Response
|
||||
from xblock.core import XBlock
|
||||
from xblock.exceptions import JsonHandlerError
|
||||
|
||||
from xmodule.exceptions import NotFoundError
|
||||
from xmodule.fields import RelativeTime
|
||||
from openedx.core.djangoapps.content_libraries import api as lib_api
|
||||
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import (
|
||||
Transcript,
|
||||
clean_video_id,
|
||||
get_html5_ids,
|
||||
get_transcript_from_contentstore,
|
||||
remove_subs_from_store,
|
||||
subs_filename,
|
||||
)
|
||||
from xblocks_contrib.video.exceptions import (
|
||||
@@ -460,141 +455,63 @@ class VideoStudioViewHandlers:
|
||||
|
||||
return response
|
||||
|
||||
def _save_transcript_field(self):
|
||||
"""
|
||||
Save `transcripts` block field.
|
||||
"""
|
||||
field = self.fields['transcripts']
|
||||
if self.transcripts:
|
||||
transcripts_copy = self.transcripts.copy()
|
||||
# Need to delete to overwrite, it's weird behavior,
|
||||
# but it only works like this.
|
||||
field.delete_from(self)
|
||||
field.write_to(self, transcripts_copy)
|
||||
else:
|
||||
field.delete_from(self)
|
||||
|
||||
def _studio_transcript_upload(self, request):
|
||||
"""
|
||||
Upload transcript. Used in "POST" method in `studio_transcript`
|
||||
"""
|
||||
_ = self.runtime.service(self, "i18n").ugettext
|
||||
video_config_service = self.runtime.service(self, 'video_config')
|
||||
if not video_config_service:
|
||||
return Response(json={'error': _('Runtime does not support transcripts.')}, status=400)
|
||||
error = self.validate_transcript_upload_data(data=request.POST)
|
||||
if error:
|
||||
response = Response(json={'error': error}, status=400)
|
||||
else:
|
||||
edx_video_id = clean_video_id(request.POST['edx_video_id'])
|
||||
language_code = request.POST['language_code']
|
||||
new_language_code = request.POST['new_language_code']
|
||||
transcript_file = request.POST['file'].file
|
||||
|
||||
is_library = isinstance(self.usage_key.context_key, LibraryLocatorV2)
|
||||
|
||||
if is_library:
|
||||
filename = f'transcript-{new_language_code}.srt'
|
||||
else:
|
||||
if not edx_video_id:
|
||||
# Back-populate the video ID for an external video.
|
||||
# pylint: disable=attribute-defined-outside-init
|
||||
self.edx_video_id = edx_video_id = create_external_video(display_name='external video')
|
||||
filename = f'{edx_video_id}-{new_language_code}.srt'
|
||||
|
||||
try:
|
||||
# Convert SRT transcript into an SJSON format
|
||||
# and upload it to S3.
|
||||
content = transcript_file.read()
|
||||
payload = {
|
||||
'edx_video_id': edx_video_id,
|
||||
'language_code': new_language_code
|
||||
}
|
||||
if is_library:
|
||||
# Save transcript as static asset in Learning Core if is a library component
|
||||
filename = f"static/{filename}"
|
||||
lib_api.add_library_block_static_asset_file(
|
||||
self.usage_key,
|
||||
filename,
|
||||
content,
|
||||
return Response(json={'error': error}, status=400)
|
||||
edx_video_id = (request.POST['edx_video_id'] or "").strip()
|
||||
language_code = request.POST['language_code']
|
||||
new_language_code = request.POST['new_language_code']
|
||||
try:
|
||||
video_config_service.upload_transcript(
|
||||
video_block=self, # NOTE: .edx_video_id and .transcripts may get mutated
|
||||
edx_video_id=edx_video_id,
|
||||
language_code=language_code,
|
||||
new_language_code=new_language_code,
|
||||
transcript_file=request.POST['file'].file,
|
||||
)
|
||||
return Response(
|
||||
json.dumps(
|
||||
{
|
||||
"edx_video_id": edx_video_id or self.edx_video_id,
|
||||
"language_code": new_language_code,
|
||||
}
|
||||
),
|
||||
status=201,
|
||||
)
|
||||
except (TranscriptsGenerationException, UnicodeDecodeError):
|
||||
return Response(
|
||||
json={
|
||||
'error': _(
|
||||
'There is a problem with this transcript file. Try to upload a different file.'
|
||||
)
|
||||
else:
|
||||
sjson_subs = Transcript.convert(
|
||||
content=content.decode('utf-8'),
|
||||
input_format=Transcript.SRT,
|
||||
output_format=Transcript.SJSON
|
||||
).encode()
|
||||
create_or_update_video_transcript(
|
||||
video_id=edx_video_id,
|
||||
language_code=language_code,
|
||||
metadata={
|
||||
'file_format': Transcript.SJSON,
|
||||
'language_code': new_language_code
|
||||
},
|
||||
file_data=ContentFile(sjson_subs),
|
||||
)
|
||||
|
||||
# If a new transcript is added, then both new_language_code and
|
||||
# language_code fields will have the same value.
|
||||
if language_code != new_language_code:
|
||||
self.transcripts.pop(language_code, None)
|
||||
self.transcripts[new_language_code] = filename
|
||||
|
||||
if is_library:
|
||||
self._save_transcript_field()
|
||||
response = Response(json.dumps(payload), status=201)
|
||||
except (TranscriptsGenerationException, UnicodeDecodeError):
|
||||
response = Response(
|
||||
json={
|
||||
'error': _(
|
||||
'There is a problem with this transcript file. Try to upload a different file.'
|
||||
)
|
||||
},
|
||||
status=400
|
||||
)
|
||||
return response
|
||||
},
|
||||
status=400
|
||||
)
|
||||
|
||||
def _studio_transcript_delete(self, request):
|
||||
"""
|
||||
Delete transcript. Used in "DELETE" method in `studio_transcript`
|
||||
"""
|
||||
_ = self.runtime.service(self, "i18n").ugettext
|
||||
video_config_service = self.runtime.service(self, 'video_config')
|
||||
if not video_config_service:
|
||||
return Response(json={'error': _('Runtime does not support transcripts.')}, status=400)
|
||||
request_data = request.json
|
||||
|
||||
if 'lang' not in request_data or 'edx_video_id' not in request_data:
|
||||
return Response(status=400)
|
||||
|
||||
language = request_data['lang']
|
||||
edx_video_id = clean_video_id(request_data['edx_video_id'])
|
||||
|
||||
if edx_video_id:
|
||||
delete_video_transcript(video_id=edx_video_id, language_code=language)
|
||||
|
||||
if isinstance(self.usage_key.context_key, LibraryLocatorV2):
|
||||
transcript_name = self.transcripts.pop(language, None)
|
||||
if transcript_name:
|
||||
# TODO: In the future, we need a proper XBlock API
|
||||
# like `self.static_assets.delete(...)` instead of coding
|
||||
# these runtime-specific/library-specific APIs.
|
||||
lib_api.delete_library_block_static_asset_file(
|
||||
self.usage_key,
|
||||
f"static/{transcript_name}",
|
||||
)
|
||||
self._save_transcript_field()
|
||||
else:
|
||||
if language == 'en':
|
||||
# remove any transcript file from content store for the video ids
|
||||
possible_sub_ids = [
|
||||
self.sub, # pylint: disable=access-member-before-definition
|
||||
self.youtube_id_1_0
|
||||
] + get_html5_ids(self.html5_sources)
|
||||
for sub_id in possible_sub_ids:
|
||||
remove_subs_from_store(sub_id, self, language)
|
||||
|
||||
# update metadata as `en` can also be present in `transcripts` field
|
||||
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
|
||||
|
||||
# also empty `sub` field
|
||||
self.sub = '' # pylint: disable=attribute-defined-outside-init
|
||||
else:
|
||||
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
|
||||
|
||||
video_config_service.delete_transcript(
|
||||
video_block=self,
|
||||
edx_video_id=request_data['edx_video_id'],
|
||||
language_code=request_data['lang'],
|
||||
)
|
||||
return Response(status=200)
|
||||
|
||||
def _studio_transcript_get(self, request):
|
||||
|
||||
Reference in New Issue
Block a user