feat: Video editor supports transcripts [FC-0076] (#36058)

* Add error handler on save video to avoid creating sjson
* Support transcripts without edx_video_id in definition_to_xml
* When copying a video from a library to a course: Create a new edx_video_id
* Save transcripts as static assets in a video in a library when adding a new transcript.
* Delete transcripts as static assets in a video in a library when deleting transcripts.
* Support download transcript in a video in a library.
* Support replace transcript in a video in a library.
* Support updating transcripts in video in a library.
* Refactor the code of downloading YouTube transcripts to enable this feature in libraries.
* Support copy from a library to a course and a course to a library.
This commit is contained in:
Chris Chávez
2025-02-21 13:33:13 -05:00
committed by GitHub
parent 6c6fd84e53
commit b6489e718c
12 changed files with 573 additions and 183 deletions

View File

@@ -10,6 +10,7 @@ from mimetypes import guess_type
import re
from attrs import frozen, Factory
from django.core.files.base import ContentFile
from django.conf import settings
from django.contrib.auth import get_user_model
from django.utils.translation import gettext as _
@@ -23,6 +24,11 @@ from xmodule.contentstore.django import contentstore
from xmodule.exceptions import NotFoundError
from xmodule.modulestore.django import modulestore
from xmodule.xml_block import XmlMixin
from xmodule.video_block.transcripts_utils import Transcript, build_components_import_path
from edxval.api import (
create_external_video,
create_or_update_video_transcript,
)
from cms.djangoapps.models.settings.course_grading import CourseGradingModel
from cms.lib.xblock.upstream_sync import UpstreamLink, UpstreamLinkException, fetch_customizable_fields
@@ -274,8 +280,14 @@ def _insert_static_files_into_downstream_xblock(
course_key=downstream_xblock.context_key,
staged_content_id=staged_content_id,
static_files=static_files,
usage_key=downstream_xblock.scope_ids.usage_id,
usage_key=downstream_xblock.usage_key,
)
if downstream_xblock.usage_key.block_type == 'video':
_import_transcripts(
downstream_xblock,
staged_content_id=staged_content_id,
static_files=static_files,
)
# Rewrite the OLX's static asset references to point to the new
# locations for those assets. See _import_files_into_course for more
@@ -331,6 +343,13 @@ def import_staged_content_from_user_clipboard(parent_key: UsageKey, request) ->
tags=user_clipboard.content.tags,
)
usage_key = new_xblock.usage_key
if usage_key.block_type == 'video':
# The edx_video_id must always be new so as not
# to interfere with the data of the copied block
new_xblock.edx_video_id = create_external_video(display_name='external video')
store.update_item(new_xblock, request.user.id)
notices = _insert_static_files_into_downstream_xblock(new_xblock, user_clipboard.content.id, request)
return new_xblock, notices
@@ -630,8 +649,8 @@ def _import_file_into_course(
# we're not going to attempt to change.
if clipboard_file_path.startswith('static/'):
# If it's in this form, it came from a library and assumes component-local assets
file_path = clipboard_file_path.lstrip('static/')
import_path = f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}"
file_path = clipboard_file_path.removeprefix('static/')
import_path = build_components_import_path(usage_key, file_path)
filename = pathlib.Path(file_path).name
new_key = course_key.make_asset_key("asset", import_path.replace("/", "_"))
else:
@@ -672,6 +691,50 @@ def _import_file_into_course(
return False, {}
def _import_transcripts(
block: XBlock,
staged_content_id: int,
static_files: list[content_staging_api.StagedContentFileData],
):
"""
Adds transcripts to VAL using the new edx_video_id.
"""
for file_data_obj in static_files:
clipboard_file_path = file_data_obj.filename
data = content_staging_api.get_staged_content_static_file_data(
staged_content_id,
clipboard_file_path
)
if data is None:
raise NotFoundError(file_data_obj.source_key)
if clipboard_file_path.startswith('static/'):
# If it's in this form, it came from a library and assumes component-local assets
file_path = clipboard_file_path.removeprefix('static/')
else:
# Otherwise it came from a course...
file_path = clipboard_file_path
filename = pathlib.Path(file_path).name
language_code = next((k for k, v in block.transcripts.items() if v == filename), None)
if language_code:
sjson_subs = Transcript.convert(
content=data,
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=block.edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': language_code
},
file_data=ContentFile(sjson_subs),
)
def is_item_in_course_tree(item):
"""
Check that the item is in the course tree.

View File

@@ -88,6 +88,7 @@ from openedx.core.lib.api.view_utils import (
DeveloperErrorViewMixin,
view_auth_classes,
)
from xmodule.video_block.transcripts_utils import clear_transcripts
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
@@ -224,6 +225,9 @@ class SyncFromUpstreamView(DeveloperErrorViewMixin, APIView):
"""
downstream = _load_accessible_block(request.user, usage_key_string, require_write_access=True)
try:
if downstream.usage_key.block_type == "video":
# Delete all transcripts so we can copy new ones from upstream
clear_transcripts(downstream)
upstream = sync_from_upstream(downstream, request.user)
static_file_notices = import_static_assets_for_library_sync(downstream, upstream, request)
except UpstreamLinkException as exc:

View File

@@ -264,7 +264,8 @@ class PostDownstreamSyncViewTest(_DownstreamSyncViewTestMixin, SharedModuleStore
@patch.object(UpstreamLink, "get_for_block", _get_upstream_link_good_and_syncable)
@patch.object(downstreams_views, "sync_from_upstream")
@patch.object(downstreams_views, "import_static_assets_for_library_sync", return_value=StaticFileNotices())
def test_200(self, mock_sync_from_upstream, mock_import_staged_content):
@patch.object(downstreams_views, "clear_transcripts")
def test_200(self, mock_sync_from_upstream, mock_import_staged_content, mock_clear_transcripts):
"""
Does the happy path work?
"""
@@ -273,6 +274,7 @@ class PostDownstreamSyncViewTest(_DownstreamSyncViewTestMixin, SharedModuleStore
assert response.status_code == 200
assert mock_sync_from_upstream.call_count == 1
assert mock_import_staged_content.call_count == 1
assert mock_clear_transcripts.call_count == 1
class DeleteDownstreamSyncViewtest(_DownstreamSyncViewTestMixin, SharedModuleStoreTestCase):

View File

@@ -15,9 +15,11 @@ from django.test.utils import override_settings
from django.urls import reverse
from edxval.api import create_video
from opaque_keys.edx.keys import UsageKey
from organizations.tests.factories import OrganizationFactory
from cms.djangoapps.contentstore.tests.utils import CourseTestCase, setup_caption_responses
from openedx.core.djangoapps.contentserver.caching import del_cached_content
from openedx.core.djangoapps.content_libraries import api as lib_api
from xmodule.contentstore.content import StaticContent # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.contentstore.django import contentstore # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.exceptions import NotFoundError # lint-amnesty, pylint: disable=wrong-import-order
@@ -27,8 +29,10 @@ from xmodule.video_block.transcripts_utils import ( # lint-amnesty, pylint: dis
GetTranscriptsFromYouTubeException,
Transcript,
get_video_transcript_content,
remove_subs_from_store
get_transcript,
remove_subs_from_store,
)
from openedx.core.djangoapps.xblock import api as xblock_api
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
@@ -92,6 +96,21 @@ class BaseTranscripts(CourseTestCase):
resp = self.client.ajax_post('/xblock/', data)
self.assertEqual(resp.status_code, 200)
self.library = lib_api.create_library(
org=OrganizationFactory.create(short_name="org1"),
slug="lib",
title="Library",
)
self.library_block_metadata = lib_api.create_library_block(
self.library.key,
"video",
"video-transcript",
)
self.library_block = xblock_api.load_block(
self.library_block_metadata.usage_key,
self.user,
)
self.video_usage_key = self._get_usage_key(resp)
self.item = modulestore().get_item(self.video_usage_key)
# hI10vDNYz4M - valid Youtube ID with transcripts.
@@ -702,6 +721,25 @@ class TestReplaceTranscripts(BaseTranscripts):
expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT)
self.assertDictEqual(actual_sjson_content, expected_sjson_content)
def test_replace_transcript_library_content_success(self):
# Make call to replace transcripts from youtube
response = self.replace_transcript(self.library_block_metadata.usage_key, self.youtube_id)
# Verify the response
self.assert_response(response, expected_status_code=200, expected_message='Success')
# Obtain updated block
updated_block = xblock_api.load_block(
self.library_block_metadata.usage_key,
self.user,
)
# Verify transcript content
transcript = get_transcript(updated_block, 'en', Transcript.SJSON)
actual_sjson_content = json.loads(transcript[0])
expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT)
self.assertDictEqual(actual_sjson_content, expected_sjson_content)
def test_replace_transcript_fails_without_data(self):
"""
Verify that replace transcript fails if we do not provide video data in request.

View File

@@ -19,7 +19,8 @@ from django.http import Http404, HttpResponse
from django.utils.translation import gettext as _
from edxval.api import create_external_video, create_or_update_video_transcript
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.keys import UsageKey, UsageKeyV2
from opaque_keys.edx.locator import LibraryLocatorV2
from cms.djangoapps.contentstore.video_storage_handlers import TranscriptProvider
from common.djangoapps.student.auth import has_course_author_access
@@ -43,6 +44,9 @@ from xmodule.video_block.transcripts_utils import ( # lint-amnesty, pylint: dis
get_transcript_link_from_youtube,
get_transcript_links_from_youtube,
)
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.xblock import api as xblock_api
from openedx.core.djangoapps.xblock.data import CheckPerm
__all__ = [
'upload_transcripts',
@@ -87,6 +91,49 @@ def link_video_to_component(video_component, user):
return edx_video_id
def save_video_transcript_in_learning_core(
usage_key,
input_format,
transcript_content,
language_code
):
"""
Saves a video transcript to the learning core.
Learning Core uses the standard `.srt` format for subtitles.
Note: SJSON is an edx-specific format that we're trying to move away from,
so for all new stuff related to Learning Core should only use `.srt`.
Arguments:
usage_key: UsageKey of the block
input_format: Input transcript format for content being passed.
transcript_content: Content of the transcript file
language_code: transcript language code
Returns:
result: A boolean indicating whether the transcript was saved or not.
video_key: Key used in video filename
"""
try:
srt_content = Transcript.convert(
content=transcript_content,
input_format=input_format,
output_format=Transcript.SRT
).encode()
filename = f"static/transcript-{language_code}.srt"
lib_api.add_library_block_static_asset_file(
usage_key,
filename,
srt_content,
)
result = True
except (TranscriptsGenerationException, UnicodeDecodeError):
result = False
return result
def save_video_transcript(edx_video_id, input_format, transcript_content, language_code):
"""
Saves a video transcript to the VAL and its content to the configured django storage(DS).
@@ -118,6 +165,7 @@ def save_video_transcript(edx_video_id, input_format, transcript_content, langua
},
file_data=ContentFile(sjson_subs),
)
result = True
except (TranscriptsGenerationException, UnicodeDecodeError):
result = False
@@ -145,6 +193,7 @@ def validate_video_block(request, locator):
item = _get_item(request, {'locator': locator})
if item.category != 'video':
error = _('Transcripts are supported only for "video" blocks.')
except (InvalidKeyError, ItemNotFoundError):
error = _('Cannot find item by locator.')
@@ -319,61 +368,38 @@ def check_transcripts(request): # lint-amnesty, pylint: disable=too-many-statem
get_transcript_from_val(edx_video_id=edx_video_id, lang='en')
command = 'found'
except NotFoundError:
filename = f'subs_{item.sub}.srt.sjson'
content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
local_transcripts = contentstore().find(content_location).data.decode('utf-8')
transcripts_presence['current_item_subs'] = item.sub
except NotFoundError:
pass
# Check for youtube transcripts presence
youtube_id = videos.get('youtube', None)
if youtube_id:
transcripts_presence['is_youtube_mode'] = True
_check_youtube_transcripts(
transcripts_presence,
youtube_id,
item,
)
# youtube local
filename = f'subs_{youtube_id}.srt.sjson'
if not isinstance(item.usage_key, UsageKeyV2):
filename = f'subs_{item.sub}.srt.sjson'
content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
local_transcripts = contentstore().find(content_location).data.decode('utf-8')
transcripts_presence['youtube_local'] = True
contentstore().find(content_location).data.decode('utf-8')
transcripts_presence['current_item_subs'] = item.sub
except NotFoundError:
log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id)
pass
if get_transcript_link_from_youtube(youtube_id):
transcripts_presence['youtube_server'] = True
#check youtube local and server transcripts for equality
if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']:
# Check for html5 local transcripts presence
html5_subs = []
for html5_id in videos['html5']:
filename = f'subs_{html5_id}.srt.sjson'
content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
transcript_links = get_transcript_links_from_youtube(
youtube_id,
settings,
item.runtime.service(item, "i18n")
html5_subs.append(contentstore().find(content_location).data)
transcripts_presence['html5_local'].append(html5_id)
except NotFoundError:
log.debug("Can't find transcripts in storage for non-youtube video_id: %s", html5_id)
if len(html5_subs) == 2: # check html5 transcripts for equality
transcripts_presence['html5_equal'] = (
json.loads(html5_subs[0].decode('utf-8')) == json.loads(html5_subs[1].decode('utf-8'))
)
for (_, link) in transcript_links.items():
youtube_server_subs = get_transcript_from_youtube(
link, youtube_id, item.runtime.service(item, "i18n")
)
if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality
transcripts_presence['youtube_diff'] = False
except GetTranscriptsFromYouTubeException:
pass
# Check for html5 local transcripts presence
html5_subs = []
for html5_id in videos['html5']:
filename = f'subs_{html5_id}.srt.sjson'
content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
html5_subs.append(contentstore().find(content_location).data)
transcripts_presence['html5_local'].append(html5_id)
except NotFoundError:
log.debug("Can't find transcripts in storage for non-youtube video_id: %s", html5_id)
if len(html5_subs) == 2: # check html5 transcripts for equality
transcripts_presence['html5_equal'] = (
json.loads(html5_subs[0].decode('utf-8')) == json.loads(html5_subs[1].decode('utf-8'))
)
command, __ = _transcripts_logic(transcripts_presence, videos)
@@ -381,6 +407,43 @@ def check_transcripts(request): # lint-amnesty, pylint: disable=too-many-statem
return JsonResponse(transcripts_presence)
def _check_youtube_transcripts(transcripts_presence, youtube_id, item):
"""
Check for youtube transcripts presence
"""
transcripts_presence['is_youtube_mode'] = True
if get_transcript_link_from_youtube(youtube_id):
transcripts_presence['youtube_server'] = True
if not isinstance(item.usage_key, UsageKeyV2):
# youtube local
filename = f'subs_{youtube_id}.srt.sjson'
content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
local_transcripts = contentstore().find(content_location).data.decode('utf-8')
transcripts_presence['youtube_local'] = True
except NotFoundError:
log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id)
# check youtube local and server transcripts for equality
if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']:
try:
transcript_links = get_transcript_links_from_youtube(
youtube_id,
settings,
item.runtime.service(item, "i18n")
)
for (_, link) in transcript_links.items():
youtube_server_subs = get_transcript_from_youtube(
link, youtube_id, item.runtime.service(item, "i18n")
)
if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality
transcripts_presence['youtube_diff'] = False
except GetTranscriptsFromYouTubeException:
pass
def _transcripts_logic(transcripts_presence, videos):
"""
By `transcripts_presence` content, figure what show to user:
@@ -447,7 +510,7 @@ def _validate_transcripts_data(request):
data: dict, loaded json from request,
videos: parsed `data` to useful format,
item: video item from storage
item: video item from storage or library
Raises `TranscriptsRequestValidationException` if validation is unsuccessful
or `PermissionDenied` if user has no access.
@@ -529,6 +592,7 @@ def choose_transcripts(request):
Or error in case of validation failures.
"""
error, validated_data = validate_transcripts_request(request, include_html5=True)
edx_video_id = None
if error:
response = error_response({}, error)
else:
@@ -546,10 +610,24 @@ def choose_transcripts(request):
return error_response({}, _('No such transcript.'))
# 2. Link a video to video component if its not already linked to one.
edx_video_id = link_video_to_component(video, request.user)
if not isinstance(video.usage_key.context_key, LibraryLocatorV2):
edx_video_id = link_video_to_component(video, request.user)
# 3. Upload the retrieved transcript to DS for the linked video ID.
success = save_video_transcript(edx_video_id, input_format, transcript_content, language_code='en')
if isinstance(video.usage_key.context_key, LibraryLocatorV2):
success = save_video_transcript_in_learning_core(
video.usage_key,
input_format,
transcript_content,
language_code='en',
)
else:
success = save_video_transcript(
edx_video_id,
input_format,
transcript_content,
language_code='en',
)
if success:
response = JsonResponse({'edx_video_id': edx_video_id, 'status': 'Success'}, status=200)
else:
@@ -569,6 +647,7 @@ def rename_transcripts(request):
Or error in case of validation failures.
"""
error, validated_data = validate_transcripts_request(request)
edx_video_id = None
if error:
response = error_response({}, error)
else:
@@ -585,10 +664,24 @@ def rename_transcripts(request):
return error_response({}, _('No such transcript.'))
# 2. Link a video to video component if its not already linked to one.
edx_video_id = link_video_to_component(video, request.user)
if not isinstance(video.usage_key.context_key, LibraryLocatorV2):
edx_video_id = link_video_to_component(video, request.user)
# 3. Upload the retrieved transcript to DS for the linked video ID.
success = save_video_transcript(edx_video_id, input_format, transcript_content, language_code='en')
if isinstance(video.usage_key.context_key, LibraryLocatorV2):
success = save_video_transcript_in_learning_core(
video.usage_key,
input_format,
transcript_content,
language_code='en',
)
else:
success = save_video_transcript(
edx_video_id,
input_format,
transcript_content,
language_code='en',
)
if success:
response = JsonResponse({'edx_video_id': edx_video_id, 'status': 'Success'}, status=200)
else:
@@ -610,6 +703,7 @@ def replace_transcripts(request):
"""
error, validated_data = validate_transcripts_request(request, include_yt=True)
youtube_id = validated_data['youtube']
edx_video_id = None
if error:
response = error_response({}, error)
elif not youtube_id:
@@ -623,16 +717,34 @@ def replace_transcripts(request):
return error_response({}, str(e))
# 2. Link a video to video component if its not already linked to one.
edx_video_id = link_video_to_component(video, request.user)
# for transcript in transcript_links:
if not isinstance(video.usage_key.context_key, LibraryLocatorV2):
edx_video_id = link_video_to_component(video, request.user)
# 3. Upload YT transcript to DS for the linked video ID.
success = True
for transcript in transcript_content:
[language_code, json_content] = transcript
success = save_video_transcript(edx_video_id, Transcript.SJSON, json_content, language_code)
if isinstance(video.usage_key.context_key, LibraryLocatorV2):
success = save_video_transcript_in_learning_core(
video.usage_key,
Transcript.SJSON,
json_content,
language_code,
)
filename = f"transcript-{language_code}.srt"
else:
success = save_video_transcript(
edx_video_id,
Transcript.SJSON,
json_content,
language_code,
)
filename = f"{edx_video_id}-{language_code}.srt"
if not success:
break
video.transcripts[language_code] = filename
if success:
video.save()
response = JsonResponse({'edx_video_id': edx_video_id, 'status': 'Success'}, status=200)
else:
response = error_response({}, _('There is a problem with the YouTube transcript file.'))
@@ -643,17 +755,25 @@ def replace_transcripts(request):
def _get_item(request, data):
"""
Obtains from 'data' the locator for an item.
Next, gets that item from the modulestore (allowing any errors to raise up).
Next, gets that item from the modulestore (allowing any errors to raise up)
or from library API if is a library content.
Finally, verifies that the user has access to the item.
Returns the item.
Returns the item and a boolean if is a library content.
"""
usage_key = UsageKey.from_string(data.get('locator'))
if not usage_key.context_key.is_course:
# TODO: implement transcript support for learning core / content libraries.
raise TranscriptsRequestValidationException(_('Transcripts are not yet supported in content libraries.'))
context_key = usage_key.context_key
if not context_key.is_course:
if isinstance(context_key, LibraryLocatorV2):
return xblock_api.load_block(
usage_key,
request.user,
check_permission=CheckPerm.CAN_EDIT,
)
raise TranscriptsRequestValidationException(_('Transcripts are not yet supported for this type of block'))
# This is placed before has_course_author_access() to validate the location,
# because has_course_author_access() raises r if location is invalid.
# because has_course_author_access() raises error if location is invalid.
item = modulestore().get_item(usage_key)
# use the item's course_key, because the usage_key might not have the run

View File

@@ -71,6 +71,21 @@ class UpstreamTestCase(ModuleStoreTestCase):
'/>\n'
))
self.upstream_video_key = libs.create_library_block(self.library.key, "video", "video-upstream").usage_key
libs.set_library_block_olx(self.upstream_video_key, (
'<video'
' display_name="Video Test"'
' edx_video_id=""'
' end_time="00:00:00"'
' html5_sources="[&quot;https://www.sample-videos.com/video321/mp4/720/big_buck_bunny_720p_2mb.mp4&quot;]"'
' start_time="00:00:00"'
' track=""'
' youtube_id_1_0=""'
'>'
' <source src="https://www.sample-videos.com/video321/mp4/720/big_buck_bunny_720p_2mb.mp4"/>'
'</video>'
))
libs.publish_changes(self.library.key, self.user.id)
self.taxonomy_all_org = tagging_api.create_taxonomy(
@@ -539,3 +554,16 @@ class UpstreamTestCase(ModuleStoreTestCase):
assert len(object_tags) == len(new_upstream_tags)
for object_tag in object_tags:
assert object_tag.value in new_upstream_tags
def test_sync_video_block(self):
downstream = BlockFactory.create(category='video', parent=self.unit, upstream=str(self.upstream_video_key))
downstream.edx_video_id = "test_video_id"
# Sync
sync_from_upstream(downstream, self.user)
assert downstream.upstream_version == 2
assert downstream.upstream_display_name == "Video Test"
assert downstream.display_name == "Video Test"
# `edx_video_id` doesn't change
assert downstream.edx_video_id == "test_video_id"

View File

@@ -297,7 +297,11 @@ def _update_non_customizable_fields(*, upstream: XBlock, downstream: XBlock) ->
"""
syncable_fields = _get_synchronizable_fields(upstream, downstream)
customizable_fields = set(downstream.get_customizable_fields().keys())
isVideoBlock = downstream.usage_key.block_type == "video"
for field_name in syncable_fields - customizable_fields:
if isVideoBlock and field_name == 'edx_video_id':
# Avoid overwriting edx_video_id between blocks
continue
new_upstream_value = getattr(upstream, field_name)
setattr(downstream, field_name, new_upstream_value)

View File

@@ -178,7 +178,7 @@ def xblock_handler(
"""
# To support sandboxed XBlocks, custom frontends, and other use cases, we
# authenticate requests using a secure token in the URL. see
# openedx.core.djangoapps.xblock.utils.get_secure_hash_for_xblock_handler
# openedx.core.djangoapps.xblock.utils.get_secure_token_for_xblock_handler
# for details and rationale.
if not validate_secure_token_for_xblock_handler(user_id, str(usage_key), secure_token):
raise PermissionDenied("Invalid/expired auth token.")

View File

@@ -741,6 +741,48 @@ class VideoExportTestCase(VideoBlockTestBase):
course_id=self.block.scope_ids.usage_id.context_key,
)
def test_export_to_xml_without_video_id(self):
"""
Test that we write the correct XML on export of a video without edx_video_id.
"""
self.block.youtube_id_0_75 = 'izygArpw-Qo'
self.block.youtube_id_1_0 = 'p2Q6BrNhdh8'
self.block.youtube_id_1_25 = '1EeWXzPdhSA'
self.block.youtube_id_1_5 = 'rABDYkeK0x8'
self.block.show_captions = False
self.block.start_time = datetime.timedelta(seconds=1.0)
self.block.end_time = datetime.timedelta(seconds=60)
self.block.track = 'http://www.example.com/track'
self.block.handout = 'http://www.example.com/handout'
self.block.download_track = True
self.block.html5_sources = ['http://www.example.com/source.mp4', 'http://www.example.com/source1.ogg']
self.block.download_video = True
self.block.transcripts = {'ua': 'ukrainian_translation.srt', 'ge': 'german_translation.srt'}
xml = self.block.definition_to_xml(self.file_system)
parser = etree.XMLParser(remove_blank_text=True)
xml_string = '''\
<video
url_name="SampleProblem"
start_time="0:00:01"
show_captions="false"
end_time="0:01:00"
download_video="true"
download_track="true"
youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8"
transcripts='{"ge": "german_translation.srt", "ua": "ukrainian_translation.srt"}'
>
<source src="http://www.example.com/source.mp4"/>
<source src="http://www.example.com/source1.ogg"/>
<track src="http://www.example.com/track"/>
<handout src="http://www.example.com/handout"/>
<transcript language="ge" src="german_translation.srt" />
<transcript language="ua" src="ukrainian_translation.srt" />
</video>
'''
expected = etree.XML(xml_string, parser=parser)
self.assertXmlEqual(expected, xml)
@patch('xmodule.video_block.video_block.edxval_api')
def test_export_to_xml_val_error(self, mock_val_api):
# Export should succeed without VAL data if video does not exist

View File

@@ -20,6 +20,7 @@ from lxml import etree
from opaque_keys.edx.keys import UsageKeyV2
from pysrt import SubRipFile, SubRipItem, SubRipTime
from pysrt.srtexc import Error
from opaque_keys.edx.locator import LibraryLocatorV2
from openedx.core.djangoapps.xblock.api import get_component_from_usage_key
from xmodule.contentstore.content import StaticContent
@@ -498,16 +499,17 @@ def manage_video_subtitles_save(item, user, old_metadata=None, generate_translat
remove_subs_from_store(video_id, item, lang)
reraised_message = ''
for lang in new_langs: # 3b
try:
generate_sjson_for_all_speeds(
item,
item.transcripts[lang],
{speed: subs_id for subs_id, speed in youtube_speed_dict(item).items()},
lang,
)
except TranscriptException:
pass
if not isinstance(item.usage_key.context_key, LibraryLocatorV2):
for lang in new_langs: # 3b
try:
generate_sjson_for_all_speeds(
item,
item.transcripts[lang],
{speed: subs_id for subs_id, speed in youtube_speed_dict(item).items()},
lang,
)
except TranscriptException:
pass
if reraised_message:
item.save_with_metadata(user)
raise TranscriptException(reraised_message)
@@ -684,6 +686,18 @@ def convert_video_transcript(file_name, content, output_format):
return dict(filename=filename, content=converted_transcript)
def clear_transcripts(block):
"""
Deletes all transcripts of a video block from VAL
"""
for language_code in block.transcripts.keys():
edxval_api.delete_video_transcript(
video_id=block.edx_video_id,
language_code=language_code,
)
block.transcripts = {}
class Transcript:
"""
Container for transcript methods.
@@ -1040,6 +1054,13 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts
return transcript_content, transcript_name, Transcript.mime_types[output_format]
def build_components_import_path(usage_key, file_path):
"""
Build components import path
"""
return f"components/{usage_key.block_type}/{usage_key.block_id}/{file_path}"
def get_transcript_from_learning_core(video_block, language, output_format, transcripts_info):
"""
Get video transcript from Learning Core (used for Content Libraries)

View File

@@ -855,11 +855,15 @@ class _BuiltInVideoBlock(
if new_transcripts.get('en'):
xml.set('sub', '')
# Update `transcripts` attribute in the xml
xml.set('transcripts', json.dumps(transcripts, sort_keys=True))
except edxval_api.ValVideoNotFoundError:
pass
else:
if transcripts.get('en'):
xml.set('sub', '')
if transcripts:
# Update `transcripts` attribute in the xml
xml.set('transcripts', json.dumps(transcripts, sort_keys=True))
# Sorting transcripts for easy testing of resulting xml
for transcript_language in sorted(transcripts.keys()):

View File

@@ -13,13 +13,14 @@ import math
from django.core.files.base import ContentFile
from django.utils.timezone import now
from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript
from opaque_keys.edx.locator import CourseLocator
from opaque_keys.edx.locator import CourseLocator, LibraryLocatorV2
from webob import Response
from xblock.core import XBlock
from xblock.exceptions import JsonHandlerError
from xmodule.exceptions import NotFoundError
from xmodule.fields import RelativeTime
from openedx.core.djangoapps.content_libraries import api as lib_api
from .transcripts_utils import (
Transcript,
@@ -467,7 +468,6 @@ class VideoStudioViewHandlers:
return error
# pylint: disable=too-many-statements
@XBlock.handler
def studio_transcript(self, request, dispatch):
"""
@@ -495,112 +495,14 @@ class VideoStudioViewHandlers:
no SRT extension or not parse-able by PySRT
UnicodeDecodeError: non-UTF8 uploaded file content encoding.
"""
_ = self.runtime.service(self, "i18n").ugettext
if dispatch.startswith('translation'):
if request.method == 'POST':
error = self.validate_transcript_upload_data(data=request.POST)
if error:
response = Response(json={'error': error}, status=400)
else:
edx_video_id = clean_video_id(request.POST['edx_video_id'])
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.POST['file'].file
if not edx_video_id:
# Back-populate the video ID for an external video.
# pylint: disable=attribute-defined-outside-init
self.edx_video_id = edx_video_id = create_external_video(display_name='external video')
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
sjson_subs = Transcript.convert(
content=transcript_file.read().decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
payload = {
'edx_video_id': edx_video_id,
'language_code': new_language_code
}
# If a new transcript is added, then both new_language_code and
# language_code fields will have the same value.
if language_code != new_language_code:
self.transcripts.pop(language_code, None)
self.transcripts[new_language_code] = f'{edx_video_id}-{new_language_code}.srt'
response = Response(json.dumps(payload), status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
response = Response(
json={
'error': _(
'There is a problem with this transcript file. Try to upload a different file.'
)
},
status=400
)
response = self._studio_transcript_upload(request)
elif request.method == 'DELETE':
request_data = request.json
if 'lang' not in request_data or 'edx_video_id' not in request_data:
return Response(status=400)
language = request_data['lang']
edx_video_id = clean_video_id(request_data['edx_video_id'])
if edx_video_id:
delete_video_transcript(video_id=edx_video_id, language_code=language)
if language == 'en':
# remove any transcript file from content store for the video ids
possible_sub_ids = [
self.sub, # pylint: disable=access-member-before-definition
self.youtube_id_1_0
] + get_html5_ids(self.html5_sources)
for sub_id in possible_sub_ids:
remove_subs_from_store(sub_id, self, language)
# update metadata as `en` can also be present in `transcripts` field
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
# also empty `sub` field
self.sub = '' # pylint: disable=attribute-defined-outside-init
else:
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
return Response(status=200)
response = self._studio_transcript_delete(request)
elif request.method == 'GET':
language = request.GET.get('language_code')
if not language:
return Response(json={'error': _('Language is required.')}, status=400)
try:
transcript_content, transcript_name, mime_type = get_transcript(
video=self, lang=language, output_format=Transcript.SRT
)
response = Response(transcript_content, headerlist=[
(
'Content-Disposition',
f'attachment; filename="{transcript_name}"'
),
('Content-Language', language),
('Content-Type', mime_type)
])
except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError):
response = Response(status=404)
response = self._studio_transcript_get(request)
else:
# Any other HTTP method is not allowed.
response = Response(status=404)
@@ -610,3 +512,165 @@ class VideoStudioViewHandlers:
response = Response(status=404)
return response
def _save_transcript_field(self):
"""
Save `transcripts` block field.
"""
field = self.fields['transcripts']
if self.transcripts:
transcripts_copy = self.transcripts.copy()
# Need to delete to overwrite, it's weird behavior,
# but it only works like this.
field.delete_from(self)
field.write_to(self, transcripts_copy)
else:
field.delete_from(self)
def _studio_transcript_upload(self, request):
"""
Upload transcript. Used in "POST" method in `studio_transcript`
"""
_ = self.runtime.service(self, "i18n").ugettext
error = self.validate_transcript_upload_data(data=request.POST)
if error:
response = Response(json={'error': error}, status=400)
else:
edx_video_id = clean_video_id(request.POST['edx_video_id'])
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.POST['file'].file
is_library = isinstance(self.usage_key.context_key, LibraryLocatorV2)
if is_library:
filename = f'transcript-{new_language_code}.srt'
else:
if not edx_video_id:
# Back-populate the video ID for an external video.
# pylint: disable=attribute-defined-outside-init
self.edx_video_id = edx_video_id = create_external_video(display_name='external video')
filename = f'{edx_video_id}-{new_language_code}.srt'
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
content = transcript_file.read()
payload = {
'edx_video_id': edx_video_id,
'language_code': new_language_code
}
if is_library:
# Save transcript as static asset in Learning Core if is a library component
filename = f"static/{filename}"
lib_api.add_library_block_static_asset_file(
self.usage_key,
filename,
content,
)
else:
sjson_subs = Transcript.convert(
content=content.decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
# If a new transcript is added, then both new_language_code and
# language_code fields will have the same value.
if language_code != new_language_code:
self.transcripts.pop(language_code, None)
self.transcripts[new_language_code] = filename
if is_library:
self._save_transcript_field()
response = Response(json.dumps(payload), status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
response = Response(
json={
'error': _(
'There is a problem with this transcript file. Try to upload a different file.'
)
},
status=400
)
return response
def _studio_transcript_delete(self, request):
"""
Delete transcript. Used in "DELETE" method in `studio_transcript`
"""
request_data = request.json
if 'lang' not in request_data or 'edx_video_id' not in request_data:
return Response(status=400)
language = request_data['lang']
edx_video_id = clean_video_id(request_data['edx_video_id'])
if edx_video_id:
delete_video_transcript(video_id=edx_video_id, language_code=language)
if isinstance(self.usage_key.context_key, LibraryLocatorV2):
transcript_name = self.transcripts.pop(language, None)
if transcript_name:
# TODO: In the future, we need a proper XBlock API
# like `self.static_assets.delete(...)` instead of coding
# these runtime-specific/library-specific APIs.
lib_api.delete_library_block_static_asset_file(
self.usage_key,
f"static/{transcript_name}",
)
self._save_transcript_field()
else:
if language == 'en':
# remove any transcript file from content store for the video ids
possible_sub_ids = [
self.sub, # pylint: disable=access-member-before-definition
self.youtube_id_1_0
] + get_html5_ids(self.html5_sources)
for sub_id in possible_sub_ids:
remove_subs_from_store(sub_id, self, language)
# update metadata as `en` can also be present in `transcripts` field
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
# also empty `sub` field
self.sub = '' # pylint: disable=attribute-defined-outside-init
else:
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
return Response(status=200)
def _studio_transcript_get(self, request):
"""
Get transcript. Used in "GET" method in `studio_transcript`
"""
_ = self.runtime.service(self, "i18n").ugettext
language = request.GET.get('language_code')
if not language:
return Response(json={'error': _('Language is required.')}, status=400)
try:
transcript_content, transcript_name, mime_type = get_transcript(
video=self, lang=language, output_format=Transcript.SRT
)
response = Response(transcript_content, headerlist=[
(
'Content-Disposition',
f'attachment; filename="{transcript_name}"'
),
('Content-Language', language),
('Content-Type', mime_type)
])
except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError):
response = Response(status=404)
return response