diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py
index 5009feb497..710b9296b8 100644
--- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py
+++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py
@@ -20,10 +20,12 @@ from six import text_type
from six.moves import range, zip
from six.moves.html_parser import HTMLParser # pylint: disable=import-error
-from opaque_keys.edx.locator import CourseLocator, LibraryLocator
+from opaque_keys.edx.locator import BundleDefinitionLocator
from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.django import contentstore
from xmodule.exceptions import NotFoundError
+from openedx.core.djangolib import blockstore_cache
+from openedx.core.lib import blockstore_api
from .bumper_utils import get_bumper_settings
@@ -994,6 +996,77 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts
return transcript_content, transcript_name, Transcript.mime_types[output_format]
+def get_transcript_from_blockstore(video_block, language, output_format, transcripts_info):
+ """
+ Get video transcript from Blockstore.
+
+ Blockstore expects video transcripts to be placed into the 'static/'
+ subfolder of the XBlock's folder in a Blockstore bundle. For example, if the
+ video XBlock's definition is in the standard location of
+ video/video1/definition.xml
+ Then the .srt files should be placed at e.g.
+ video/video1/static/video1-en.srt
+ This is the same place where other public static files are placed for other
+ XBlocks, such as image files used by HTML blocks.
+
+ Video XBlocks in Blockstore must set the 'transcripts' XBlock field to a
+ JSON dictionary listing the filename of the transcript for each language:
+
+
+ This method is tested in openedx/core/djangoapps/content_libraries/tests/test_static_assets.py
+
+ Arguments:
+ video_block (Video XBlock): The video XBlock
+ language (str): transcript language
+ output_format (str): transcript output format
+ transcripts_info (dict): transcript info for a video, from video_block.get_transcripts_info()
+
+ Returns:
+ tuple containing content, filename, mimetype
+ """
+ if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
+ raise NotFoundError('Invalid transcript format `{output_format}`'.format(output_format=output_format))
+ transcripts = transcripts_info['transcripts']
+ if language not in transcripts:
+ raise NotFoundError("Video {} does not have a transcript file defined for the '{}' language in its OLX.".format(
+ video_block.scope_ids.usage_id,
+ language,
+ ))
+ filename = transcripts[language]
+ if not filename.endswith('.srt'):
+ # We want to standardize on .srt
+ raise NotFoundError("Video XBlocks in Blockstore only support .srt transcript files.")
+ # Try to load the transcript file out of Blockstore
+ # In lieu of an XBlock API for this (like block.runtime.resources_fs), we use the blockstore API directly.
+ bundle_uuid = video_block.scope_ids.def_id.bundle_uuid
+ path = video_block.scope_ids.def_id.olx_path.rpartition('/')[0] + '/static/' + filename
+ bundle_version = video_block.scope_ids.def_id.bundle_version # Either bundle_version or draft_name will be set.
+ draft_name = video_block.scope_ids.def_id.draft_name
+ try:
+ content_binary = blockstore_cache.get_bundle_file_data_with_cache(bundle_uuid, path, bundle_version, draft_name)
+ except blockstore_api.BundleFileNotFound:
+ raise NotFoundError("Transcript file '{}' missing for video XBlock {}".format(
+ path,
+ video_block.scope_ids.usage_id,
+ ))
+ # Now convert the transcript data to the requested format:
+ filename_no_extension = os.path.splitext(filename)[0]
+ output_filename = '{}.{}'.format(filename_no_extension, output_format)
+ output_transcript = Transcript.convert(
+ content_binary.decode('utf-8'),
+ input_format=Transcript.SRT,
+ output_format=output_format,
+ )
+ if not output_transcript.strip():
+ raise NotFoundError('No transcript content')
+ return output_transcript, output_filename, Transcript.mime_types[output_format]
+
+
def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=None):
"""
Get video transcript from edx-val or content store.
@@ -1011,18 +1084,18 @@ def get_transcript(video, lang=None, output_format=Transcript.SRT, youtube_id=No
if not lang:
lang = video.get_default_transcript_language(transcripts_info)
+ if isinstance(video.scope_ids.def_id, BundleDefinitionLocator):
+ # This block is in Blockstore.
+ # For Blockstore, VAL is considered deprecated and we can load the transcript file
+ # directly using the Blockstore API:
+ return get_transcript_from_blockstore(video, lang, output_format, transcripts_info)
+
try:
edx_video_id = clean_video_id(video.edx_video_id)
if not edx_video_id:
raise NotFoundError
return get_transcript_from_val(edx_video_id, lang, output_format)
except NotFoundError:
- # If this is not in a modulestore course or library, don't try loading from contentstore:
- if not isinstance(video.scope_ids.usage_id.course_key, (CourseLocator, LibraryLocator)):
- raise NotFoundError(
- u'Video transcripts cannot yet be loaded from Blockstore (block: {})'.format(video.scope_ids.usage_id),
- )
-
return get_transcript_from_contentstore(
video,
lang,
diff --git a/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py b/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py
index 66b435d6f3..71f66c5a26 100644
--- a/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py
+++ b/openedx/core/djangoapps/content_libraries/tests/test_static_assets.py
@@ -13,6 +13,16 @@ SVG_DATA = """""".encode('utf-8')
+# part of an .srt transcript file
+TRANSCRIPT_DATA = """1
+00:00:00,260 --> 00:00:01,510
+Welcome to edX.
+
+2
+00:00:01,510 --> 00:00:04,480
+I'm Anant Agarwal, I'm the president of edX,
+""".encode('utf-8')
+
class ContentLibrariesStaticAssetsTest(ContentLibrariesRestApiTest):
"""
@@ -111,3 +121,49 @@ class ContentLibrariesStaticAssetsTest(ContentLibrariesRestApiTest):
# 'a////////b' is not allowed
file_name = "a////////b"
self._set_library_block_asset(block_id, file_name, SVG_DATA, expect_response=400)
+
+ def test_video_transcripts(self):
+ """
+ Test that video blocks can read transcript files out of blockstore.
+ """
+ library = self._create_library(slug="transcript-test-lib", title="Transcripts Test Library")
+ block = self._add_block_to_library(library["id"], "video", "video1")
+ block_id = block["id"]
+ self._set_library_block_olx(block_id, """
+
+ """)
+ # Upload the transcript file
+ self._set_library_block_asset(block_id, "3_yD_cEKoCk-en.srt", TRANSCRIPT_DATA)
+
+ transcript_handler_url = self._get_block_handler_url(block_id, "transcript")
+
+ def check_sjson():
+ """
+ Call the handler endpoint which the video player uses to load the transcript as SJSON
+ """
+ url = transcript_handler_url + 'translation/en'
+ response = self.client.get(url)
+ self.assertEqual(response.status_code, 200)
+ self.assertIn("Welcome to edX", response.content.decode('utf-8'))
+
+ def check_download():
+ """
+ Call the handler endpoint which the video player uses to download the transcript SRT file
+ """
+ url = transcript_handler_url + 'download'
+ response = self.client.get(url)
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.content, TRANSCRIPT_DATA)
+
+ check_sjson()
+ check_download()
+ # Publish the OLX and the transcript file, since published data gets
+ # served differently by Blockstore and we should test that too.
+ self._commit_library_changes(library["id"])
+ check_sjson()
+ check_download()