diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index fca01d7352..150379a2f8 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -784,49 +784,6 @@ class VideoTranscriptsMixin(object): # to clean redundant language codes. return list(set(translations)) - def get_transcript(self, transcripts, transcript_format='srt', lang=None): - """ - Returns transcript, filename and MIME type. - - transcripts (dict): A dict with all transcripts and a sub. - - Raises: - - NotFoundError if cannot find transcript file in storage. - - ValueError if transcript file is empty or incorrect JSON. - - KeyError if transcript file has incorrect format. - - If language is 'en', self.sub should be correct subtitles name. - If language is 'en', but if self.sub is not defined, this means that we - should search for video name in order to get proper transcript (old style courses). - If language is not 'en', give back transcript in proper language and format. - """ - if not lang: - lang = self.get_default_transcript_language(transcripts) - - sub, other_lang = transcripts["sub"], transcripts["transcripts"] - if lang == 'en': - if sub: # HTML5 case and (Youtube case for new style videos) - transcript_name = sub - elif self.youtube_id_1_0: # old courses - transcript_name = self.youtube_id_1_0 - else: - log.debug("No subtitles for 'en' language") - raise ValueError - - data = Transcript.asset(self.location, transcript_name, lang).data.decode('utf-8') - filename = u'{}.{}'.format(transcript_name, transcript_format) - content = Transcript.convert(data, 'sjson', transcript_format) - else: - data = Transcript.asset(self.location, None, None, other_lang[lang]).data.decode('utf-8') - filename = u'{}.{}'.format(os.path.splitext(other_lang[lang])[0], transcript_format) - content = Transcript.convert(data, 'srt', transcript_format) - - if not content: - log.debug('no subtitles produced in get_transcript') - raise ValueError - - return content, filename, Transcript.mime_types[transcript_format] - def get_default_transcript_language(self, transcripts): """ Returns the default transcript language for this video module. @@ -910,7 +867,10 @@ def get_transcript_from_val(edx_video_id, lang=None, output_format=Transcript.SR def get_transcript_for_video(video_location, subs_id, file_name, language): """ - Get video transcript from content store. + Get video transcript from content store. This is a lower level function and is used by + `get_transcript_from_contentstore`. Prefer that function instead where possible. If you + need to support getting transcripts from VAL or Blockstore as well, use the `get_transcript` + function instead. NOTE: Transcripts can be searched from content store by two ways: 1. by an id(a.k.a subs_id) which will be used to construct transcript filename diff --git a/common/lib/xmodule/xmodule/video_module/video_module.py b/common/lib/xmodule/xmodule/video_module/video_module.py index 27b61934f7..48c3dbbdd9 100644 --- a/common/lib/xmodule/xmodule/video_module/video_module.py +++ b/common/lib/xmodule/xmodule/video_module/video_module.py @@ -56,7 +56,7 @@ from .transcripts_utils import ( VideoTranscriptsMixin, clean_video_id, get_html5_ids, - get_transcript_for_video, + get_transcript, subs_filename ) from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers @@ -593,12 +593,7 @@ class VideoBlock( possible_sub_ids = [self.sub, self.youtube_id_1_0] + get_html5_ids(self.html5_sources) for sub_id in possible_sub_ids: try: - get_transcript_for_video( - self.location, - subs_id=sub_id, - file_name=sub_id, - language=u'en' - ) + _, sub_id, _ = get_transcript(self, lang=u'en', output_format=Transcript.TXT) transcripts_info['transcripts'] = dict(transcripts_info['transcripts'], en=sub_id) break except NotFoundError: @@ -1029,10 +1024,7 @@ class VideoBlock( def _update_transcript_for_index(language=None): """ Find video transcript - if not found, don't update index """ try: - transcripts = self.get_transcripts_info() - transcript = self.get_transcript( - transcripts, transcript_format='txt', lang=language - )[0].replace("\n", " ") + transcript = get_transcript(self, lang=language, output_format=Transcript.TXT)[0].replace("\n", " ") transcript_index_name = "transcript_{}".format(language if language else self.transcript_language) video_body.update({transcript_index_name: transcript}) except NotFoundError: diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py index 736006cc34..1df5546745 100644 --- a/lms/djangoapps/courseware/tests/test_video_handlers.py +++ b/lms/djangoapps/courseware/tests/test_video_handlers.py @@ -25,7 +25,12 @@ from xmodule.exceptions import NotFoundError from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.django import modulestore from xmodule.video_module import VideoBlock -from xmodule.video_module.transcripts_utils import Transcript, edxval_api, subs_filename +from xmodule.video_module.transcripts_utils import ( + Transcript, + edxval_api, + get_transcript, + subs_filename, +) from xmodule.x_module import STUDENT_VIEW from .helpers import BaseTestXmodule @@ -521,9 +526,8 @@ class TestTranscriptDownloadDispatch(TestVideo): request = Request.blank('/download') response = self.item.transcript(request=request, dispatch='download') self.assertEqual(response.status, '404 Not Found') - transcripts = self.item.get_transcripts_info() with self.assertRaises(NotFoundError): - self.item.get_transcript(transcripts) + get_transcript(self.item) @patch( 'xmodule.video_module.transcripts_utils.get_transcript_for_video', @@ -537,7 +541,7 @@ class TestTranscriptDownloadDispatch(TestVideo): self.assertEqual(response.headers['Content-Disposition'], 'attachment; filename="en_塞.srt"') @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data') - @patch('xmodule.video_module.VideoBlock.get_transcript', Mock(side_effect=NotFoundError)) + @patch('xmodule.video_module.get_transcript', Mock(side_effect=NotFoundError)) def test_download_fallback_transcript(self, mock_get_video_transcript_data): """ Verify val transcript is returned as a fallback if it is not found in the content store. @@ -1196,8 +1200,7 @@ class TestGetTranscript(TestVideo): _upload_sjson_file(good_sjson, self.item.location) self.item.sub = _get_subs_id(good_sjson.name) - transcripts = self.item.get_transcripts_info() - text, filename, mime_type = self.item.get_transcript(transcripts) + text, filename, mime_type = get_transcript(self.item) expected_text = textwrap.dedent("""\ 0 @@ -1211,7 +1214,7 @@ class TestGetTranscript(TestVideo): """) self.assertEqual(text, expected_text) - self.assertEqual(filename[:-4], self.item.sub) + self.assertEqual(filename[:-4], 'en_' + self.item.sub) self.assertEqual(mime_type, 'application/x-subrip; charset=utf-8') def test_good_txt_transcript(self): @@ -1234,27 +1237,27 @@ class TestGetTranscript(TestVideo): _upload_sjson_file(good_sjson, self.item.location) self.item.sub = _get_subs_id(good_sjson.name) - transcripts = self.item.get_transcripts_info() - text, filename, mime_type = self.item.get_transcript(transcripts, transcript_format="txt") + text, filename, mime_type = get_transcript(self.item, output_format=Transcript.TXT) expected_text = textwrap.dedent("""\ Hi, welcome to Edx. Let's start with what is on your screen right now.""") self.assertEqual(text, expected_text) - self.assertEqual(filename, self.item.sub + '.txt') + self.assertEqual(filename, 'en_' + self.item.sub + '.txt') self.assertEqual(mime_type, 'text/plain; charset=utf-8') def test_en_with_empty_sub(self): - transcripts = {"transcripts": {}, "sub": ""} + self.item.sub = "" + self.item.transcripts = None # no self.sub, self.youttube_1_0 exist, but no file in assets with self.assertRaises(NotFoundError): - self.item.get_transcript(transcripts) + get_transcript(self.item) # no self.sub and no self.youtube_1_0, no non-en transcritps self.item.youtube_id_1_0 = None - with self.assertRaises(ValueError): - self.item.get_transcript(transcripts) + with self.assertRaises(NotFoundError): + get_transcript(self.item) # no self.sub but youtube_1_0 exists with file in assets good_sjson = _create_file(content=textwrap.dedent("""\ @@ -1276,7 +1279,7 @@ class TestGetTranscript(TestVideo): _upload_sjson_file(good_sjson, self.item.location) self.item.youtube_id_1_0 = _get_subs_id(good_sjson.name) - text, filename, mime_type = self.item.get_transcript(transcripts) + text, filename, mime_type = get_transcript(self.item) expected_text = textwrap.dedent("""\ 0 00:00:00,270 --> 00:00:02,720 @@ -1289,7 +1292,7 @@ class TestGetTranscript(TestVideo): """) self.assertEqual(text, expected_text) - self.assertEqual(filename, self.item.youtube_id_1_0 + '.srt') + self.assertEqual(filename, 'en_' + self.item.youtube_id_1_0 + '.srt') self.assertEqual(mime_type, 'application/x-subrip; charset=utf-8') def test_non_en_with_non_ascii_filename(self): @@ -1298,14 +1301,14 @@ class TestGetTranscript(TestVideo): _upload_file(self.srt_file, self.item_descriptor.location, u"塞.srt") transcripts = self.item.get_transcripts_info() - text, filename, mime_type = self.item.get_transcript(transcripts) + text, filename, mime_type = get_transcript(self.item) expected_text = textwrap.dedent(u""" 0 00:00:00,12 --> 00:00:00,100 Привіт, edX вітає вас. """) self.assertEqual(text, expected_text) - self.assertEqual(filename, u"塞.srt") + self.assertEqual(filename, u"zh_塞.srt") self.assertEqual(mime_type, 'application/x-subrip; charset=utf-8') def test_value_error(self): @@ -1316,7 +1319,7 @@ class TestGetTranscript(TestVideo): transcripts = self.item.get_transcripts_info() with self.assertRaises(ValueError): - self.item.get_transcript(transcripts) + get_transcript(self.item) def test_key_error(self): good_sjson = _create_file(content=""" @@ -1337,4 +1340,4 @@ class TestGetTranscript(TestVideo): transcripts = self.item.get_transcripts_info() with self.assertRaises(KeyError): - self.item.get_transcript(transcripts) + get_transcript(self.item)