diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py index d4d8277632..216b31651b 100644 --- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py +++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py @@ -640,6 +640,12 @@ class TestTranscript(unittest.TestCase): with self.assertRaises(transcripts_utils.TranscriptsGenerationException): transcripts_utils.Transcript.convert(invalid_srt_transcript, 'srt', 'sjson') + def test_convert_invalid_invalid_sjson_to_srt(self): + invalid_content = "Text with special character /\"\'\b\f\t\r\n." + error_transcript = {"start": [1], "end": [2], "text": ["An error occured obtaining the transcript."]} + assert transcripts_utils.Transcript.convert(invalid_content, 'sjson', 'txt') == error_transcript['text'][0] + assert error_transcript["text"][0] in transcripts_utils.Transcript.convert(invalid_content, 'sjson', 'srt') + def test_dummy_non_existent_transcript(self): """ Test `Transcript.asset` raises `NotFoundError` for dummy non-existent transcript. @@ -995,6 +1001,16 @@ class TestGetTranscript(SharedModuleStoreTestCase): exception_message = str(no_en_transcript_exception.exception) self.assertEqual(exception_message, 'No transcript for `en` language') + @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data') + def test_get_transcript_incorrect_json_(self, mock_get_video_transcript_data): + """ + Verify that `get transcript` function returns a working json file if the original throws an error + """ + error_transcript = {"start": [1], "end": [2], "text": ["An error occured obtaining the transcript."]} + mock_get_video_transcript_data.side_effect = ValueError + content, _, _ = transcripts_utils.get_transcript(self.video, 'zh') + assert error_transcript["text"][0] in content + @ddt.data( transcripts_utils.TranscriptsGenerationException, UnicodeDecodeError('aliencodec', b'\x02\x01', 1, 2, 'alien codec found!') diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index 7abe134de1..c33b70a2ad 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -577,8 +577,17 @@ def get_video_transcript_content(edx_video_id, language_code): transcript = None edx_video_id = clean_video_id(edx_video_id) if edxval_api and edx_video_id: - transcript = edxval_api.get_video_transcript_data(edx_video_id, language_code) - + try: + transcript = edxval_api.get_video_transcript_data(edx_video_id, language_code) + except ValueError: + log.exception( + f"Error getting transcript from edx-val id: {edx_video_id}: language code {language_code}" + ) + content = '{"start": [1],"end": [2],"text": ["An error occured obtaining the transcript."]}' + transcript = dict( + file_name='error-{edx_video_id}-{language_code}.srt', + content=Transcript.convert(content, 'sjson', 'srt') + ) return transcript @@ -680,14 +689,20 @@ class Transcript: content_str = content.decode('latin-1') else: content_str = content - + try: + content_dict = json.loads(content_str) + except ValueError: + truncated = content_str[:100].strip() + log.exception( + f"Failed to convert {input_format} to {output_format} for {repr(truncated)}..." + ) + content_dict = {"start": [1], "end": [2], "text": ["An error occured obtaining the transcript."]} if output_format == 'txt': - text = json.loads(content_str)['text'] + text = content_dict['text'] text_without_none = [line if line else '' for line in text] return html.unescape("\n".join(text_without_none)) - elif output_format == 'srt': - return generate_srt_from_sjson(json.loads(content_str), speed=1.0) + return generate_srt_from_sjson(content_dict, speed=1.0) @staticmethod def asset(location, subs_id, lang='en', filename=None): diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py index 0d266f5e8f..06db753d12 100644 --- a/lms/djangoapps/courseware/tests/test_video_handlers.py +++ b/lms/djangoapps/courseware/tests/test_video_handlers.py @@ -1296,15 +1296,16 @@ class TestGetTranscript(TestVideo): # lint-amnesty, pylint: disable=test-inheri assert filename == 'zh_塞.srt' assert mime_type == 'application/x-subrip; charset=utf-8' - def test_value_error(self): + def test_value_error_handled(self): good_sjson = _create_file(content='bad content') _upload_sjson_file(good_sjson, self.item.location) self.item.sub = _get_subs_id(good_sjson.name) transcripts = self.item.get_transcripts_info() # lint-amnesty, pylint: disable=unused-variable - with pytest.raises(ValueError): - get_transcript(self.item) + error_transcript = {"start": [], "end": [], "text": ["An error occured obtaining the transcript."]} + content, _, _ = get_transcript(self.item) + assert error_transcript["text"][0] in content def test_key_error(self): good_sjson = _create_file(content="""