fix: Prevent Transcripts from Failing Export, Edit (#28242)

Instead of having json errors in transcript acquisition and conversion cause errors, have transcription conversion and acquisition simply return an error message in the transcription which can prompt a change from the user.

Although not uploading a transcript is handled, transcripts can often cause errors in edit, export, and other activities due to json errors. These errors block the entire use of these features, so to allow for reupload, etc, we add an error message instead of transcript and log the event.

In response to [TNL-8539](https://openedx.atlassian.net/secure/RapidBoard.jspa?rapidView=580&projectKey=TNL&modal=detail&selectedIssue=TNL-8539)

Testing: Unit tests coverage is included in the PR. Upload, import, and export of courses with transcriptions is also easily hand-testable. Just create a video in studio, add an irrelevant transcript. Then try to import, export, and edit the problem. Expected behavior is success.
This commit is contained in:
connorhaugh
2021-07-21 15:52:54 -04:00
committed by GitHub
parent cf1c3c292a
commit 0d26738fce
3 changed files with 41 additions and 9 deletions

View File

@@ -640,6 +640,12 @@ class TestTranscript(unittest.TestCase):
with self.assertRaises(transcripts_utils.TranscriptsGenerationException):
transcripts_utils.Transcript.convert(invalid_srt_transcript, 'srt', 'sjson')
def test_convert_invalid_invalid_sjson_to_srt(self):
invalid_content = "Text with special character /\"\'\b\f\t\r\n."
error_transcript = {"start": [1], "end": [2], "text": ["An error occured obtaining the transcript."]}
assert transcripts_utils.Transcript.convert(invalid_content, 'sjson', 'txt') == error_transcript['text'][0]
assert error_transcript["text"][0] in transcripts_utils.Transcript.convert(invalid_content, 'sjson', 'srt')
def test_dummy_non_existent_transcript(self):
"""
Test `Transcript.asset` raises `NotFoundError` for dummy non-existent transcript.
@@ -995,6 +1001,16 @@ class TestGetTranscript(SharedModuleStoreTestCase):
exception_message = str(no_en_transcript_exception.exception)
self.assertEqual(exception_message, 'No transcript for `en` language')
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
def test_get_transcript_incorrect_json_(self, mock_get_video_transcript_data):
"""
Verify that `get transcript` function returns a working json file if the original throws an error
"""
error_transcript = {"start": [1], "end": [2], "text": ["An error occured obtaining the transcript."]}
mock_get_video_transcript_data.side_effect = ValueError
content, _, _ = transcripts_utils.get_transcript(self.video, 'zh')
assert error_transcript["text"][0] in content
@ddt.data(
transcripts_utils.TranscriptsGenerationException,
UnicodeDecodeError('aliencodec', b'\x02\x01', 1, 2, 'alien codec found!')

View File

@@ -577,8 +577,17 @@ def get_video_transcript_content(edx_video_id, language_code):
transcript = None
edx_video_id = clean_video_id(edx_video_id)
if edxval_api and edx_video_id:
transcript = edxval_api.get_video_transcript_data(edx_video_id, language_code)
try:
transcript = edxval_api.get_video_transcript_data(edx_video_id, language_code)
except ValueError:
log.exception(
f"Error getting transcript from edx-val id: {edx_video_id}: language code {language_code}"
)
content = '{"start": [1],"end": [2],"text": ["An error occured obtaining the transcript."]}'
transcript = dict(
file_name='error-{edx_video_id}-{language_code}.srt',
content=Transcript.convert(content, 'sjson', 'srt')
)
return transcript
@@ -680,14 +689,20 @@ class Transcript:
content_str = content.decode('latin-1')
else:
content_str = content
try:
content_dict = json.loads(content_str)
except ValueError:
truncated = content_str[:100].strip()
log.exception(
f"Failed to convert {input_format} to {output_format} for {repr(truncated)}..."
)
content_dict = {"start": [1], "end": [2], "text": ["An error occured obtaining the transcript."]}
if output_format == 'txt':
text = json.loads(content_str)['text']
text = content_dict['text']
text_without_none = [line if line else '' for line in text]
return html.unescape("\n".join(text_without_none))
elif output_format == 'srt':
return generate_srt_from_sjson(json.loads(content_str), speed=1.0)
return generate_srt_from_sjson(content_dict, speed=1.0)
@staticmethod
def asset(location, subs_id, lang='en', filename=None):

View File

@@ -1296,15 +1296,16 @@ class TestGetTranscript(TestVideo): # lint-amnesty, pylint: disable=test-inheri
assert filename == 'zh_塞.srt'
assert mime_type == 'application/x-subrip; charset=utf-8'
def test_value_error(self):
def test_value_error_handled(self):
good_sjson = _create_file(content='bad content')
_upload_sjson_file(good_sjson, self.item.location)
self.item.sub = _get_subs_id(good_sjson.name)
transcripts = self.item.get_transcripts_info() # lint-amnesty, pylint: disable=unused-variable
with pytest.raises(ValueError):
get_transcript(self.item)
error_transcript = {"start": [], "end": [], "text": ["An error occured obtaining the transcript."]}
content, _, _ = get_transcript(self.item)
assert error_transcript["text"][0] in content
def test_key_error(self):
good_sjson = _create_file(content="""