Merge pull request #17976 from edx/mushtaq/html5_sources_transcript
Update the get_transcript util with html5_sources
This commit is contained in:
@@ -261,10 +261,7 @@ class TestMigrateTranscripts(ModuleStoreTestCase):
|
||||
u'[Transcript migration] process for ge transcript started'),
|
||||
(LOGGER_NAME,
|
||||
'ERROR',
|
||||
'[Transcript migration] Exception: u"SON(['
|
||||
'(\'category\', \'asset\'), (\'name\', u\'not_found.srt\'),'
|
||||
' (\'course\', u\'{}\'), (\'tag\', \'c4x\'), (\'org\', u\'{}\'),'
|
||||
' (\'revision\', None)])"'.format(self.course_2.id.course, self.course_2.id.org)),
|
||||
"[Transcript migration] Exception: u'No transcript for `ge` language'"),
|
||||
(LOGGER_NAME,
|
||||
'INFO',
|
||||
u'[Transcript migration] process for course {} ended. Processed 1 transcripts'.format(
|
||||
@@ -272,11 +269,8 @@ class TestMigrateTranscripts(ModuleStoreTestCase):
|
||||
)),
|
||||
(LOGGER_NAME,
|
||||
'INFO',
|
||||
"[Transcript migration] Result: Failed: language ge of video test_edx_video_id_2 with exception SON(["
|
||||
"('category', 'asset'), ('name', u'not_found.srt'), ('course', u'{}'),"
|
||||
" ('tag', 'c4x'), ('org', u'{}'), ('revision', None)])".format(
|
||||
self.course_2.id.course, self.course_2.id.org)
|
||||
)
|
||||
"[Transcript migration] Result: Failed: language ge of video test_edx_video_id_2 with exception "
|
||||
"No transcript for `ge` language")
|
||||
)
|
||||
|
||||
with LogCapture(LOGGER_NAME, level=logging.INFO) as logger:
|
||||
|
||||
@@ -744,7 +744,7 @@ class TestGetTranscript(SharedModuleStoreTestCase):
|
||||
edx_video_id=u'1234-5678-90'
|
||||
)
|
||||
|
||||
def create_transcript(self, subs_id, language=u'en', filename='video.srt'):
|
||||
def create_transcript(self, subs_id, language=u'en', filename='video.srt', youtube_id_1_0='', html5_sources=None):
|
||||
"""
|
||||
create transcript.
|
||||
"""
|
||||
@@ -752,21 +752,26 @@ class TestGetTranscript(SharedModuleStoreTestCase):
|
||||
if language != u'en':
|
||||
transcripts = {language: filename}
|
||||
|
||||
html5_sources = html5_sources or []
|
||||
self.video = ItemFactory.create(
|
||||
category='video',
|
||||
parent_location=self.vertical.location,
|
||||
sub=subs_id,
|
||||
youtube_id_1_0=youtube_id_1_0,
|
||||
transcripts=transcripts,
|
||||
edx_video_id=u'1234-5678-90'
|
||||
edx_video_id=u'1234-5678-90',
|
||||
html5_sources=html5_sources
|
||||
)
|
||||
|
||||
if subs_id:
|
||||
transcripts_utils.save_subs_to_store(
|
||||
self.subs_sjson,
|
||||
subs_id,
|
||||
self.video,
|
||||
language=language,
|
||||
)
|
||||
possible_subs = [subs_id, youtube_id_1_0] + transcripts_utils.get_html5_ids(html5_sources)
|
||||
for possible_sub in possible_subs:
|
||||
if possible_sub:
|
||||
transcripts_utils.save_subs_to_store(
|
||||
self.subs_sjson,
|
||||
possible_sub,
|
||||
self.video,
|
||||
language=language,
|
||||
)
|
||||
|
||||
def create_srt_file(self, content):
|
||||
"""
|
||||
@@ -812,31 +817,69 @@ class TestGetTranscript(SharedModuleStoreTestCase):
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
# video.sub transcript
|
||||
{
|
||||
'language': u'en',
|
||||
'subs_id': 'video_101',
|
||||
'filename': 'en_video_101.srt',
|
||||
'youtube_id_1_0': '',
|
||||
'html5_sources': [],
|
||||
'expected_filename': 'en_video_101.srt',
|
||||
},
|
||||
# if video.sub is present, rest will be skipped.
|
||||
{
|
||||
'language': u'en',
|
||||
'subs_id': 'video_101',
|
||||
'youtube_id_1_0': 'test_yt_id',
|
||||
'html5_sources': ['www.abc.com/foo.mp4'],
|
||||
'expected_filename': 'en_video_101.srt',
|
||||
},
|
||||
# video.youtube_id_1_0 transcript
|
||||
{
|
||||
'language': u'en',
|
||||
'subs_id': '',
|
||||
'youtube_id_1_0': 'test_yt_id',
|
||||
'html5_sources': [],
|
||||
'expected_filename': 'en_test_yt_id.srt',
|
||||
},
|
||||
# video.html5_sources transcript
|
||||
{
|
||||
'language': u'en',
|
||||
'subs_id': '',
|
||||
'youtube_id_1_0': '',
|
||||
'html5_sources': ['www.abc.com/foo.mp4'],
|
||||
'expected_filename': 'en_foo.srt',
|
||||
},
|
||||
# non-english transcript
|
||||
{
|
||||
'language': u'ur',
|
||||
'subs_id': '',
|
||||
'filename': 'ur_video_101.srt',
|
||||
'youtube_id_1_0': '',
|
||||
'html5_sources': [],
|
||||
'expected_filename': 'ur_video_101.srt',
|
||||
},
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_get_transcript_from_content_store(self, language, subs_id, filename):
|
||||
def test_get_transcript_from_contentstore(
|
||||
self,
|
||||
language,
|
||||
subs_id,
|
||||
youtube_id_1_0,
|
||||
html5_sources,
|
||||
expected_filename
|
||||
):
|
||||
"""
|
||||
Verify that `get_transcript` function returns correct data when transcript is in content store.
|
||||
"""
|
||||
self.upload_file(self.create_srt_file(self.subs_srt), self.video.location, filename)
|
||||
self.create_transcript(subs_id, language, filename)
|
||||
content, filename, mimetype = transcripts_utils.get_transcript(
|
||||
base_filename = 'video_101.srt'
|
||||
self.upload_file(self.create_srt_file(self.subs_srt), self.video.location, base_filename)
|
||||
self.create_transcript(subs_id, language, base_filename, youtube_id_1_0, html5_sources)
|
||||
content, file_name, mimetype = transcripts_utils.get_transcript(
|
||||
self.video,
|
||||
language
|
||||
)
|
||||
|
||||
self.assertEqual(content, self.subs[language])
|
||||
self.assertEqual(filename, filename)
|
||||
self.assertEqual(file_name, expected_filename)
|
||||
self.assertEqual(mimetype, self.srt_mime_type)
|
||||
|
||||
def test_get_transcript_from_content_store_for_ur(self):
|
||||
|
||||
@@ -859,7 +859,7 @@ def get_transcript_from_val(edx_video_id, lang=None, output_format=Transcript.SR
|
||||
"""
|
||||
Get video transcript from edx-val.
|
||||
Arguments:
|
||||
edx_video_id (unicode): course identifier
|
||||
edx_video_id (unicode): video identifier
|
||||
lang (unicode): transcript language
|
||||
output_format (unicode): transcript output format
|
||||
Returns:
|
||||
@@ -923,6 +923,7 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts
|
||||
Returns:
|
||||
tuple containing content, filename, mimetype
|
||||
"""
|
||||
input_format, base_name, transcript_content = None, None, None
|
||||
if output_format not in (Transcript.SRT, Transcript.SJSON, Transcript.TXT):
|
||||
raise NotFoundError('Invalid transcript format `{output_format}`'.format(output_format=output_format))
|
||||
|
||||
@@ -930,24 +931,24 @@ def get_transcript_from_contentstore(video, language, output_format, transcripts
|
||||
transcripts = dict(other_languages)
|
||||
|
||||
# this is sent in case of a translation dispatch and we need to use it as our subs_id.
|
||||
if youtube_id:
|
||||
transcripts['en'] = youtube_id
|
||||
elif sub:
|
||||
transcripts['en'] = sub
|
||||
elif video.youtube_id_1_0:
|
||||
transcripts['en'] = video.youtube_id_1_0
|
||||
elif language == u'en':
|
||||
raise NotFoundError('No transcript for `en` language')
|
||||
possible_sub_ids = [youtube_id, sub, video.youtube_id_1_0] + get_html5_ids(video.html5_sources)
|
||||
for sub_id in possible_sub_ids:
|
||||
try:
|
||||
transcripts[u'en'] = sub_id
|
||||
input_format, base_name, transcript_content = get_transcript_for_video(
|
||||
video.location,
|
||||
subs_id=sub_id,
|
||||
file_name=transcripts[language],
|
||||
language=language
|
||||
)
|
||||
break
|
||||
except (KeyError, NotFoundError):
|
||||
continue
|
||||
|
||||
try:
|
||||
input_format, base_name, transcript_content = get_transcript_for_video(
|
||||
video.location,
|
||||
subs_id=transcripts.get('en'),
|
||||
file_name=transcripts[language],
|
||||
language=language
|
||||
)
|
||||
except KeyError:
|
||||
raise NotFoundError
|
||||
if transcript_content is None:
|
||||
raise NotFoundError('No transcript for `{lang}` language'.format(
|
||||
lang=language
|
||||
))
|
||||
|
||||
# add language prefix to transcript file only if language is not None
|
||||
language_prefix = '{}_'.format(language) if language else ''
|
||||
|
||||
Reference in New Issue
Block a user