transcript name param url
This commit is contained in:
committed by
Waheed Ahmed
parent
de5e12d995
commit
ecc3473d36
@@ -270,6 +270,107 @@ class TestDownloadYoutubeSubs(ModuleStoreTestCase):
|
||||
|
||||
self.clear_sub_content(good_youtube_sub)
|
||||
|
||||
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
||||
def test_get_transcript_name_youtube_server_success(self, mock_get):
|
||||
"""
|
||||
Get transcript name from transcript_list fetch from youtube server api
|
||||
depends on language code, default language in YOUTUBE Text Api is "en"
|
||||
"""
|
||||
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
||||
youtube_text_api['params']['v'] = 'dummy_video_id'
|
||||
response_success = """
|
||||
<transcript_list>
|
||||
<track id="1" name="Custom" lang_code="en" />
|
||||
<track id="0" name="Custom1" lang_code="en-GB"/>
|
||||
</transcript_list>
|
||||
"""
|
||||
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
|
||||
|
||||
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
|
||||
self.assertEqual(transcript_name, 'Custom')
|
||||
|
||||
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
||||
def test_get_transcript_name_youtube_server_no_transcripts(self, mock_get):
|
||||
"""
|
||||
When there are no transcripts of video transcript name will be None
|
||||
"""
|
||||
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
||||
youtube_text_api['params']['v'] = 'dummy_video_id'
|
||||
response_success = "<transcript_list></transcript_list>"
|
||||
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
|
||||
|
||||
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
|
||||
self.assertIsNone(transcript_name)
|
||||
|
||||
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
||||
def test_get_transcript_name_youtube_server_language_not_exist(self, mock_get):
|
||||
"""
|
||||
When the language does not exist in transcript_list transcript name will be None
|
||||
"""
|
||||
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
||||
youtube_text_api['params']['v'] = 'dummy_video_id'
|
||||
youtube_text_api['params']['lang'] = 'abc'
|
||||
response_success = """
|
||||
<transcript_list>
|
||||
<track id="1" name="Custom" lang_code="en" />
|
||||
<track id="0" name="Custom1" lang_code="en-GB"/>
|
||||
</transcript_list>
|
||||
"""
|
||||
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
|
||||
|
||||
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
|
||||
self.assertIsNone(transcript_name)
|
||||
|
||||
def mocked_requests_get(*args, **kwargs):
|
||||
"""
|
||||
This method will be used by the mock to replace requests.get
|
||||
"""
|
||||
# pylint: disable=no-method-argument
|
||||
response_transcript_list = """
|
||||
<transcript_list>
|
||||
<track id="1" name="Custom" lang_code="en" />
|
||||
<track id="0" name="Custom1" lang_code="en-GB"/>
|
||||
</transcript_list>
|
||||
"""
|
||||
response_transcript = textwrap.dedent("""
|
||||
<transcript>
|
||||
<text start="0" dur="0.27"></text>
|
||||
<text start="0.27" dur="2.45">Test text 1.</text>
|
||||
<text start="2.72">Test text 2.</text>
|
||||
<text start="5.43" dur="1.73">Test text 3.</text>
|
||||
</transcript>
|
||||
""")
|
||||
|
||||
if kwargs == {'params': {'lang': 'en', 'v': 'good_id_2'}}:
|
||||
return Mock(status_code=200, text='')
|
||||
elif kwargs == {'params': {'type': 'list', 'v': 'good_id_2'}}:
|
||||
return Mock(status_code=200, text=response_transcript_list, content=response_transcript_list)
|
||||
elif kwargs == {'params': {'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'}}:
|
||||
return Mock(status_code=200, text=response_transcript, content=response_transcript)
|
||||
|
||||
return Mock(status_code=404, text='')
|
||||
|
||||
@patch('xmodule.video_module.transcripts_utils.requests.get', side_effect=mocked_requests_get)
|
||||
def test_downloading_subs_using_transcript_name(self, mock_get):
|
||||
"""
|
||||
Download transcript using transcript name in url
|
||||
"""
|
||||
good_youtube_sub = 'good_id_2'
|
||||
self.clear_sub_content(good_youtube_sub)
|
||||
|
||||
transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings)
|
||||
mock_get.assert_any_call(
|
||||
'http://video.google.com/timedtext',
|
||||
params={'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'}
|
||||
)
|
||||
|
||||
# Check asset status after import of transcript.
|
||||
filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub)
|
||||
content_location = StaticContent.compute_location(self.course.id, filename)
|
||||
self.assertTrue(contentstore().find(content_location))
|
||||
|
||||
self.clear_sub_content(good_youtube_sub)
|
||||
|
||||
|
||||
class TestGenerateSubsFromSource(TestDownloadYoutubeSubs):
|
||||
"""Tests for `generate_subs_from_source` function."""
|
||||
|
||||
@@ -94,7 +94,32 @@ def save_subs_to_store(subs, subs_id, item, language='en'):
|
||||
return save_to_store(filedata, filename, 'application/json', item.location)
|
||||
|
||||
|
||||
def get_transcripts_from_youtube(youtube_id, settings, i18n):
|
||||
def youtube_video_transcript_name(youtube_text_api):
|
||||
"""
|
||||
Get the transcript name from available transcripts of video
|
||||
with respect to language from youtube server
|
||||
"""
|
||||
# pylint: disable=no-member
|
||||
utf8_parser = etree.XMLParser(encoding='utf-8')
|
||||
|
||||
transcripts_param = {'type': 'list', 'v': youtube_text_api['params']['v']}
|
||||
lang = youtube_text_api['params']['lang']
|
||||
# get list of transcripts of specific video
|
||||
# url-form
|
||||
# http://video.google.com/timedtext?type=list&v={VideoId}
|
||||
youtube_response = requests.get('http://' + youtube_text_api['url'], params=transcripts_param)
|
||||
if youtube_response.status_code == 200 and youtube_response.text:
|
||||
# pylint: disable=no-member
|
||||
youtube_data = etree.fromstring(youtube_response.content, parser=utf8_parser)
|
||||
# iterate all transcripts information from youtube server
|
||||
for element in youtube_data:
|
||||
# search specific language code such as 'en' in transcripts info list
|
||||
if element.tag == 'track' and element.get('lang_code', '') == lang:
|
||||
return element.get('name')
|
||||
return None
|
||||
|
||||
|
||||
def get_transcripts_from_youtube(youtube_id, settings, i18n, youtube_transcript_name=''):
|
||||
"""
|
||||
Gets transcripts from youtube for youtube_id.
|
||||
|
||||
@@ -109,6 +134,12 @@ def get_transcripts_from_youtube(youtube_id, settings, i18n):
|
||||
|
||||
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
||||
youtube_text_api['params']['v'] = youtube_id
|
||||
# if the transcript name is not empty on youtube server we have to pass
|
||||
# name param in url in order to get transcript
|
||||
# example http://video.google.com/timedtext?lang=en&v={VideoId}&name={transcript_name}
|
||||
youtube_transcript_name = youtube_video_transcript_name(youtube_text_api)
|
||||
if youtube_transcript_name:
|
||||
youtube_text_api['params']['name'] = youtube_transcript_name
|
||||
data = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params'])
|
||||
|
||||
if data.status_code != 200 or not data.text:
|
||||
|
||||
Reference in New Issue
Block a user