diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py index ef762471d1..82d7a2e91c 100644 --- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py +++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py @@ -270,6 +270,107 @@ class TestDownloadYoutubeSubs(ModuleStoreTestCase): self.clear_sub_content(good_youtube_sub) + @patch('xmodule.video_module.transcripts_utils.requests.get') + def test_get_transcript_name_youtube_server_success(self, mock_get): + """ + Get transcript name from transcript_list fetch from youtube server api + depends on language code, default language in YOUTUBE Text Api is "en" + """ + youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) + youtube_text_api['params']['v'] = 'dummy_video_id' + response_success = """ + + + + + """ + mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success) + + transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api) + self.assertEqual(transcript_name, 'Custom') + + @patch('xmodule.video_module.transcripts_utils.requests.get') + def test_get_transcript_name_youtube_server_no_transcripts(self, mock_get): + """ + When there are no transcripts of video transcript name will be None + """ + youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) + youtube_text_api['params']['v'] = 'dummy_video_id' + response_success = "" + mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success) + + transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api) + self.assertIsNone(transcript_name) + + @patch('xmodule.video_module.transcripts_utils.requests.get') + def test_get_transcript_name_youtube_server_language_not_exist(self, mock_get): + """ + When the language does not exist in transcript_list transcript name will be None + """ + youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) + youtube_text_api['params']['v'] = 'dummy_video_id' + youtube_text_api['params']['lang'] = 'abc' + response_success = """ + + + + + """ + mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success) + + transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api) + self.assertIsNone(transcript_name) + + def mocked_requests_get(*args, **kwargs): + """ + This method will be used by the mock to replace requests.get + """ + # pylint: disable=no-method-argument + response_transcript_list = """ + + + + + """ + response_transcript = textwrap.dedent(""" + + + Test text 1. + Test text 2. + Test text 3. + + """) + + if kwargs == {'params': {'lang': 'en', 'v': 'good_id_2'}}: + return Mock(status_code=200, text='') + elif kwargs == {'params': {'type': 'list', 'v': 'good_id_2'}}: + return Mock(status_code=200, text=response_transcript_list, content=response_transcript_list) + elif kwargs == {'params': {'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'}}: + return Mock(status_code=200, text=response_transcript, content=response_transcript) + + return Mock(status_code=404, text='') + + @patch('xmodule.video_module.transcripts_utils.requests.get', side_effect=mocked_requests_get) + def test_downloading_subs_using_transcript_name(self, mock_get): + """ + Download transcript using transcript name in url + """ + good_youtube_sub = 'good_id_2' + self.clear_sub_content(good_youtube_sub) + + transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings) + mock_get.assert_any_call( + 'http://video.google.com/timedtext', + params={'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'} + ) + + # Check asset status after import of transcript. + filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub) + content_location = StaticContent.compute_location(self.course.id, filename) + self.assertTrue(contentstore().find(content_location)) + + self.clear_sub_content(good_youtube_sub) + class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): """Tests for `generate_subs_from_source` function.""" diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index fd82ed3452..bf7d39b7c9 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -94,7 +94,32 @@ def save_subs_to_store(subs, subs_id, item, language='en'): return save_to_store(filedata, filename, 'application/json', item.location) -def get_transcripts_from_youtube(youtube_id, settings, i18n): +def youtube_video_transcript_name(youtube_text_api): + """ + Get the transcript name from available transcripts of video + with respect to language from youtube server + """ + # pylint: disable=no-member + utf8_parser = etree.XMLParser(encoding='utf-8') + + transcripts_param = {'type': 'list', 'v': youtube_text_api['params']['v']} + lang = youtube_text_api['params']['lang'] + # get list of transcripts of specific video + # url-form + # http://video.google.com/timedtext?type=list&v={VideoId} + youtube_response = requests.get('http://' + youtube_text_api['url'], params=transcripts_param) + if youtube_response.status_code == 200 and youtube_response.text: + # pylint: disable=no-member + youtube_data = etree.fromstring(youtube_response.content, parser=utf8_parser) + # iterate all transcripts information from youtube server + for element in youtube_data: + # search specific language code such as 'en' in transcripts info list + if element.tag == 'track' and element.get('lang_code', '') == lang: + return element.get('name') + return None + + +def get_transcripts_from_youtube(youtube_id, settings, i18n, youtube_transcript_name=''): """ Gets transcripts from youtube for youtube_id. @@ -109,6 +134,12 @@ def get_transcripts_from_youtube(youtube_id, settings, i18n): youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) youtube_text_api['params']['v'] = youtube_id + # if the transcript name is not empty on youtube server we have to pass + # name param in url in order to get transcript + # example http://video.google.com/timedtext?lang=en&v={VideoId}&name={transcript_name} + youtube_transcript_name = youtube_video_transcript_name(youtube_text_api) + if youtube_transcript_name: + youtube_text_api['params']['name'] = youtube_transcript_name data = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params']) if data.status_code != 200 or not data.text: