diff --git a/cms/envs/common.py b/cms/envs/common.py index 459c24de32..e69609022f 100644 --- a/cms/envs/common.py +++ b/cms/envs/common.py @@ -1536,6 +1536,7 @@ YOUTUBE = { 'TRANSCRIPTS': { 'CAPTION_TRACKS_REGEX': r"captionTracks\"\:\[(?P[^\]]+)", 'YOUTUBE_URL_BASE': 'https://www.youtube.com/watch?v=', + 'ALLOWED_LANGUAGE_CODES': ["en", "en-US", "en-GB"], }, 'IMAGE_API': 'http://img.youtube.com/vi/{youtube_id}/0.jpg', # /maxresdefault.jpg for 1920*1080 diff --git a/lms/envs/common.py b/lms/envs/common.py index 16382b6175..b17815301f 100644 --- a/lms/envs/common.py +++ b/lms/envs/common.py @@ -2949,6 +2949,7 @@ YOUTUBE = { 'TRANSCRIPTS': { 'CAPTION_TRACKS_REGEX': r"captionTracks\"\:\[(?P[^\]]+)", 'YOUTUBE_URL_BASE': 'https://www.youtube.com/watch?v=', + 'ALLOWED_LANGUAGE_CODES': ["en", "en-US", "en-GB"], }, 'IMAGE_API': 'http://img.youtube.com/vi/{youtube_id}/0.jpg', # /maxresdefault.jpg for 1920*1080 diff --git a/xmodule/tests/test_transcripts_utils.py b/xmodule/tests/test_transcripts_utils.py index 0675dff2cf..4f33169b51 100644 --- a/xmodule/tests/test_transcripts_utils.py +++ b/xmodule/tests/test_transcripts_utils.py @@ -7,10 +7,11 @@ These tests follow the following nomenclature: - among the fields found in a track descriptor is a caption URL (aka caption link) - use this link to obtain the track's caption data ''' -from ..video_block.transcripts_utils import get_transcript_link_from_youtube - from unittest import mock, TestCase +import ddt + +from ..video_block.transcripts_utils import get_transcript_link_from_youtube YOUTUBE_VIDEO_ID = "z-LoKnweV6w" @@ -102,43 +103,34 @@ class YoutubeVideoHTMLResponse: self.content = bytearray(youtube_html, 'UTF-8') +@ddt.ddt class TranscriptsUtilsTest(TestCase): """ Tests utility fucntions for transcripts (in video_block) """ @mock.patch('requests.get') - def test_get_transcript_link_from_youtube(self, mock_get): + @ddt.data("en", "en-US", "en-GB") + def test_get_transcript_link_from_youtube(self, language_code, mock_get): """ Happy path test: english caption link returned when video page HTML has one english caption """ - language_code = 'en' mock_get.return_value = YoutubeVideoHTMLResponse.with_caption_track(language_code) language_specific_caption_link = get_transcript_link_from_youtube(YOUTUBE_VIDEO_ID) self.assertEqual(language_specific_caption_link, CAPTION_URL_UTF8_DECODED_TEMPLATE.format(language_code)) @ mock.patch('requests.get') - def test_get_caption_no_english_caption(self, mock_get): + @ddt.data("fr", None) + def test_get_caption_no_english_caption(self, language_code, mock_get): """ No caption link returned when video page HTML contains no caption in English """ - language_code = 'fr' mock_get.return_value = YoutubeVideoHTMLResponse.with_caption_track(language_code) english_language_caption_link = get_transcript_link_from_youtube(YOUTUBE_VIDEO_ID) self.assertIsNone(english_language_caption_link) - @ mock.patch('requests.get') - def test_get_caption_no_captions_in_HTML(self, mock_get): - """ - No caption link returned when video page HTML contains no captions at all - """ - mock_get.return_value = YoutubeVideoHTMLResponse.with_no_caption_tracks() - - english_language_caption_link = get_transcript_link_from_youtube(YOUTUBE_VIDEO_ID) - self.assertEqual(english_language_caption_link, None) - @ mock.patch('requests.get') def test_get_caption_malformed_caption_locator(self, mock_get): """ diff --git a/xmodule/video_block/transcripts_utils.py b/xmodule/video_block/transcripts_utils.py index 39f18ca741..b3fcb52202 100644 --- a/xmodule/video_block/transcripts_utils.py +++ b/xmodule/video_block/transcripts_utils.py @@ -182,12 +182,13 @@ def get_transcript_link_from_youtube(youtube_id): try: youtube_html = requests.get(f"{youtube_url_base}{youtube_id}") caption_re = settings.YOUTUBE['TRANSCRIPTS']['CAPTION_TRACKS_REGEX'] + allowed_language_codes = settings.YOUTUBE['TRANSCRIPTS']['ALLOWED_LANGUAGE_CODES'] caption_matched = re.search(caption_re, youtube_html.content.decode("utf-8")) if caption_matched: caption_tracks = json.loads(f'[{caption_matched.group("caption_tracks")}]') for caption in caption_tracks: - if "languageCode" in caption.keys() and caption["languageCode"] == "en": - return caption["baseUrl"] + if "languageCode" in caption.keys() and caption["languageCode"] in allowed_language_codes: + return caption.get("baseUrl") return None except ConnectionError: return None