diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py index 5914414c34..4ca039a08f 100644 --- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py +++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- """ Tests for transcripts_utils. """ import copy +import ddt import textwrap import unittest from uuid import uuid4 -import ddt from django.conf import settings from django.test.utils import override_settings diff --git a/cms/djangoapps/contentstore/views/tests/test_transcripts.py b/cms/djangoapps/contentstore/views/tests/test_transcripts.py index 319f1f6b43..23109cf925 100644 --- a/cms/djangoapps/contentstore/views/tests/test_transcripts.py +++ b/cms/djangoapps/contentstore/views/tests/test_transcripts.py @@ -1,6 +1,7 @@ """Tests for items views.""" import copy +import ddt import json import os import tempfile @@ -10,7 +11,7 @@ from uuid import uuid4 from django.conf import settings from django.core.urlresolvers import reverse from django.test.utils import override_settings -from mock import patch +from mock import patch, Mock from opaque_keys.edx.keys import UsageKey from contentstore.tests.utils import CourseTestCase, mock_requests_get @@ -525,7 +526,70 @@ class TestDownloadTranscripts(BaseTranscripts): self.assertEqual(resp.status_code, 404) + @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True)) + @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data') + def test_download_fallback_transcript(self, mock_get_video_transcript_data): + """ + Verify that the val transcript is returned if its not found in content-store. + """ + mock_get_video_transcript_data.return_value = { + 'content': json.dumps({ + "start": [10], + "end": [100], + "text": ["Hi, welcome to Edx."], + }), + 'file_name': 'edx.sjson' + } + self.item.data = textwrap.dedent(""" + + """) + modulestore().update_item(self.item, self.user.id) + + download_transcripts_url = reverse('download_transcripts') + response = self.client.get(download_transcripts_url, {'locator': self.video_usage_key}) + + # Expected response + expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + expected_headers = { + 'content-disposition': 'attachment; filename="edx.srt"', + 'content-type': 'application/x-subrip; charset=utf-8' + } + + # Assert the actual response + self.assertEqual(response.status_code, 200) + self.assertEqual(response.content, expected_content) + for attribute, value in expected_headers.iteritems(): + self.assertEqual(response.get(attribute), value) + + @patch( + 'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', + Mock(return_value=False), + ) + def test_download_fallback_transcript_feature_disabled(self): + """ + Verify the transcript download when feature is disabled. + """ + self.item.data = textwrap.dedent(""" + + """) + modulestore().update_item(self.item, self.user.id) + + download_transcripts_url = reverse('download_transcripts') + response = self.client.get(download_transcripts_url, {'locator': self.video_usage_key}) + # Assert the actual response + self.assertEqual(response.status_code, 404) + + +@ddt.ddt class TestCheckTranscripts(BaseTranscripts): """ Tests for '/transcripts/check' url. @@ -760,6 +824,58 @@ class TestCheckTranscripts(BaseTranscripts): self.assertEqual(resp.status_code, 400) self.assertEqual(json.loads(resp.content).get('status'), 'Transcripts are supported only for "video" modules.') + @ddt.data( + (True, 'found'), + (False, 'not_found') + ) + @ddt.unpack + @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled') + @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data', Mock(return_value=True)) + def test_command_for_fallback_transcript(self, feature_enabled, expected_command, video_transcript_feature): + """ + Verify the command if a transcript is not found in content-store but + its there in edx-val. + """ + video_transcript_feature.return_value = feature_enabled + self.item.data = textwrap.dedent(""" + + """) + modulestore().update_item(self.item, self.user.id) + + # Make request to check transcript view + data = { + 'locator': unicode(self.video_usage_key), + 'videos': [{ + 'type': 'html5', + 'video': "", + 'mode': 'mp4', + }] + } + check_transcripts_url = reverse('check_transcripts') + response = self.client.get(check_transcripts_url, {'data': json.dumps(data)}) + + # Assert the response + self.assertEqual(response.status_code, 200) + self.assertDictEqual( + json.loads(response.content), + { + u'status': u'Success', + u'subs': u'', + u'youtube_local': False, + u'is_youtube_mode': False, + u'youtube_server': False, + u'command': expected_command, + u'current_item_subs': None, + u'youtube_diff': True, + u'html5_local': [], + u'html5_equal': False, + } + ) + class TestSaveTranscripts(BaseTranscripts): """ diff --git a/cms/djangoapps/contentstore/views/transcripts_ajax.py b/cms/djangoapps/contentstore/views/transcripts_ajax.py index e32f27dbca..a9b7118a38 100644 --- a/cms/djangoapps/contentstore/views/transcripts_ajax.py +++ b/cms/djangoapps/contentstore/views/transcripts_ajax.py @@ -27,16 +27,18 @@ from xmodule.exceptions import NotFoundError from xmodule.modulestore.django import modulestore from xmodule.modulestore.exceptions import ItemNotFoundError from xmodule.video_module.transcripts_utils import ( - GetTranscriptsFromYouTubeException, - TranscriptsRequestValidationException, copy_or_rename_transcript, download_youtube_subs, + GetTranscriptsFromYouTubeException, + get_video_transcript_content, generate_srt_from_sjson, generate_subs_from_source, get_transcripts_from_youtube, manage_video_subtitles_save, remove_subs_from_store, - youtube_video_transcript_name + Transcript, + TranscriptsRequestValidationException, + youtube_video_transcript_name, ) __all__ = [ @@ -144,6 +146,7 @@ def download_transcripts(request): Raises Http404 if unsuccessful. """ locator = request.GET.get('locator') + subs_id = request.GET.get('subs_id') if not locator: log.debug('GET data without "locator" property.') raise Http404 @@ -154,31 +157,45 @@ def download_transcripts(request): log.debug("Can't find item by locator.") raise Http404 - subs_id = request.GET.get('subs_id') - if not subs_id: - log.debug('GET data without "subs_id" property.') - raise Http404 - if item.category != 'video': log.debug('transcripts are supported only for video" modules.') raise Http404 - filename = 'subs_{0}.srt.sjson'.format(subs_id) - content_location = StaticContent.compute_location(item.location.course_key, filename) try: - sjson_transcripts = contentstore().find(content_location) - log.debug("Downloading subs for %s id", subs_id) - str_subs = generate_srt_from_sjson(json.loads(sjson_transcripts.data), speed=1.0) - if not str_subs: - log.debug('generate_srt_from_sjson produces no subtitles') - raise Http404 - response = HttpResponse(str_subs, content_type='application/x-subrip') - response['Content-Disposition'] = 'attachment; filename="{0}.srt"'.format(subs_id) - return response + if not subs_id: + raise NotFoundError + + filename = subs_id + content_location = StaticContent.compute_location( + item.location.course_key, + 'subs_{filename}.srt.sjson'.format(filename=filename), + ) + sjson_transcript = contentstore().find(content_location).data except NotFoundError: - log.debug("Can't find content in storage for %s subs", subs_id) + # Try searching in VAL for the transcript as a last resort + transcript = get_video_transcript_content( + course_id=item.location.course_key, + language_code=u'en', + edx_video_id=item.edx_video_id, + youtube_id_1_0=item.youtube_id_1_0, + html5_sources=item.html5_sources, + ) + if not transcript: + raise Http404 + + filename = os.path.splitext(os.path.basename(transcript['file_name']))[0].encode('utf8') + sjson_transcript = transcript['content'] + + # convert sjson content into srt format. + transcript_content = Transcript.convert(sjson_transcript, input_format='sjson', output_format='srt') + if not transcript_content: raise Http404 + # Construct an HTTP response + response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8') + response['Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format(filename=filename) + return response + @login_required def check_transcripts(request): @@ -284,6 +301,17 @@ def check_transcripts(request): transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1]) command, subs_to_use = _transcripts_logic(transcripts_presence, videos) + if command == 'not_found': + # Try searching in VAL for the transcript as a last resort + video_transcript = get_video_transcript_content( + course_id=item.location.course_key, + language_code=u'en', + edx_video_id=item.edx_video_id, + youtube_id_1_0=item.youtube_id_1_0, + html5_sources=item.html5_sources, + ) + command = 'found' if video_transcript else command + transcripts_presence.update({ 'command': command, 'subs': subs_to_use, diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index d46944bbbf..e78eb4c894 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -12,12 +12,18 @@ from pysrt import SubRipTime, SubRipItem, SubRipFile from lxml import etree from HTMLParser import HTMLParser +from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag from xmodule.exceptions import NotFoundError from xmodule.contentstore.content import StaticContent from xmodule.contentstore.django import contentstore from .bumper_utils import get_bumper_settings +try: + from edxval import api as edxval_api +except ImportError: + edxval_api = None + log = logging.getLogger(__name__) @@ -474,8 +480,8 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources): Returns list internal or external video ids. Arguments: - edx_video_id (str): edx_video_id - youtube_id_1_0 (str): youtube id + edx_video_id (unicode): edx_video_id + youtube_id_1_0 (unicode): youtube id html5_sources (list): html5 video ids Returns: @@ -492,6 +498,28 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources): return external, video_ids +def get_video_transcript_content(course_id, language_code, edx_video_id, youtube_id_1_0, html5_sources): + """ + Gets video transcript content, only if the corresponding feature flag is enabled for the given `course_id`. + + Arguments: + course_id(CourseKey): Course key identifying a course + language_code(unicode): Language code of the requested transcript + edx_video_id(unicode): edx-val's video identifier + youtube_id_1_0(unicode): A youtube source identifier + html5_sources(list): A list containing html5 sources + + Returns: + A dict containing transcript's file name and its sjson content. + """ + transcript = None + if VideoTranscriptEnabledFlag.feature_enabled(course_id=course_id) and edxval_api: + __, video_candidate_ids = get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources) + transcript = edxval_api.get_video_transcript_data(video_candidate_ids, language_code) + + return transcript + + class Transcript(object): """ Container for transcript methods. diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py index 76e83dc6ff..fbd326a2ba 100644 --- a/common/lib/xmodule/xmodule/video_module/video_handlers.py +++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py @@ -7,6 +7,8 @@ StudioViewHandlers are handlers for video descriptor instance. import json import logging +import os + from datetime import datetime from webob import Response @@ -21,6 +23,7 @@ from .transcripts_utils import ( TranscriptException, TranscriptsGenerationException, generate_sjson_for_all_speeds, + get_video_transcript_content, youtube_speed_dict, Transcript, save_to_store, @@ -242,7 +245,24 @@ class VideoStudentViewHandlers(object): log.debug(ex.message) # Try to return static URL redirection as last resort # if no translation is required - return self.get_static_transcript(request, transcripts) + response = self.get_static_transcript(request, transcripts) + if response.status_code == 404: + transcript = get_video_transcript_content( + course_id=self.course_id, + language_code=language, + edx_video_id=self.edx_video_id, + youtube_id_1_0=self.youtube_id_1_0, + html5_sources=self.html5_sources, + ) + if transcript: + response = Response( + transcript['content'], + headerlist=[('Content-Language', language)], + charset='utf8', + ) + response.content_type = Transcript.mime_types['sjson'] + + return response except ( TranscriptException, UnicodeDecodeError, @@ -260,8 +280,44 @@ class VideoStudentViewHandlers(object): transcript_content, transcript_filename, transcript_mime_type = self.get_transcript( transcripts, transcript_format=self.transcript_download_format, lang=lang ) - except (NotFoundError, ValueError, KeyError, UnicodeDecodeError): - log.debug("Video@download exception") + except NotFoundError: + response = Response(status=404) + # Make sure the language is set. + if lang is None: + lang = self.get_default_transcript_language(transcripts) + + transcript = get_video_transcript_content( + course_id=self.course_id, + language_code=lang, + edx_video_id=self.edx_video_id, + youtube_id_1_0=self.youtube_id_1_0, + html5_sources=self.html5_sources, + ) + if transcript: + transcript_content = Transcript.convert( + transcript['content'], + input_format='sjson', + output_format=self.transcript_download_format + ) + + # Construct the response + base_name, __ = os.path.splitext(os.path.basename(transcript['file_name'])) + filename = '{base_name}.{ext}'.format( + base_name=base_name.encode('utf8'), + ext=self.transcript_download_format + ) + response = Response( + transcript_content, + headerlist=[ + ('Content-Disposition', 'attachment; filename="{filename}"'.format(filename=filename)), + ('Content-Language', lang), + ], + charset='utf8', + ) + response.content_type = Transcript.mime_types[self.transcript_download_format] + + return response + except (ValueError, KeyError, UnicodeDecodeError): return Response(status=404) else: response = Response( diff --git a/common/lib/xmodule/xmodule/video_module/video_module.py b/common/lib/xmodule/xmodule/video_module/video_module.py index 59a728c526..42e816ca7d 100644 --- a/common/lib/xmodule/xmodule/video_module/video_module.py +++ b/common/lib/xmodule/xmodule/video_module/video_module.py @@ -24,7 +24,7 @@ from pkg_resources import resource_string from django.conf import settings from lxml import etree from opaque_keys.edx.locator import AssetLocator -from openedx.core.djangoapps.video_config.models import HLSPlaybackEnabledFlag +from openedx.core.djangoapps.video_config.models import HLSPlaybackEnabledFlag, VideoTranscriptEnabledFlag from openedx.core.lib.cache_utils import memoize_in_request_cache from openedx.core.lib.license import LicenseMixin from xblock.core import XBlock @@ -42,7 +42,11 @@ from xmodule.xml_module import deserialize_field, is_pointer_tag, name_to_pathna from .bumper_utils import bumperize from .transcripts_utils import ( - Transcript, VideoTranscriptsMixin, get_html5_ids, get_video_ids_info + get_html5_ids, + get_video_ids_info, + get_video_transcript_content, + Transcript, + VideoTranscriptsMixin, ) from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers from .video_utils import create_youtube_string, format_xml_exception_message, get_poster, rewrite_video_url @@ -182,6 +186,18 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers, elif sub or other_lang: track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?') + if not track_url: + # Check transcript's availability in edx-val + transcript = get_video_transcript_content( + course_id=self.course_id, + language_code=self.transcript_language, + edx_video_id=self.edx_video_id, + youtube_id_1_0=self.youtube_id_1_0, + html5_sources=self.html5_sources, + ) + if transcript: + track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?') + transcript_language = self.get_default_transcript_language(transcripts) native_languages = {lang: label for lang, label in settings.LANGUAGES if len(lang) == 2} diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py index 83d555dda5..0c47e73942 100644 --- a/lms/djangoapps/courseware/tests/test_video_handlers.py +++ b/lms/djangoapps/courseware/tests/test_video_handlers.py @@ -11,7 +11,7 @@ import ddt import freezegun from mock import MagicMock, Mock, patch from nose.plugins.attrib import attr -from webob import Request +from webob import Request, Response from common.test.utils import normalize_repr from openedx.core.djangoapps.contentserver.caching import del_cached_content @@ -370,6 +370,55 @@ class TestTranscriptDownloadDispatch(TestVideo): self.assertEqual(response.headers['Content-Type'], 'application/x-subrip; charset=utf-8') self.assertEqual(response.headers['Content-Disposition'], 'attachment; filename="塞.srt"') + @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data') + @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True)) + @patch('xmodule.video_module.VideoModule.get_transcript', Mock(side_effect=NotFoundError)) + def test_download_fallback_transcript(self, mock_get_video_transcript_data): + """ + Verify val transcript is returned as a fallback if it is not found in the content store. + """ + mock_get_video_transcript_data.return_value = { + 'content': json.dumps({ + "start": [10], + "end": [100], + "text": ["Hi, welcome to Edx."], + }), + 'file_name': 'edx.sjson' + } + + # Make request to XModule transcript handler + request = Request.blank('/download') + response = self.item.transcript(request=request, dispatch='download') + + # Expected response + expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + expected_headers = { + 'Content-Disposition': 'attachment; filename="edx.srt"', + 'Content-Language': u'en', + 'Content-Type': 'application/x-subrip; charset=utf-8' + } + + # Assert the actual response + self.assertEqual(response.status_code, 200) + self.assertEqual(response.text, expected_content) + for attribute, value in expected_headers.iteritems(): + self.assertEqual(response.headers[attribute], value) + + @patch( + 'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', + Mock(return_value=False), + ) + @patch('xmodule.video_module.VideoModule.get_transcript', Mock(side_effect=NotFoundError)) + def test_download_fallback_transcript_feature_disabled(self): + """ + Verify val transcript if its feature is disabled. + """ + # Make request to XModule transcript handler + request = Request.blank('/download') + response = self.item.transcript(request=request, dispatch='download') + # Assert the actual response + self.assertEqual(response.status_code, 404) + @attr(shard=1) @ddt.ddt @@ -602,6 +651,55 @@ class TestTranscriptTranslationGetDispatch(TestVideo): with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, self.course.id): store.update_item(self.course, self.user.id) + @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data') + @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True)) + @patch('xmodule.video_module.VideoModule.translation', Mock(side_effect=NotFoundError)) + @patch('xmodule.video_module.VideoModule.get_static_transcript', Mock(return_value=Response(status=404))) + def test_translation_fallback_transcript(self, mock_get_video_transcript_data): + """ + Verify that the val transcript is returned as a fallback, + if it is not found in the content store. + """ + transcript = { + 'content': json.dumps({ + "start": [10], + "end": [100], + "text": ["Hi, welcome to Edx."], + }), + 'file_name': 'edx.sjson' + } + mock_get_video_transcript_data.return_value = transcript + + # Make request to XModule transcript handler + response = self.item.transcript(request=Request.blank('/translation/en'), dispatch='translation/en') + + # Expected headers + expected_headers = { + 'Content-Language': 'en', + 'Content-Type': 'application/json' + } + + # Assert the actual response + self.assertEqual(response.status_code, 200) + self.assertEqual(response.text, transcript['content']) + for attribute, value in expected_headers.iteritems(): + self.assertEqual(response.headers[attribute], value) + + @patch( + 'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', + Mock(return_value=False), + ) + @patch('xmodule.video_module.VideoModule.translation', Mock(side_effect=NotFoundError)) + @patch('xmodule.video_module.VideoModule.get_static_transcript', Mock(return_value=Response(status=404))) + def test_translation_fallback_transcript_feature_disabled(self): + """ + Verify that val transcript is not returned when its feature is disabled. + """ + # Make request to XModule transcript handler + response = self.item.transcript(request=Request.blank('/translation/en'), dispatch='translation/en') + # Assert the actual response + self.assertEqual(response.status_code, 404) + @attr(shard=1) class TestStudioTranscriptTranslationGetDispatch(TestVideo):