diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py
index 5914414c34..4ca039a08f 100644
--- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py
+++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
""" Tests for transcripts_utils. """
import copy
+import ddt
import textwrap
import unittest
from uuid import uuid4
-import ddt
from django.conf import settings
from django.test.utils import override_settings
diff --git a/cms/djangoapps/contentstore/views/tests/test_transcripts.py b/cms/djangoapps/contentstore/views/tests/test_transcripts.py
index 319f1f6b43..23109cf925 100644
--- a/cms/djangoapps/contentstore/views/tests/test_transcripts.py
+++ b/cms/djangoapps/contentstore/views/tests/test_transcripts.py
@@ -1,6 +1,7 @@
"""Tests for items views."""
import copy
+import ddt
import json
import os
import tempfile
@@ -10,7 +11,7 @@ from uuid import uuid4
from django.conf import settings
from django.core.urlresolvers import reverse
from django.test.utils import override_settings
-from mock import patch
+from mock import patch, Mock
from opaque_keys.edx.keys import UsageKey
from contentstore.tests.utils import CourseTestCase, mock_requests_get
@@ -525,7 +526,70 @@ class TestDownloadTranscripts(BaseTranscripts):
self.assertEqual(resp.status_code, 404)
+ @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
+ @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
+ def test_download_fallback_transcript(self, mock_get_video_transcript_data):
+ """
+ Verify that the val transcript is returned if its not found in content-store.
+ """
+ mock_get_video_transcript_data.return_value = {
+ 'content': json.dumps({
+ "start": [10],
+ "end": [100],
+ "text": ["Hi, welcome to Edx."],
+ }),
+ 'file_name': 'edx.sjson'
+ }
+ self.item.data = textwrap.dedent("""
+
+ """)
+ modulestore().update_item(self.item, self.user.id)
+
+ download_transcripts_url = reverse('download_transcripts')
+ response = self.client.get(download_transcripts_url, {'locator': self.video_usage_key})
+
+ # Expected response
+ expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
+ expected_headers = {
+ 'content-disposition': 'attachment; filename="edx.srt"',
+ 'content-type': 'application/x-subrip; charset=utf-8'
+ }
+
+ # Assert the actual response
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.content, expected_content)
+ for attribute, value in expected_headers.iteritems():
+ self.assertEqual(response.get(attribute), value)
+
+ @patch(
+ 'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
+ Mock(return_value=False),
+ )
+ def test_download_fallback_transcript_feature_disabled(self):
+ """
+ Verify the transcript download when feature is disabled.
+ """
+ self.item.data = textwrap.dedent("""
+
+ """)
+ modulestore().update_item(self.item, self.user.id)
+
+ download_transcripts_url = reverse('download_transcripts')
+ response = self.client.get(download_transcripts_url, {'locator': self.video_usage_key})
+ # Assert the actual response
+ self.assertEqual(response.status_code, 404)
+
+
+@ddt.ddt
class TestCheckTranscripts(BaseTranscripts):
"""
Tests for '/transcripts/check' url.
@@ -760,6 +824,58 @@ class TestCheckTranscripts(BaseTranscripts):
self.assertEqual(resp.status_code, 400)
self.assertEqual(json.loads(resp.content).get('status'), 'Transcripts are supported only for "video" modules.')
+ @ddt.data(
+ (True, 'found'),
+ (False, 'not_found')
+ )
+ @ddt.unpack
+ @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled')
+ @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data', Mock(return_value=True))
+ def test_command_for_fallback_transcript(self, feature_enabled, expected_command, video_transcript_feature):
+ """
+ Verify the command if a transcript is not found in content-store but
+ its there in edx-val.
+ """
+ video_transcript_feature.return_value = feature_enabled
+ self.item.data = textwrap.dedent("""
+
+ """)
+ modulestore().update_item(self.item, self.user.id)
+
+ # Make request to check transcript view
+ data = {
+ 'locator': unicode(self.video_usage_key),
+ 'videos': [{
+ 'type': 'html5',
+ 'video': "",
+ 'mode': 'mp4',
+ }]
+ }
+ check_transcripts_url = reverse('check_transcripts')
+ response = self.client.get(check_transcripts_url, {'data': json.dumps(data)})
+
+ # Assert the response
+ self.assertEqual(response.status_code, 200)
+ self.assertDictEqual(
+ json.loads(response.content),
+ {
+ u'status': u'Success',
+ u'subs': u'',
+ u'youtube_local': False,
+ u'is_youtube_mode': False,
+ u'youtube_server': False,
+ u'command': expected_command,
+ u'current_item_subs': None,
+ u'youtube_diff': True,
+ u'html5_local': [],
+ u'html5_equal': False,
+ }
+ )
+
class TestSaveTranscripts(BaseTranscripts):
"""
diff --git a/cms/djangoapps/contentstore/views/transcripts_ajax.py b/cms/djangoapps/contentstore/views/transcripts_ajax.py
index e32f27dbca..a9b7118a38 100644
--- a/cms/djangoapps/contentstore/views/transcripts_ajax.py
+++ b/cms/djangoapps/contentstore/views/transcripts_ajax.py
@@ -27,16 +27,18 @@ from xmodule.exceptions import NotFoundError
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.video_module.transcripts_utils import (
- GetTranscriptsFromYouTubeException,
- TranscriptsRequestValidationException,
copy_or_rename_transcript,
download_youtube_subs,
+ GetTranscriptsFromYouTubeException,
+ get_video_transcript_content,
generate_srt_from_sjson,
generate_subs_from_source,
get_transcripts_from_youtube,
manage_video_subtitles_save,
remove_subs_from_store,
- youtube_video_transcript_name
+ Transcript,
+ TranscriptsRequestValidationException,
+ youtube_video_transcript_name,
)
__all__ = [
@@ -144,6 +146,7 @@ def download_transcripts(request):
Raises Http404 if unsuccessful.
"""
locator = request.GET.get('locator')
+ subs_id = request.GET.get('subs_id')
if not locator:
log.debug('GET data without "locator" property.')
raise Http404
@@ -154,31 +157,45 @@ def download_transcripts(request):
log.debug("Can't find item by locator.")
raise Http404
- subs_id = request.GET.get('subs_id')
- if not subs_id:
- log.debug('GET data without "subs_id" property.')
- raise Http404
-
if item.category != 'video':
log.debug('transcripts are supported only for video" modules.')
raise Http404
- filename = 'subs_{0}.srt.sjson'.format(subs_id)
- content_location = StaticContent.compute_location(item.location.course_key, filename)
try:
- sjson_transcripts = contentstore().find(content_location)
- log.debug("Downloading subs for %s id", subs_id)
- str_subs = generate_srt_from_sjson(json.loads(sjson_transcripts.data), speed=1.0)
- if not str_subs:
- log.debug('generate_srt_from_sjson produces no subtitles')
- raise Http404
- response = HttpResponse(str_subs, content_type='application/x-subrip')
- response['Content-Disposition'] = 'attachment; filename="{0}.srt"'.format(subs_id)
- return response
+ if not subs_id:
+ raise NotFoundError
+
+ filename = subs_id
+ content_location = StaticContent.compute_location(
+ item.location.course_key,
+ 'subs_{filename}.srt.sjson'.format(filename=filename),
+ )
+ sjson_transcript = contentstore().find(content_location).data
except NotFoundError:
- log.debug("Can't find content in storage for %s subs", subs_id)
+ # Try searching in VAL for the transcript as a last resort
+ transcript = get_video_transcript_content(
+ course_id=item.location.course_key,
+ language_code=u'en',
+ edx_video_id=item.edx_video_id,
+ youtube_id_1_0=item.youtube_id_1_0,
+ html5_sources=item.html5_sources,
+ )
+ if not transcript:
+ raise Http404
+
+ filename = os.path.splitext(os.path.basename(transcript['file_name']))[0].encode('utf8')
+ sjson_transcript = transcript['content']
+
+ # convert sjson content into srt format.
+ transcript_content = Transcript.convert(sjson_transcript, input_format='sjson', output_format='srt')
+ if not transcript_content:
raise Http404
+ # Construct an HTTP response
+ response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8')
+ response['Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format(filename=filename)
+ return response
+
@login_required
def check_transcripts(request):
@@ -284,6 +301,17 @@ def check_transcripts(request):
transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1])
command, subs_to_use = _transcripts_logic(transcripts_presence, videos)
+ if command == 'not_found':
+ # Try searching in VAL for the transcript as a last resort
+ video_transcript = get_video_transcript_content(
+ course_id=item.location.course_key,
+ language_code=u'en',
+ edx_video_id=item.edx_video_id,
+ youtube_id_1_0=item.youtube_id_1_0,
+ html5_sources=item.html5_sources,
+ )
+ command = 'found' if video_transcript else command
+
transcripts_presence.update({
'command': command,
'subs': subs_to_use,
diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py
index d46944bbbf..e78eb4c894 100644
--- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py
+++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py
@@ -12,12 +12,18 @@ from pysrt import SubRipTime, SubRipItem, SubRipFile
from lxml import etree
from HTMLParser import HTMLParser
+from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
from xmodule.exceptions import NotFoundError
from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.django import contentstore
from .bumper_utils import get_bumper_settings
+try:
+ from edxval import api as edxval_api
+except ImportError:
+ edxval_api = None
+
log = logging.getLogger(__name__)
@@ -474,8 +480,8 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources):
Returns list internal or external video ids.
Arguments:
- edx_video_id (str): edx_video_id
- youtube_id_1_0 (str): youtube id
+ edx_video_id (unicode): edx_video_id
+ youtube_id_1_0 (unicode): youtube id
html5_sources (list): html5 video ids
Returns:
@@ -492,6 +498,28 @@ def get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources):
return external, video_ids
+def get_video_transcript_content(course_id, language_code, edx_video_id, youtube_id_1_0, html5_sources):
+ """
+ Gets video transcript content, only if the corresponding feature flag is enabled for the given `course_id`.
+
+ Arguments:
+ course_id(CourseKey): Course key identifying a course
+ language_code(unicode): Language code of the requested transcript
+ edx_video_id(unicode): edx-val's video identifier
+ youtube_id_1_0(unicode): A youtube source identifier
+ html5_sources(list): A list containing html5 sources
+
+ Returns:
+ A dict containing transcript's file name and its sjson content.
+ """
+ transcript = None
+ if VideoTranscriptEnabledFlag.feature_enabled(course_id=course_id) and edxval_api:
+ __, video_candidate_ids = get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources)
+ transcript = edxval_api.get_video_transcript_data(video_candidate_ids, language_code)
+
+ return transcript
+
+
class Transcript(object):
"""
Container for transcript methods.
diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py
index 76e83dc6ff..fbd326a2ba 100644
--- a/common/lib/xmodule/xmodule/video_module/video_handlers.py
+++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py
@@ -7,6 +7,8 @@ StudioViewHandlers are handlers for video descriptor instance.
import json
import logging
+import os
+
from datetime import datetime
from webob import Response
@@ -21,6 +23,7 @@ from .transcripts_utils import (
TranscriptException,
TranscriptsGenerationException,
generate_sjson_for_all_speeds,
+ get_video_transcript_content,
youtube_speed_dict,
Transcript,
save_to_store,
@@ -242,7 +245,24 @@ class VideoStudentViewHandlers(object):
log.debug(ex.message)
# Try to return static URL redirection as last resort
# if no translation is required
- return self.get_static_transcript(request, transcripts)
+ response = self.get_static_transcript(request, transcripts)
+ if response.status_code == 404:
+ transcript = get_video_transcript_content(
+ course_id=self.course_id,
+ language_code=language,
+ edx_video_id=self.edx_video_id,
+ youtube_id_1_0=self.youtube_id_1_0,
+ html5_sources=self.html5_sources,
+ )
+ if transcript:
+ response = Response(
+ transcript['content'],
+ headerlist=[('Content-Language', language)],
+ charset='utf8',
+ )
+ response.content_type = Transcript.mime_types['sjson']
+
+ return response
except (
TranscriptException,
UnicodeDecodeError,
@@ -260,8 +280,44 @@ class VideoStudentViewHandlers(object):
transcript_content, transcript_filename, transcript_mime_type = self.get_transcript(
transcripts, transcript_format=self.transcript_download_format, lang=lang
)
- except (NotFoundError, ValueError, KeyError, UnicodeDecodeError):
- log.debug("Video@download exception")
+ except NotFoundError:
+ response = Response(status=404)
+ # Make sure the language is set.
+ if lang is None:
+ lang = self.get_default_transcript_language(transcripts)
+
+ transcript = get_video_transcript_content(
+ course_id=self.course_id,
+ language_code=lang,
+ edx_video_id=self.edx_video_id,
+ youtube_id_1_0=self.youtube_id_1_0,
+ html5_sources=self.html5_sources,
+ )
+ if transcript:
+ transcript_content = Transcript.convert(
+ transcript['content'],
+ input_format='sjson',
+ output_format=self.transcript_download_format
+ )
+
+ # Construct the response
+ base_name, __ = os.path.splitext(os.path.basename(transcript['file_name']))
+ filename = '{base_name}.{ext}'.format(
+ base_name=base_name.encode('utf8'),
+ ext=self.transcript_download_format
+ )
+ response = Response(
+ transcript_content,
+ headerlist=[
+ ('Content-Disposition', 'attachment; filename="{filename}"'.format(filename=filename)),
+ ('Content-Language', lang),
+ ],
+ charset='utf8',
+ )
+ response.content_type = Transcript.mime_types[self.transcript_download_format]
+
+ return response
+ except (ValueError, KeyError, UnicodeDecodeError):
return Response(status=404)
else:
response = Response(
diff --git a/common/lib/xmodule/xmodule/video_module/video_module.py b/common/lib/xmodule/xmodule/video_module/video_module.py
index 59a728c526..42e816ca7d 100644
--- a/common/lib/xmodule/xmodule/video_module/video_module.py
+++ b/common/lib/xmodule/xmodule/video_module/video_module.py
@@ -24,7 +24,7 @@ from pkg_resources import resource_string
from django.conf import settings
from lxml import etree
from opaque_keys.edx.locator import AssetLocator
-from openedx.core.djangoapps.video_config.models import HLSPlaybackEnabledFlag
+from openedx.core.djangoapps.video_config.models import HLSPlaybackEnabledFlag, VideoTranscriptEnabledFlag
from openedx.core.lib.cache_utils import memoize_in_request_cache
from openedx.core.lib.license import LicenseMixin
from xblock.core import XBlock
@@ -42,7 +42,11 @@ from xmodule.xml_module import deserialize_field, is_pointer_tag, name_to_pathna
from .bumper_utils import bumperize
from .transcripts_utils import (
- Transcript, VideoTranscriptsMixin, get_html5_ids, get_video_ids_info
+ get_html5_ids,
+ get_video_ids_info,
+ get_video_transcript_content,
+ Transcript,
+ VideoTranscriptsMixin,
)
from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers
from .video_utils import create_youtube_string, format_xml_exception_message, get_poster, rewrite_video_url
@@ -182,6 +186,18 @@ class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers,
elif sub or other_lang:
track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?')
+ if not track_url:
+ # Check transcript's availability in edx-val
+ transcript = get_video_transcript_content(
+ course_id=self.course_id,
+ language_code=self.transcript_language,
+ edx_video_id=self.edx_video_id,
+ youtube_id_1_0=self.youtube_id_1_0,
+ html5_sources=self.html5_sources,
+ )
+ if transcript:
+ track_url = self.runtime.handler_url(self, 'transcript', 'download').rstrip('/?')
+
transcript_language = self.get_default_transcript_language(transcripts)
native_languages = {lang: label for lang, label in settings.LANGUAGES if len(lang) == 2}
diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py
index 83d555dda5..0c47e73942 100644
--- a/lms/djangoapps/courseware/tests/test_video_handlers.py
+++ b/lms/djangoapps/courseware/tests/test_video_handlers.py
@@ -11,7 +11,7 @@ import ddt
import freezegun
from mock import MagicMock, Mock, patch
from nose.plugins.attrib import attr
-from webob import Request
+from webob import Request, Response
from common.test.utils import normalize_repr
from openedx.core.djangoapps.contentserver.caching import del_cached_content
@@ -370,6 +370,55 @@ class TestTranscriptDownloadDispatch(TestVideo):
self.assertEqual(response.headers['Content-Type'], 'application/x-subrip; charset=utf-8')
self.assertEqual(response.headers['Content-Disposition'], 'attachment; filename="塞.srt"')
+ @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
+ @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
+ @patch('xmodule.video_module.VideoModule.get_transcript', Mock(side_effect=NotFoundError))
+ def test_download_fallback_transcript(self, mock_get_video_transcript_data):
+ """
+ Verify val transcript is returned as a fallback if it is not found in the content store.
+ """
+ mock_get_video_transcript_data.return_value = {
+ 'content': json.dumps({
+ "start": [10],
+ "end": [100],
+ "text": ["Hi, welcome to Edx."],
+ }),
+ 'file_name': 'edx.sjson'
+ }
+
+ # Make request to XModule transcript handler
+ request = Request.blank('/download')
+ response = self.item.transcript(request=request, dispatch='download')
+
+ # Expected response
+ expected_content = u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
+ expected_headers = {
+ 'Content-Disposition': 'attachment; filename="edx.srt"',
+ 'Content-Language': u'en',
+ 'Content-Type': 'application/x-subrip; charset=utf-8'
+ }
+
+ # Assert the actual response
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.text, expected_content)
+ for attribute, value in expected_headers.iteritems():
+ self.assertEqual(response.headers[attribute], value)
+
+ @patch(
+ 'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
+ Mock(return_value=False),
+ )
+ @patch('xmodule.video_module.VideoModule.get_transcript', Mock(side_effect=NotFoundError))
+ def test_download_fallback_transcript_feature_disabled(self):
+ """
+ Verify val transcript if its feature is disabled.
+ """
+ # Make request to XModule transcript handler
+ request = Request.blank('/download')
+ response = self.item.transcript(request=request, dispatch='download')
+ # Assert the actual response
+ self.assertEqual(response.status_code, 404)
+
@attr(shard=1)
@ddt.ddt
@@ -602,6 +651,55 @@ class TestTranscriptTranslationGetDispatch(TestVideo):
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred, self.course.id):
store.update_item(self.course, self.user.id)
+ @patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
+ @patch('xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled', Mock(return_value=True))
+ @patch('xmodule.video_module.VideoModule.translation', Mock(side_effect=NotFoundError))
+ @patch('xmodule.video_module.VideoModule.get_static_transcript', Mock(return_value=Response(status=404)))
+ def test_translation_fallback_transcript(self, mock_get_video_transcript_data):
+ """
+ Verify that the val transcript is returned as a fallback,
+ if it is not found in the content store.
+ """
+ transcript = {
+ 'content': json.dumps({
+ "start": [10],
+ "end": [100],
+ "text": ["Hi, welcome to Edx."],
+ }),
+ 'file_name': 'edx.sjson'
+ }
+ mock_get_video_transcript_data.return_value = transcript
+
+ # Make request to XModule transcript handler
+ response = self.item.transcript(request=Request.blank('/translation/en'), dispatch='translation/en')
+
+ # Expected headers
+ expected_headers = {
+ 'Content-Language': 'en',
+ 'Content-Type': 'application/json'
+ }
+
+ # Assert the actual response
+ self.assertEqual(response.status_code, 200)
+ self.assertEqual(response.text, transcript['content'])
+ for attribute, value in expected_headers.iteritems():
+ self.assertEqual(response.headers[attribute], value)
+
+ @patch(
+ 'xmodule.video_module.transcripts_utils.VideoTranscriptEnabledFlag.feature_enabled',
+ Mock(return_value=False),
+ )
+ @patch('xmodule.video_module.VideoModule.translation', Mock(side_effect=NotFoundError))
+ @patch('xmodule.video_module.VideoModule.get_static_transcript', Mock(return_value=Response(status=404)))
+ def test_translation_fallback_transcript_feature_disabled(self):
+ """
+ Verify that val transcript is not returned when its feature is disabled.
+ """
+ # Make request to XModule transcript handler
+ response = self.item.transcript(request=Request.blank('/translation/en'), dispatch='translation/en')
+ # Assert the actual response
+ self.assertEqual(response.status_code, 404)
+
@attr(shard=1)
class TestStudioTranscriptTranslationGetDispatch(TestVideo):