diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py index 52f9993e00..603cc23246 100644 --- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py +++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py @@ -2,6 +2,7 @@ """ Tests for transcripts_utils. """ import copy import ddt +import json import textwrap import unittest from uuid import uuid4 @@ -610,28 +611,53 @@ class TestTranscript(unittest.TestCase): self.txt_transcript = u"Elephant's Dream\nAt the left we can see..." def test_convert_srt_to_txt(self): + """ + Tests that the srt transcript is successfully converted into txt format. + """ expected = self.txt_transcript actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'txt') self.assertEqual(actual, expected) def test_convert_srt_to_srt(self): + """ + Tests that srt to srt conversion works as expected. + """ expected = self.srt_transcript actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'srt') self.assertEqual(actual, expected) def test_convert_sjson_to_txt(self): + """ + Tests that the sjson transcript is successfully converted into txt format. + """ expected = self.txt_transcript actual = transcripts_utils.Transcript.convert(self.sjson_transcript, 'sjson', 'txt') self.assertEqual(actual, expected) def test_convert_sjson_to_srt(self): + """ + Tests that the sjson transcript is successfully converted into srt format. + """ expected = self.srt_transcript actual = transcripts_utils.Transcript.convert(self.sjson_transcript, 'sjson', 'srt') self.assertEqual(actual, expected) def test_convert_srt_to_sjson(self): - with self.assertRaises(NotImplementedError): - transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson') + """ + Tests that the srt transcript is successfully converted into sjson format. + """ + expected = json.loads(self.sjson_transcript) + actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson') + self.assertDictEqual(actual, expected) + + def test_convert_invalid_srt_to_sjson(self): + """ + Tests that TranscriptsGenerationException was raises on trying + to convert invalid srt transcript to sjson. + """ + invalid_srt_transcript = 'invalid SubRip file content' + with self.assertRaises(transcripts_utils.TranscriptsGenerationException): + transcripts_utils.Transcript.convert(invalid_srt_transcript, 'srt', 'sjson') def test_dummy_non_existent_transcript(self): """ diff --git a/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py b/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py index e6c4a9664d..6fa24c3237 100644 --- a/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py +++ b/cms/djangoapps/contentstore/views/tests/test_transcript_settings.py @@ -1,12 +1,15 @@ +# -*- coding: utf-8 -*- import ddt import json -from mock import Mock, patch +from io import BytesIO +from mock import Mock, patch, ANY from django.test.testcases import TestCase from contentstore.tests.utils import CourseTestCase from contentstore.utils import reverse_course_url from contentstore.views.transcript_settings import TranscriptionProviderErrorType, validate_transcript_credentials +from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file @ddt.ddt @@ -277,3 +280,153 @@ class TranscriptDownloadTest(CourseTestCase): # Assert the response self.assertEqual(response.status_code, 400) self.assertEqual(json.loads(response.content)['error'], expected_error_message) + + +@ddt.ddt +@patch( + 'openedx.core.djangoapps.video_config.models.VideoTranscriptEnabledFlag.feature_enabled', + Mock(return_value=True) +) +class TranscriptUploadTest(CourseTestCase): + """ + Tests for transcript upload handler. + """ + VIEW_NAME = 'transcript_upload_handler' + + def get_url_for_course_key(self, course_id): + return reverse_course_url(self.VIEW_NAME, course_id) + + def test_302_with_anonymous_user(self): + """ + Verify that redirection happens in case of unauthorized request. + """ + self.client.logout() + transcript_upload_url = self.get_url_for_course_key(self.course.id) + response = self.client.post(transcript_upload_url, content_type='application/json') + self.assertEqual(response.status_code, 302) + + def test_405_with_not_allowed_request_method(self): + """ + Verify that 405 is returned in case of not-allowed request methods. + Allowed request methods include POST. + """ + transcript_upload_url = self.get_url_for_course_key(self.course.id) + response = self.client.get(transcript_upload_url, content_type='application/json') + self.assertEqual(response.status_code, 405) + + def test_404_with_feature_disabled(self): + """ + Verify that 404 is returned if the corresponding feature is disabled. + """ + transcript_upload_url = self.get_url_for_course_key(self.course.id) + with patch('openedx.core.djangoapps.video_config.models.VideoTranscriptEnabledFlag.feature_enabled') as feature: + feature.return_value = False + response = self.client.post(transcript_upload_url, content_type='application/json') + self.assertEqual(response.status_code, 404) + + @patch('contentstore.views.transcript_settings.create_or_update_video_transcript') + def test_transcript_upload_handler(self, mock_create_or_update_video_transcript): + """ + Tests that transcript upload handler works as expected. + """ + transcript_upload_url = self.get_url_for_course_key(self.course.id) + transcript_file_stream = BytesIO('0\n00:00:00,010 --> 00:00:00,100\nПривіт, edX вітає вас.\n\n') + # Make request to transcript upload handler + response = self.client.post( + transcript_upload_url, + { + 'edx_video_id': '123', + 'language_code': 'en', + 'file': transcript_file_stream, + }, + format='multipart' + ) + + self.assertEqual(response.status_code, 201) + mock_create_or_update_video_transcript.assert_called_with( + video_id='123', + language_code='en', + file_name='subs.sjson', + file_format='sjson', + provider='Custom', + file_data=ANY, + ) + + @ddt.data( + ( + { + 'edx_video_id': '123', + 'language_code': 'en', + }, + u'A transcript file is required.' + ), + ( + { + 'language_code': u'en', + 'file': u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + }, + u'Following parameters are required: edx_video_id.' + ), + ( + { + 'file': u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n' + }, + u'Following parameters are required: edx_video_id, language_code.' + ) + ) + @ddt.unpack + def test_transcript_upload_handler_missing_attrs(self, request_payload, expected_error_message): + """ + Tests the transcript upload handler when the required attributes are missing. + """ + transcript_upload_url = self.get_url_for_course_key(self.course.id) + # Make request to transcript upload handler + response = self.client.post(transcript_upload_url, request_payload, format='multipart') + self.assertEqual(response.status_code, 400) + self.assertEqual(json.loads(response.content)['error'], expected_error_message) + + def test_transcript_upload_handler_with_image(self): + """ + Tests the transcript upload handler with an image file. + """ + with make_image_file() as image_file: + transcript_upload_url = self.get_url_for_course_key(self.course.id) + # Make request to transcript upload handler + response = self.client.post( + transcript_upload_url, + { + 'edx_video_id': '123', + 'language_code': 'en', + 'file': image_file, + }, + format='multipart' + ) + + self.assertEqual(response.status_code, 400) + self.assertEqual( + json.loads(response.content)['error'], + u'There is a problem with this transcript file. Try to upload a different file.' + ) + + def test_transcript_upload_handler_with_invalid_transcript(self): + """ + Tests the transcript upload handler with an invalid transcript file. + """ + transcript_upload_url = self.get_url_for_course_key(self.course.id) + transcript_file_stream = BytesIO('An invalid transcript SubRip file content') + # Make request to transcript upload handler + response = self.client.post( + transcript_upload_url, + { + 'edx_video_id': '123', + 'language_code': 'en', + 'file': transcript_file_stream, + }, + format='multipart' + ) + + self.assertEqual(response.status_code, 400) + self.assertEqual( + json.loads(response.content)['error'], + u'There is a problem with this transcript file. Try to upload a different file.' + ) diff --git a/cms/djangoapps/contentstore/views/transcript_settings.py b/cms/djangoapps/contentstore/views/transcript_settings.py index 18c79b39cd..56c9ced459 100644 --- a/cms/djangoapps/contentstore/views/transcript_settings.py +++ b/cms/djangoapps/contentstore/views/transcript_settings.py @@ -2,12 +2,16 @@ Views related to the transcript preferences feature """ import os +import json +import logging from django.contrib.auth.decorators import login_required +from django.core.files.base import ContentFile from django.http import HttpResponseNotFound, HttpResponse from django.utils.translation import ugettext as _ from django.views.decorators.http import require_POST, require_GET from edxval.api import ( + create_or_update_video_transcript, get_3rd_party_transcription_plans, get_video_transcript_data, update_transcript_credentials_state_for_org, @@ -19,9 +23,11 @@ from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcri from util.json_request import JsonResponse, expect_json from contentstore.views.videos import TranscriptProvider -from xmodule.video_module.transcripts_utils import Transcript +from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException -__all__ = ['transcript_credentials_handler', 'transcript_download_handler'] +__all__ = ['transcript_credentials_handler', 'transcript_download_handler', 'transcript_upload_handler'] + +LOGGER = logging.getLogger(__name__) class TranscriptionProviderErrorType: @@ -146,12 +152,94 @@ def transcript_download_handler(request, course_key_string): if transcript: name_and_extension = os.path.splitext(transcript['file_name']) basename, file_format = name_and_extension[0], name_and_extension[1][1:] - transcript_filename = '{base_name}.srt'.format(base_name=basename.encode('utf8')) - transcript_content = Transcript.convert(transcript['content'], input_format=file_format, output_format='srt') + transcript_filename = '{base_name}.{ext}'.format(base_name=basename.encode('utf8'), ext=Transcript.SRT) + transcript_content = Transcript.convert( + content=transcript['content'], + input_format=file_format, + output_format=Transcript.SRT + ) # Construct an HTTP response - response = HttpResponse(transcript_content, content_type=Transcript.mime_types['srt']) + response = HttpResponse(transcript_content, content_type=Transcript.mime_types[Transcript.SRT]) response['Content-Disposition'] = 'attachment; filename="{filename}"'.format(filename=transcript_filename) else: response = HttpResponseNotFound() return response + + +def validate_transcript_upload_data(data, files): + """ + Validates video transcript file. + Arguments: + data: A request's data part. + files: A request's files part. + Returns: + None or String + If there is error returns error message otherwise None. + """ + error = None + # Validate the must have attributes - this error is unlikely to be faced by common users. + must_have_attrs = ['edx_video_id', 'language_code'] + missing = [attr for attr in must_have_attrs if attr not in data] + if missing: + error = _(u'Following parameters are required: {missing}.').format(missing=', '.join(missing)) + elif 'file' not in files: + error = _(u'A transcript file is required.') + + return error + + +@login_required +@require_POST +def transcript_upload_handler(request, course_key_string): + """ + View to upload a transcript file. + + Arguments: + request: A WSGI request object + course_key_string: Course key identifying a course + + Transcript file, edx video id and transcript language are required. + Transcript file should be in SRT(SubRip) format. + + Returns + - A 400 if any of the validation fails + - A 404 if the corresponding feature flag is disabled + - A 200 if transcript has been uploaded successfully + """ + # Check whether the feature is available for this course. + course_key = CourseKey.from_string(course_key_string) + if not VideoTranscriptEnabledFlag.feature_enabled(course_key): + return HttpResponseNotFound() + + error = validate_transcript_upload_data(data=request.POST, files=request.FILES) + if error: + response = JsonResponse({'error': error}, status=400) + else: + edx_video_id = request.POST['edx_video_id'] + language_code = request.POST['language_code'] + transcript_file = request.FILES['file'] + try: + # Convert SRT transcript into an SJSON format + # and upload it to S3. + sjson_subs = Transcript.convert( + content=transcript_file.read(), + input_format=Transcript.SRT, + output_format=Transcript.SJSON + ) + create_or_update_video_transcript( + video_id=edx_video_id, + language_code=language_code, + file_name='subs.sjson', + file_format=Transcript.SJSON, + provider='Custom', + file_data=ContentFile(json.dumps(sjson_subs)), + ) + response = JsonResponse(status=201) + except (TranscriptsGenerationException, UnicodeDecodeError): + response = JsonResponse( + {'error': _(u'There is a problem with this transcript file. Try to upload a different file.')}, + status=400 + ) + + return response diff --git a/cms/djangoapps/contentstore/views/videos.py b/cms/djangoapps/contentstore/views/videos.py index 47e36822d7..fdfb766592 100644 --- a/cms/djangoapps/contentstore/views/videos.py +++ b/cms/djangoapps/contentstore/views/videos.py @@ -34,6 +34,7 @@ from edxval.api import ( get_available_transcript_languages ) from opaque_keys.edx.keys import CourseKey +from xmodule.video_module.transcripts_utils import Transcript from contentstore.models import VideoUploadConfig from contentstore.utils import reverse_course_url @@ -73,9 +74,6 @@ VIDEO_UPLOAD_MAX_FILE_SIZE_GB = 5 # maximum time for video to remain in upload state MAX_UPLOAD_HOURS = 24 -# Transcript download format -TRANSCRIPT_DOWNLOAD_FILE_FORMAT = 'srt' - class TranscriptProvider(object): """ @@ -647,8 +645,12 @@ def videos_index_html(course): 'transcript_download_handler', unicode(course.id) ), + 'transcript_upload_handler_url': reverse_course_url( + 'transcript_upload_handler', + unicode(course.id) + ), 'transcription_plans': get_3rd_party_transcription_plans(), - 'trancript_download_file_format': TRANSCRIPT_DOWNLOAD_FILE_FORMAT + 'trancript_download_file_format': Transcript.SRT } context['active_transcript_preferences'] = get_transcript_preferences(unicode(course.id)) # Cached state for transcript providers' credentials (org-specific) diff --git a/cms/static/js/views/previous_video_upload_list.js b/cms/static/js/views/previous_video_upload_list.js index 244df65974..c845a6e99b 100644 --- a/cms/static/js/views/previous_video_upload_list.js +++ b/cms/static/js/views/previous_video_upload_list.js @@ -22,7 +22,6 @@ define( model: model, transcriptAvailableLanguages: options.transcriptAvailableLanguages, videoSupportedFileFormats: options.videoSupportedFileFormats, - videoTranscriptSettings: options.videoTranscriptSettings, isVideoTranscriptEnabled: options.isVideoTranscriptEnabled }); }); diff --git a/cms/urls.py b/cms/urls.py index 647e0b1841..1dee28ec66 100644 --- a/cms/urls.py +++ b/cms/urls.py @@ -144,6 +144,8 @@ urlpatterns = [ contentstore.views.transcript_credentials_handler, name='transcript_credentials_handler'), url(r'^transcript_download/{}$'.format(settings.COURSE_KEY_PATTERN), contentstore.views.transcript_download_handler, name='transcript_download_handler'), + url(r'^transcript_upload/{}$'.format(settings.COURSE_KEY_PATTERN), + contentstore.views.transcript_upload_handler, name='transcript_upload_handler'), url(r'^video_encodings_download/{}$'.format(settings.COURSE_KEY_PATTERN), contentstore.views.video_encodings_download, name='video_encodings_download'), url(r'^group_configurations/{}$'.format(settings.COURSE_KEY_PATTERN), diff --git a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py index 3ebd9b98c1..3318f5670d 100644 --- a/common/lib/xmodule/xmodule/video_module/transcripts_utils.py +++ b/common/lib/xmodule/xmodule/video_module/transcripts_utils.py @@ -9,6 +9,7 @@ import json import requests import logging from pysrt import SubRipTime, SubRipItem, SubRipFile +from pysrt.srtexc import Error from lxml import etree from HTMLParser import HTMLParser @@ -284,6 +285,32 @@ def generate_srt_from_sjson(sjson_subs, speed): return output +def generate_sjson_from_srt(srt_subs): + """ + Generate transcripts from sjson to SubRip (*.srt). + + Arguments: + srt_subs(SubRip): "SRT" subs object + + Returns: + Subs converted to "SJSON" format. + """ + sub_starts = [] + sub_ends = [] + sub_texts = [] + for sub in srt_subs: + sub_starts.append(sub.start.ordinal) + sub_ends.append(sub.end.ordinal) + sub_texts.append(sub.text.replace('\n', ' ')) + + sjson_subs = { + 'start': sub_starts, + 'end': sub_ends, + 'text': sub_texts + } + return sjson_subs + + def copy_or_rename_transcript(new_name, old_name, item, delete_old=False, user=None): """ Renames `old_name` transcript file in storage to `new_name`. @@ -544,10 +571,13 @@ class Transcript(object): """ Container for transcript methods. """ + SRT = 'srt' + TXT = 'txt' + SJSON = 'sjson' mime_types = { - 'srt': 'application/x-subrip; charset=utf-8', - 'txt': 'text/plain; charset=utf-8', - 'sjson': 'application/json', + SRT: 'application/x-subrip; charset=utf-8', + TXT: 'text/plain; charset=utf-8', + SJSON: 'application/json', } @staticmethod @@ -556,7 +586,10 @@ class Transcript(object): Convert transcript `content` from `input_format` to `output_format`. Accepted input formats: sjson, srt. - Accepted output format: srt, txt. + Accepted output format: srt, txt, sjson. + + Raises: + TranscriptsGenerationException: On parsing the invalid srt content during conversion from srt to sjson. """ assert input_format in ('srt', 'sjson') assert output_format in ('txt', 'srt', 'sjson') @@ -571,7 +604,17 @@ class Transcript(object): return HTMLParser().unescape(text) elif output_format == 'sjson': - raise NotImplementedError + try: + # With error handling (set to 'ERROR_RAISE'), we will be getting + # the exception if something went wrong in parsing the transcript. + srt_subs = SubRipFile.from_string( + content.decode('utf8'), + error_handling=SubRipFile.ERROR_RAISE + ) + except Error as ex: # Base exception from pysrt + raise TranscriptsGenerationException(ex.message) + + return generate_sjson_from_srt(srt_subs) if input_format == 'sjson':