Transcript upload backend for Video Upload Page – EDUCATOR-1854
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
""" Tests for transcripts_utils. """
|
||||
import copy
|
||||
import ddt
|
||||
import json
|
||||
import textwrap
|
||||
import unittest
|
||||
from uuid import uuid4
|
||||
@@ -610,28 +611,53 @@ class TestTranscript(unittest.TestCase):
|
||||
self.txt_transcript = u"Elephant's Dream\nAt the left we can see..."
|
||||
|
||||
def test_convert_srt_to_txt(self):
|
||||
"""
|
||||
Tests that the srt transcript is successfully converted into txt format.
|
||||
"""
|
||||
expected = self.txt_transcript
|
||||
actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'txt')
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_convert_srt_to_srt(self):
|
||||
"""
|
||||
Tests that srt to srt conversion works as expected.
|
||||
"""
|
||||
expected = self.srt_transcript
|
||||
actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'srt')
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_convert_sjson_to_txt(self):
|
||||
"""
|
||||
Tests that the sjson transcript is successfully converted into txt format.
|
||||
"""
|
||||
expected = self.txt_transcript
|
||||
actual = transcripts_utils.Transcript.convert(self.sjson_transcript, 'sjson', 'txt')
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_convert_sjson_to_srt(self):
|
||||
"""
|
||||
Tests that the sjson transcript is successfully converted into srt format.
|
||||
"""
|
||||
expected = self.srt_transcript
|
||||
actual = transcripts_utils.Transcript.convert(self.sjson_transcript, 'sjson', 'srt')
|
||||
self.assertEqual(actual, expected)
|
||||
|
||||
def test_convert_srt_to_sjson(self):
|
||||
with self.assertRaises(NotImplementedError):
|
||||
transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson')
|
||||
"""
|
||||
Tests that the srt transcript is successfully converted into sjson format.
|
||||
"""
|
||||
expected = json.loads(self.sjson_transcript)
|
||||
actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson')
|
||||
self.assertDictEqual(actual, expected)
|
||||
|
||||
def test_convert_invalid_srt_to_sjson(self):
|
||||
"""
|
||||
Tests that TranscriptsGenerationException was raises on trying
|
||||
to convert invalid srt transcript to sjson.
|
||||
"""
|
||||
invalid_srt_transcript = 'invalid SubRip file content'
|
||||
with self.assertRaises(transcripts_utils.TranscriptsGenerationException):
|
||||
transcripts_utils.Transcript.convert(invalid_srt_transcript, 'srt', 'sjson')
|
||||
|
||||
def test_dummy_non_existent_transcript(self):
|
||||
"""
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import ddt
|
||||
import json
|
||||
from mock import Mock, patch
|
||||
from io import BytesIO
|
||||
from mock import Mock, patch, ANY
|
||||
|
||||
from django.test.testcases import TestCase
|
||||
|
||||
from contentstore.tests.utils import CourseTestCase
|
||||
from contentstore.utils import reverse_course_url
|
||||
from contentstore.views.transcript_settings import TranscriptionProviderErrorType, validate_transcript_credentials
|
||||
from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
@@ -277,3 +280,153 @@ class TranscriptDownloadTest(CourseTestCase):
|
||||
# Assert the response
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(json.loads(response.content)['error'], expected_error_message)
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
@patch(
|
||||
'openedx.core.djangoapps.video_config.models.VideoTranscriptEnabledFlag.feature_enabled',
|
||||
Mock(return_value=True)
|
||||
)
|
||||
class TranscriptUploadTest(CourseTestCase):
|
||||
"""
|
||||
Tests for transcript upload handler.
|
||||
"""
|
||||
VIEW_NAME = 'transcript_upload_handler'
|
||||
|
||||
def get_url_for_course_key(self, course_id):
|
||||
return reverse_course_url(self.VIEW_NAME, course_id)
|
||||
|
||||
def test_302_with_anonymous_user(self):
|
||||
"""
|
||||
Verify that redirection happens in case of unauthorized request.
|
||||
"""
|
||||
self.client.logout()
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
response = self.client.post(transcript_upload_url, content_type='application/json')
|
||||
self.assertEqual(response.status_code, 302)
|
||||
|
||||
def test_405_with_not_allowed_request_method(self):
|
||||
"""
|
||||
Verify that 405 is returned in case of not-allowed request methods.
|
||||
Allowed request methods include POST.
|
||||
"""
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
response = self.client.get(transcript_upload_url, content_type='application/json')
|
||||
self.assertEqual(response.status_code, 405)
|
||||
|
||||
def test_404_with_feature_disabled(self):
|
||||
"""
|
||||
Verify that 404 is returned if the corresponding feature is disabled.
|
||||
"""
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
with patch('openedx.core.djangoapps.video_config.models.VideoTranscriptEnabledFlag.feature_enabled') as feature:
|
||||
feature.return_value = False
|
||||
response = self.client.post(transcript_upload_url, content_type='application/json')
|
||||
self.assertEqual(response.status_code, 404)
|
||||
|
||||
@patch('contentstore.views.transcript_settings.create_or_update_video_transcript')
|
||||
def test_transcript_upload_handler(self, mock_create_or_update_video_transcript):
|
||||
"""
|
||||
Tests that transcript upload handler works as expected.
|
||||
"""
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
transcript_file_stream = BytesIO('0\n00:00:00,010 --> 00:00:00,100\nПривіт, edX вітає вас.\n\n')
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(
|
||||
transcript_upload_url,
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'file': transcript_file_stream,
|
||||
},
|
||||
format='multipart'
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 201)
|
||||
mock_create_or_update_video_transcript.assert_called_with(
|
||||
video_id='123',
|
||||
language_code='en',
|
||||
file_name='subs.sjson',
|
||||
file_format='sjson',
|
||||
provider='Custom',
|
||||
file_data=ANY,
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
(
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
},
|
||||
u'A transcript file is required.'
|
||||
),
|
||||
(
|
||||
{
|
||||
'language_code': u'en',
|
||||
'file': u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
|
||||
},
|
||||
u'Following parameters are required: edx_video_id.'
|
||||
),
|
||||
(
|
||||
{
|
||||
'file': u'0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
|
||||
},
|
||||
u'Following parameters are required: edx_video_id, language_code.'
|
||||
)
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_transcript_upload_handler_missing_attrs(self, request_payload, expected_error_message):
|
||||
"""
|
||||
Tests the transcript upload handler when the required attributes are missing.
|
||||
"""
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(transcript_upload_url, request_payload, format='multipart')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(json.loads(response.content)['error'], expected_error_message)
|
||||
|
||||
def test_transcript_upload_handler_with_image(self):
|
||||
"""
|
||||
Tests the transcript upload handler with an image file.
|
||||
"""
|
||||
with make_image_file() as image_file:
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(
|
||||
transcript_upload_url,
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'file': image_file,
|
||||
},
|
||||
format='multipart'
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(
|
||||
json.loads(response.content)['error'],
|
||||
u'There is a problem with this transcript file. Try to upload a different file.'
|
||||
)
|
||||
|
||||
def test_transcript_upload_handler_with_invalid_transcript(self):
|
||||
"""
|
||||
Tests the transcript upload handler with an invalid transcript file.
|
||||
"""
|
||||
transcript_upload_url = self.get_url_for_course_key(self.course.id)
|
||||
transcript_file_stream = BytesIO('An invalid transcript SubRip file content')
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(
|
||||
transcript_upload_url,
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'file': transcript_file_stream,
|
||||
},
|
||||
format='multipart'
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(
|
||||
json.loads(response.content)['error'],
|
||||
u'There is a problem with this transcript file. Try to upload a different file.'
|
||||
)
|
||||
|
||||
@@ -2,12 +2,16 @@
|
||||
Views related to the transcript preferences feature
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.core.files.base import ContentFile
|
||||
from django.http import HttpResponseNotFound, HttpResponse
|
||||
from django.utils.translation import ugettext as _
|
||||
from django.views.decorators.http import require_POST, require_GET
|
||||
from edxval.api import (
|
||||
create_or_update_video_transcript,
|
||||
get_3rd_party_transcription_plans,
|
||||
get_video_transcript_data,
|
||||
update_transcript_credentials_state_for_org,
|
||||
@@ -19,9 +23,11 @@ from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcri
|
||||
from util.json_request import JsonResponse, expect_json
|
||||
|
||||
from contentstore.views.videos import TranscriptProvider
|
||||
from xmodule.video_module.transcripts_utils import Transcript
|
||||
from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException
|
||||
|
||||
__all__ = ['transcript_credentials_handler', 'transcript_download_handler']
|
||||
__all__ = ['transcript_credentials_handler', 'transcript_download_handler', 'transcript_upload_handler']
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TranscriptionProviderErrorType:
|
||||
@@ -146,12 +152,94 @@ def transcript_download_handler(request, course_key_string):
|
||||
if transcript:
|
||||
name_and_extension = os.path.splitext(transcript['file_name'])
|
||||
basename, file_format = name_and_extension[0], name_and_extension[1][1:]
|
||||
transcript_filename = '{base_name}.srt'.format(base_name=basename.encode('utf8'))
|
||||
transcript_content = Transcript.convert(transcript['content'], input_format=file_format, output_format='srt')
|
||||
transcript_filename = '{base_name}.{ext}'.format(base_name=basename.encode('utf8'), ext=Transcript.SRT)
|
||||
transcript_content = Transcript.convert(
|
||||
content=transcript['content'],
|
||||
input_format=file_format,
|
||||
output_format=Transcript.SRT
|
||||
)
|
||||
# Construct an HTTP response
|
||||
response = HttpResponse(transcript_content, content_type=Transcript.mime_types['srt'])
|
||||
response = HttpResponse(transcript_content, content_type=Transcript.mime_types[Transcript.SRT])
|
||||
response['Content-Disposition'] = 'attachment; filename="{filename}"'.format(filename=transcript_filename)
|
||||
else:
|
||||
response = HttpResponseNotFound()
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def validate_transcript_upload_data(data, files):
|
||||
"""
|
||||
Validates video transcript file.
|
||||
Arguments:
|
||||
data: A request's data part.
|
||||
files: A request's files part.
|
||||
Returns:
|
||||
None or String
|
||||
If there is error returns error message otherwise None.
|
||||
"""
|
||||
error = None
|
||||
# Validate the must have attributes - this error is unlikely to be faced by common users.
|
||||
must_have_attrs = ['edx_video_id', 'language_code']
|
||||
missing = [attr for attr in must_have_attrs if attr not in data]
|
||||
if missing:
|
||||
error = _(u'Following parameters are required: {missing}.').format(missing=', '.join(missing))
|
||||
elif 'file' not in files:
|
||||
error = _(u'A transcript file is required.')
|
||||
|
||||
return error
|
||||
|
||||
|
||||
@login_required
|
||||
@require_POST
|
||||
def transcript_upload_handler(request, course_key_string):
|
||||
"""
|
||||
View to upload a transcript file.
|
||||
|
||||
Arguments:
|
||||
request: A WSGI request object
|
||||
course_key_string: Course key identifying a course
|
||||
|
||||
Transcript file, edx video id and transcript language are required.
|
||||
Transcript file should be in SRT(SubRip) format.
|
||||
|
||||
Returns
|
||||
- A 400 if any of the validation fails
|
||||
- A 404 if the corresponding feature flag is disabled
|
||||
- A 200 if transcript has been uploaded successfully
|
||||
"""
|
||||
# Check whether the feature is available for this course.
|
||||
course_key = CourseKey.from_string(course_key_string)
|
||||
if not VideoTranscriptEnabledFlag.feature_enabled(course_key):
|
||||
return HttpResponseNotFound()
|
||||
|
||||
error = validate_transcript_upload_data(data=request.POST, files=request.FILES)
|
||||
if error:
|
||||
response = JsonResponse({'error': error}, status=400)
|
||||
else:
|
||||
edx_video_id = request.POST['edx_video_id']
|
||||
language_code = request.POST['language_code']
|
||||
transcript_file = request.FILES['file']
|
||||
try:
|
||||
# Convert SRT transcript into an SJSON format
|
||||
# and upload it to S3.
|
||||
sjson_subs = Transcript.convert(
|
||||
content=transcript_file.read(),
|
||||
input_format=Transcript.SRT,
|
||||
output_format=Transcript.SJSON
|
||||
)
|
||||
create_or_update_video_transcript(
|
||||
video_id=edx_video_id,
|
||||
language_code=language_code,
|
||||
file_name='subs.sjson',
|
||||
file_format=Transcript.SJSON,
|
||||
provider='Custom',
|
||||
file_data=ContentFile(json.dumps(sjson_subs)),
|
||||
)
|
||||
response = JsonResponse(status=201)
|
||||
except (TranscriptsGenerationException, UnicodeDecodeError):
|
||||
response = JsonResponse(
|
||||
{'error': _(u'There is a problem with this transcript file. Try to upload a different file.')},
|
||||
status=400
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
@@ -34,6 +34,7 @@ from edxval.api import (
|
||||
get_available_transcript_languages
|
||||
)
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
from xmodule.video_module.transcripts_utils import Transcript
|
||||
|
||||
from contentstore.models import VideoUploadConfig
|
||||
from contentstore.utils import reverse_course_url
|
||||
@@ -73,9 +74,6 @@ VIDEO_UPLOAD_MAX_FILE_SIZE_GB = 5
|
||||
# maximum time for video to remain in upload state
|
||||
MAX_UPLOAD_HOURS = 24
|
||||
|
||||
# Transcript download format
|
||||
TRANSCRIPT_DOWNLOAD_FILE_FORMAT = 'srt'
|
||||
|
||||
|
||||
class TranscriptProvider(object):
|
||||
"""
|
||||
@@ -647,8 +645,12 @@ def videos_index_html(course):
|
||||
'transcript_download_handler',
|
||||
unicode(course.id)
|
||||
),
|
||||
'transcript_upload_handler_url': reverse_course_url(
|
||||
'transcript_upload_handler',
|
||||
unicode(course.id)
|
||||
),
|
||||
'transcription_plans': get_3rd_party_transcription_plans(),
|
||||
'trancript_download_file_format': TRANSCRIPT_DOWNLOAD_FILE_FORMAT
|
||||
'trancript_download_file_format': Transcript.SRT
|
||||
}
|
||||
context['active_transcript_preferences'] = get_transcript_preferences(unicode(course.id))
|
||||
# Cached state for transcript providers' credentials (org-specific)
|
||||
|
||||
@@ -22,7 +22,6 @@ define(
|
||||
model: model,
|
||||
transcriptAvailableLanguages: options.transcriptAvailableLanguages,
|
||||
videoSupportedFileFormats: options.videoSupportedFileFormats,
|
||||
videoTranscriptSettings: options.videoTranscriptSettings,
|
||||
isVideoTranscriptEnabled: options.isVideoTranscriptEnabled
|
||||
});
|
||||
});
|
||||
|
||||
@@ -144,6 +144,8 @@ urlpatterns = [
|
||||
contentstore.views.transcript_credentials_handler, name='transcript_credentials_handler'),
|
||||
url(r'^transcript_download/{}$'.format(settings.COURSE_KEY_PATTERN),
|
||||
contentstore.views.transcript_download_handler, name='transcript_download_handler'),
|
||||
url(r'^transcript_upload/{}$'.format(settings.COURSE_KEY_PATTERN),
|
||||
contentstore.views.transcript_upload_handler, name='transcript_upload_handler'),
|
||||
url(r'^video_encodings_download/{}$'.format(settings.COURSE_KEY_PATTERN),
|
||||
contentstore.views.video_encodings_download, name='video_encodings_download'),
|
||||
url(r'^group_configurations/{}$'.format(settings.COURSE_KEY_PATTERN),
|
||||
|
||||
@@ -9,6 +9,7 @@ import json
|
||||
import requests
|
||||
import logging
|
||||
from pysrt import SubRipTime, SubRipItem, SubRipFile
|
||||
from pysrt.srtexc import Error
|
||||
from lxml import etree
|
||||
from HTMLParser import HTMLParser
|
||||
|
||||
@@ -284,6 +285,32 @@ def generate_srt_from_sjson(sjson_subs, speed):
|
||||
return output
|
||||
|
||||
|
||||
def generate_sjson_from_srt(srt_subs):
|
||||
"""
|
||||
Generate transcripts from sjson to SubRip (*.srt).
|
||||
|
||||
Arguments:
|
||||
srt_subs(SubRip): "SRT" subs object
|
||||
|
||||
Returns:
|
||||
Subs converted to "SJSON" format.
|
||||
"""
|
||||
sub_starts = []
|
||||
sub_ends = []
|
||||
sub_texts = []
|
||||
for sub in srt_subs:
|
||||
sub_starts.append(sub.start.ordinal)
|
||||
sub_ends.append(sub.end.ordinal)
|
||||
sub_texts.append(sub.text.replace('\n', ' '))
|
||||
|
||||
sjson_subs = {
|
||||
'start': sub_starts,
|
||||
'end': sub_ends,
|
||||
'text': sub_texts
|
||||
}
|
||||
return sjson_subs
|
||||
|
||||
|
||||
def copy_or_rename_transcript(new_name, old_name, item, delete_old=False, user=None):
|
||||
"""
|
||||
Renames `old_name` transcript file in storage to `new_name`.
|
||||
@@ -544,10 +571,13 @@ class Transcript(object):
|
||||
"""
|
||||
Container for transcript methods.
|
||||
"""
|
||||
SRT = 'srt'
|
||||
TXT = 'txt'
|
||||
SJSON = 'sjson'
|
||||
mime_types = {
|
||||
'srt': 'application/x-subrip; charset=utf-8',
|
||||
'txt': 'text/plain; charset=utf-8',
|
||||
'sjson': 'application/json',
|
||||
SRT: 'application/x-subrip; charset=utf-8',
|
||||
TXT: 'text/plain; charset=utf-8',
|
||||
SJSON: 'application/json',
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -556,7 +586,10 @@ class Transcript(object):
|
||||
Convert transcript `content` from `input_format` to `output_format`.
|
||||
|
||||
Accepted input formats: sjson, srt.
|
||||
Accepted output format: srt, txt.
|
||||
Accepted output format: srt, txt, sjson.
|
||||
|
||||
Raises:
|
||||
TranscriptsGenerationException: On parsing the invalid srt content during conversion from srt to sjson.
|
||||
"""
|
||||
assert input_format in ('srt', 'sjson')
|
||||
assert output_format in ('txt', 'srt', 'sjson')
|
||||
@@ -571,7 +604,17 @@ class Transcript(object):
|
||||
return HTMLParser().unescape(text)
|
||||
|
||||
elif output_format == 'sjson':
|
||||
raise NotImplementedError
|
||||
try:
|
||||
# With error handling (set to 'ERROR_RAISE'), we will be getting
|
||||
# the exception if something went wrong in parsing the transcript.
|
||||
srt_subs = SubRipFile.from_string(
|
||||
content.decode('utf8'),
|
||||
error_handling=SubRipFile.ERROR_RAISE
|
||||
)
|
||||
except Error as ex: # Base exception from pysrt
|
||||
raise TranscriptsGenerationException(ex.message)
|
||||
|
||||
return generate_sjson_from_srt(srt_subs)
|
||||
|
||||
if input_format == 'sjson':
|
||||
|
||||
|
||||
Reference in New Issue
Block a user