Treat transcript content as unicode strings and convert them at any edge where we encounter them. One decision made here was to not update edx-val it treats transcripts as byte but is much closer to the actual files so it makes more sense over there. But within the platform they are generally passed around as serialized json and so it's much better for them to be unicode.
281 lines
10 KiB
Python
281 lines
10 KiB
Python
"""
|
|
Views related to the transcript preferences feature
|
|
"""
|
|
from __future__ import absolute_import
|
|
|
|
import logging
|
|
import os
|
|
|
|
from django.contrib.auth.decorators import login_required
|
|
from django.core.files.base import ContentFile
|
|
from django.http import HttpResponse, HttpResponseNotFound
|
|
from django.utils.translation import ugettext as _
|
|
from django.views.decorators.http import require_GET, require_http_methods, require_POST
|
|
from edxval.api import (
|
|
create_or_update_video_transcript,
|
|
delete_video_transcript,
|
|
get_3rd_party_transcription_plans,
|
|
get_available_transcript_languages,
|
|
get_video_transcript_data,
|
|
update_transcript_credentials_state_for_org
|
|
)
|
|
from opaque_keys.edx.keys import CourseKey
|
|
|
|
from contentstore.views.videos import TranscriptProvider
|
|
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
|
|
from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials
|
|
from student.auth import has_studio_write_access
|
|
from util.json_request import JsonResponse, expect_json
|
|
from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException
|
|
|
|
__all__ = [
|
|
'transcript_credentials_handler',
|
|
'transcript_download_handler',
|
|
'transcript_upload_handler',
|
|
'transcript_delete_handler'
|
|
]
|
|
|
|
LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
class TranscriptionProviderErrorType:
|
|
"""
|
|
Transcription provider's error types enumeration.
|
|
"""
|
|
INVALID_CREDENTIALS = 1
|
|
|
|
|
|
def validate_transcript_credentials(provider, **credentials):
|
|
"""
|
|
Validates transcript credentials.
|
|
|
|
Validations:
|
|
Providers must be either 3PlayMedia or Cielo24.
|
|
In case of:
|
|
3PlayMedia - 'api_key' and 'api_secret_key' are required.
|
|
Cielo24 - 'api_key' and 'username' are required.
|
|
|
|
It ignores any extra/unrelated parameters passed in credentials and
|
|
only returns the validated ones.
|
|
"""
|
|
error_message, validated_credentials = '', {}
|
|
valid_providers = list(get_3rd_party_transcription_plans().keys())
|
|
if provider in valid_providers:
|
|
must_have_props = []
|
|
if provider == TranscriptProvider.THREE_PLAY_MEDIA:
|
|
must_have_props = ['api_key', 'api_secret_key']
|
|
elif provider == TranscriptProvider.CIELO24:
|
|
must_have_props = ['api_key', 'username']
|
|
|
|
missing = [
|
|
must_have_prop for must_have_prop in must_have_props if must_have_prop not in list(credentials.keys())
|
|
]
|
|
if missing:
|
|
error_message = u'{missing} must be specified.'.format(missing=' and '.join(missing))
|
|
return error_message, validated_credentials
|
|
|
|
validated_credentials.update({
|
|
prop: credentials[prop] for prop in must_have_props
|
|
})
|
|
else:
|
|
error_message = u'Invalid Provider {provider}.'.format(provider=provider)
|
|
|
|
return error_message, validated_credentials
|
|
|
|
|
|
@expect_json
|
|
@login_required
|
|
@require_POST
|
|
def transcript_credentials_handler(request, course_key_string):
|
|
"""
|
|
JSON view handler to update the transcript organization credentials.
|
|
|
|
Arguments:
|
|
request: WSGI request object
|
|
course_key_string: A course identifier to extract the org.
|
|
|
|
Returns:
|
|
- A 200 response if credentials are valid and successfully updated in edx-video-pipeline.
|
|
- A 404 response if transcript feature is not enabled for this course.
|
|
- A 400 if credentials do not pass validations, hence not updated in edx-video-pipeline.
|
|
"""
|
|
course_key = CourseKey.from_string(course_key_string)
|
|
if not VideoTranscriptEnabledFlag.feature_enabled(course_key):
|
|
return HttpResponseNotFound()
|
|
|
|
provider = request.json.pop('provider')
|
|
error_message, validated_credentials = validate_transcript_credentials(provider=provider, **request.json)
|
|
if error_message:
|
|
response = JsonResponse({'error': error_message}, status=400)
|
|
else:
|
|
# Send the validated credentials to edx-video-pipeline.
|
|
credentials_payload = dict(validated_credentials, org=course_key.org, provider=provider)
|
|
error_response, is_updated = update_3rd_party_transcription_service_credentials(**credentials_payload)
|
|
# Send appropriate response based on whether credentials were updated or not.
|
|
if is_updated:
|
|
# Cache credentials state in edx-val.
|
|
update_transcript_credentials_state_for_org(org=course_key.org, provider=provider, exists=is_updated)
|
|
response = JsonResponse(status=200)
|
|
else:
|
|
# Error response would contain error types and the following
|
|
# error type is received from edx-video-pipeline whenever we've
|
|
# got invalid credentials for a provider. Its kept this way because
|
|
# edx-video-pipeline doesn't support i18n translations yet.
|
|
error_type = error_response.get('error_type')
|
|
if error_type == TranscriptionProviderErrorType.INVALID_CREDENTIALS:
|
|
error_message = _('The information you entered is incorrect.')
|
|
|
|
response = JsonResponse({'error': error_message}, status=400)
|
|
|
|
return response
|
|
|
|
|
|
@login_required
|
|
@require_GET
|
|
def transcript_download_handler(request):
|
|
"""
|
|
JSON view handler to download a transcript.
|
|
|
|
Arguments:
|
|
request: WSGI request object
|
|
|
|
Returns:
|
|
- A 200 response with SRT transcript file attached.
|
|
- A 400 if there is a validation error.
|
|
- A 404 if there is no such transcript.
|
|
"""
|
|
missing = [attr for attr in ['edx_video_id', 'language_code'] if attr not in request.GET]
|
|
if missing:
|
|
return JsonResponse(
|
|
{'error': _(u'The following parameters are required: {missing}.').format(missing=', '.join(missing))},
|
|
status=400
|
|
)
|
|
|
|
edx_video_id = request.GET['edx_video_id']
|
|
language_code = request.GET['language_code']
|
|
transcript = get_video_transcript_data(video_id=edx_video_id, language_code=language_code)
|
|
if transcript:
|
|
name_and_extension = os.path.splitext(transcript['file_name'])
|
|
basename, file_format = name_and_extension[0], name_and_extension[1][1:]
|
|
transcript_filename = '{base_name}.{ext}'.format(base_name=basename, ext=Transcript.SRT)
|
|
transcript_content = Transcript.convert(
|
|
content=transcript['content'],
|
|
input_format=file_format,
|
|
output_format=Transcript.SRT
|
|
)
|
|
# Construct an HTTP response
|
|
response = HttpResponse(transcript_content, content_type=Transcript.mime_types[Transcript.SRT])
|
|
response['Content-Disposition'] = u'attachment; filename="{filename}"'.format(filename=transcript_filename)
|
|
else:
|
|
response = HttpResponseNotFound()
|
|
|
|
return response
|
|
|
|
|
|
def validate_transcript_upload_data(data, files):
|
|
"""
|
|
Validates video transcript file.
|
|
Arguments:
|
|
data: A request's data part.
|
|
files: A request's files part.
|
|
Returns:
|
|
None or String
|
|
If there is error returns error message otherwise None.
|
|
"""
|
|
error = None
|
|
# Validate the must have attributes - this error is unlikely to be faced by common users.
|
|
must_have_attrs = ['edx_video_id', 'language_code', 'new_language_code']
|
|
missing = [attr for attr in must_have_attrs if attr not in data]
|
|
if missing:
|
|
error = _(u'The following parameters are required: {missing}.').format(missing=', '.join(missing))
|
|
elif (
|
|
data['language_code'] != data['new_language_code'] and
|
|
data['new_language_code'] in get_available_transcript_languages(video_id=data['edx_video_id'])
|
|
):
|
|
error = _(u'A transcript with the "{language_code}" language code already exists.'.format(
|
|
language_code=data['new_language_code']
|
|
))
|
|
elif 'file' not in files:
|
|
error = _(u'A transcript file is required.')
|
|
|
|
return error
|
|
|
|
|
|
@login_required
|
|
@require_POST
|
|
def transcript_upload_handler(request):
|
|
"""
|
|
View to upload a transcript file.
|
|
|
|
Arguments:
|
|
request: A WSGI request object
|
|
|
|
Transcript file, edx video id and transcript language are required.
|
|
Transcript file should be in SRT(SubRip) format.
|
|
|
|
Returns
|
|
- A 400 if any of the validation fails
|
|
- A 200 if transcript has been uploaded successfully
|
|
"""
|
|
error = validate_transcript_upload_data(data=request.POST, files=request.FILES)
|
|
if error:
|
|
response = JsonResponse({'error': error}, status=400)
|
|
else:
|
|
edx_video_id = request.POST['edx_video_id']
|
|
language_code = request.POST['language_code']
|
|
new_language_code = request.POST['new_language_code']
|
|
transcript_file = request.FILES['file']
|
|
try:
|
|
# Convert SRT transcript into an SJSON format
|
|
# and upload it to S3.
|
|
sjson_subs = Transcript.convert(
|
|
content=transcript_file.read().decode('utf-8'),
|
|
input_format=Transcript.SRT,
|
|
output_format=Transcript.SJSON
|
|
)
|
|
create_or_update_video_transcript(
|
|
video_id=edx_video_id,
|
|
language_code=language_code,
|
|
metadata={
|
|
'provider': TranscriptProvider.CUSTOM,
|
|
'file_format': Transcript.SJSON,
|
|
'language_code': new_language_code
|
|
},
|
|
file_data=ContentFile(sjson_subs),
|
|
)
|
|
response = JsonResponse(status=201)
|
|
except (TranscriptsGenerationException, UnicodeDecodeError):
|
|
response = JsonResponse(
|
|
{'error': _(u'There is a problem with this transcript file. Try to upload a different file.')},
|
|
status=400
|
|
)
|
|
|
|
return response
|
|
|
|
|
|
@login_required
|
|
@require_http_methods(["DELETE"])
|
|
def transcript_delete_handler(request, course_key_string, edx_video_id, language_code):
|
|
"""
|
|
View to delete a transcript file.
|
|
|
|
Arguments:
|
|
request: A WSGI request object
|
|
course_key_string: Course key identifying a course.
|
|
edx_video_id: edX video identifier whose transcript need to be deleted.
|
|
language_code: transcript's language code.
|
|
|
|
Returns
|
|
- A 404 if the user does not have required permisions
|
|
- A 200 if transcript is deleted without any error(s)
|
|
"""
|
|
# Check whether the feature is available for this course.
|
|
course_key = CourseKey.from_string(course_key_string)
|
|
# User needs to have studio write access for this course.
|
|
if not has_studio_write_access(request.user, course_key):
|
|
return HttpResponseNotFound()
|
|
|
|
delete_video_transcript(video_id=edx_video_id, language_code=language_code)
|
|
|
|
return JsonResponse(status=200)
|