Files
edx-platform/cms/djangoapps/contentstore/transcript_storage_handlers.py

267 lines
10 KiB
Python

"""
Business logic for video transcripts.
"""
import logging
import os
from django.core.files.base import ContentFile
from django.http import HttpResponse, HttpResponseNotFound
from django.utils.translation import gettext as _
from edxval.api import (
create_or_update_video_transcript,
delete_video_transcript as delete_video_transcript_source_function,
get_3rd_party_transcription_plans,
get_available_transcript_languages,
get_video_transcript_data,
update_transcript_credentials_state_for_org,
get_video_transcript
)
from opaque_keys.edx.keys import CourseKey
from common.djangoapps.util.json_request import JsonResponse
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials
from openedx.core.djangoapps.video_config.transcripts_utils import Transcript # lint-amnesty, pylint: disable=wrong-import-order
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
from .toggles import use_mock_video_uploads
from .video_storage_handlers import TranscriptProvider
LOGGER = logging.getLogger(__name__)
class TranscriptionProviderErrorType:
"""
Transcription provider's error types enumeration.
"""
INVALID_CREDENTIALS = 1
def validate_transcript_credentials(provider, **credentials):
"""
Validates transcript credentials.
Validations:
Providers must be either 3PlayMedia or Cielo24.
In case of:
3PlayMedia - 'api_key' and 'api_secret_key' are required.
Cielo24 - 'api_key' and 'username' are required.
It ignores any extra/unrelated parameters passed in credentials and
only returns the validated ones.
"""
error_message, validated_credentials = '', {}
valid_providers = list(get_3rd_party_transcription_plans().keys())
if provider in valid_providers:
must_have_props = []
if provider == TranscriptProvider.THREE_PLAY_MEDIA:
must_have_props = ['api_key', 'api_secret_key']
elif provider == TranscriptProvider.CIELO24:
must_have_props = ['api_key', 'username']
missing = [
must_have_prop for must_have_prop in must_have_props if must_have_prop not in list(credentials.keys()) # lint-amnesty, pylint: disable=consider-iterating-dictionary
]
if missing:
error_message = '{missing} must be specified.'.format(missing=' and '.join(missing))
return error_message, validated_credentials
validated_credentials.update({
prop: credentials[prop] for prop in must_have_props
})
else:
error_message = f'Invalid Provider {provider}.'
return error_message, validated_credentials
def handle_transcript_credentials(request, course_key_string):
"""
JSON view handler to update the transcript organization credentials.
Arguments:
request: WSGI request object
course_key_string: A course identifier to extract the org.
Returns:
- A 200 response if credentials are valid and successfully updated in edx-video-pipeline.
- A 404 response if transcript feature is not enabled for this course.
- A 400 if credentials do not pass validations, hence not updated in edx-video-pipeline.
"""
course_key = CourseKey.from_string(course_key_string)
if not VideoTranscriptEnabledFlag.feature_enabled(course_key):
return HttpResponseNotFound()
provider = request.json.pop('provider')
error_message, validated_credentials = validate_transcript_credentials(provider=provider, **request.json)
if error_message:
response = JsonResponse({'error': error_message}, status=400)
else:
# Send the validated credentials to edx-video-pipeline and video-encode-manager
credentials_payload = dict(validated_credentials, org=course_key.org, provider=provider)
error_response, is_updated = update_3rd_party_transcription_service_credentials(**credentials_payload)
# Send appropriate response based on whether credentials were updated or not.
if is_updated:
# Cache credentials state in edx-val.
update_transcript_credentials_state_for_org(org=course_key.org, provider=provider, exists=is_updated)
response = JsonResponse(status=200)
else:
# Error response would contain error types and the following
# error type is received from edx-video-pipeline whenever we've
# got invalid credentials for a provider. Its kept this way because
# edx-video-pipeline doesn't support i18n translations yet.
error_type = error_response.get('error_type')
if error_type == TranscriptionProviderErrorType.INVALID_CREDENTIALS:
error_message = _('The information you entered is incorrect.')
response = JsonResponse({'error': error_message}, status=400)
return response
def handle_transcript_download(request):
"""
JSON view handler to download a transcript.
Arguments:
request: WSGI request object
Returns:
- A 200 response with SRT transcript file attached.
- A 400 if there is a validation error.
- A 404 if there is no such transcript.
"""
missing = [attr for attr in ['edx_video_id', 'language_code'] if attr not in request.GET]
if missing:
return JsonResponse(
{'error': _('The following parameters are required: {missing}.').format(missing=', '.join(missing))},
status=400
)
edx_video_id = request.GET['edx_video_id']
language_code = request.GET['language_code']
transcript = get_video_transcript_data(video_id=edx_video_id, language_code=language_code)
if transcript:
name_and_extension = os.path.splitext(transcript['file_name'])
basename, file_format = name_and_extension[0], name_and_extension[1][1:]
transcript_filename = f'{basename}.{Transcript.SRT}'
transcript_content = Transcript.convert(
content=transcript['content'],
input_format=file_format,
output_format=Transcript.SRT
)
# Construct an HTTP response
response = HttpResponse(transcript_content, content_type=Transcript.mime_types[Transcript.SRT])
response['Content-Disposition'] = f'attachment; filename="{transcript_filename}"'
else:
response = HttpResponseNotFound()
return response
def _create_or_update_video_transcript(**kwargs):
if use_mock_video_uploads():
return True
return create_or_update_video_transcript(**kwargs)
def upload_transcript(request):
"""
Upload a transcript file
Arguments:
request: A WSGI request object
Transcript file in SRT format
"""
edx_video_id = request.POST['edx_video_id']
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.FILES['file']
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
sjson_subs = Transcript.convert(
content=transcript_file.read().decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
_create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'provider': TranscriptProvider.CUSTOM,
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
response = JsonResponse(status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
LOGGER.error("Unable to update transcript on edX video %s for language %s", edx_video_id, new_language_code)
response = JsonResponse(
{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
status=400
)
finally:
LOGGER.info("Updated transcript on edX video %s for language %s", edx_video_id, new_language_code)
return response
def validate_transcript_upload_data(data, files):
"""
Validates video transcript file.
Arguments:
data: A request's data part.
files: A request's files part.
Returns:
None or String
If there is error returns error message otherwise None.
"""
error = None
# Validate the must have attributes - this error is unlikely to be faced by common users.
must_have_attrs = ['edx_video_id', 'language_code', 'new_language_code']
missing = [attr for attr in must_have_attrs if attr not in data]
if missing:
error = _('The following parameters are required: {missing}.').format(missing=', '.join(missing))
elif (
data['language_code'] != data['new_language_code'] and
data['new_language_code'] in get_available_transcript_languages(video_id=data['edx_video_id'])
):
error = _('A transcript with the "{language_code}" language code already exists.'.format( # lint-amnesty, pylint: disable=translation-of-non-string
language_code=data['new_language_code']
))
elif 'file' not in files:
error = _('A transcript file is required.')
return error
def delete_video_transcript(video_id=None, language_code=None):
return delete_video_transcript_source_function(video_id=video_id, language_code=language_code)
def delete_video_transcript_or_404(request):
"""
Delete a video transcript or return 404 if it doesn't exist.
"""
missing = [attr for attr in ['edx_video_id', 'language_code'] if attr not in request.GET]
if missing:
return JsonResponse(
{'error': _('The following parameters are required: {missing}.').format(missing=', '.join(missing))},
status=400
)
video_id = request.GET.get('edx_video_id')
language_code = request.GET.get('language_code')
if not get_video_transcript(video_id=video_id, language_code=language_code):
return HttpResponseNotFound()
delete_video_transcript(video_id=video_id, language_code=language_code)
return JsonResponse(status=200)