[BD-27] FEAT: Add new API endpoint for uploading transcripts (#27844)

* FEAT: Add new API endpoint for uploading transcripts

* Add log messages to transcript upload function
This commit is contained in:
Josh
2021-06-08 05:34:51 -07:00
committed by GitHub
parent 13598f5faa
commit bc082eab93
3 changed files with 258 additions and 30 deletions

View File

@@ -6,6 +6,7 @@ from io import StringIO
from unittest.mock import ANY, Mock, patch
import ddt
from django.test.client import Client
from django.test.testcases import TestCase
from django.urls import reverse
from edxval import api
@@ -14,6 +15,7 @@ from cms.djangoapps.contentstore.tests.utils import CourseTestCase
from cms.djangoapps.contentstore.utils import reverse_course_url
from common.djangoapps.student.roles import CourseStaffRole
from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file
from openedx.core.djangoapps.oauth_dispatch.jwt import create_jwt_for_user
from ..transcript_settings import TranscriptionProviderErrorType, validate_transcript_credentials
@@ -550,3 +552,193 @@ class TranscriptDeleteTest(CourseTestCase):
))
self.assertEqual(response.status_code, 200)
self.assertFalse(api.get_video_transcript_data(video_id=video_id, language_code=language_code))
@ddt.ddt
class TranscriptUploadApiTest(CourseTestCase):
"""
Tests for transcript upload handler.
"""
def setUp(self):
super().setUp()
jwt_headers = {
'HTTP_AUTHORIZATION': 'JWT ' + create_jwt_for_user(self.user)
}
self.client = Client(**jwt_headers)
@property
def view_url(self):
"""
Returns url for this view
"""
return reverse('transcript_upload_api')
def test_401_without_authentication(self):
"""
Verify that redirection happens in case of an unauthenticated request.
"""
response = self.client.post(self.view_url, content_type='application/json', HTTP_AUTHORIZATION='')
self.assertEqual(response.status_code, 401)
def test_405_with_not_allowed_request_method(self):
"""
Verify that 405 is returned in case of not-allowed request methods.
Allowed request methods include POST.
"""
response = self.client.get(self.view_url, content_type='application/json')
self.assertEqual(response.status_code, 405)
@patch('cms.djangoapps.contentstore.views.transcript_settings.create_or_update_video_transcript')
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler(self, mock_create_or_update_video_transcript):
"""
Tests that transcript upload handler works as expected.
"""
transcript_file_stream = StringIO('0\n00:00:00,010 --> 00:00:00,100\nПривіт, edX вітає вас.\n\n')
# Make request to transcript upload handler
response = self.client.post(
self.view_url,
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'es',
'file': transcript_file_stream,
},
format='multipart'
)
self.assertEqual(response.status_code, 201)
mock_create_or_update_video_transcript.assert_called_with(
video_id='123',
language_code='en',
metadata={
'language_code': 'es',
'file_format': 'sjson',
'provider': 'Custom'
},
file_data=ANY,
)
@ddt.data(
(
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'en',
},
'A transcript file is required.'
),
(
{
'language_code': 'en',
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
},
'The following parameters are required: edx_video_id, new_language_code.'
),
(
{
'language_code': 'en',
'new_language_code': 'en',
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
},
'The following parameters are required: edx_video_id.'
),
(
{
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
},
'The following parameters are required: edx_video_id, language_code, new_language_code.'
)
)
@ddt.unpack
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler_missing_attrs(self, request_payload, expected_error_message):
"""
Tests the transcript upload handler when the required attributes are missing.
"""
# Make request to transcript upload handler
response = self.client.post(self.view_url, request_payload, format='multipart')
self.assertEqual(response.status_code, 400)
self.assertEqual(json.loads(response.content.decode('utf-8'))['error'], expected_error_message)
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en', 'es'])
)
def test_transcript_upload_handler_existing_transcript(self):
"""
Tests that upload handler do not update transcript's language if a transcript
with the same language already present for an edx_video_id.
"""
# Make request to transcript upload handler
request_payload = {
'edx_video_id': '1234',
'language_code': 'en',
'new_language_code': 'es'
}
response = self.client.post(self.view_url, request_payload, format='multipart')
self.assertEqual(response.status_code, 400)
self.assertEqual(
json.loads(response.content.decode('utf-8'))['error'],
'A transcript with the "es" language code already exists.'
)
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler_with_image(self):
"""
Tests the transcript upload handler with an image file.
"""
with make_image_file() as image_file:
# Make request to transcript upload handler
response = self.client.post(
self.view_url,
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'es',
'file': image_file,
},
format='multipart'
)
self.assertEqual(response.status_code, 400)
self.assertEqual(
json.loads(response.content.decode('utf-8'))['error'],
'There is a problem with this transcript file. Try to upload a different file.'
)
@patch(
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
Mock(return_value=['en']),
)
def test_transcript_upload_handler_with_invalid_transcript(self):
"""
Tests the transcript upload handler with an invalid transcript file.
"""
transcript_file_stream = StringIO('An invalid transcript SubRip file content')
# Make request to transcript upload handler
response = self.client.post(
self.view_url,
{
'edx_video_id': '123',
'language_code': 'en',
'new_language_code': 'es',
'file': transcript_file_stream,
},
format='multipart'
)
self.assertEqual(response.status_code, 400)
self.assertEqual(
json.loads(response.content.decode('utf-8'))['error'],
'There is a problem with this transcript file. Try to upload a different file.'
)

View File

@@ -20,11 +20,14 @@ from edxval.api import (
update_transcript_credentials_state_for_org
)
from opaque_keys.edx.keys import CourseKey
from rest_framework.decorators import api_view
from rest_framework.response import Response
from common.djangoapps.student.auth import has_studio_write_access
from common.djangoapps.util.json_request import JsonResponse, expect_json
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials
from openedx.core.lib.api.view_utils import view_auth_classes
from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException
from .videos import TranscriptProvider
@@ -33,7 +36,8 @@ __all__ = [
'transcript_credentials_handler',
'transcript_download_handler',
'transcript_upload_handler',
'transcript_delete_handler'
'transcript_delete_handler',
'transcript_upload_api',
]
LOGGER = logging.getLogger(__name__)
@@ -173,6 +177,41 @@ def transcript_download_handler(request):
return response
def upload_transcript(request):
edx_video_id = request.POST['edx_video_id']
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.FILES['file']
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
sjson_subs = Transcript.convert(
content=transcript_file.read().decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'provider': TranscriptProvider.CUSTOM,
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
response = JsonResponse(status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
LOGGER.error("Unable to update transcript on edX video %s for language %s", edx_video_id, new_language_code)
response = JsonResponse(
{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
status=400
)
finally:
LOGGER.info("Updated transcript on edX video %s for language %s", edx_video_id, new_language_code)
return response
def validate_transcript_upload_data(data, files):
"""
Validates video transcript file.
@@ -202,6 +241,30 @@ def validate_transcript_upload_data(data, files):
return error
@api_view(['POST'])
@view_auth_classes()
@expect_json
def transcript_upload_api(request):
"""
API View for uploading transcript files.
Arguments:
request: A WSGI request object
Transcript file in SRT format
Returns:
- A 400 if any validation fails
- A 200 if the transcript has been uploaded successfully
"""
error = validate_transcript_upload_data(data=request.POST, files=request.FILES)
if error:
response = JsonResponse({'error': error}, status=400)
else:
response = upload_transcript(request)
return response
@login_required
@require_POST
def transcript_upload_handler(request):
@@ -222,35 +285,7 @@ def transcript_upload_handler(request):
if error:
response = JsonResponse({'error': error}, status=400)
else:
edx_video_id = request.POST['edx_video_id']
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.FILES['file']
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
sjson_subs = Transcript.convert(
content=transcript_file.read().decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'provider': TranscriptProvider.CUSTOM,
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
response = JsonResponse(status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
response = JsonResponse(
{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
status=400
)
response = upload_transcript(request)
return response

View File

@@ -169,6 +169,7 @@ urlpatterns = [
url(r'^transcript_delete/{}(?:/(?P<edx_video_id>[-\w]+))?(?:/(?P<language_code>[^/]*))?$'.format(
settings.COURSE_KEY_PATTERN
), contentstore_views.transcript_delete_handler, name='transcript_delete_handler'),
url(r'^transcript_upload_api/$', contentstore_views.transcript_upload_api, name='transcript_upload_api'),
url(fr'^video_encodings_download/{settings.COURSE_KEY_PATTERN}$',
contentstore_views.video_encodings_download, name='video_encodings_download'),
url(fr'^group_configurations/{settings.COURSE_KEY_PATTERN}$',