[BD-27] FEAT: Add new API endpoint for uploading transcripts (#27844)
* FEAT: Add new API endpoint for uploading transcripts * Add log messages to transcript upload function
This commit is contained in:
@@ -6,6 +6,7 @@ from io import StringIO
|
||||
from unittest.mock import ANY, Mock, patch
|
||||
|
||||
import ddt
|
||||
from django.test.client import Client
|
||||
from django.test.testcases import TestCase
|
||||
from django.urls import reverse
|
||||
from edxval import api
|
||||
@@ -14,6 +15,7 @@ from cms.djangoapps.contentstore.tests.utils import CourseTestCase
|
||||
from cms.djangoapps.contentstore.utils import reverse_course_url
|
||||
from common.djangoapps.student.roles import CourseStaffRole
|
||||
from openedx.core.djangoapps.profile_images.tests.helpers import make_image_file
|
||||
from openedx.core.djangoapps.oauth_dispatch.jwt import create_jwt_for_user
|
||||
|
||||
from ..transcript_settings import TranscriptionProviderErrorType, validate_transcript_credentials
|
||||
|
||||
@@ -550,3 +552,193 @@ class TranscriptDeleteTest(CourseTestCase):
|
||||
))
|
||||
self.assertEqual(response.status_code, 200)
|
||||
self.assertFalse(api.get_video_transcript_data(video_id=video_id, language_code=language_code))
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TranscriptUploadApiTest(CourseTestCase):
|
||||
"""
|
||||
Tests for transcript upload handler.
|
||||
"""
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
jwt_headers = {
|
||||
'HTTP_AUTHORIZATION': 'JWT ' + create_jwt_for_user(self.user)
|
||||
}
|
||||
self.client = Client(**jwt_headers)
|
||||
|
||||
@property
|
||||
def view_url(self):
|
||||
"""
|
||||
Returns url for this view
|
||||
"""
|
||||
return reverse('transcript_upload_api')
|
||||
|
||||
def test_401_without_authentication(self):
|
||||
"""
|
||||
Verify that redirection happens in case of an unauthenticated request.
|
||||
"""
|
||||
response = self.client.post(self.view_url, content_type='application/json', HTTP_AUTHORIZATION='')
|
||||
self.assertEqual(response.status_code, 401)
|
||||
|
||||
def test_405_with_not_allowed_request_method(self):
|
||||
"""
|
||||
Verify that 405 is returned in case of not-allowed request methods.
|
||||
Allowed request methods include POST.
|
||||
"""
|
||||
response = self.client.get(self.view_url, content_type='application/json')
|
||||
self.assertEqual(response.status_code, 405)
|
||||
|
||||
@patch('cms.djangoapps.contentstore.views.transcript_settings.create_or_update_video_transcript')
|
||||
@patch(
|
||||
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
|
||||
Mock(return_value=['en']),
|
||||
)
|
||||
def test_transcript_upload_handler(self, mock_create_or_update_video_transcript):
|
||||
"""
|
||||
Tests that transcript upload handler works as expected.
|
||||
"""
|
||||
transcript_file_stream = StringIO('0\n00:00:00,010 --> 00:00:00,100\nПривіт, edX вітає вас.\n\n')
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(
|
||||
self.view_url,
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'new_language_code': 'es',
|
||||
'file': transcript_file_stream,
|
||||
},
|
||||
format='multipart'
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 201)
|
||||
mock_create_or_update_video_transcript.assert_called_with(
|
||||
video_id='123',
|
||||
language_code='en',
|
||||
metadata={
|
||||
'language_code': 'es',
|
||||
'file_format': 'sjson',
|
||||
'provider': 'Custom'
|
||||
},
|
||||
file_data=ANY,
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
(
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'new_language_code': 'en',
|
||||
},
|
||||
'A transcript file is required.'
|
||||
),
|
||||
(
|
||||
{
|
||||
'language_code': 'en',
|
||||
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
|
||||
},
|
||||
'The following parameters are required: edx_video_id, new_language_code.'
|
||||
),
|
||||
(
|
||||
{
|
||||
'language_code': 'en',
|
||||
'new_language_code': 'en',
|
||||
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
|
||||
},
|
||||
'The following parameters are required: edx_video_id.'
|
||||
),
|
||||
(
|
||||
{
|
||||
'file': '0\n00:00:00,010 --> 00:00:00,100\nHi, welcome to Edx.\n\n'
|
||||
},
|
||||
'The following parameters are required: edx_video_id, language_code, new_language_code.'
|
||||
)
|
||||
)
|
||||
@ddt.unpack
|
||||
@patch(
|
||||
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
|
||||
Mock(return_value=['en']),
|
||||
)
|
||||
def test_transcript_upload_handler_missing_attrs(self, request_payload, expected_error_message):
|
||||
"""
|
||||
Tests the transcript upload handler when the required attributes are missing.
|
||||
"""
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(self.view_url, request_payload, format='multipart')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(json.loads(response.content.decode('utf-8'))['error'], expected_error_message)
|
||||
|
||||
@patch(
|
||||
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
|
||||
Mock(return_value=['en', 'es'])
|
||||
)
|
||||
def test_transcript_upload_handler_existing_transcript(self):
|
||||
"""
|
||||
Tests that upload handler do not update transcript's language if a transcript
|
||||
with the same language already present for an edx_video_id.
|
||||
"""
|
||||
# Make request to transcript upload handler
|
||||
request_payload = {
|
||||
'edx_video_id': '1234',
|
||||
'language_code': 'en',
|
||||
'new_language_code': 'es'
|
||||
}
|
||||
response = self.client.post(self.view_url, request_payload, format='multipart')
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(
|
||||
json.loads(response.content.decode('utf-8'))['error'],
|
||||
'A transcript with the "es" language code already exists.'
|
||||
)
|
||||
|
||||
@patch(
|
||||
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
|
||||
Mock(return_value=['en']),
|
||||
)
|
||||
def test_transcript_upload_handler_with_image(self):
|
||||
"""
|
||||
Tests the transcript upload handler with an image file.
|
||||
"""
|
||||
with make_image_file() as image_file:
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(
|
||||
self.view_url,
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'new_language_code': 'es',
|
||||
'file': image_file,
|
||||
},
|
||||
format='multipart'
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(
|
||||
json.loads(response.content.decode('utf-8'))['error'],
|
||||
'There is a problem with this transcript file. Try to upload a different file.'
|
||||
)
|
||||
|
||||
@patch(
|
||||
'cms.djangoapps.contentstore.views.transcript_settings.get_available_transcript_languages',
|
||||
Mock(return_value=['en']),
|
||||
)
|
||||
def test_transcript_upload_handler_with_invalid_transcript(self):
|
||||
"""
|
||||
Tests the transcript upload handler with an invalid transcript file.
|
||||
"""
|
||||
transcript_file_stream = StringIO('An invalid transcript SubRip file content')
|
||||
# Make request to transcript upload handler
|
||||
response = self.client.post(
|
||||
self.view_url,
|
||||
{
|
||||
'edx_video_id': '123',
|
||||
'language_code': 'en',
|
||||
'new_language_code': 'es',
|
||||
'file': transcript_file_stream,
|
||||
},
|
||||
format='multipart'
|
||||
)
|
||||
|
||||
self.assertEqual(response.status_code, 400)
|
||||
self.assertEqual(
|
||||
json.loads(response.content.decode('utf-8'))['error'],
|
||||
'There is a problem with this transcript file. Try to upload a different file.'
|
||||
)
|
||||
|
||||
@@ -20,11 +20,14 @@ from edxval.api import (
|
||||
update_transcript_credentials_state_for_org
|
||||
)
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
from rest_framework.decorators import api_view
|
||||
from rest_framework.response import Response
|
||||
|
||||
from common.djangoapps.student.auth import has_studio_write_access
|
||||
from common.djangoapps.util.json_request import JsonResponse, expect_json
|
||||
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
|
||||
from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials
|
||||
from openedx.core.lib.api.view_utils import view_auth_classes
|
||||
from xmodule.video_module.transcripts_utils import Transcript, TranscriptsGenerationException
|
||||
|
||||
from .videos import TranscriptProvider
|
||||
@@ -33,7 +36,8 @@ __all__ = [
|
||||
'transcript_credentials_handler',
|
||||
'transcript_download_handler',
|
||||
'transcript_upload_handler',
|
||||
'transcript_delete_handler'
|
||||
'transcript_delete_handler',
|
||||
'transcript_upload_api',
|
||||
]
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
@@ -173,6 +177,41 @@ def transcript_download_handler(request):
|
||||
return response
|
||||
|
||||
|
||||
def upload_transcript(request):
|
||||
edx_video_id = request.POST['edx_video_id']
|
||||
language_code = request.POST['language_code']
|
||||
new_language_code = request.POST['new_language_code']
|
||||
transcript_file = request.FILES['file']
|
||||
try:
|
||||
# Convert SRT transcript into an SJSON format
|
||||
# and upload it to S3.
|
||||
sjson_subs = Transcript.convert(
|
||||
content=transcript_file.read().decode('utf-8'),
|
||||
input_format=Transcript.SRT,
|
||||
output_format=Transcript.SJSON
|
||||
).encode()
|
||||
create_or_update_video_transcript(
|
||||
video_id=edx_video_id,
|
||||
language_code=language_code,
|
||||
metadata={
|
||||
'provider': TranscriptProvider.CUSTOM,
|
||||
'file_format': Transcript.SJSON,
|
||||
'language_code': new_language_code
|
||||
},
|
||||
file_data=ContentFile(sjson_subs),
|
||||
)
|
||||
response = JsonResponse(status=201)
|
||||
except (TranscriptsGenerationException, UnicodeDecodeError):
|
||||
LOGGER.error("Unable to update transcript on edX video %s for language %s", edx_video_id, new_language_code)
|
||||
response = JsonResponse(
|
||||
{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
|
||||
status=400
|
||||
)
|
||||
finally:
|
||||
LOGGER.info("Updated transcript on edX video %s for language %s", edx_video_id, new_language_code)
|
||||
return response
|
||||
|
||||
|
||||
def validate_transcript_upload_data(data, files):
|
||||
"""
|
||||
Validates video transcript file.
|
||||
@@ -202,6 +241,30 @@ def validate_transcript_upload_data(data, files):
|
||||
return error
|
||||
|
||||
|
||||
@api_view(['POST'])
|
||||
@view_auth_classes()
|
||||
@expect_json
|
||||
def transcript_upload_api(request):
|
||||
"""
|
||||
API View for uploading transcript files.
|
||||
|
||||
Arguments:
|
||||
request: A WSGI request object
|
||||
|
||||
Transcript file in SRT format
|
||||
|
||||
Returns:
|
||||
- A 400 if any validation fails
|
||||
- A 200 if the transcript has been uploaded successfully
|
||||
"""
|
||||
error = validate_transcript_upload_data(data=request.POST, files=request.FILES)
|
||||
if error:
|
||||
response = JsonResponse({'error': error}, status=400)
|
||||
else:
|
||||
response = upload_transcript(request)
|
||||
return response
|
||||
|
||||
|
||||
@login_required
|
||||
@require_POST
|
||||
def transcript_upload_handler(request):
|
||||
@@ -222,35 +285,7 @@ def transcript_upload_handler(request):
|
||||
if error:
|
||||
response = JsonResponse({'error': error}, status=400)
|
||||
else:
|
||||
edx_video_id = request.POST['edx_video_id']
|
||||
language_code = request.POST['language_code']
|
||||
new_language_code = request.POST['new_language_code']
|
||||
transcript_file = request.FILES['file']
|
||||
try:
|
||||
# Convert SRT transcript into an SJSON format
|
||||
# and upload it to S3.
|
||||
sjson_subs = Transcript.convert(
|
||||
content=transcript_file.read().decode('utf-8'),
|
||||
input_format=Transcript.SRT,
|
||||
output_format=Transcript.SJSON
|
||||
).encode()
|
||||
create_or_update_video_transcript(
|
||||
video_id=edx_video_id,
|
||||
language_code=language_code,
|
||||
metadata={
|
||||
'provider': TranscriptProvider.CUSTOM,
|
||||
'file_format': Transcript.SJSON,
|
||||
'language_code': new_language_code
|
||||
},
|
||||
file_data=ContentFile(sjson_subs),
|
||||
)
|
||||
response = JsonResponse(status=201)
|
||||
except (TranscriptsGenerationException, UnicodeDecodeError):
|
||||
response = JsonResponse(
|
||||
{'error': _('There is a problem with this transcript file. Try to upload a different file.')},
|
||||
status=400
|
||||
)
|
||||
|
||||
response = upload_transcript(request)
|
||||
return response
|
||||
|
||||
|
||||
|
||||
@@ -169,6 +169,7 @@ urlpatterns = [
|
||||
url(r'^transcript_delete/{}(?:/(?P<edx_video_id>[-\w]+))?(?:/(?P<language_code>[^/]*))?$'.format(
|
||||
settings.COURSE_KEY_PATTERN
|
||||
), contentstore_views.transcript_delete_handler, name='transcript_delete_handler'),
|
||||
url(r'^transcript_upload_api/$', contentstore_views.transcript_upload_api, name='transcript_upload_api'),
|
||||
url(fr'^video_encodings_download/{settings.COURSE_KEY_PATTERN}$',
|
||||
contentstore_views.video_encodings_download, name='video_encodings_download'),
|
||||
url(fr'^group_configurations/{settings.COURSE_KEY_PATTERN}$',
|
||||
|
||||
Reference in New Issue
Block a user