Files
edx-platform/xmodule/video_block/video_handlers.py
Chris Chávez b6489e718c feat: Video editor supports transcripts [FC-0076] (#36058)
* Add error handler on save video to avoid creating sjson
* Support transcripts without edx_video_id in definition_to_xml
* When copying a video from a library to a course: Create a new edx_video_id
* Save transcripts as static assets in a video in a library when adding a new transcript.
* Delete transcripts as static assets in a video in a library when deleting transcripts.
* Support download transcript in a video in a library.
* Support replace transcript in a video in a library.
* Support updating transcripts in video in a library.
* Refactor the code of downloading YouTube transcripts to enable this feature in libraries.
* Support copy from a library to a course and a course to a library.
2025-02-21 13:33:13 -05:00

677 lines
26 KiB
Python

"""
Handlers for video block.
StudentViewHandlers are handlers for video block instance.
StudioViewHandlers are handlers for video descriptor instance.
"""
import json
import logging
import math
from django.core.files.base import ContentFile
from django.utils.timezone import now
from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript
from opaque_keys.edx.locator import CourseLocator, LibraryLocatorV2
from webob import Response
from xblock.core import XBlock
from xblock.exceptions import JsonHandlerError
from xmodule.exceptions import NotFoundError
from xmodule.fields import RelativeTime
from openedx.core.djangoapps.content_libraries import api as lib_api
from .transcripts_utils import (
Transcript,
TranscriptException,
TranscriptsGenerationException,
clean_video_id,
generate_sjson_for_all_speeds,
get_html5_ids,
get_or_create_sjson,
get_transcript,
get_transcript_from_contentstore,
remove_subs_from_store,
subs_filename,
youtube_speed_dict
)
log = logging.getLogger(__name__)
# Disable no-member warning:
# pylint: disable=no-member
def to_boolean(value):
"""
Convert a value from a GET or POST request parameter to a bool
"""
if isinstance(value, bytes):
value = value.decode('ascii', errors='replace')
if isinstance(value, str):
return value.lower() == 'true'
else:
return bool(value)
class VideoStudentViewHandlers:
"""
Handlers for video block instance.
"""
global_speed = None
transcript_language = None
def handle_ajax(self, dispatch, data):
"""
Update values of xfields, that were changed by student.
"""
accepted_keys = [
'speed', 'auto_advance', 'saved_video_position', 'transcript_language',
'transcript_download_format', 'youtube_is_available',
'bumper_last_view_date', 'bumper_do_not_show_again'
]
conversions = {
'speed': json.loads,
'auto_advance': json.loads,
'saved_video_position': RelativeTime.isotime_to_timedelta,
'youtube_is_available': json.loads,
'bumper_last_view_date': to_boolean,
'bumper_do_not_show_again': to_boolean,
}
if dispatch == 'save_user_state':
for key in data:
if key in accepted_keys:
if key in conversions:
value = conversions[key](data[key])
else:
value = data[key]
if key == 'bumper_last_view_date':
value = now()
if key == 'speed' and math.isnan(value):
message = f"Invalid speed value {value}, must be a float."
log.warning(message)
return json.dumps({'success': False, 'error': message})
setattr(self, key, value)
if key == 'speed':
self.global_speed = self.speed
return json.dumps({'success': True})
log.debug(f"GET {data}")
log.debug(f"DISPATCH {dispatch}")
raise NotFoundError('Unexpected dispatch type')
def translation(self, youtube_id, transcripts):
"""
This is called to get transcript file for specific language.
youtube_id: str: must be one of youtube_ids or None if HTML video
transcripts (dict): A dict with all transcripts and a sub.
Logic flow:
If youtube_id doesn't exist, we have a video in HTML5 mode. Otherwise,
video in Youtube or Flash modes.
if youtube:
If english -> give back youtube_id subtitles:
Return what we have in contentstore for given youtube_id.
If non-english:
a) extract youtube_id from srt file name.
b) try to find sjson by youtube_id and return if successful.
c) generate sjson from srt for all youtube speeds.
if non-youtube:
If english -> give back `sub` subtitles:
Return what we have in contentstore for given subs_if that is stored in self.sub.
If non-english:
a) try to find previously generated sjson.
b) otherwise generate sjson from srt and return it.
Filenames naming:
en: subs_videoid.srt.sjson
non_en: uk_subs_videoid.srt.sjson
Raises:
NotFoundError if for 'en' subtitles no asset is uploaded.
NotFoundError if youtube_id does not exist / invalid youtube_id
"""
sub, other_lang = transcripts["sub"], transcripts["transcripts"]
if youtube_id:
# Youtube case:
if self.transcript_language == 'en':
return Transcript.asset(self.location, youtube_id).data
youtube_ids = youtube_speed_dict(self)
if youtube_id not in youtube_ids:
log.info("Youtube_id %s does not exist", youtube_id)
raise NotFoundError
try:
sjson_transcript = Transcript.asset(self.location, youtube_id, self.transcript_language).data
except NotFoundError:
log.info("Can't find content in storage for %s transcript: generating.", youtube_id)
generate_sjson_for_all_speeds(
self,
other_lang[self.transcript_language],
{speed: youtube_id for youtube_id, speed in youtube_ids.items()},
self.transcript_language
)
sjson_transcript = Transcript.asset(self.location, youtube_id, self.transcript_language).data
return sjson_transcript
else:
# HTML5 case
if self.transcript_language == 'en':
if '.srt' not in sub: # not bumper case
return Transcript.asset(self.location, sub).data
try:
return get_or_create_sjson(self, {'en': sub})
except TranscriptException:
pass # to raise NotFoundError and try to get data in get_static_transcript
elif other_lang:
return get_or_create_sjson(self, other_lang)
raise NotFoundError
def get_static_transcript(self, request, transcripts):
"""
Courses that are imported with the --nostatic flag do not show
transcripts/captions properly even if those captions are stored inside
their static folder. This adds a last resort method of redirecting to
the static asset path of the course if the transcript can't be found
inside the contentstore and the course has the static_asset_path field
set.
transcripts (dict): A dict with all transcripts and a sub.
"""
response = Response(status=404)
# Only do redirect for English
if not self.transcript_language == 'en':
return response
# If this video lives in library, the code below is not relevant and will error.
if not isinstance(self.course_id, CourseLocator):
return response
video_id = request.GET.get('videoId', None)
if video_id:
transcript_name = video_id
else:
transcript_name = transcripts["sub"]
if transcript_name:
# Get the asset path for course
asset_path = None
course = self.runtime.modulestore.get_course(self.course_id)
if course.static_asset_path:
asset_path = course.static_asset_path
else:
# It seems static_asset_path is not set in any XMLModuleStore courses.
asset_path = getattr(course, 'data_dir', '')
if asset_path:
response = Response(
status=307,
location='/static/{}/{}'.format(
asset_path,
subs_filename(transcript_name, self.transcript_language)
)
)
return response
@XBlock.json_handler
def publish_completion(self, data, dispatch): # pylint: disable=unused-argument
"""
Entry point for completion for student_view.
Parameters:
data: JSON dict:
key: "completion"
value: float in range [0.0, 1.0]
dispatch: Ignored.
Return value: JSON response (200 on success, 400 for malformed data)
"""
completion_service = self.runtime.service(self, 'completion')
if completion_service is None:
raise JsonHandlerError(500, "No completion service found")
if not completion_service.completion_tracking_enabled():
raise JsonHandlerError(404, "Completion tracking is not enabled and API calls are unexpected")
if not isinstance(data['completion'], (int, float)):
message = "Invalid completion value {}. Must be a float in range [0.0, 1.0]"
raise JsonHandlerError(400, message.format(data['completion']))
if not 0.0 <= data['completion'] <= 1.0:
message = "Invalid completion value {}. Must be in range [0.0, 1.0]"
raise JsonHandlerError(400, message.format(data['completion']))
self.runtime.publish(self, "completion", data)
return {"result": "ok"}
@staticmethod
def make_transcript_http_response(content, filename, language, content_type, add_attachment_header=True):
"""
Construct `Response` object.
Arguments:
content (unicode): transcript content
filename (unicode): transcript filename
language (unicode): transcript language
mimetype (unicode): transcript content type
add_attachment_header (bool): whether to add attachment header or not
"""
headerlist = [
('Content-Language', language),
]
if add_attachment_header:
headerlist.append(
(
'Content-Disposition',
f'attachment; filename="{filename}"'
)
)
response = Response(
content,
headerlist=headerlist,
charset='utf8'
)
response.content_type = content_type
return response
@XBlock.handler
def transcript(self, request, dispatch):
"""
Entry point for transcript handlers for student_view.
Request GET contains:
(optional) `videoId` for `translation` dispatch.
`is_bumper=1` flag for bumper case.
Dispatches, (HTTP GET):
/translation/[language_id]
/download
/available_translations/
Explanations:
`download`: returns SRT or TXT file.
`translation`: depends on HTTP methods:
Provide translation for requested language, SJSON format is sent back on success,
Proper language_id should be in url.
`available_translations`:
Returns list of languages, for which transcript files exist.
For 'en' check if SJSON exists. For non-`en` check if SRT file exists.
"""
is_bumper = request.GET.get('is_bumper', False)
transcripts = self.get_transcripts_info(is_bumper)
if dispatch.startswith('translation'):
language = dispatch.replace('translation', '').strip('/')
# Because scrapers hit video blocks, verify that a user exists.
# use the _request attr to get the django request object.
if not request._request.user: # pylint: disable=protected-access
log.info("Transcript: user must be logged or public view enabled to get transcript")
return Response(status=403)
if not language:
log.info("Invalid /translation request: no language.")
return Response(status=400)
if language not in ['en'] + list(transcripts["transcripts"].keys()):
log.info("Video: transcript facilities are not available for given language.")
return Response(status=404)
if language != self.transcript_language:
self.transcript_language = language
try:
if is_bumper:
content, filename, mimetype = get_transcript_from_contentstore(
self,
self.transcript_language,
Transcript.SJSON,
transcripts
)
else:
content, filename, mimetype = get_transcript(
self,
lang=self.transcript_language,
output_format=Transcript.SJSON,
youtube_id=request.GET.get('videoId'),
)
response = self.make_transcript_http_response(
content,
filename,
self.transcript_language,
mimetype,
add_attachment_header=False
)
except NotFoundError as exc:
edx_video_id = clean_video_id(self.edx_video_id)
log.warning(
'[Translation Dispatch] %s: %s',
self.location,
exc if is_bumper else f'Transcript not found for {edx_video_id}, lang: {self.transcript_language}',
)
response = self.get_static_transcript(request, transcripts)
elif dispatch == 'download':
lang = request.GET.get('lang', None)
try:
content, filename, mimetype = get_transcript(self, lang, output_format=self.transcript_download_format)
except NotFoundError:
return Response(status=404)
response = self.make_transcript_http_response(
content,
filename,
self.transcript_language,
mimetype
)
elif dispatch.startswith('available_translations'):
available_translations = self.available_translations(
transcripts,
verify_assets=True,
is_bumper=is_bumper
)
if available_translations:
response = Response(json.dumps(available_translations))
response.content_type = 'application/json'
else:
response = Response(status=404)
else: # unknown dispatch
log.debug("Dispatch is not allowed")
response = Response(status=404)
return response
@XBlock.handler
def student_view_user_state(self, request, suffix=''): # lint-amnesty, pylint: disable=unused-argument
"""
Endpoint to get user-specific state, like current position and playback speed,
without rendering the full student_view HTML. This is similar to student_view_state,
but that one cannot contain user-specific info.
"""
view_state = self.student_view_data()
view_state.update({
"saved_video_position": self.saved_video_position.total_seconds(),
"speed": self.speed,
})
return Response(
json.dumps(view_state),
content_type='application/json',
charset='UTF-8'
)
@XBlock.handler
def yt_video_metadata(self, request, suffix=''): # lint-amnesty, pylint: disable=unused-argument
"""
Endpoint to get YouTube metadata.
This handler is only used in the Learning-Core-based runtime. The old
runtime uses a similar REST API that's not an XBlock handler.
"""
from lms.djangoapps.courseware.views.views import load_metadata_from_youtube
if not self.youtube_id_1_0:
# TODO: more informational response to explain that yt_video_metadata not supported for non-youtube videos.
return Response('{}', status=400)
metadata, status_code = load_metadata_from_youtube(video_id=self.youtube_id_1_0, request=request)
response = Response(json.dumps(metadata), status=status_code)
response.content_type = 'application/json'
return response
class VideoStudioViewHandlers:
"""
Handlers for Studio view.
"""
def validate_transcript_upload_data(self, data):
"""
Validates video transcript file.
Arguments:
data: Transcript data to be validated.
Returns:
None or String
If there is error returns error message otherwise None.
"""
error = None
_ = self.runtime.service(self, "i18n").ugettext
# Validate the must have attributes - this error is unlikely to be faced by common users.
must_have_attrs = ['edx_video_id', 'language_code', 'new_language_code']
missing = [attr for attr in must_have_attrs if attr not in data]
# Get available transcript languages.
transcripts = self.get_transcripts_info()
available_translations = self.available_translations(transcripts, verify_assets=True)
if missing:
error = _('The following parameters are required: {missing}.').format(missing=', '.join(missing))
elif (
data['language_code'] != data['new_language_code'] and data['new_language_code'] in available_translations
):
error = _('A transcript with the "{language_code}" language code already exists.').format(
language_code=data['new_language_code'],
)
elif 'file' not in data:
error = _('A transcript file is required.')
return error
@XBlock.handler
def studio_transcript(self, request, dispatch):
"""
Entry point for Studio transcript handlers.
Dispatches:
/translation/[language_id] - language_id sould be in url.
`translation` dispatch support following HTTP methods:
`POST`:
Upload srt file. Check possibility of generation of proper sjson files.
For now, it works only for self.transcripts, not for `en`.
`GET:
Return filename from storage. SRT format is sent back on success. Filename should be in GET dict.
We raise all exceptions right in Studio:
NotFoundError:
Video or asset was deleted from module/contentstore, but request came later.
Seems impossible to be raised. block_render.py catches NotFoundErrors from here.
/translation POST:
TypeError:
Unjsonable filename or content.
TranscriptsGenerationException, TranscriptException:
no SRT extension or not parse-able by PySRT
UnicodeDecodeError: non-UTF8 uploaded file content encoding.
"""
if dispatch.startswith('translation'):
if request.method == 'POST':
response = self._studio_transcript_upload(request)
elif request.method == 'DELETE':
response = self._studio_transcript_delete(request)
elif request.method == 'GET':
response = self._studio_transcript_get(request)
else:
# Any other HTTP method is not allowed.
response = Response(status=404)
else: # unknown dispatch
log.debug("Dispatch is not allowed")
response = Response(status=404)
return response
def _save_transcript_field(self):
"""
Save `transcripts` block field.
"""
field = self.fields['transcripts']
if self.transcripts:
transcripts_copy = self.transcripts.copy()
# Need to delete to overwrite, it's weird behavior,
# but it only works like this.
field.delete_from(self)
field.write_to(self, transcripts_copy)
else:
field.delete_from(self)
def _studio_transcript_upload(self, request):
"""
Upload transcript. Used in "POST" method in `studio_transcript`
"""
_ = self.runtime.service(self, "i18n").ugettext
error = self.validate_transcript_upload_data(data=request.POST)
if error:
response = Response(json={'error': error}, status=400)
else:
edx_video_id = clean_video_id(request.POST['edx_video_id'])
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.POST['file'].file
is_library = isinstance(self.usage_key.context_key, LibraryLocatorV2)
if is_library:
filename = f'transcript-{new_language_code}.srt'
else:
if not edx_video_id:
# Back-populate the video ID for an external video.
# pylint: disable=attribute-defined-outside-init
self.edx_video_id = edx_video_id = create_external_video(display_name='external video')
filename = f'{edx_video_id}-{new_language_code}.srt'
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
content = transcript_file.read()
payload = {
'edx_video_id': edx_video_id,
'language_code': new_language_code
}
if is_library:
# Save transcript as static asset in Learning Core if is a library component
filename = f"static/{filename}"
lib_api.add_library_block_static_asset_file(
self.usage_key,
filename,
content,
)
else:
sjson_subs = Transcript.convert(
content=content.decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
# If a new transcript is added, then both new_language_code and
# language_code fields will have the same value.
if language_code != new_language_code:
self.transcripts.pop(language_code, None)
self.transcripts[new_language_code] = filename
if is_library:
self._save_transcript_field()
response = Response(json.dumps(payload), status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
response = Response(
json={
'error': _(
'There is a problem with this transcript file. Try to upload a different file.'
)
},
status=400
)
return response
def _studio_transcript_delete(self, request):
"""
Delete transcript. Used in "DELETE" method in `studio_transcript`
"""
request_data = request.json
if 'lang' not in request_data or 'edx_video_id' not in request_data:
return Response(status=400)
language = request_data['lang']
edx_video_id = clean_video_id(request_data['edx_video_id'])
if edx_video_id:
delete_video_transcript(video_id=edx_video_id, language_code=language)
if isinstance(self.usage_key.context_key, LibraryLocatorV2):
transcript_name = self.transcripts.pop(language, None)
if transcript_name:
# TODO: In the future, we need a proper XBlock API
# like `self.static_assets.delete(...)` instead of coding
# these runtime-specific/library-specific APIs.
lib_api.delete_library_block_static_asset_file(
self.usage_key,
f"static/{transcript_name}",
)
self._save_transcript_field()
else:
if language == 'en':
# remove any transcript file from content store for the video ids
possible_sub_ids = [
self.sub, # pylint: disable=access-member-before-definition
self.youtube_id_1_0
] + get_html5_ids(self.html5_sources)
for sub_id in possible_sub_ids:
remove_subs_from_store(sub_id, self, language)
# update metadata as `en` can also be present in `transcripts` field
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
# also empty `sub` field
self.sub = '' # pylint: disable=attribute-defined-outside-init
else:
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
return Response(status=200)
def _studio_transcript_get(self, request):
"""
Get transcript. Used in "GET" method in `studio_transcript`
"""
_ = self.runtime.service(self, "i18n").ugettext
language = request.GET.get('language_code')
if not language:
return Response(json={'error': _('Language is required.')}, status=400)
try:
transcript_content, transcript_name, mime_type = get_transcript(
video=self, lang=language, output_format=Transcript.SRT
)
response = Response(transcript_content, headerlist=[
(
'Content-Disposition',
f'attachment; filename="{transcript_name}"'
),
('Content-Language', language),
('Content-Type', mime_type)
])
except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError):
response = Response(status=404)
return response