refactor: Move upload/delete transcript int video_config service

Merge pull request #37793 from openedx/farhan/upload-delete-transcript
This commit is contained in:
Muhammad Farhan Khan
2025-12-24 12:21:58 +05:00
committed by GitHub
6 changed files with 201 additions and 133 deletions

View File

@@ -130,6 +130,7 @@ def _get_course_block(course_descriptor, user):
""" Gets course block that takes into account user state and permissions """
# Adding courseware imports here to insulate other apps (e.g. schedules) to
# avoid import errors.
# TODO remove the LMS dependency https://github.com/openedx/edx-platform/issues/37659
from lms.djangoapps.courseware.model_data import FieldDataCache
from lms.djangoapps.courseware.block_render import get_block_for_descriptor

View File

@@ -9,9 +9,13 @@ for the extracted video block in xblocks-contrib repository.
import logging
from opaque_keys.edx.keys import CourseKey, UsageKey
from opaque_keys.edx.locator import LibraryLocatorV2
from xblocks_contrib.video.exceptions import TranscriptNotFoundError
from openedx.core.djangoapps.video_config import sharing
from django.core.files import File
from django.core.files.base import ContentFile
from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript
from organizations.api import get_course_organization
from openedx.core.djangoapps.video_config.models import (
CourseYoutubeBlockedFlag,
@@ -19,6 +23,16 @@ from openedx.core.djangoapps.video_config.models import (
)
from openedx.core.djangoapps.video_config.toggles import TRANSCRIPT_FEEDBACK
from openedx.core.djangoapps.video_pipeline.config.waffle import DEPRECATE_YOUTUBE
from openedx.core.djangoapps.content_libraries.api import (
add_library_block_static_asset_file,
delete_library_block_static_asset_file,
)
from openedx.core.djangoapps.video_config.transcripts_utils import (
Transcript,
clean_video_id,
get_html5_ids,
remove_subs_from_store,
)
log = logging.getLogger(__name__)
@@ -30,6 +44,9 @@ class VideoConfigService:
This service abstracts away edx-platform specific functionality
that the Video XBlock needs, allowing the Video XBlock to be
extracted to a separate repository.
TODO: This service could be improved in a few ways:
https://github.com/openedx/edx-platform/issues/37656
"""
def get_public_video_url(self, usage_id: UsageKey) -> str:
@@ -120,3 +137,117 @@ class VideoConfigService:
raise TranscriptNotFoundError(
f"Failed to get transcript: {exc}"
) from exc
def upload_transcript(
self,
*,
video_block,
language_code: str,
new_language_code: str | None,
transcript_file: File,
edx_video_id: str | None,
) -> None:
"""
Store a transcript, however the runtime prefers to.
Mutates:
* video_block.transcripts
* video_block.edx_video_id, iff a new video is created in edx-val.
Can raise:
* UnicodeDecodeError
* TranscriptsGenerationException
"""
is_library = isinstance(video_block.usage_key.context_key, LibraryLocatorV2)
content: bytes = transcript_file.read()
if is_library:
# Save transcript as static asset in Learning Core if is a library component
filename = f'static/transcript-{new_language_code}.srt'
add_library_block_static_asset_file(video_block.usage_key, filename, content)
else:
edx_video_id = clean_video_id(edx_video_id)
if not edx_video_id:
# Back-populate the video ID for an external video.
# pylint: disable=attribute-defined-outside-init
edx_video_id = create_external_video(display_name='external video')
video_block.edx_video_id = edx_video_id
filename = f'{edx_video_id}-{new_language_code}.srt'
# Convert SRT transcript into an SJSON format and upload it to S3 if a course component
sjson_subs = Transcript.convert(
content=content.decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': new_language_code,
},
file_data=ContentFile(sjson_subs),
)
# If a new transcript is added, then both new_language_code and
# language_code fields will have the same value.
if language_code != new_language_code:
video_block.transcripts.pop(language_code, None)
video_block.transcripts[new_language_code] = filename
if is_library:
_save_transcript_field(video_block)
def delete_transcript(
self,
*,
video_block,
edx_video_id: str | None,
language_code: str,
) -> None:
"""
Delete a transcript from the runtime's storage.
"""
edx_video_id = clean_video_id(edx_video_id)
if edx_video_id:
delete_video_transcript(video_id=edx_video_id, language_code=language_code)
if isinstance(video_block.context_key, LibraryLocatorV2):
transcript_name = video_block.transcripts.pop(language_code, None)
if transcript_name:
delete_library_block_static_asset_file(video_block.usage_key, f"static/{transcript_name}")
_save_transcript_field(video_block)
else:
if language_code == 'en':
# remove any transcript file from content store for the video ids
possible_sub_ids = [
video_block.sub, # pylint: disable=access-member-before-definition
video_block.youtube_id_1_0
] + get_html5_ids(video_block.html5_sources)
for sub_id in possible_sub_ids:
remove_subs_from_store(sub_id, video_block, language_code)
# update metadata as `en` can also be present in `transcripts` field
remove_subs_from_store(
video_block.transcripts.pop(language_code, None), video_block, language_code
)
# also empty `sub` field
video_block.sub = '' # pylint: disable=attribute-defined-outside-init
else:
remove_subs_from_store(
video_block.transcripts.pop(language_code, None), video_block, language_code
)
def _save_transcript_field(video_block):
"""
Hacky workaround to ensure that transcript field is saved for Learning Core video blocks.
It's not clear why this is necessary.
"""
field = video_block.fields['transcripts']
if video_block.transcripts:
transcripts_copy = video_block.transcripts.copy()
# Need to delete to overwrite, it's weird behavior,
# but it only works like this.
field.delete_from(video_block)
field.write_to(video_block, transcripts_copy)
else:
field.delete_from(video_block)

View File

@@ -28,7 +28,6 @@ from xmodule.contentstore.content import StaticContent
from xmodule.contentstore.django import contentstore
from xmodule.exceptions import NotFoundError
from xmodule.video_block.bumper_utils import get_bumper_settings
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
@@ -834,6 +833,11 @@ class VideoTranscriptsMixin:
is_bumper(bool): If True, the request is for the bumper transcripts
include_val_transcripts(bool): If True, include edx-val transcripts as well
"""
# TODO: This causes a circular import when imported at the top-level.
# This import will be removed as part of the VideoBlock extraction.
# https://github.com/openedx/edx-platform/issues/36282
from xmodule.video_block.bumper_utils import get_bumper_settings
if is_bumper:
transcripts = copy.deepcopy(get_bumper_settings(self).get('transcripts', {}))
sub = transcripts.pop("en", "")

View File

@@ -22,7 +22,6 @@ from xblock.field_data import DictFieldData, FieldData, SplitFieldData
from xblock.fields import Scope, ScopeIds
from xblock.runtime import IdReader, KvsFieldData, MemoryIdManager, Runtime
from openedx.core.djangoapps.video_config.services import VideoConfigService
from xmodule.errortracker import make_error_tracker
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import XBlockI18nService
@@ -229,6 +228,8 @@ class XBlockRuntime(RuntimeShim, Runtime):
Submit a grade for the block.
"""
if self.user and not self.user.is_anonymous:
# TODO: we shouldn't be using an LMS API here.
# https://github.com/openedx/edx-platform/issues/37660
grades_signals.SCORE_PUBLISHED.send(
sender=None,
block=block,
@@ -343,6 +344,8 @@ class XBlockRuntime(RuntimeShim, Runtime):
elif service_name == 'error_tracker':
return make_error_tracker()
elif service_name == 'video_config':
# Import here to avoid circular dependency
from openedx.core.djangoapps.video_config.services import VideoConfigService
return VideoConfigService()
# Otherwise, fall back to the base implementation which loads services

View File

@@ -107,12 +107,6 @@ ignore_imports =
# -> openedx.core.djangoapps.course_groups.partition_scheme
# -> lms.djangoapps.courseware.masquerade
openedx.core.djangoapps.course_groups.partition_scheme -> lms.djangoapps.courseware.masquerade
# cms.djangoapps.contentstore.[various]
# -> openedx.core.djangoapps.content.course_overviews.models
# -> lms.djangoapps.ccx.utils
# & lms.djangoapps.certificates.api
# & lms.djangoapps.discussion.django_comment_client
openedx.core.djangoapps.content.course_overviews.models -> lms.djangoapps.*.*
# cms.djangoapps.export_course_metadata.tasks
# -> openedx.core.djangoapps.schedules.content_highlights
# -> lms.djangoapps.courseware.block_render & lms.djangoapps.courseware.model_data
@@ -170,6 +164,12 @@ ignore_imports =
# https://github.com/openedx/edx-platform/issues/33428
openedx.core.djangoapps.content_libraries.permissions -> cms.djangoapps.course_creators.views
openedx.core.djangoapps.content_libraries.tasks -> cms.djangoapps.contentstore.storage
# Outstanding arch issue: course_overviews is tangled up with LMS
# https://github.com/openedx/edx-platform/issues/37658
openedx.core.djangoapps.content.course_overviews.models -> lms.djangoapps.**
# Outstanding arch issue: content_highlights uses courseware block_render
# https://github.com/openedx/edx-platform/issues/37659
openedx.core.djangoapps.schedules.content_highlights -> lms.djangoapps.courseware.block_render
[importlinter:contract:2]
name = Do not depend on non-public API of isolated apps.
@@ -234,3 +234,15 @@ ignore_imports =
# Content libraries imports contentstore.helpers which imports upstream_sync
openedx.core.djangoapps.content_libraries.api.blocks -> cms.djangoapps.contentstore.helpers
openedx.core.djangoapps.content_libraries.api.libraries -> cms.djangoapps.contentstore.helpers
# Outstanding arch issue: course_overviews is tangled up with LMS
# https://github.com/openedx/edx-platform/issues/37658
openedx.core.djangoapps.content.course_overviews.models -> lms.djangoapps.**
# Outstanding arch issue: content_highlights uses courseware block_render
# https://github.com/openedx/edx-platform/issues/37659
openedx.core.djangoapps.schedules.content_highlights -> lms.djangoapps.courseware.block_render
# This import happens only because grading signals are defined in LMS rather than openedx-events
# https://github.com/openedx/edx-platform/issues/37660
openedx.core.djangoapps.xblock.runtime.runtime -> lms.djangoapps.grades.api
# These imports will become OK once we worked on the following issue:
# https://github.com/openedx/edx-platform/issues/33428
openedx.core.djangoapps.video_config.services -> openedx.core.djangoapps.content_libraries.api

View File

@@ -10,24 +10,19 @@ import json
import logging
import math
from django.core.files.base import ContentFile
from django.utils.timezone import now
from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript
from opaque_keys.edx.locator import CourseLocator, LibraryLocatorV2
from opaque_keys.edx.locator import CourseLocator
from webob import Response
from xblock.core import XBlock
from xblock.exceptions import JsonHandlerError
from xmodule.exceptions import NotFoundError
from xmodule.fields import RelativeTime
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.video_config.transcripts_utils import (
Transcript,
clean_video_id,
get_html5_ids,
get_transcript_from_contentstore,
remove_subs_from_store,
subs_filename,
)
from xblocks_contrib.video.exceptions import (
@@ -460,141 +455,63 @@ class VideoStudioViewHandlers:
return response
def _save_transcript_field(self):
"""
Save `transcripts` block field.
"""
field = self.fields['transcripts']
if self.transcripts:
transcripts_copy = self.transcripts.copy()
# Need to delete to overwrite, it's weird behavior,
# but it only works like this.
field.delete_from(self)
field.write_to(self, transcripts_copy)
else:
field.delete_from(self)
def _studio_transcript_upload(self, request):
"""
Upload transcript. Used in "POST" method in `studio_transcript`
"""
_ = self.runtime.service(self, "i18n").ugettext
video_config_service = self.runtime.service(self, 'video_config')
if not video_config_service:
return Response(json={'error': _('Runtime does not support transcripts.')}, status=400)
error = self.validate_transcript_upload_data(data=request.POST)
if error:
response = Response(json={'error': error}, status=400)
else:
edx_video_id = clean_video_id(request.POST['edx_video_id'])
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
transcript_file = request.POST['file'].file
is_library = isinstance(self.usage_key.context_key, LibraryLocatorV2)
if is_library:
filename = f'transcript-{new_language_code}.srt'
else:
if not edx_video_id:
# Back-populate the video ID for an external video.
# pylint: disable=attribute-defined-outside-init
self.edx_video_id = edx_video_id = create_external_video(display_name='external video')
filename = f'{edx_video_id}-{new_language_code}.srt'
try:
# Convert SRT transcript into an SJSON format
# and upload it to S3.
content = transcript_file.read()
payload = {
'edx_video_id': edx_video_id,
'language_code': new_language_code
}
if is_library:
# Save transcript as static asset in Learning Core if is a library component
filename = f"static/{filename}"
lib_api.add_library_block_static_asset_file(
self.usage_key,
filename,
content,
return Response(json={'error': error}, status=400)
edx_video_id = (request.POST['edx_video_id'] or "").strip()
language_code = request.POST['language_code']
new_language_code = request.POST['new_language_code']
try:
video_config_service.upload_transcript(
video_block=self, # NOTE: .edx_video_id and .transcripts may get mutated
edx_video_id=edx_video_id,
language_code=language_code,
new_language_code=new_language_code,
transcript_file=request.POST['file'].file,
)
return Response(
json.dumps(
{
"edx_video_id": edx_video_id or self.edx_video_id,
"language_code": new_language_code,
}
),
status=201,
)
except (TranscriptsGenerationException, UnicodeDecodeError):
return Response(
json={
'error': _(
'There is a problem with this transcript file. Try to upload a different file.'
)
else:
sjson_subs = Transcript.convert(
content=content.decode('utf-8'),
input_format=Transcript.SRT,
output_format=Transcript.SJSON
).encode()
create_or_update_video_transcript(
video_id=edx_video_id,
language_code=language_code,
metadata={
'file_format': Transcript.SJSON,
'language_code': new_language_code
},
file_data=ContentFile(sjson_subs),
)
# If a new transcript is added, then both new_language_code and
# language_code fields will have the same value.
if language_code != new_language_code:
self.transcripts.pop(language_code, None)
self.transcripts[new_language_code] = filename
if is_library:
self._save_transcript_field()
response = Response(json.dumps(payload), status=201)
except (TranscriptsGenerationException, UnicodeDecodeError):
response = Response(
json={
'error': _(
'There is a problem with this transcript file. Try to upload a different file.'
)
},
status=400
)
return response
},
status=400
)
def _studio_transcript_delete(self, request):
"""
Delete transcript. Used in "DELETE" method in `studio_transcript`
"""
_ = self.runtime.service(self, "i18n").ugettext
video_config_service = self.runtime.service(self, 'video_config')
if not video_config_service:
return Response(json={'error': _('Runtime does not support transcripts.')}, status=400)
request_data = request.json
if 'lang' not in request_data or 'edx_video_id' not in request_data:
return Response(status=400)
language = request_data['lang']
edx_video_id = clean_video_id(request_data['edx_video_id'])
if edx_video_id:
delete_video_transcript(video_id=edx_video_id, language_code=language)
if isinstance(self.usage_key.context_key, LibraryLocatorV2):
transcript_name = self.transcripts.pop(language, None)
if transcript_name:
# TODO: In the future, we need a proper XBlock API
# like `self.static_assets.delete(...)` instead of coding
# these runtime-specific/library-specific APIs.
lib_api.delete_library_block_static_asset_file(
self.usage_key,
f"static/{transcript_name}",
)
self._save_transcript_field()
else:
if language == 'en':
# remove any transcript file from content store for the video ids
possible_sub_ids = [
self.sub, # pylint: disable=access-member-before-definition
self.youtube_id_1_0
] + get_html5_ids(self.html5_sources)
for sub_id in possible_sub_ids:
remove_subs_from_store(sub_id, self, language)
# update metadata as `en` can also be present in `transcripts` field
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
# also empty `sub` field
self.sub = '' # pylint: disable=attribute-defined-outside-init
else:
remove_subs_from_store(self.transcripts.pop(language, None), self, language)
video_config_service.delete_transcript(
video_block=self,
edx_video_id=request_data['edx_video_id'],
language_code=request_data['lang'],
)
return Response(status=200)
def _studio_transcript_get(self, request):