From b57e2ac1eaceb3f36d1623164e441d54b6385648 Mon Sep 17 00:00:00 2001 From: Muhammad Farhan Khan Date: Fri, 5 Dec 2025 17:53:10 +0500 Subject: [PATCH] chore: move get_transcript method to video service (#37635) https://github.com/openedx/public-engineering/issues/445 --- .../tests/test_transcripts_utils.py | 13 +++--- .../transcript_storage_handlers.py | 3 +- .../contentstore/views/transcripts_ajax.py | 2 +- .../core/djangoapps/video_config/services.py | 29 ++++++++++++ .../video_config/transcripts_utils.py | 6 +-- .../core/djangoapps/xblock/runtime/runtime.py | 3 ++ xmodule/video_block/video_block.py | 35 +++++++++------ xmodule/video_block/video_handlers.py | 44 ++++++++++++++++--- 8 files changed, 102 insertions(+), 33 deletions(-) diff --git a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py index 5a401e62d9..f856ef6a0c 100644 --- a/cms/djangoapps/contentstore/tests/test_transcripts_utils.py +++ b/cms/djangoapps/contentstore/tests/test_transcripts_utils.py @@ -24,6 +24,7 @@ from xmodule.exceptions import NotFoundError # lint-amnesty, pylint: disable=wr from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase # lint-amnesty, pylint: disable=wrong-import-order from xmodule.modulestore.tests.factories import CourseFactory, BlockFactory # lint-amnesty, pylint: disable=wrong-import-order from openedx.core.djangoapps.video_config import transcripts_utils # lint-amnesty, pylint: disable=wrong-import-order +from xblocks_contrib.video.exceptions import TranscriptsGenerationException TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex @@ -314,7 +315,7 @@ class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): # lint-amnesty, pyli """) self.clear_subs_content(youtube_subs) - # Check transcripts_utils.TranscriptsGenerationException not thrown. + # Check TranscriptsGenerationException not thrown. # Also checks that uppercase file extensions are supported. transcripts_utils.generate_subs_from_source(youtube_subs, 'SRT', srt_filedata, self.course) @@ -345,7 +346,7 @@ class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): # lint-amnesty, pyli At the left we can see... """) - with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm: + with self.assertRaises(TranscriptsGenerationException) as cm: transcripts_utils.generate_subs_from_source(youtube_subs, 'BAD_FORMAT', srt_filedata, self.course) exception_message = str(cm.exception) self.assertEqual(exception_message, "We support only SubRip (*.srt) transcripts format.") @@ -359,7 +360,7 @@ class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): # lint-amnesty, pyli srt_filedata = """BAD_DATA""" - with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm: + with self.assertRaises(TranscriptsGenerationException) as cm: transcripts_utils.generate_subs_from_source(youtube_subs, 'srt', srt_filedata, self.course) exception_message = str(cm.exception) self.assertEqual(exception_message, "Something wrong with SubRip transcripts file during parsing.") @@ -588,7 +589,7 @@ class TestTranscript(unittest.TestCase): to convert invalid srt transcript to sjson. """ invalid_srt_transcript = 'invalid SubRip file content' - with self.assertRaises(transcripts_utils.TranscriptsGenerationException): + with self.assertRaises(TranscriptsGenerationException): transcripts_utils.Transcript.convert(invalid_srt_transcript, 'srt', 'sjson') def test_convert_invalid_invalid_sjson_to_srt(self): @@ -963,7 +964,7 @@ class TestGetTranscript(SharedModuleStoreTestCase): assert error_transcript["text"][0] in content @ddt.data( - transcripts_utils.TranscriptsGenerationException, + TranscriptsGenerationException, UnicodeDecodeError('aliencodec', b'\x02\x01', 1, 2, 'alien codec found!') ) @patch('openedx.core.djangoapps.video_config.transcripts_utils.Transcript') @@ -983,7 +984,7 @@ class TestGetTranscript(SharedModuleStoreTestCase): ) @ddt.data( - transcripts_utils.TranscriptsGenerationException, + TranscriptsGenerationException, UnicodeDecodeError('aliencodec', b'\x02\x01', 1, 2, 'alien codec found!') ) @patch('openedx.core.djangoapps.video_config.transcripts_utils.Transcript') diff --git a/cms/djangoapps/contentstore/transcript_storage_handlers.py b/cms/djangoapps/contentstore/transcript_storage_handlers.py index ba26d4d678..2c254eb359 100644 --- a/cms/djangoapps/contentstore/transcript_storage_handlers.py +++ b/cms/djangoapps/contentstore/transcript_storage_handlers.py @@ -23,7 +23,8 @@ from opaque_keys.edx.keys import CourseKey from common.djangoapps.util.json_request import JsonResponse from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials -from openedx.core.djangoapps.video_config.transcripts_utils import Transcript, TranscriptsGenerationException # lint-amnesty, pylint: disable=wrong-import-order +from openedx.core.djangoapps.video_config.transcripts_utils import Transcript # lint-amnesty, pylint: disable=wrong-import-order +from xblocks_contrib.video.exceptions import TranscriptsGenerationException from .toggles import use_mock_video_uploads from .video_storage_handlers import TranscriptProvider diff --git a/cms/djangoapps/contentstore/views/transcripts_ajax.py b/cms/djangoapps/contentstore/views/transcripts_ajax.py index 26bde09035..903bf2dd79 100644 --- a/cms/djangoapps/contentstore/views/transcripts_ajax.py +++ b/cms/djangoapps/contentstore/views/transcripts_ajax.py @@ -33,7 +33,6 @@ from xmodule.modulestore.exceptions import ItemNotFoundError # lint-amnesty, py from openedx.core.djangoapps.video_config.transcripts_utils import ( # lint-amnesty, pylint: disable=wrong-import-order GetTranscriptsFromYouTubeException, Transcript, - TranscriptsGenerationException, TranscriptsRequestValidationException, clean_video_id, download_youtube_subs, @@ -44,6 +43,7 @@ from openedx.core.djangoapps.video_config.transcripts_utils import ( # lint-amn get_transcript_link_from_youtube, get_transcript_links_from_youtube, ) +from xblocks_contrib.video.exceptions import TranscriptsGenerationException from openedx.core.djangoapps.content_libraries import api as lib_api from openedx.core.djangoapps.xblock import api as xblock_api from openedx.core.djangoapps.xblock.data import CheckPerm diff --git a/openedx/core/djangoapps/video_config/services.py b/openedx/core/djangoapps/video_config/services.py index 110c4ed05e..66799d7393 100644 --- a/openedx/core/djangoapps/video_config/services.py +++ b/openedx/core/djangoapps/video_config/services.py @@ -9,6 +9,7 @@ for the extracted video block in xblocks-contrib repository. import logging from opaque_keys.edx.keys import CourseKey, UsageKey +from xblocks_contrib.video.exceptions import TranscriptNotFoundError from openedx.core.djangoapps.video_config import sharing from organizations.api import get_course_organization @@ -91,3 +92,31 @@ class VideoConfigService: Check if HLS playback is enabled for the course. """ return HLSPlaybackEnabledFlag.feature_enabled(course_id) + + def get_transcript( + self, + video_block, + lang: str | None = None, + output_format: str = 'srt', + youtube_id: str | None = None, + ) -> tuple[bytes, str, str]: + """ + Retrieve a transcript from the runtime's storage. + + Returns: + tuple(bytes, str, str): transcript content, filename, and mimetype. + + Raises: + TranscriptsGenerationException: If the transcript cannot be found or retrieved + TranscriptNotFoundError: If the transcript cannot be found or retrieved + """ + # Import here to avoid circular dependency + from openedx.core.djangoapps.video_config.transcripts_utils import get_transcript + from xmodule.exceptions import NotFoundError + + try: + return get_transcript(video_block, lang, output_format, youtube_id) + except NotFoundError as exc: + raise TranscriptNotFoundError( + f"Failed to get transcript: {exc}" + ) from exc diff --git a/openedx/core/djangoapps/video_config/transcripts_utils.py b/openedx/core/djangoapps/video_config/transcripts_utils.py index cbd0a0b31b..be86324cd6 100644 --- a/openedx/core/djangoapps/video_config/transcripts_utils.py +++ b/openedx/core/djangoapps/video_config/transcripts_utils.py @@ -29,6 +29,8 @@ from xmodule.contentstore.django import contentstore from xmodule.exceptions import NotFoundError from xmodule.video_block.bumper_utils import get_bumper_settings +from xblocks_contrib.video.exceptions import TranscriptsGenerationException + try: from edxval import api as edxval_api @@ -45,10 +47,6 @@ class TranscriptException(Exception): pass -class TranscriptsGenerationException(Exception): - pass - - class GetTranscriptsFromYouTubeException(Exception): pass diff --git a/openedx/core/djangoapps/xblock/runtime/runtime.py b/openedx/core/djangoapps/xblock/runtime/runtime.py index 8829cedac7..7f58beff82 100644 --- a/openedx/core/djangoapps/xblock/runtime/runtime.py +++ b/openedx/core/djangoapps/xblock/runtime/runtime.py @@ -22,6 +22,7 @@ from xblock.field_data import DictFieldData, FieldData, SplitFieldData from xblock.fields import Scope, ScopeIds from xblock.runtime import IdReader, KvsFieldData, MemoryIdManager, Runtime +from openedx.core.djangoapps.video_config.services import VideoConfigService from xmodule.errortracker import make_error_tracker from xmodule.contentstore.django import contentstore from xmodule.modulestore.django import XBlockI18nService @@ -341,6 +342,8 @@ class XBlockRuntime(RuntimeShim, Runtime): return EnrollmentsService() elif service_name == 'error_tracker': return make_error_tracker() + elif service_name == 'video_config': + return VideoConfigService() # Otherwise, fall back to the base implementation which loads services # defined in the constructor: diff --git a/xmodule/video_block/video_block.py b/xmodule/video_block/video_block.py index 0ea6864b1c..e8a7b7e87e 100644 --- a/xmodule/video_block/video_block.py +++ b/xmodule/video_block/video_block.py @@ -56,13 +56,14 @@ from openedx.core.djangoapps.video_config.transcripts_utils import ( clean_video_id, get_endonym_or_label, get_html5_ids, - get_transcript, subs_filename ) from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers from .video_utils import create_youtube_string, format_xml_exception_message, get_poster, rewrite_video_url from .video_xfields import VideoFields +from xblocks_contrib.video.exceptions import TranscriptNotFoundError + # The following import/except block for edxval is temporary measure until # edxval is a proper XBlock Runtime Service. # @@ -652,13 +653,15 @@ class _BuiltInVideoBlock( # construct transcripts info and also find if `en` subs exist transcripts_info = self.get_transcripts_info() possible_sub_ids = [self.sub, self.youtube_id_1_0] + get_html5_ids(self.html5_sources) - for sub_id in possible_sub_ids: - try: - _, sub_id, _ = get_transcript(self, lang='en', output_format=Transcript.TXT) - transcripts_info['transcripts'] = dict(transcripts_info['transcripts'], en=sub_id) - break - except NotFoundError: - continue + video_config_service = self.runtime.service(self, 'video_config') + if video_config_service: + for sub_id in possible_sub_ids: + try: + _, sub_id, _ = video_config_service.get_transcript(self, lang='en', output_format=Transcript.TXT) + transcripts_info['transcripts'] = dict(transcripts_info['transcripts'], en=sub_id) + break + except TranscriptNotFoundError: + continue editable_fields['transcripts']['value'] = transcripts_info['transcripts'] editable_fields['transcripts']['urlRoot'] = self.runtime.handler_url( @@ -1091,12 +1094,16 @@ class _BuiltInVideoBlock( def _update_transcript_for_index(language=None): """ Find video transcript - if not found, don't update index """ - try: - transcript = get_transcript(self, lang=language, output_format=Transcript.TXT)[0].replace("\n", " ") - transcript_index_name = f"transcript_{language if language else self.transcript_language}" - video_body.update({transcript_index_name: transcript}) - except NotFoundError: - pass + video_config_service = self.runtime.service(self, 'video_config') + if video_config_service: + try: + transcript = video_config_service.get_transcript( + self, lang=language, output_format=Transcript.TXT + )[0].replace("\n", " ") + transcript_index_name = f"transcript_{language if language else self.transcript_language}" + video_body.update({transcript_index_name: transcript}) + except TranscriptNotFoundError: + pass if self.sub: _update_transcript_for_index() diff --git a/xmodule/video_block/video_handlers.py b/xmodule/video_block/video_handlers.py index ea35736414..ef65f90548 100644 --- a/xmodule/video_block/video_handlers.py +++ b/xmodule/video_block/video_handlers.py @@ -25,21 +25,44 @@ from openedx.core.djangoapps.content_libraries import api as lib_api from openedx.core.djangoapps.video_config.transcripts_utils import ( Transcript, TranscriptException, - TranscriptsGenerationException, clean_video_id, generate_sjson_for_all_speeds, get_html5_ids, get_or_create_sjson, - get_transcript, get_transcript_from_contentstore, remove_subs_from_store, subs_filename, youtube_speed_dict ) +from xblocks_contrib.video.exceptions import ( + TranscriptsGenerationException, + TranscriptNotFoundError, +) log = logging.getLogger(__name__) +def get_transcript( + video_block, + lang: str | None = None, + output_format: str = 'srt', + youtube_id: str | None = None, +) -> tuple[bytes, str, str]: + """ + Retrieve a transcript using a video block's configuration service. + + Returns: + tuple(bytes, str, str): transcript content, filename, and mimetype. + + Raises: + Exception: If the video config service is not available or the transcript cannot be retrieved. + """ + video_config_service = video_block.runtime.service(video_block, 'video_config') + if not video_config_service: + raise Exception("Video config service not found") + return video_config_service.get_transcript(video_block, lang, output_format, youtube_id) + + # Disable no-member warning: # pylint: disable=no-member @@ -356,7 +379,7 @@ class VideoStudentViewHandlers: mimetype, add_attachment_header=False ) - except NotFoundError as exc: + except (NotFoundError, TranscriptNotFoundError) as exc: edx_video_id = clean_video_id(self.edx_video_id) log.warning( '[Translation Dispatch] %s: %s', @@ -370,7 +393,7 @@ class VideoStudentViewHandlers: try: content, filename, mimetype = get_transcript(self, lang, output_format=self.transcript_download_format) - except NotFoundError: + except TranscriptNotFoundError: return Response(status=404) response = self.make_transcript_http_response( @@ -660,8 +683,11 @@ class VideoStudioViewHandlers: return Response(json={'error': _('Language is required.')}, status=400) try: - transcript_content, transcript_name, mime_type = get_transcript( - video=self, lang=language, output_format=Transcript.SRT + video_config_service = self.runtime.service(self, 'video_config') + if not video_config_service: + return Response(status=404) + transcript_content, transcript_name, mime_type = video_config_service.get_transcript( + self, lang=language, output_format=Transcript.SRT ) response = Response(transcript_content, headerlist=[ ( @@ -671,6 +697,10 @@ class VideoStudioViewHandlers: ('Content-Language', language), ('Content-Type', mime_type) ]) - except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError): + except ( + UnicodeDecodeError, + TranscriptsGenerationException, + TranscriptNotFoundError + ): response = Response(status=404) return response