chore: move get_transcript method to video service (#37635)
https://github.com/openedx/public-engineering/issues/445
This commit is contained in:
committed by
GitHub
parent
d592784b35
commit
b57e2ac1ea
@@ -24,6 +24,7 @@ from xmodule.exceptions import NotFoundError # lint-amnesty, pylint: disable=wr
|
||||
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, BlockFactory # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from openedx.core.djangoapps.video_config import transcripts_utils # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
|
||||
|
||||
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
|
||||
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
|
||||
@@ -314,7 +315,7 @@ class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): # lint-amnesty, pyli
|
||||
""")
|
||||
self.clear_subs_content(youtube_subs)
|
||||
|
||||
# Check transcripts_utils.TranscriptsGenerationException not thrown.
|
||||
# Check TranscriptsGenerationException not thrown.
|
||||
# Also checks that uppercase file extensions are supported.
|
||||
transcripts_utils.generate_subs_from_source(youtube_subs, 'SRT', srt_filedata, self.course)
|
||||
|
||||
@@ -345,7 +346,7 @@ class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): # lint-amnesty, pyli
|
||||
At the left we can see...
|
||||
""")
|
||||
|
||||
with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm:
|
||||
with self.assertRaises(TranscriptsGenerationException) as cm:
|
||||
transcripts_utils.generate_subs_from_source(youtube_subs, 'BAD_FORMAT', srt_filedata, self.course)
|
||||
exception_message = str(cm.exception)
|
||||
self.assertEqual(exception_message, "We support only SubRip (*.srt) transcripts format.")
|
||||
@@ -359,7 +360,7 @@ class TestGenerateSubsFromSource(TestDownloadYoutubeSubs): # lint-amnesty, pyli
|
||||
|
||||
srt_filedata = """BAD_DATA"""
|
||||
|
||||
with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm:
|
||||
with self.assertRaises(TranscriptsGenerationException) as cm:
|
||||
transcripts_utils.generate_subs_from_source(youtube_subs, 'srt', srt_filedata, self.course)
|
||||
exception_message = str(cm.exception)
|
||||
self.assertEqual(exception_message, "Something wrong with SubRip transcripts file during parsing.")
|
||||
@@ -588,7 +589,7 @@ class TestTranscript(unittest.TestCase):
|
||||
to convert invalid srt transcript to sjson.
|
||||
"""
|
||||
invalid_srt_transcript = 'invalid SubRip file content'
|
||||
with self.assertRaises(transcripts_utils.TranscriptsGenerationException):
|
||||
with self.assertRaises(TranscriptsGenerationException):
|
||||
transcripts_utils.Transcript.convert(invalid_srt_transcript, 'srt', 'sjson')
|
||||
|
||||
def test_convert_invalid_invalid_sjson_to_srt(self):
|
||||
@@ -963,7 +964,7 @@ class TestGetTranscript(SharedModuleStoreTestCase):
|
||||
assert error_transcript["text"][0] in content
|
||||
|
||||
@ddt.data(
|
||||
transcripts_utils.TranscriptsGenerationException,
|
||||
TranscriptsGenerationException,
|
||||
UnicodeDecodeError('aliencodec', b'\x02\x01', 1, 2, 'alien codec found!')
|
||||
)
|
||||
@patch('openedx.core.djangoapps.video_config.transcripts_utils.Transcript')
|
||||
@@ -983,7 +984,7 @@ class TestGetTranscript(SharedModuleStoreTestCase):
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
transcripts_utils.TranscriptsGenerationException,
|
||||
TranscriptsGenerationException,
|
||||
UnicodeDecodeError('aliencodec', b'\x02\x01', 1, 2, 'alien codec found!')
|
||||
)
|
||||
@patch('openedx.core.djangoapps.video_config.transcripts_utils.Transcript')
|
||||
|
||||
@@ -23,7 +23,8 @@ from opaque_keys.edx.keys import CourseKey
|
||||
from common.djangoapps.util.json_request import JsonResponse
|
||||
from openedx.core.djangoapps.video_config.models import VideoTranscriptEnabledFlag
|
||||
from openedx.core.djangoapps.video_pipeline.api import update_3rd_party_transcription_service_credentials
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import Transcript, TranscriptsGenerationException # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import Transcript # lint-amnesty, pylint: disable=wrong-import-order
|
||||
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
|
||||
|
||||
from .toggles import use_mock_video_uploads
|
||||
from .video_storage_handlers import TranscriptProvider
|
||||
|
||||
@@ -33,7 +33,6 @@ from xmodule.modulestore.exceptions import ItemNotFoundError # lint-amnesty, py
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import ( # lint-amnesty, pylint: disable=wrong-import-order
|
||||
GetTranscriptsFromYouTubeException,
|
||||
Transcript,
|
||||
TranscriptsGenerationException,
|
||||
TranscriptsRequestValidationException,
|
||||
clean_video_id,
|
||||
download_youtube_subs,
|
||||
@@ -44,6 +43,7 @@ from openedx.core.djangoapps.video_config.transcripts_utils import ( # lint-amn
|
||||
get_transcript_link_from_youtube,
|
||||
get_transcript_links_from_youtube,
|
||||
)
|
||||
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
|
||||
from openedx.core.djangoapps.content_libraries import api as lib_api
|
||||
from openedx.core.djangoapps.xblock import api as xblock_api
|
||||
from openedx.core.djangoapps.xblock.data import CheckPerm
|
||||
|
||||
@@ -9,6 +9,7 @@ for the extracted video block in xblocks-contrib repository.
|
||||
import logging
|
||||
|
||||
from opaque_keys.edx.keys import CourseKey, UsageKey
|
||||
from xblocks_contrib.video.exceptions import TranscriptNotFoundError
|
||||
|
||||
from openedx.core.djangoapps.video_config import sharing
|
||||
from organizations.api import get_course_organization
|
||||
@@ -91,3 +92,31 @@ class VideoConfigService:
|
||||
Check if HLS playback is enabled for the course.
|
||||
"""
|
||||
return HLSPlaybackEnabledFlag.feature_enabled(course_id)
|
||||
|
||||
def get_transcript(
|
||||
self,
|
||||
video_block,
|
||||
lang: str | None = None,
|
||||
output_format: str = 'srt',
|
||||
youtube_id: str | None = None,
|
||||
) -> tuple[bytes, str, str]:
|
||||
"""
|
||||
Retrieve a transcript from the runtime's storage.
|
||||
|
||||
Returns:
|
||||
tuple(bytes, str, str): transcript content, filename, and mimetype.
|
||||
|
||||
Raises:
|
||||
TranscriptsGenerationException: If the transcript cannot be found or retrieved
|
||||
TranscriptNotFoundError: If the transcript cannot be found or retrieved
|
||||
"""
|
||||
# Import here to avoid circular dependency
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import get_transcript
|
||||
from xmodule.exceptions import NotFoundError
|
||||
|
||||
try:
|
||||
return get_transcript(video_block, lang, output_format, youtube_id)
|
||||
except NotFoundError as exc:
|
||||
raise TranscriptNotFoundError(
|
||||
f"Failed to get transcript: {exc}"
|
||||
) from exc
|
||||
|
||||
@@ -29,6 +29,8 @@ from xmodule.contentstore.django import contentstore
|
||||
from xmodule.exceptions import NotFoundError
|
||||
|
||||
from xmodule.video_block.bumper_utils import get_bumper_settings
|
||||
from xblocks_contrib.video.exceptions import TranscriptsGenerationException
|
||||
|
||||
|
||||
try:
|
||||
from edxval import api as edxval_api
|
||||
@@ -45,10 +47,6 @@ class TranscriptException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TranscriptsGenerationException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class GetTranscriptsFromYouTubeException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from xblock.field_data import DictFieldData, FieldData, SplitFieldData
|
||||
from xblock.fields import Scope, ScopeIds
|
||||
from xblock.runtime import IdReader, KvsFieldData, MemoryIdManager, Runtime
|
||||
|
||||
from openedx.core.djangoapps.video_config.services import VideoConfigService
|
||||
from xmodule.errortracker import make_error_tracker
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.modulestore.django import XBlockI18nService
|
||||
@@ -341,6 +342,8 @@ class XBlockRuntime(RuntimeShim, Runtime):
|
||||
return EnrollmentsService()
|
||||
elif service_name == 'error_tracker':
|
||||
return make_error_tracker()
|
||||
elif service_name == 'video_config':
|
||||
return VideoConfigService()
|
||||
|
||||
# Otherwise, fall back to the base implementation which loads services
|
||||
# defined in the constructor:
|
||||
|
||||
@@ -56,13 +56,14 @@ from openedx.core.djangoapps.video_config.transcripts_utils import (
|
||||
clean_video_id,
|
||||
get_endonym_or_label,
|
||||
get_html5_ids,
|
||||
get_transcript,
|
||||
subs_filename
|
||||
)
|
||||
from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers
|
||||
from .video_utils import create_youtube_string, format_xml_exception_message, get_poster, rewrite_video_url
|
||||
from .video_xfields import VideoFields
|
||||
|
||||
from xblocks_contrib.video.exceptions import TranscriptNotFoundError
|
||||
|
||||
# The following import/except block for edxval is temporary measure until
|
||||
# edxval is a proper XBlock Runtime Service.
|
||||
#
|
||||
@@ -652,13 +653,15 @@ class _BuiltInVideoBlock(
|
||||
# construct transcripts info and also find if `en` subs exist
|
||||
transcripts_info = self.get_transcripts_info()
|
||||
possible_sub_ids = [self.sub, self.youtube_id_1_0] + get_html5_ids(self.html5_sources)
|
||||
for sub_id in possible_sub_ids:
|
||||
try:
|
||||
_, sub_id, _ = get_transcript(self, lang='en', output_format=Transcript.TXT)
|
||||
transcripts_info['transcripts'] = dict(transcripts_info['transcripts'], en=sub_id)
|
||||
break
|
||||
except NotFoundError:
|
||||
continue
|
||||
video_config_service = self.runtime.service(self, 'video_config')
|
||||
if video_config_service:
|
||||
for sub_id in possible_sub_ids:
|
||||
try:
|
||||
_, sub_id, _ = video_config_service.get_transcript(self, lang='en', output_format=Transcript.TXT)
|
||||
transcripts_info['transcripts'] = dict(transcripts_info['transcripts'], en=sub_id)
|
||||
break
|
||||
except TranscriptNotFoundError:
|
||||
continue
|
||||
|
||||
editable_fields['transcripts']['value'] = transcripts_info['transcripts']
|
||||
editable_fields['transcripts']['urlRoot'] = self.runtime.handler_url(
|
||||
@@ -1091,12 +1094,16 @@ class _BuiltInVideoBlock(
|
||||
|
||||
def _update_transcript_for_index(language=None):
|
||||
""" Find video transcript - if not found, don't update index """
|
||||
try:
|
||||
transcript = get_transcript(self, lang=language, output_format=Transcript.TXT)[0].replace("\n", " ")
|
||||
transcript_index_name = f"transcript_{language if language else self.transcript_language}"
|
||||
video_body.update({transcript_index_name: transcript})
|
||||
except NotFoundError:
|
||||
pass
|
||||
video_config_service = self.runtime.service(self, 'video_config')
|
||||
if video_config_service:
|
||||
try:
|
||||
transcript = video_config_service.get_transcript(
|
||||
self, lang=language, output_format=Transcript.TXT
|
||||
)[0].replace("\n", " ")
|
||||
transcript_index_name = f"transcript_{language if language else self.transcript_language}"
|
||||
video_body.update({transcript_index_name: transcript})
|
||||
except TranscriptNotFoundError:
|
||||
pass
|
||||
|
||||
if self.sub:
|
||||
_update_transcript_for_index()
|
||||
|
||||
@@ -25,21 +25,44 @@ from openedx.core.djangoapps.content_libraries import api as lib_api
|
||||
from openedx.core.djangoapps.video_config.transcripts_utils import (
|
||||
Transcript,
|
||||
TranscriptException,
|
||||
TranscriptsGenerationException,
|
||||
clean_video_id,
|
||||
generate_sjson_for_all_speeds,
|
||||
get_html5_ids,
|
||||
get_or_create_sjson,
|
||||
get_transcript,
|
||||
get_transcript_from_contentstore,
|
||||
remove_subs_from_store,
|
||||
subs_filename,
|
||||
youtube_speed_dict
|
||||
)
|
||||
from xblocks_contrib.video.exceptions import (
|
||||
TranscriptsGenerationException,
|
||||
TranscriptNotFoundError,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_transcript(
|
||||
video_block,
|
||||
lang: str | None = None,
|
||||
output_format: str = 'srt',
|
||||
youtube_id: str | None = None,
|
||||
) -> tuple[bytes, str, str]:
|
||||
"""
|
||||
Retrieve a transcript using a video block's configuration service.
|
||||
|
||||
Returns:
|
||||
tuple(bytes, str, str): transcript content, filename, and mimetype.
|
||||
|
||||
Raises:
|
||||
Exception: If the video config service is not available or the transcript cannot be retrieved.
|
||||
"""
|
||||
video_config_service = video_block.runtime.service(video_block, 'video_config')
|
||||
if not video_config_service:
|
||||
raise Exception("Video config service not found")
|
||||
return video_config_service.get_transcript(video_block, lang, output_format, youtube_id)
|
||||
|
||||
|
||||
# Disable no-member warning:
|
||||
# pylint: disable=no-member
|
||||
|
||||
@@ -356,7 +379,7 @@ class VideoStudentViewHandlers:
|
||||
mimetype,
|
||||
add_attachment_header=False
|
||||
)
|
||||
except NotFoundError as exc:
|
||||
except (NotFoundError, TranscriptNotFoundError) as exc:
|
||||
edx_video_id = clean_video_id(self.edx_video_id)
|
||||
log.warning(
|
||||
'[Translation Dispatch] %s: %s',
|
||||
@@ -370,7 +393,7 @@ class VideoStudentViewHandlers:
|
||||
|
||||
try:
|
||||
content, filename, mimetype = get_transcript(self, lang, output_format=self.transcript_download_format)
|
||||
except NotFoundError:
|
||||
except TranscriptNotFoundError:
|
||||
return Response(status=404)
|
||||
|
||||
response = self.make_transcript_http_response(
|
||||
@@ -660,8 +683,11 @@ class VideoStudioViewHandlers:
|
||||
return Response(json={'error': _('Language is required.')}, status=400)
|
||||
|
||||
try:
|
||||
transcript_content, transcript_name, mime_type = get_transcript(
|
||||
video=self, lang=language, output_format=Transcript.SRT
|
||||
video_config_service = self.runtime.service(self, 'video_config')
|
||||
if not video_config_service:
|
||||
return Response(status=404)
|
||||
transcript_content, transcript_name, mime_type = video_config_service.get_transcript(
|
||||
self, lang=language, output_format=Transcript.SRT
|
||||
)
|
||||
response = Response(transcript_content, headerlist=[
|
||||
(
|
||||
@@ -671,6 +697,10 @@ class VideoStudioViewHandlers:
|
||||
('Content-Language', language),
|
||||
('Content-Type', mime_type)
|
||||
])
|
||||
except (UnicodeDecodeError, TranscriptsGenerationException, NotFoundError):
|
||||
except (
|
||||
UnicodeDecodeError,
|
||||
TranscriptsGenerationException,
|
||||
TranscriptNotFoundError
|
||||
):
|
||||
response = Response(status=404)
|
||||
return response
|
||||
|
||||
Reference in New Issue
Block a user