Merge pull request #23910 from open-craft/samuel/fix-get-video-subs

SE-2589 fix AttributeError: 'LibraryLocatorV2' object has no attribute 'make_asset_key'
This commit is contained in:
David Ormsbee
2020-07-10 14:59:30 -04:00
committed by GitHub
3 changed files with 30 additions and 75 deletions

View File

@@ -784,49 +784,6 @@ class VideoTranscriptsMixin(object):
# to clean redundant language codes.
return list(set(translations))
def get_transcript(self, transcripts, transcript_format='srt', lang=None):
"""
Returns transcript, filename and MIME type.
transcripts (dict): A dict with all transcripts and a sub.
Raises:
- NotFoundError if cannot find transcript file in storage.
- ValueError if transcript file is empty or incorrect JSON.
- KeyError if transcript file has incorrect format.
If language is 'en', self.sub should be correct subtitles name.
If language is 'en', but if self.sub is not defined, this means that we
should search for video name in order to get proper transcript (old style courses).
If language is not 'en', give back transcript in proper language and format.
"""
if not lang:
lang = self.get_default_transcript_language(transcripts)
sub, other_lang = transcripts["sub"], transcripts["transcripts"]
if lang == 'en':
if sub: # HTML5 case and (Youtube case for new style videos)
transcript_name = sub
elif self.youtube_id_1_0: # old courses
transcript_name = self.youtube_id_1_0
else:
log.debug("No subtitles for 'en' language")
raise ValueError
data = Transcript.asset(self.location, transcript_name, lang).data.decode('utf-8')
filename = u'{}.{}'.format(transcript_name, transcript_format)
content = Transcript.convert(data, 'sjson', transcript_format)
else:
data = Transcript.asset(self.location, None, None, other_lang[lang]).data.decode('utf-8')
filename = u'{}.{}'.format(os.path.splitext(other_lang[lang])[0], transcript_format)
content = Transcript.convert(data, 'srt', transcript_format)
if not content:
log.debug('no subtitles produced in get_transcript')
raise ValueError
return content, filename, Transcript.mime_types[transcript_format]
def get_default_transcript_language(self, transcripts):
"""
Returns the default transcript language for this video module.
@@ -910,7 +867,10 @@ def get_transcript_from_val(edx_video_id, lang=None, output_format=Transcript.SR
def get_transcript_for_video(video_location, subs_id, file_name, language):
"""
Get video transcript from content store.
Get video transcript from content store. This is a lower level function and is used by
`get_transcript_from_contentstore`. Prefer that function instead where possible. If you
need to support getting transcripts from VAL or Blockstore as well, use the `get_transcript`
function instead.
NOTE: Transcripts can be searched from content store by two ways:
1. by an id(a.k.a subs_id) which will be used to construct transcript filename

View File

@@ -56,7 +56,7 @@ from .transcripts_utils import (
VideoTranscriptsMixin,
clean_video_id,
get_html5_ids,
get_transcript_for_video,
get_transcript,
subs_filename
)
from .video_handlers import VideoStudentViewHandlers, VideoStudioViewHandlers
@@ -593,12 +593,7 @@ class VideoBlock(
possible_sub_ids = [self.sub, self.youtube_id_1_0] + get_html5_ids(self.html5_sources)
for sub_id in possible_sub_ids:
try:
get_transcript_for_video(
self.location,
subs_id=sub_id,
file_name=sub_id,
language=u'en'
)
_, sub_id, _ = get_transcript(self, lang=u'en', output_format=Transcript.TXT)
transcripts_info['transcripts'] = dict(transcripts_info['transcripts'], en=sub_id)
break
except NotFoundError:
@@ -1029,10 +1024,7 @@ class VideoBlock(
def _update_transcript_for_index(language=None):
""" Find video transcript - if not found, don't update index """
try:
transcripts = self.get_transcripts_info()
transcript = self.get_transcript(
transcripts, transcript_format='txt', lang=language
)[0].replace("\n", " ")
transcript = get_transcript(self, lang=language, output_format=Transcript.TXT)[0].replace("\n", " ")
transcript_index_name = "transcript_{}".format(language if language else self.transcript_language)
video_body.update({transcript_index_name: transcript})
except NotFoundError:

View File

@@ -25,7 +25,12 @@ from xmodule.exceptions import NotFoundError
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore
from xmodule.video_module import VideoBlock
from xmodule.video_module.transcripts_utils import Transcript, edxval_api, subs_filename
from xmodule.video_module.transcripts_utils import (
Transcript,
edxval_api,
get_transcript,
subs_filename,
)
from xmodule.x_module import STUDENT_VIEW
from .helpers import BaseTestXmodule
@@ -521,9 +526,8 @@ class TestTranscriptDownloadDispatch(TestVideo):
request = Request.blank('/download')
response = self.item.transcript(request=request, dispatch='download')
self.assertEqual(response.status, '404 Not Found')
transcripts = self.item.get_transcripts_info()
with self.assertRaises(NotFoundError):
self.item.get_transcript(transcripts)
get_transcript(self.item)
@patch(
'xmodule.video_module.transcripts_utils.get_transcript_for_video',
@@ -537,7 +541,7 @@ class TestTranscriptDownloadDispatch(TestVideo):
self.assertEqual(response.headers['Content-Disposition'], 'attachment; filename="en_塞.srt"')
@patch('xmodule.video_module.transcripts_utils.edxval_api.get_video_transcript_data')
@patch('xmodule.video_module.VideoBlock.get_transcript', Mock(side_effect=NotFoundError))
@patch('xmodule.video_module.get_transcript', Mock(side_effect=NotFoundError))
def test_download_fallback_transcript(self, mock_get_video_transcript_data):
"""
Verify val transcript is returned as a fallback if it is not found in the content store.
@@ -1196,8 +1200,7 @@ class TestGetTranscript(TestVideo):
_upload_sjson_file(good_sjson, self.item.location)
self.item.sub = _get_subs_id(good_sjson.name)
transcripts = self.item.get_transcripts_info()
text, filename, mime_type = self.item.get_transcript(transcripts)
text, filename, mime_type = get_transcript(self.item)
expected_text = textwrap.dedent("""\
0
@@ -1211,7 +1214,7 @@ class TestGetTranscript(TestVideo):
""")
self.assertEqual(text, expected_text)
self.assertEqual(filename[:-4], self.item.sub)
self.assertEqual(filename[:-4], 'en_' + self.item.sub)
self.assertEqual(mime_type, 'application/x-subrip; charset=utf-8')
def test_good_txt_transcript(self):
@@ -1234,27 +1237,27 @@ class TestGetTranscript(TestVideo):
_upload_sjson_file(good_sjson, self.item.location)
self.item.sub = _get_subs_id(good_sjson.name)
transcripts = self.item.get_transcripts_info()
text, filename, mime_type = self.item.get_transcript(transcripts, transcript_format="txt")
text, filename, mime_type = get_transcript(self.item, output_format=Transcript.TXT)
expected_text = textwrap.dedent("""\
Hi, welcome to Edx.
Let's start with what is on your screen right now.""")
self.assertEqual(text, expected_text)
self.assertEqual(filename, self.item.sub + '.txt')
self.assertEqual(filename, 'en_' + self.item.sub + '.txt')
self.assertEqual(mime_type, 'text/plain; charset=utf-8')
def test_en_with_empty_sub(self):
transcripts = {"transcripts": {}, "sub": ""}
self.item.sub = ""
self.item.transcripts = None
# no self.sub, self.youttube_1_0 exist, but no file in assets
with self.assertRaises(NotFoundError):
self.item.get_transcript(transcripts)
get_transcript(self.item)
# no self.sub and no self.youtube_1_0, no non-en transcritps
self.item.youtube_id_1_0 = None
with self.assertRaises(ValueError):
self.item.get_transcript(transcripts)
with self.assertRaises(NotFoundError):
get_transcript(self.item)
# no self.sub but youtube_1_0 exists with file in assets
good_sjson = _create_file(content=textwrap.dedent("""\
@@ -1276,7 +1279,7 @@ class TestGetTranscript(TestVideo):
_upload_sjson_file(good_sjson, self.item.location)
self.item.youtube_id_1_0 = _get_subs_id(good_sjson.name)
text, filename, mime_type = self.item.get_transcript(transcripts)
text, filename, mime_type = get_transcript(self.item)
expected_text = textwrap.dedent("""\
0
00:00:00,270 --> 00:00:02,720
@@ -1289,7 +1292,7 @@ class TestGetTranscript(TestVideo):
""")
self.assertEqual(text, expected_text)
self.assertEqual(filename, self.item.youtube_id_1_0 + '.srt')
self.assertEqual(filename, 'en_' + self.item.youtube_id_1_0 + '.srt')
self.assertEqual(mime_type, 'application/x-subrip; charset=utf-8')
def test_non_en_with_non_ascii_filename(self):
@@ -1298,14 +1301,14 @@ class TestGetTranscript(TestVideo):
_upload_file(self.srt_file, self.item_descriptor.location, u"塞.srt")
transcripts = self.item.get_transcripts_info()
text, filename, mime_type = self.item.get_transcript(transcripts)
text, filename, mime_type = get_transcript(self.item)
expected_text = textwrap.dedent(u"""
0
00:00:00,12 --> 00:00:00,100
Привіт, edX вітає вас.
""")
self.assertEqual(text, expected_text)
self.assertEqual(filename, u"塞.srt")
self.assertEqual(filename, u"zh_塞.srt")
self.assertEqual(mime_type, 'application/x-subrip; charset=utf-8')
def test_value_error(self):
@@ -1316,7 +1319,7 @@ class TestGetTranscript(TestVideo):
transcripts = self.item.get_transcripts_info()
with self.assertRaises(ValueError):
self.item.get_transcript(transcripts)
get_transcript(self.item)
def test_key_error(self):
good_sjson = _create_file(content="""
@@ -1337,4 +1340,4 @@ class TestGetTranscript(TestVideo):
transcripts = self.item.get_transcripts_info()
with self.assertRaises(KeyError):
self.item.get_transcript(transcripts)
get_transcript(self.item)