Merge pull request #18240 from edx/mrehan/backward-transcript-export

Backward compatible video transcripts export (<= open-release/ginkgo.master)
This commit is contained in:
M. Rehan
2018-05-25 23:07:37 +05:00
committed by GitHub
7 changed files with 139 additions and 26 deletions

View File

@@ -716,7 +716,12 @@ class VideoExportTestCase(VideoDescriptorTestBase):
Test that we write the correct XML on export.
"""
edx_video_id = u'test_edx_video_id'
mock_val_api.export_to_xml = Mock(return_value=etree.Element('video_asset'))
mock_val_api.export_to_xml = Mock(
return_value=dict(
xml=etree.Element('video_asset'),
transcripts={}
)
)
self.descriptor.youtube_id_0_75 = 'izygArpw-Qo'
self.descriptor.youtube_id_1_0 = 'p2Q6BrNhdh8'
self.descriptor.youtube_id_1_25 = '1EeWXzPdhSA'
@@ -736,14 +741,23 @@ class VideoExportTestCase(VideoDescriptorTestBase):
xml = self.descriptor.definition_to_xml(self.file_system)
parser = etree.XMLParser(remove_blank_text=True)
xml_string = '''\
<video url_name="SampleProblem" start_time="0:00:01" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" end_time="0:01:00" download_video="true" download_track="true">
<video
url_name="SampleProblem"
start_time="0:00:01"
show_captions="false"
end_time="0:01:00"
download_video="true"
download_track="true"
youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8"
transcripts='{"ge": "german_translation.srt", "ua": "ukrainian_translation.srt"}'
>
<source src="http://www.example.com/source.mp4"/>
<source src="http://www.example.com/source1.ogg"/>
<track src="http://www.example.com/track"/>
<handout src="http://www.example.com/handout"/>
<video_asset />
<transcript language="ge" src="german_translation.srt" />
<transcript language="ua" src="ukrainian_translation.srt" />
<video_asset />
</video>
'''
expected = etree.XML(xml_string, parser=parser)

View File

@@ -739,13 +739,9 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler
ele.set('src', self.handout)
xml.append(ele)
transcripts = {}
if self.transcripts is not None:
# sorting for easy testing of resulting xml
for transcript_language in sorted(self.transcripts.keys()):
ele = etree.Element('transcript')
ele.set('language', transcript_language)
ele.set('src', self.transcripts[transcript_language])
xml.append(ele)
transcripts.update(self.transcripts)
edx_video_id = clean_video_id(self.edx_video_id)
if edxval_api and edx_video_id:
@@ -753,17 +749,42 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler
# Create static dir if not created earlier.
resource_fs.makedirs(EXPORT_IMPORT_STATIC_DIR, recreate=True)
xml.append(
edxval_api.export_to_xml(
video_id=edx_video_id,
resource_fs=resource_fs,
static_dir=EXPORT_IMPORT_STATIC_DIR,
course_id=unicode(self.runtime.course_id.for_branch(None))
)
# Backward compatible exports
# edxval exports new transcripts into the course OLX and returns a transcript
# files map so that it can also be rewritten in old transcript metadata fields
# (i.e. `self.transcripts`) on import and older open-releases (<= ginkgo),
# who do not have deprecated contentstore yet, can also import and use new-style
# transcripts into their openedX instances.
exported_metadata = edxval_api.export_to_xml(
video_id=edx_video_id,
resource_fs=resource_fs,
static_dir=EXPORT_IMPORT_STATIC_DIR,
course_id=unicode(self.runtime.course_id.for_branch(None))
)
# Update xml with edxval metadata
xml.append(exported_metadata['xml'])
# we don't need sub if english transcript
# is also in new transcripts.
new_transcripts = exported_metadata['transcripts']
transcripts.update(new_transcripts)
if new_transcripts.get('en'):
xml.set('sub', '')
# Update `transcripts` attribute in the xml
xml.set('transcripts', json.dumps(transcripts))
except edxval_api.ValVideoNotFoundError:
pass
# Sorting transcripts for easy testing of resulting xml
for transcript_language in sorted(transcripts.keys()):
ele = etree.Element('transcript')
ele.set('language', transcript_language)
ele.set('src', transcripts[transcript_language])
xml.append(ele)
# handle license specifically
self.add_license_to_xml(xml)

View File

@@ -139,6 +139,14 @@ class XmlParserMixin(object):
# Used for storing xml attributes between import and export, for roundtrips
'xml_attributes')
# This is a categories to fields map that contains the block category specific fields which should not be
# cleaned and/or override while adding xml to node.
metadata_to_not_to_clean = {
# A category `video` having `sub` and `transcripts` fields
# which should not be cleaned/override in an xml object.
'video': ('sub', 'transcripts')
}
metadata_to_export_to_policy = ('discussion_topics',)
@staticmethod
@@ -165,13 +173,15 @@ class XmlParserMixin(object):
raise NotImplementedError("%s does not implement definition_from_xml" % cls.__name__)
@classmethod
def clean_metadata_from_xml(cls, xml_object):
def clean_metadata_from_xml(cls, xml_object, excluded_fields=()):
"""
Remove any attribute named for a field with scope Scope.settings from the supplied
xml_object
"""
for field_name, field in cls.fields.items():
if field.scope == Scope.settings and xml_object.get(field_name) is not None:
if (field.scope == Scope.settings
and field_name not in excluded_fields
and xml_object.get(field_name) is not None):
del xml_object.attrib[field_name]
@classmethod
@@ -448,7 +458,8 @@ class XmlParserMixin(object):
aside.add_xml_to_node(aside_node)
xml_object.append(aside_node)
self.clean_metadata_from_xml(xml_object)
not_to_clean_fields = self.metadata_to_not_to_clean.get(self.category, ())
self.clean_metadata_from_xml(xml_object, excluded_fields=not_to_clean_fields)
# Set the tag on both nodes so we get the file path right.
xml_object.tag = self.category
@@ -457,7 +468,9 @@ class XmlParserMixin(object):
# Add the non-inherited metadata
for attr in sorted(own_metadata(self)):
# don't want e.g. data_dir
if attr not in self.metadata_to_strip and attr not in self.metadata_to_export_to_policy:
if (attr not in self.metadata_to_strip
and attr not in self.metadata_to_export_to_policy
and attr not in not_to_clean_fields):
val = serialize_field(self._field_data.get(self, attr))
try:
xml_object.set(attr, val)

View File

@@ -20,6 +20,7 @@ from fs.path import combine
from edxval.api import (
ValCannotCreateError,
ValVideoNotFoundError,
create_video_transcript,
create_or_update_video_transcript,
create_profile,
create_video,
@@ -54,7 +55,7 @@ MODULESTORES = {
ModuleStoreEnum.Type.split: TEST_DATA_SPLIT_MODULESTORE,
}
TRANSCRIPT_FILE_SRT_DATA = """
TRANSCRIPT_FILE_SRT_DATA = u"""
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
@@ -1589,16 +1590,19 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
actual = self.descriptor.definition_to_xml(resource_fs=self.file_system)
expected_str = """
<video download_video="false" url_name="SampleProblem">
<video download_video="false" url_name="SampleProblem" transcripts='{transcripts}'>
<video_asset client_video_id="test_client_video_id" duration="111.0" image="">
<encoded_video profile="mobile" url="http://example.com/video" file_size="222" bitrate="333"/>
<transcripts>
<transcript file_format="srt" language_code="{language_code}" provider="Cielo24"/>
</transcripts>
</video_asset>
<transcript language="{language_code}" src="{transcript_file}"/>
</video>
""".format(
language_code=language_code
language_code=language_code,
transcript_file=transcript_file_name,
transcripts=json.dumps({language_code: transcript_file_name})
)
parser = etree.XMLParser(remove_blank_text=True)
expected = etree.XML(expected_str, parser=parser)
@@ -1612,6 +1616,66 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language_code)
self.assertEqual(transcript['content'], expected_transcript_content)
@ddt.data(
(['en', 'da'], 'test_sub', ''),
(['da'], 'test_sub', 'test_sub')
)
@ddt.unpack
def test_export_val_transcripts_backward_compatibility(self, languages, sub, expected_sub):
"""
Tests new transcripts export for backward compatibility.
"""
self.descriptor.edx_video_id = 'test_video_id'
self.descriptor.sub = sub
# Setup VAL encode profile, video and transcripts
create_profile('mobile')
create_video({
'edx_video_id': self.descriptor.edx_video_id,
'client_video_id': 'test_client_video_id',
'duration': 111.0,
'status': 'dummy',
'encoded_videos': [{
'profile': 'mobile',
'url': 'http://example.com/video',
'file_size': 222,
'bitrate': 333,
}],
})
for language in languages:
create_video_transcript(
video_id=self.descriptor.edx_video_id,
language_code=language,
file_format=Transcript.SRT,
content=ContentFile(TRANSCRIPT_FILE_SRT_DATA)
)
# Export the video module into xml
video_xml = self.descriptor.definition_to_xml(resource_fs=self.file_system)
# Assert `sub` and `transcripts` attribute in the xml
self.assertEqual(video_xml.get('sub'), expected_sub)
expected_transcripts = {
language: "{edx_video_id}-{language}.srt".format(
edx_video_id=self.descriptor.edx_video_id,
language=language
)
for language in languages
}
self.assertDictEqual(json.loads(video_xml.get('transcripts')), expected_transcripts)
# Assert transcript content from course OLX
for language in languages:
expected_transcript_path = combine(
combine(self.temp_dir, EXPORT_IMPORT_COURSE_DIR),
combine(EXPORT_IMPORT_STATIC_DIR, expected_transcripts[language])
)
expected_transcript_content = File(open(expected_transcript_path)).read()
transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language)
self.assertEqual(transcript['content'], expected_transcript_content)
def test_export_val_data_not_found(self):
"""
Tests that external video export works as expected.

View File

@@ -127,7 +127,7 @@ edx-rest-api-client==1.7.1
edx-search==1.1.0
edx-submissions==2.0.12
edx-user-state-client==1.0.4
edxval==0.1.14
edxval==0.1.15
elasticsearch==1.9.0 # via edx-search
enum34==1.1.6
event-tracking==0.2.4

View File

@@ -149,7 +149,7 @@ edx-search==1.1.0
edx-sphinx-theme==1.3.0
edx-submissions==2.0.12
edx-user-state-client==1.0.4
edxval==0.1.14
edxval==0.1.15
elasticsearch==1.9.0
enum34==1.1.6
event-tracking==0.2.4

View File

@@ -41,6 +41,7 @@ git+https://github.com/edx-solutions/xblock-drag-and-drop-v2@v2.1.6#egg=xblock-d
git+https://github.com/open-craft/xblock-poll@7ba819b968fe8faddb78bb22e1fe7637005eb414#egg=xblock-poll==1.2.7
git+https://github.com/edx/xblock-utils.git@v1.1.1#egg=xblock-utils==1.1.1
-e common/lib/xmodule
amqp==1.4.9
analytics-python==1.1.0
anyjson==0.3.3
@@ -143,7 +144,7 @@ edx-rest-api-client==1.7.1
edx-search==1.1.0
edx-submissions==2.0.12
edx-user-state-client==1.0.4
edxval==0.1.14
edxval==0.1.15
elasticsearch==1.9.0
enum34==1.1.6
event-tracking==0.2.4