Export video transcripts - EDUCATOR-1789

This commit is contained in:
Mushtaq Ali
2018-02-15 15:37:43 +05:00
parent 89d8e7b539
commit 36483f938d
4 changed files with 110 additions and 38 deletions

View File

@@ -15,8 +15,10 @@ the course, section, subsection, unit, etc.
import os
import unittest
import datetime
import shutil
from uuid import uuid4
from tempfile import mkdtemp
from lxml import etree
from mock import ANY, Mock, patch, MagicMock
import ddt
@@ -24,6 +26,7 @@ import ddt
from django.conf import settings
from django.test.utils import override_settings
from fs.osfs import OSFS
from opaque_keys.edx.locator import CourseLocator
from opaque_keys.edx.keys import CourseKey
from xblock.field_data import DictFieldData
@@ -31,7 +34,7 @@ from xblock.fields import ScopeIds
from xmodule.tests import get_test_descriptor_system
from xmodule.validation import StudioValidationMessage
from xmodule.video_module import VideoDescriptor, create_youtube_string
from xmodule.video_module import VideoDescriptor, create_youtube_string, EXPORT_STATIC_DIR
from xmodule.video_module.transcripts_utils import download_youtube_subs, save_to_store
from . import LogicTest
from .test_import import DummySystem
@@ -672,11 +675,18 @@ class VideoExportTestCase(VideoDescriptorTestBase):
"""
Make sure that VideoDescriptor can export itself to XML correctly.
"""
def setUp(self):
super(VideoExportTestCase, self).setUp()
self.temp_dir = mkdtemp()
self.file_system = OSFS(self.temp_dir)
self.addCleanup(shutil.rmtree, self.temp_dir)
@patch('xmodule.video_module.video_module.edxval_api')
def test_export_to_xml(self, mock_val_api):
"""
Test that we write the correct XML on export.
"""
edx_video_id = u'test_edx_video_id'
mock_val_api.export_to_xml = Mock(return_value=etree.Element('video_asset'))
self.descriptor.youtube_id_0_75 = 'izygArpw-Qo'
self.descriptor.youtube_id_1_0 = 'p2Q6BrNhdh8'
@@ -691,10 +701,10 @@ class VideoExportTestCase(VideoDescriptorTestBase):
self.descriptor.html5_sources = ['http://www.example.com/source.mp4', 'http://www.example.com/source1.ogg']
self.descriptor.download_video = True
self.descriptor.transcripts = {'ua': 'ukrainian_translation.srt', 'ge': 'german_translation.srt'}
self.descriptor.edx_video_id = 'test_edx_video_id'
self.descriptor.edx_video_id = edx_video_id
self.descriptor.runtime.course_id = MagicMock()
xml = self.descriptor.definition_to_xml(None) # We don't use the `resource_fs` parameter
xml = self.descriptor.definition_to_xml(self.file_system)
parser = etree.XMLParser(remove_blank_text=True)
xml_string = '''\
<video url_name="SampleProblem" start_time="0:00:01" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" end_time="0:01:00" download_video="true" download_track="true">
@@ -710,9 +720,10 @@ class VideoExportTestCase(VideoDescriptorTestBase):
expected = etree.XML(xml_string, parser=parser)
self.assertXmlEqual(expected, xml)
mock_val_api.export_to_xml.assert_called_once_with(
[u'test_edx_video_id', u'p2Q6BrNhdh8', 'source', 'source1'],
ANY,
external=False
video_id=edx_video_id,
static_dir=EXPORT_STATIC_DIR,
resource_fs=self.file_system,
course_id=unicode(self.descriptor.runtime.course_id.for_branch(None)),
)
@patch('xmodule.video_module.video_module.edxval_api')
@@ -723,7 +734,7 @@ class VideoExportTestCase(VideoDescriptorTestBase):
self.descriptor.edx_video_id = 'test_edx_video_id'
self.descriptor.runtime.course_id = MagicMock()
xml = self.descriptor.definition_to_xml(None)
xml = self.descriptor.definition_to_xml(self.file_system)
parser = etree.XMLParser(remove_blank_text=True)
xml_string = '<video url_name="SampleProblem" download_video="false"/>'
expected = etree.XML(xml_string, parser=parser)
@@ -746,7 +757,7 @@ class VideoExportTestCase(VideoDescriptorTestBase):
self.descriptor.html5_sources = ['http://www.example.com/source.mp4', 'http://www.example.com/source.ogg']
self.descriptor.download_video = True
xml = self.descriptor.definition_to_xml(None) # We don't use the `resource_fs` parameter
xml = self.descriptor.definition_to_xml(self.file_system)
parser = etree.XMLParser(remove_blank_text=True)
xml_string = '''\
<video url_name="SampleProblem" start_time="0:00:05" youtube="0.75:izygArpw-Qo,1.00:p2Q6BrNhdh8,1.25:1EeWXzPdhSA,1.50:rABDYkeK0x8" show_captions="false" download_video="true" download_track="true">
@@ -763,7 +774,7 @@ class VideoExportTestCase(VideoDescriptorTestBase):
"""
Test XML export with defaults.
"""
xml = self.descriptor.definition_to_xml(None)
xml = self.descriptor.definition_to_xml(self.file_system)
# Check that download_video field is also set to default (False) in xml for backward compatibility
expected = '<video url_name="SampleProblem" download_video="false"/>\n'
self.assertEquals(expected, etree.tostring(xml, pretty_print=True))
@@ -774,7 +785,7 @@ class VideoExportTestCase(VideoDescriptorTestBase):
Test XML export with transcripts being overridden to None.
"""
self.descriptor.transcripts = None
xml = self.descriptor.definition_to_xml(None)
xml = self.descriptor.definition_to_xml(self.file_system)
expected = '<video url_name="SampleProblem" download_video="false"/>\n'
self.assertEquals(expected, etree.tostring(xml, pretty_print=True))
@@ -785,7 +796,7 @@ class VideoExportTestCase(VideoDescriptorTestBase):
The illegal characters in a String field are removed from the string instead.
"""
self.descriptor.display_name = 'Display\x1eName'
xml = self.descriptor.definition_to_xml(None)
xml = self.descriptor.definition_to_xml(self.file_system)
self.assertEqual(xml.get('display_name'), 'DisplayName')
@patch('xmodule.video_module.video_module.edxval_api', None)
@@ -794,7 +805,7 @@ class VideoExportTestCase(VideoDescriptorTestBase):
Test XML export handles the unicode characters.
"""
self.descriptor.display_name = u'这是文'
xml = self.descriptor.definition_to_xml(None)
xml = self.descriptor.definition_to_xml(self.file_system)
self.assertEqual(xml.get('display_name'), u'\u8fd9\u662f\u6587')

View File

@@ -47,6 +47,7 @@ from .transcripts_utils import (
get_video_ids_info,
Transcript,
VideoTranscriptsMixin,
clean_video_id,
)
from .transcripts_model_utils import (
is_val_transcript_feature_enabled_for_course
@@ -98,6 +99,9 @@ log = logging.getLogger(__name__)
_ = lambda text: text
EXPORT_STATIC_DIR = u'static'
@XBlock.wants('settings', 'completion')
class VideoModule(VideoFields, VideoTranscriptsMixin, VideoStudentViewHandlers, XModule, LicenseMixin):
"""
@@ -711,19 +715,22 @@ class VideoDescriptor(VideoFields, VideoTranscriptsMixin, VideoStudioViewHandler
ele.set('src', self.transcripts[transcript_language])
xml.append(ele)
if edxval_api:
external, video_ids = get_video_ids_info(self.edx_video_id, self.youtube_id_1_0, self.html5_sources)
if video_ids:
try:
xml.append(
edxval_api.export_to_xml(
video_ids,
unicode(self.runtime.course_id.for_branch(None)),
external=external
)
edx_video_id = clean_video_id(self.edx_video_id)
if edx_video_id:
try:
# Create static dir if not created earlier.
resource_fs.makedirs(EXPORT_STATIC_DIR, recreate=True)
xml.append(
edxval_api.export_to_xml(
video_id=edx_video_id,
resource_fs=resource_fs,
static_dir=EXPORT_STATIC_DIR,
course_id=unicode(self.runtime.course_id.for_branch(None))
)
except edxval_api.ValVideoNotFoundError:
pass
)
except edxval_api.ValVideoNotFoundError:
pass
# handle license specifically
self.add_license_to_xml(xml)

View File

@@ -1,14 +1,22 @@
# -*- coding: utf-8 -*-
"""Video xmodule tests in mongo."""
"""
Video xmodule tests in mongo.
"""
import json
from collections import OrderedDict
from uuid import uuid4
from tempfile import mkdtemp
import shutil
import ddt
from django.conf import settings
from django.core.files import File
from django.core.files.base import ContentFile
from django.test import TestCase
from django.test.utils import override_settings
from fs.osfs import OSFS
from fs.path import combine
from edxval.api import (
ValCannotCreateError,
ValVideoNotFoundError,
@@ -16,7 +24,8 @@ from edxval.api import (
create_profile,
create_video,
get_video_info,
get_video_transcript
get_video_transcript,
get_video_transcript_data
)
from lxml import etree
from mock import MagicMock, Mock, patch
@@ -32,6 +41,7 @@ from xmodule.tests.test_import import DummySystem
from xmodule.tests.test_video import VideoDescriptorTestBase, instantiate_descriptor
from xmodule.video_module import VideoDescriptor, bumper_utils, rewrite_video_url, video_utils
from xmodule.video_module.transcripts_utils import Transcript, save_to_store
from xmodule.video_module.video_module import EXPORT_STATIC_DIR
from xmodule.x_module import STUDENT_VIEW
from .helpers import BaseTestXmodule
@@ -43,6 +53,16 @@ MODULESTORES = {
ModuleStoreEnum.Type.split: TEST_DATA_SPLIT_MODULESTORE,
}
TRANSCRIPT_FILE_DATA = """
1
00:00:14,370 --> 00:00:16,530
I am overwatch.
2
00:00:16,500 --> 00:00:18,600
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻.
"""
@attr(shard=1)
class TestVideoYouTube(TestVideo):
@@ -1509,12 +1529,15 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
super(VideoDescriptorTest, self).setUp()
self.descriptor.runtime.handler_url = MagicMock()
self.descriptor.runtime.course_id = MagicMock()
self.temp_dir = mkdtemp()
self.file_system = OSFS(self.temp_dir)
self.addCleanup(shutil.rmtree, self.temp_dir)
def get_video_transcript_data(self, video_id):
return dict(
video_id=video_id,
language_code='ar',
url='/media/ext101.srt',
url='{media_url}ext101.srt'.format(media_url=settings.MEDIA_URL), # MEDIA_URL is /static/uploads/
provider='Cielo24',
file_format='srt',
)
@@ -1547,7 +1570,14 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
)
def test_export_val_data_with_internal(self):
"""
Tests that exported VAL videos are working as expected.
"""
language_code = 'ar'
transcript_file_name = 'test_edx_video_id-ar.srt'
expected_transcript_path = combine(self.temp_dir, combine(EXPORT_STATIC_DIR, transcript_file_name))
self.descriptor.edx_video_id = 'test_edx_video_id'
create_profile('mobile')
create_video({
'edx_video_id': self.descriptor.edx_video_id,
@@ -1561,34 +1591,48 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
'bitrate': 333,
}],
})
create_or_update_video_transcript(
transcript_url = create_or_update_video_transcript(
video_id=self.descriptor.edx_video_id,
language_code='ar',
language_code=language_code,
metadata={
'provider': 'Cielo24',
'file_name': 'ext101.srt',
'file_format': 'srt'
}
},
file_data=ContentFile(TRANSCRIPT_FILE_DATA)
)
actual = self.descriptor.definition_to_xml(resource_fs=None)
actual = self.descriptor.definition_to_xml(resource_fs=self.file_system)
expected_str = """
<video download_video="false" url_name="SampleProblem">
<video_asset client_video_id="test_client_video_id" duration="111.0" image="">
<encoded_video profile="mobile" url="http://example.com/video" file_size="222" bitrate="333"/>
<transcripts>
<transcript file_format="srt" file_name="ext101.srt" language_code="ar" provider="Cielo24" video_id="{video_id}"/>
<transcript file_format="srt" file_name='video-transcripts/{transcript_name}' language_code="{language_code}" provider="Cielo24"/>
</transcripts>
</video_asset>
</video>
""".format(video_id=self.descriptor.edx_video_id)
""".format(
transcript_name=transcript_url.split('/')[-1],
language_code=language_code
)
parser = etree.XMLParser(remove_blank_text=True)
expected = etree.XML(expected_str, parser=parser)
self.assertXmlEqual(expected, actual)
# Verify transcript file is created.
self.assertEqual([transcript_file_name], self.file_system.listdir(EXPORT_STATIC_DIR))
# Also verify the content of created transcript file.
expected_transcript_content = File(open(expected_transcript_path)).read()
transcript = get_video_transcript_data(video_id=self.descriptor.edx_video_id, language_code=language_code)
self.assertEqual(transcript['content'], expected_transcript_content)
def test_export_val_data_not_found(self):
"""
Tests that external video export works as expected.
"""
self.descriptor.edx_video_id = 'nonexistent'
actual = self.descriptor.definition_to_xml(resource_fs=None)
actual = self.descriptor.definition_to_xml(resource_fs=self.file_system)
expected_str = """<video download_video="false" url_name="SampleProblem"/>"""
parser = etree.XMLParser(remove_blank_text=True)
expected = etree.XML(expected_str, parser=parser)
@@ -1597,12 +1641,12 @@ class VideoDescriptorTest(TestCase, VideoDescriptorTestBase):
@patch('xmodule.video_module.transcripts_utils.get_video_ids_info')
def test_export_no_video_ids(self, mock_get_video_ids_info):
"""
Tests export when there are no video ids
Tests export when there is no video id. `export_to_xml` only works in case of video id.
"""
mock_get_video_ids_info.return_value = True, []
actual = self.descriptor.definition_to_xml(resource_fs=None)
expected_str = '<video url_name="SampleProblem" download_video="false"><video_asset/></video>'
actual = self.descriptor.definition_to_xml(resource_fs=self.file_system)
expected_str = '<video url_name="SampleProblem" download_video="false"></video>'
parser = etree.XMLParser(remove_blank_text=True)
expected = etree.XML(expected_str, parser=parser)

View File

@@ -579,6 +579,16 @@ ACTIVATION_EMAIL_FROM_ADDRESS = 'test_activate@edx.org'
TEMPLATES[0]['OPTIONS']['debug'] = True
########################## VIDEO TRANSCRIPTS STORAGE ############################
VIDEO_TRANSCRIPTS_SETTINGS = dict(
VIDEO_TRANSCRIPTS_MAX_BYTES=3 * 1024 * 1024, # 3 MB
STORAGE_KWARGS=dict(
location=MEDIA_ROOT,
base_url=MEDIA_URL,
),
DIRECTORY_PREFIX='video-transcripts/',
)
####################### Plugin Settings ##########################
from openedx.core.djangoapps.plugins import plugin_settings, constants as plugin_constants