Purpose of this, is to serve transcripts from edx-val if the requested transcripts are not found in the contentstore i.e. that's where all the transcripts are living to date.
687 lines
25 KiB
Python
687 lines
25 KiB
Python
# -*- coding: utf-8 -*-
|
|
""" Tests for transcripts_utils. """
|
|
import copy
|
|
import ddt
|
|
import textwrap
|
|
import unittest
|
|
from uuid import uuid4
|
|
|
|
from django.conf import settings
|
|
from django.test.utils import override_settings
|
|
from django.utils import translation
|
|
from mock import Mock, patch
|
|
from nose.plugins.skip import SkipTest
|
|
|
|
from contentstore.tests.utils import mock_requests_get
|
|
from xmodule.contentstore.content import StaticContent
|
|
from xmodule.contentstore.django import contentstore
|
|
from xmodule.exceptions import NotFoundError
|
|
from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase
|
|
from xmodule.modulestore.tests.factories import CourseFactory
|
|
from xmodule.video_module import transcripts_utils
|
|
|
|
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
|
|
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
|
|
|
|
|
|
class TestGenerateSubs(unittest.TestCase):
|
|
"""Tests for `generate_subs` function."""
|
|
def setUp(self):
|
|
super(TestGenerateSubs, self).setUp()
|
|
|
|
self.source_subs = {
|
|
'start': [100, 200, 240, 390, 1000],
|
|
'end': [200, 240, 380, 1000, 1500],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
|
|
def test_generate_subs_increase_speed(self):
|
|
subs = transcripts_utils.generate_subs(2, 1, self.source_subs)
|
|
self.assertDictEqual(
|
|
subs,
|
|
{
|
|
'start': [200, 400, 480, 780, 2000],
|
|
'end': [400, 480, 760, 2000, 3000],
|
|
'text': ['subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5']
|
|
}
|
|
)
|
|
|
|
def test_generate_subs_decrease_speed_1(self):
|
|
subs = transcripts_utils.generate_subs(0.5, 1, self.source_subs)
|
|
self.assertDictEqual(
|
|
subs,
|
|
{
|
|
'start': [50, 100, 120, 195, 500],
|
|
'end': [100, 120, 190, 500, 750],
|
|
'text': ['subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5']
|
|
}
|
|
)
|
|
|
|
def test_generate_subs_decrease_speed_2(self):
|
|
"""Test for correct devision during `generate_subs` process."""
|
|
subs = transcripts_utils.generate_subs(1, 2, self.source_subs)
|
|
self.assertDictEqual(
|
|
subs,
|
|
{
|
|
'start': [50, 100, 120, 195, 500],
|
|
'end': [100, 120, 190, 500, 750],
|
|
'text': ['subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5']
|
|
}
|
|
)
|
|
|
|
|
|
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
|
|
class TestSaveSubsToStore(SharedModuleStoreTestCase):
|
|
"""Tests for `save_subs_to_store` function."""
|
|
|
|
org = 'MITx'
|
|
number = '999'
|
|
display_name = 'Test course'
|
|
|
|
def clear_subs_content(self):
|
|
"""Remove, if subtitles content exists."""
|
|
for content_location in [self.content_location, self.content_copied_location]:
|
|
try:
|
|
content = contentstore().find(content_location)
|
|
contentstore().delete(content.location)
|
|
except NotFoundError:
|
|
pass
|
|
|
|
@classmethod
|
|
def sub_id_to_location(cls, sub_id):
|
|
"""
|
|
A helper to compute a static file location from a subtitle id.
|
|
"""
|
|
return StaticContent.compute_location(cls.course.id, u'subs_{0}.srt.sjson'.format(sub_id))
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
super(TestSaveSubsToStore, cls).setUpClass()
|
|
cls.course = CourseFactory.create(
|
|
org=cls.org, number=cls.number, display_name=cls.display_name)
|
|
|
|
cls.subs = {
|
|
'start': [100, 200, 240, 390, 1000],
|
|
'end': [200, 240, 380, 1000, 1500],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
|
|
# Prefix it to ensure that unicode filenames are allowed
|
|
cls.subs_id = u'uniçøde_{}'.format(uuid4())
|
|
cls.subs_copied_id = u'cøpy_{}'.format(uuid4())
|
|
|
|
cls.content_location = cls.sub_id_to_location(cls.subs_id)
|
|
cls.content_copied_location = cls.sub_id_to_location(cls.subs_copied_id)
|
|
|
|
# incorrect subs
|
|
cls.unjsonable_subs = {1} # set can't be serialized
|
|
|
|
cls.unjsonable_subs_id = str(uuid4())
|
|
cls.content_location_unjsonable = cls.sub_id_to_location(cls.unjsonable_subs_id)
|
|
|
|
def setUp(self):
|
|
super(TestSaveSubsToStore, self).setUp()
|
|
self.addCleanup(self.clear_subs_content)
|
|
self.clear_subs_content()
|
|
|
|
def test_save_unicode_filename(self):
|
|
# Mock a video item
|
|
item = Mock(location=Mock(course_key=self.course.id))
|
|
transcripts_utils.save_subs_to_store(self.subs, self.subs_id, self.course)
|
|
transcripts_utils.copy_or_rename_transcript(self.subs_copied_id, self.subs_id, item)
|
|
self.assertTrue(contentstore().find(self.content_copied_location))
|
|
|
|
def test_save_subs_to_store(self):
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(self.content_location)
|
|
|
|
result_location = transcripts_utils.save_subs_to_store(
|
|
self.subs,
|
|
self.subs_id,
|
|
self.course)
|
|
|
|
self.assertTrue(contentstore().find(self.content_location))
|
|
self.assertEqual(result_location, self.content_location)
|
|
|
|
def test_save_unjsonable_subs_to_store(self):
|
|
"""
|
|
Ensures that subs, that can't be dumped, can't be found later.
|
|
"""
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(self.content_location_unjsonable)
|
|
|
|
with self.assertRaises(TypeError):
|
|
transcripts_utils.save_subs_to_store(
|
|
self.unjsonable_subs,
|
|
self.unjsonable_subs_id,
|
|
self.course)
|
|
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(self.content_location_unjsonable)
|
|
|
|
|
|
class TestYoutubeSubsBase(SharedModuleStoreTestCase):
|
|
"""
|
|
Base class for tests of Youtube subs. Using override_settings and
|
|
a setUpClass() override in a test class which is inherited by another
|
|
test class doesn't work well with pytest-django.
|
|
"""
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
super(TestYoutubeSubsBase, cls).setUpClass()
|
|
cls.course = CourseFactory.create(
|
|
org=cls.org, number=cls.number, display_name=cls.display_name)
|
|
|
|
|
|
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
|
|
class TestDownloadYoutubeSubs(TestYoutubeSubsBase):
|
|
"""Tests for `download_youtube_subs` function."""
|
|
|
|
org = 'MITx'
|
|
number = '999'
|
|
display_name = 'Test course'
|
|
|
|
def clear_sub_content(self, subs_id):
|
|
"""
|
|
Remove, if subtitle content exists.
|
|
"""
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(self.course.id, filename)
|
|
try:
|
|
content = contentstore().find(content_location)
|
|
contentstore().delete(content.location)
|
|
except NotFoundError:
|
|
pass
|
|
|
|
def clear_subs_content(self, youtube_subs):
|
|
"""
|
|
Remove, if subtitles content exists.
|
|
|
|
youtube_subs: dict of '{speed: youtube_id}' format for different speeds.
|
|
"""
|
|
for subs_id in youtube_subs.values():
|
|
self.clear_sub_content(subs_id)
|
|
|
|
def test_success_downloading_subs(self):
|
|
|
|
response = textwrap.dedent("""<?xml version="1.0" encoding="utf-8" ?>
|
|
<transcript>
|
|
<text start="0" dur="0.27"></text>
|
|
<text start="0.27" dur="2.45">Test text 1.</text>
|
|
<text start="2.72">Test text 2.</text>
|
|
<text start="5.43" dur="1.73">Test text 3.</text>
|
|
</transcript>
|
|
""")
|
|
good_youtube_sub = 'good_id_2'
|
|
self.clear_sub_content(good_youtube_sub)
|
|
|
|
with patch('xmodule.video_module.transcripts_utils.requests.get') as mock_get:
|
|
mock_get.return_value = Mock(status_code=200, text=response, content=response)
|
|
# Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown
|
|
transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings)
|
|
|
|
mock_get.assert_any_call('http://video.google.com/timedtext', params={'lang': 'en', 'v': 'good_id_2'})
|
|
|
|
# Check asset status after import of transcript.
|
|
filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub)
|
|
content_location = StaticContent.compute_location(self.course.id, filename)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_sub_content(good_youtube_sub)
|
|
|
|
def test_subs_for_html5_vid_with_periods(self):
|
|
"""
|
|
This is to verify a fix whereby subtitle files uploaded against
|
|
a HTML5 video that contains periods in the name causes
|
|
incorrect subs name parsing
|
|
"""
|
|
html5_ids = transcripts_utils.get_html5_ids(['foo.mp4', 'foo.1.bar.mp4', 'foo/bar/baz.1.4.mp4', 'foo'])
|
|
self.assertEqual(4, len(html5_ids))
|
|
self.assertEqual(html5_ids[0], 'foo')
|
|
self.assertEqual(html5_ids[1], 'foo.1.bar')
|
|
self.assertEqual(html5_ids[2], 'baz.1.4')
|
|
self.assertEqual(html5_ids[3], 'foo')
|
|
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
|
def test_fail_downloading_subs(self, mock_get):
|
|
|
|
mock_get.return_value = Mock(status_code=404, text='Error 404')
|
|
|
|
bad_youtube_sub = 'BAD_YOUTUBE_ID2'
|
|
self.clear_sub_content(bad_youtube_sub)
|
|
|
|
with self.assertRaises(transcripts_utils.GetTranscriptsFromYouTubeException):
|
|
transcripts_utils.download_youtube_subs(bad_youtube_sub, self.course, settings)
|
|
|
|
# Check asset status after import of transcript.
|
|
filename = 'subs_{0}.srt.sjson'.format(bad_youtube_sub)
|
|
content_location = StaticContent.compute_location(
|
|
self.course.id, filename
|
|
)
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(content_location)
|
|
|
|
self.clear_sub_content(bad_youtube_sub)
|
|
|
|
def test_success_downloading_chinese_transcripts(self):
|
|
|
|
# Disabled 11/14/13
|
|
# This test is flakey because it performs an HTTP request on an external service
|
|
# Re-enable when `requests.get` is patched using `mock.patch`
|
|
raise SkipTest
|
|
|
|
good_youtube_sub = 'j_jEn79vS3g' # Chinese, utf-8
|
|
self.clear_sub_content(good_youtube_sub)
|
|
|
|
# Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown
|
|
transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings)
|
|
|
|
# Check assets status after importing subtitles.
|
|
for subs_id in good_youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.course.id, filename
|
|
)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_sub_content(good_youtube_sub)
|
|
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
|
def test_get_transcript_name_youtube_server_success(self, mock_get):
|
|
"""
|
|
Get transcript name from transcript_list fetch from youtube server api
|
|
depends on language code, default language in YOUTUBE Text Api is "en"
|
|
"""
|
|
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
|
youtube_text_api['params']['v'] = 'dummy_video_id'
|
|
response_success = """
|
|
<transcript_list>
|
|
<track id="1" name="Custom" lang_code="en" />
|
|
<track id="0" name="Custom1" lang_code="en-GB"/>
|
|
</transcript_list>
|
|
"""
|
|
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
|
|
|
|
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
|
|
self.assertEqual(transcript_name, 'Custom')
|
|
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
|
def test_get_transcript_name_youtube_server_no_transcripts(self, mock_get):
|
|
"""
|
|
When there are no transcripts of video transcript name will be None
|
|
"""
|
|
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
|
youtube_text_api['params']['v'] = 'dummy_video_id'
|
|
response_success = "<transcript_list></transcript_list>"
|
|
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
|
|
|
|
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
|
|
self.assertIsNone(transcript_name)
|
|
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
|
def test_get_transcript_name_youtube_server_language_not_exist(self, mock_get):
|
|
"""
|
|
When the language does not exist in transcript_list transcript name will be None
|
|
"""
|
|
youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
|
|
youtube_text_api['params']['v'] = 'dummy_video_id'
|
|
youtube_text_api['params']['lang'] = 'abc'
|
|
response_success = """
|
|
<transcript_list>
|
|
<track id="1" name="Custom" lang_code="en" />
|
|
<track id="0" name="Custom1" lang_code="en-GB"/>
|
|
</transcript_list>
|
|
"""
|
|
mock_get.return_value = Mock(status_code=200, text=response_success, content=response_success)
|
|
|
|
transcript_name = transcripts_utils.youtube_video_transcript_name(youtube_text_api)
|
|
self.assertIsNone(transcript_name)
|
|
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get', side_effect=mock_requests_get)
|
|
def test_downloading_subs_using_transcript_name(self, mock_get):
|
|
"""
|
|
Download transcript using transcript name in url
|
|
"""
|
|
good_youtube_sub = 'good_id_2'
|
|
self.clear_sub_content(good_youtube_sub)
|
|
|
|
transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings)
|
|
mock_get.assert_any_call(
|
|
'http://video.google.com/timedtext',
|
|
params={'lang': 'en', 'v': 'good_id_2', 'name': 'Custom'}
|
|
)
|
|
|
|
# Check asset status after import of transcript.
|
|
filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub)
|
|
content_location = StaticContent.compute_location(self.course.id, filename)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_sub_content(good_youtube_sub)
|
|
|
|
|
|
class TestGenerateSubsFromSource(TestDownloadYoutubeSubs):
|
|
"""Tests for `generate_subs_from_source` function."""
|
|
|
|
def test_success_generating_subs(self):
|
|
youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
srt_filedata = textwrap.dedent("""
|
|
1
|
|
00:00:10,500 --> 00:00:13,000
|
|
Elephant's Dream
|
|
|
|
2
|
|
00:00:15,000 --> 00:00:18,000
|
|
At the left we can see...
|
|
""")
|
|
self.clear_subs_content(youtube_subs)
|
|
|
|
# Check transcripts_utils.TranscriptsGenerationException not thrown.
|
|
# Also checks that uppercase file extensions are supported.
|
|
transcripts_utils.generate_subs_from_source(youtube_subs, 'SRT', srt_filedata, self.course)
|
|
|
|
# Check assets status after importing subtitles.
|
|
for subs_id in youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.course.id, filename
|
|
)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_subs_content(youtube_subs)
|
|
|
|
def test_fail_bad_subs_type(self):
|
|
youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
|
|
srt_filedata = textwrap.dedent("""
|
|
1
|
|
00:00:10,500 --> 00:00:13,000
|
|
Elephant's Dream
|
|
|
|
2
|
|
00:00:15,000 --> 00:00:18,000
|
|
At the left we can see...
|
|
""")
|
|
|
|
with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm:
|
|
transcripts_utils.generate_subs_from_source(youtube_subs, 'BAD_FORMAT', srt_filedata, self.course)
|
|
exception_message = cm.exception.message
|
|
self.assertEqual(exception_message, "We support only SubRip (*.srt) transcripts format.")
|
|
|
|
def test_fail_bad_subs_filedata(self):
|
|
youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
|
|
srt_filedata = """BAD_DATA"""
|
|
|
|
with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm:
|
|
transcripts_utils.generate_subs_from_source(youtube_subs, 'srt', srt_filedata, self.course)
|
|
exception_message = cm.exception.message
|
|
self.assertEqual(exception_message, "Something wrong with SubRip transcripts file during parsing.")
|
|
|
|
|
|
class TestGenerateSrtFromSjson(TestDownloadYoutubeSubs):
|
|
"""Tests for `generate_srt_from_sjson` function."""
|
|
|
|
def test_success_generating_subs(self):
|
|
sjson_subs = {
|
|
'start': [100, 200, 240, 390, 54000],
|
|
'end': [200, 240, 380, 1000, 78400],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 1)
|
|
self.assertTrue(srt_subs)
|
|
expected_subs = [
|
|
'00:00:00,100 --> 00:00:00,200\nsubs #1',
|
|
'00:00:00,200 --> 00:00:00,240\nsubs #2',
|
|
'00:00:00,240 --> 00:00:00,380\nsubs #3',
|
|
'00:00:00,390 --> 00:00:01,000\nsubs #4',
|
|
'00:00:54,000 --> 00:01:18,400\nsubs #5',
|
|
]
|
|
|
|
for sub in expected_subs:
|
|
self.assertIn(sub, srt_subs)
|
|
|
|
def test_success_generating_subs_speed_up(self):
|
|
sjson_subs = {
|
|
'start': [100, 200, 240, 390, 54000],
|
|
'end': [200, 240, 380, 1000, 78400],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 0.5)
|
|
self.assertTrue(srt_subs)
|
|
expected_subs = [
|
|
'00:00:00,050 --> 00:00:00,100\nsubs #1',
|
|
'00:00:00,100 --> 00:00:00,120\nsubs #2',
|
|
'00:00:00,120 --> 00:00:00,190\nsubs #3',
|
|
'00:00:00,195 --> 00:00:00,500\nsubs #4',
|
|
'00:00:27,000 --> 00:00:39,200\nsubs #5',
|
|
]
|
|
for sub in expected_subs:
|
|
self.assertIn(sub, srt_subs)
|
|
|
|
def test_success_generating_subs_speed_down(self):
|
|
sjson_subs = {
|
|
'start': [100, 200, 240, 390, 54000],
|
|
'end': [200, 240, 380, 1000, 78400],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 2)
|
|
self.assertTrue(srt_subs)
|
|
|
|
expected_subs = [
|
|
'00:00:00,200 --> 00:00:00,400\nsubs #1',
|
|
'00:00:00,400 --> 00:00:00,480\nsubs #2',
|
|
'00:00:00,480 --> 00:00:00,760\nsubs #3',
|
|
'00:00:00,780 --> 00:00:02,000\nsubs #4',
|
|
'00:01:48,000 --> 00:02:36,800\nsubs #5',
|
|
]
|
|
for sub in expected_subs:
|
|
self.assertIn(sub, srt_subs)
|
|
|
|
def test_fail_generating_subs(self):
|
|
sjson_subs = {
|
|
'start': [100, 200],
|
|
'end': [100],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 1)
|
|
self.assertFalse(srt_subs)
|
|
|
|
|
|
class TestYoutubeTranscripts(unittest.TestCase):
|
|
"""
|
|
Tests for checking right datastructure returning when using youtube api.
|
|
"""
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
|
def test_youtube_bad_status_code(self, mock_get):
|
|
mock_get.return_value = Mock(status_code=404, text='test')
|
|
youtube_id = 'bad_youtube_id'
|
|
with self.assertRaises(transcripts_utils.GetTranscriptsFromYouTubeException):
|
|
transcripts_utils.get_transcripts_from_youtube(youtube_id, settings, translation)
|
|
|
|
@patch('xmodule.video_module.transcripts_utils.requests.get')
|
|
def test_youtube_empty_text(self, mock_get):
|
|
mock_get.return_value = Mock(status_code=200, text='')
|
|
youtube_id = 'bad_youtube_id'
|
|
with self.assertRaises(transcripts_utils.GetTranscriptsFromYouTubeException):
|
|
transcripts_utils.get_transcripts_from_youtube(youtube_id, settings, translation)
|
|
|
|
def test_youtube_good_result(self):
|
|
response = textwrap.dedent("""<?xml version="1.0" encoding="utf-8" ?>
|
|
<transcript>
|
|
<text start="0" dur="0.27"></text>
|
|
<text start="0.27" dur="2.45">Test text 1.</text>
|
|
<text start="2.72">Test text 2.</text>
|
|
<text start="5.43" dur="1.73">Test text 3.</text>
|
|
</transcript>
|
|
""")
|
|
expected_transcripts = {
|
|
'start': [270, 2720, 5430],
|
|
'end': [2720, 2720, 7160],
|
|
'text': ['Test text 1.', 'Test text 2.', 'Test text 3.']
|
|
}
|
|
youtube_id = 'good_youtube_id'
|
|
with patch('xmodule.video_module.transcripts_utils.requests.get') as mock_get:
|
|
mock_get.return_value = Mock(status_code=200, text=response, content=response)
|
|
transcripts = transcripts_utils.get_transcripts_from_youtube(youtube_id, settings, translation)
|
|
self.assertEqual(transcripts, expected_transcripts)
|
|
mock_get.assert_called_with('http://video.google.com/timedtext', params={'lang': 'en', 'v': 'good_youtube_id'})
|
|
|
|
|
|
class TestTranscript(unittest.TestCase):
|
|
"""
|
|
Tests for Transcript class e.g. different transcript conversions.
|
|
"""
|
|
def setUp(self):
|
|
super(TestTranscript, self).setUp()
|
|
|
|
self.srt_transcript = textwrap.dedent("""\
|
|
0
|
|
00:00:10,500 --> 00:00:13,000
|
|
Elephant's Dream
|
|
|
|
1
|
|
00:00:15,000 --> 00:00:18,000
|
|
At the left we can see...
|
|
|
|
""")
|
|
|
|
self.sjson_transcript = textwrap.dedent("""\
|
|
{
|
|
"start": [
|
|
10500,
|
|
15000
|
|
],
|
|
"end": [
|
|
13000,
|
|
18000
|
|
],
|
|
"text": [
|
|
"Elephant's Dream",
|
|
"At the left we can see..."
|
|
]
|
|
}
|
|
""")
|
|
|
|
self.txt_transcript = u"Elephant's Dream\nAt the left we can see..."
|
|
|
|
def test_convert_srt_to_txt(self):
|
|
expected = self.txt_transcript
|
|
actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'txt')
|
|
self.assertEqual(actual, expected)
|
|
|
|
def test_convert_srt_to_srt(self):
|
|
expected = self.srt_transcript
|
|
actual = transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'srt')
|
|
self.assertEqual(actual, expected)
|
|
|
|
def test_convert_sjson_to_txt(self):
|
|
expected = self.txt_transcript
|
|
actual = transcripts_utils.Transcript.convert(self.sjson_transcript, 'sjson', 'txt')
|
|
self.assertEqual(actual, expected)
|
|
|
|
def test_convert_sjson_to_srt(self):
|
|
expected = self.srt_transcript
|
|
actual = transcripts_utils.Transcript.convert(self.sjson_transcript, 'sjson', 'srt')
|
|
self.assertEqual(actual, expected)
|
|
|
|
def test_convert_srt_to_sjson(self):
|
|
with self.assertRaises(NotImplementedError):
|
|
transcripts_utils.Transcript.convert(self.srt_transcript, 'srt', 'sjson')
|
|
|
|
|
|
class TestSubsFilename(unittest.TestCase):
|
|
"""
|
|
Tests for subs_filename funtion.
|
|
"""
|
|
|
|
def test_unicode(self):
|
|
name = transcripts_utils.subs_filename(u"˙∆©ƒƒƒ")
|
|
self.assertEqual(name, u'subs_˙∆©ƒƒƒ.srt.sjson')
|
|
name = transcripts_utils.subs_filename(u"˙∆©ƒƒƒ", 'uk')
|
|
self.assertEqual(name, u'uk_subs_˙∆©ƒƒƒ.srt.sjson')
|
|
|
|
|
|
@ddt.ddt
|
|
class TestVideoIdsInfo(unittest.TestCase):
|
|
"""
|
|
Tests for `get_video_ids_info`.
|
|
"""
|
|
@ddt.data(
|
|
{
|
|
'edx_video_id': '000-000-000',
|
|
'youtube_id_1_0': '12as34',
|
|
'html5_sources': [
|
|
'www.abc.com/foo.mp4', 'www.abc.com/bar.webm', 'foo/bar/baz.m3u8'
|
|
],
|
|
'expected_result': (False, ['000-000-000', '12as34', 'foo', 'bar', 'baz'])
|
|
},
|
|
{
|
|
'edx_video_id': '',
|
|
'youtube_id_1_0': '12as34',
|
|
'html5_sources': [
|
|
'www.abc.com/foo.mp4', 'www.abc.com/bar.webm', 'foo/bar/baz.m3u8'
|
|
],
|
|
'expected_result': (True, ['12as34', 'foo', 'bar', 'baz'])
|
|
},
|
|
{
|
|
'edx_video_id': '',
|
|
'youtube_id_1_0': '',
|
|
'html5_sources': [
|
|
'www.abc.com/foo.mp4', 'www.abc.com/bar.webm',
|
|
],
|
|
'expected_result': (True, ['foo', 'bar'])
|
|
},
|
|
)
|
|
@ddt.unpack
|
|
def test_get_video_ids_info(self, edx_video_id, youtube_id_1_0, html5_sources, expected_result):
|
|
"""
|
|
Verify that `get_video_ids_info` works as expected.
|
|
"""
|
|
actual_result = transcripts_utils.get_video_ids_info(edx_video_id, youtube_id_1_0, html5_sources)
|
|
self.assertEqual(actual_result, expected_result)
|