408 lines
14 KiB
Python
408 lines
14 KiB
Python
""" Tests for transcripts_utils. """
|
|
import unittest
|
|
from uuid import uuid4
|
|
import copy
|
|
import textwrap
|
|
|
|
from pymongo import MongoClient
|
|
|
|
from django.test.utils import override_settings
|
|
from django.conf import settings
|
|
|
|
from xmodule.modulestore.tests.factories import CourseFactory
|
|
from xmodule.contentstore.content import StaticContent
|
|
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
|
from xmodule.exceptions import NotFoundError
|
|
from xmodule.contentstore.django import contentstore, _CONTENTSTORE
|
|
from contentstore import transcripts_utils
|
|
|
|
from contentstore.tests.modulestore_config import TEST_MODULESTORE
|
|
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
|
|
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
|
|
|
|
|
|
class TestGenerateSubs(unittest.TestCase):
|
|
"""Tests for `generate_subs` function."""
|
|
def setUp(self):
|
|
self.source_subs = {
|
|
'start': [100, 200, 240, 390, 1000],
|
|
'end': [200, 240, 380, 1000, 1500],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
|
|
def test_generate_subs_increase_speed(self):
|
|
subs = transcripts_utils.generate_subs(2, 1, self.source_subs)
|
|
self.assertDictEqual(
|
|
subs,
|
|
{
|
|
'start': [200, 400, 480, 780, 2000],
|
|
'end': [400, 480, 760, 2000, 3000],
|
|
'text': ['subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5']
|
|
}
|
|
)
|
|
|
|
def test_generate_subs_decrease_speed_1(self):
|
|
subs = transcripts_utils.generate_subs(0.5, 1, self.source_subs)
|
|
self.assertDictEqual(
|
|
subs,
|
|
{
|
|
'start': [50, 100, 120, 195, 500],
|
|
'end': [100, 120, 190, 500, 750],
|
|
'text': ['subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5']
|
|
}
|
|
)
|
|
|
|
def test_generate_subs_decrease_speed_2(self):
|
|
"""Test for correct devision during `generate_subs` process."""
|
|
subs = transcripts_utils.generate_subs(1, 2, self.source_subs)
|
|
self.assertDictEqual(
|
|
subs,
|
|
{
|
|
'start': [50, 100, 120, 195, 500],
|
|
'end': [100, 120, 190, 500, 750],
|
|
'text': ['subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5']
|
|
}
|
|
)
|
|
|
|
|
|
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE, MODULESTORE=TEST_MODULESTORE)
|
|
class TestSaveSubsToStore(ModuleStoreTestCase):
|
|
"""Tests for `save_subs_to_store` function."""
|
|
|
|
org = 'MITx'
|
|
number = '999'
|
|
display_name = 'Test course'
|
|
|
|
def clear_subs_content(self):
|
|
"""Remove, if subtitles content exists."""
|
|
try:
|
|
content = contentstore().find(self.content_location)
|
|
contentstore().delete(content.get_id())
|
|
except NotFoundError:
|
|
pass
|
|
|
|
def setUp(self):
|
|
|
|
self.course = CourseFactory.create(
|
|
org=self.org, number=self.number, display_name=self.display_name)
|
|
|
|
self.subs = {
|
|
'start': [100, 200, 240, 390, 1000],
|
|
'end': [200, 240, 380, 1000, 1500],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
|
|
self.subs_id = str(uuid4())
|
|
filename = 'subs_{0}.srt.sjson'.format(self.subs_id)
|
|
self.content_location = StaticContent.compute_location(
|
|
self.org, self.number, filename
|
|
)
|
|
|
|
# incorrect subs
|
|
self.unjsonable_subs = set([1]) # set can't be serialized
|
|
|
|
self.unjsonable_subs_id = str(uuid4())
|
|
filename_unjsonable = 'subs_{0}.srt.sjson'.format(self.unjsonable_subs_id)
|
|
self.content_location_unjsonable = StaticContent.compute_location(
|
|
self.org, self.number, filename_unjsonable
|
|
)
|
|
|
|
self.clear_subs_content()
|
|
|
|
def test_save_subs_to_store(self):
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(self.content_location)
|
|
|
|
result_location = transcripts_utils.save_subs_to_store(
|
|
self.subs,
|
|
self.subs_id,
|
|
self.course)
|
|
|
|
self.assertTrue(contentstore().find(self.content_location))
|
|
self.assertEqual(result_location, self.content_location)
|
|
|
|
def test_save_unjsonable_subs_to_store(self):
|
|
"""
|
|
Assures that subs, that can't be dumped, can't be found later.
|
|
"""
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(self.content_location_unjsonable)
|
|
|
|
with self.assertRaises(TypeError):
|
|
transcripts_utils.save_subs_to_store(
|
|
self.unjsonable_subs,
|
|
self.unjsonable_subs_id,
|
|
self.course)
|
|
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(self.content_location_unjsonable)
|
|
|
|
def tearDown(self):
|
|
self.clear_subs_content()
|
|
MongoClient().drop_database(TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'])
|
|
_CONTENTSTORE.clear()
|
|
|
|
|
|
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE, MODULESTORE=TEST_MODULESTORE)
|
|
class TestDownloadYoutubeSubs(ModuleStoreTestCase):
|
|
"""Tests for `download_youtube_subs` function."""
|
|
|
|
org = 'MITx'
|
|
number = '999'
|
|
display_name = 'Test course'
|
|
|
|
def clear_subs_content(self, youtube_subs):
|
|
"""Remove, if subtitles content exists."""
|
|
for subs_id in youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.org, self.number, filename
|
|
)
|
|
try:
|
|
content = contentstore().find(content_location)
|
|
contentstore().delete(content.get_id())
|
|
except NotFoundError:
|
|
pass
|
|
|
|
def setUp(self):
|
|
self.course = CourseFactory.create(
|
|
org=self.org, number=self.number, display_name=self.display_name)
|
|
|
|
def tearDown(self):
|
|
MongoClient().drop_database(TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'])
|
|
_CONTENTSTORE.clear()
|
|
|
|
def test_success_downloading_subs(self):
|
|
good_youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
self.clear_subs_content(good_youtube_subs)
|
|
|
|
# Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown
|
|
transcripts_utils.download_youtube_subs(good_youtube_subs, self.course)
|
|
|
|
# Check assets status after importing subtitles.
|
|
for subs_id in good_youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.org, self.number, filename
|
|
)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_subs_content(good_youtube_subs)
|
|
|
|
def test_fail_downloading_subs(self):
|
|
bad_youtube_subs = {
|
|
0.5: 'BAD_YOUTUBE_ID1',
|
|
1.0: 'BAD_YOUTUBE_ID2',
|
|
2.0: 'BAD_YOUTUBE_ID3'
|
|
}
|
|
self.clear_subs_content(bad_youtube_subs)
|
|
|
|
with self.assertRaises(transcripts_utils.GetTranscriptsFromYouTubeException):
|
|
transcripts_utils.download_youtube_subs(bad_youtube_subs, self.course)
|
|
|
|
# Check assets status after importing subtitles.
|
|
for subs_id in bad_youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.org, self.number, filename
|
|
)
|
|
with self.assertRaises(NotFoundError):
|
|
contentstore().find(content_location)
|
|
|
|
self.clear_subs_content(bad_youtube_subs)
|
|
|
|
def test_success_downloading_chinise_transcripts(self):
|
|
good_youtube_subs = {
|
|
1.0: 'j_jEn79vS3g', # Chinese, utf-8
|
|
}
|
|
self.clear_subs_content(good_youtube_subs)
|
|
|
|
# Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown
|
|
transcripts_utils.download_youtube_subs(good_youtube_subs, self.course)
|
|
|
|
# Check assets status after importing subtitles.
|
|
for subs_id in good_youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.org, self.number, filename
|
|
)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_subs_content(good_youtube_subs)
|
|
|
|
|
|
class TestGenerateSubsFromSource(TestDownloadYoutubeSubs):
|
|
"""Tests for `generate_subs_from_source` function."""
|
|
|
|
def test_success_generating_subs(self):
|
|
youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
srt_filedata = textwrap.dedent("""
|
|
1
|
|
00:00:10,500 --> 00:00:13,000
|
|
Elephant's Dream
|
|
|
|
2
|
|
00:00:15,000 --> 00:00:18,000
|
|
At the left we can see...
|
|
""")
|
|
self.clear_subs_content(youtube_subs)
|
|
|
|
# Check transcripts_utils.TranscriptsGenerationException not thrown
|
|
transcripts_utils.generate_subs_from_source(youtube_subs, 'srt', srt_filedata, self.course)
|
|
|
|
# Check assets status after importing subtitles.
|
|
for subs_id in youtube_subs.values():
|
|
filename = 'subs_{0}.srt.sjson'.format(subs_id)
|
|
content_location = StaticContent.compute_location(
|
|
self.org, self.number, filename
|
|
)
|
|
self.assertTrue(contentstore().find(content_location))
|
|
|
|
self.clear_subs_content(youtube_subs)
|
|
|
|
def test_fail_bad_subs_type(self):
|
|
youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
|
|
srt_filedata = textwrap.dedent("""
|
|
1
|
|
00:00:10,500 --> 00:00:13,000
|
|
Elephant's Dream
|
|
|
|
2
|
|
00:00:15,000 --> 00:00:18,000
|
|
At the left we can see...
|
|
""")
|
|
|
|
with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm:
|
|
transcripts_utils.generate_subs_from_source(youtube_subs, 'BAD_FORMAT', srt_filedata, self.course)
|
|
exception_message = cm.exception.message
|
|
self.assertEqual(exception_message, "We support only SubRip (*.srt) transcripts format.")
|
|
|
|
def test_fail_bad_subs_filedata(self):
|
|
youtube_subs = {
|
|
0.5: 'JMD_ifUUfsU',
|
|
1.0: 'hI10vDNYz4M',
|
|
2.0: 'AKqURZnYqpk'
|
|
}
|
|
|
|
srt_filedata = """BAD_DATA"""
|
|
|
|
with self.assertRaises(transcripts_utils.TranscriptsGenerationException) as cm:
|
|
transcripts_utils.generate_subs_from_source(youtube_subs, 'srt', srt_filedata, self.course)
|
|
exception_message = cm.exception.message
|
|
self.assertEqual(exception_message, "Something wrong with SubRip transcripts file during parsing.")
|
|
|
|
|
|
class TestGenerateSrtFromSjson(TestDownloadYoutubeSubs):
|
|
"""Tests for `generate_srt_from_sjson` function."""
|
|
|
|
def test_success_generating_subs(self):
|
|
sjson_subs = {
|
|
'start': [100, 200, 240, 390, 54000],
|
|
'end': [200, 240, 380, 1000, 78400],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 1)
|
|
self.assertTrue(srt_subs)
|
|
expected_subs = [
|
|
'00:00:00,100 --> 00:00:00,200\nsubs #1',
|
|
'00:00:00,200 --> 00:00:00,240\nsubs #2',
|
|
'00:00:00,240 --> 00:00:00,380\nsubs #3',
|
|
'00:00:00,390 --> 00:00:01,000\nsubs #4',
|
|
'00:00:54,000 --> 00:01:18,400\nsubs #5',
|
|
]
|
|
|
|
for sub in expected_subs:
|
|
self.assertIn(sub, srt_subs)
|
|
|
|
def test_success_generating_subs_speed_up(self):
|
|
sjson_subs = {
|
|
'start': [100, 200, 240, 390, 54000],
|
|
'end': [200, 240, 380, 1000, 78400],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 0.5)
|
|
self.assertTrue(srt_subs)
|
|
expected_subs = [
|
|
'00:00:00,050 --> 00:00:00,100\nsubs #1',
|
|
'00:00:00,100 --> 00:00:00,120\nsubs #2',
|
|
'00:00:00,120 --> 00:00:00,190\nsubs #3',
|
|
'00:00:00,195 --> 00:00:00,500\nsubs #4',
|
|
'00:00:27,000 --> 00:00:39,200\nsubs #5',
|
|
]
|
|
for sub in expected_subs:
|
|
self.assertIn(sub, srt_subs)
|
|
|
|
def test_success_generating_subs_speed_down(self):
|
|
sjson_subs = {
|
|
'start': [100, 200, 240, 390, 54000],
|
|
'end': [200, 240, 380, 1000, 78400],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2',
|
|
'subs #3',
|
|
'subs #4',
|
|
'subs #5'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 2)
|
|
self.assertTrue(srt_subs)
|
|
|
|
expected_subs = [
|
|
'00:00:00,200 --> 00:00:00,400\nsubs #1',
|
|
'00:00:00,400 --> 00:00:00,480\nsubs #2',
|
|
'00:00:00,480 --> 00:00:00,760\nsubs #3',
|
|
'00:00:00,780 --> 00:00:02,000\nsubs #4',
|
|
'00:01:48,000 --> 00:02:36,800\nsubs #5',
|
|
]
|
|
for sub in expected_subs:
|
|
self.assertIn(sub, srt_subs)
|
|
|
|
def test_fail_generating_subs(self):
|
|
sjson_subs = {
|
|
'start': [100, 200],
|
|
'end': [100],
|
|
'text': [
|
|
'subs #1',
|
|
'subs #2'
|
|
]
|
|
}
|
|
srt_subs = transcripts_utils.generate_srt_from_sjson(sjson_subs, 1)
|
|
self.assertFalse(srt_subs)
|