Merge pull request #6746 from edx/waheed/tnl935-fix-BOM-in-transcript
Fixed transcript skip first line if it contains BOM(Byte Order Mark).
This commit is contained in:
@@ -302,7 +302,7 @@ class TestUploadtranscripts(Basetranscripts):
|
||||
"""
|
||||
Test uploading subs containing BOM(Byte Order Mark), e.g. U+FEFF
|
||||
"""
|
||||
filedate = textwrap.dedent("""
|
||||
filedata = textwrap.dedent("""
|
||||
1
|
||||
00:00:10,500 --> 00:00:13,000
|
||||
Test ufeff characters
|
||||
@@ -313,8 +313,8 @@ class TestUploadtranscripts(Basetranscripts):
|
||||
""").encode('utf-8-sig')
|
||||
|
||||
# Verify that ufeff character is in filedata.
|
||||
self.assertIn("ufeff", filedate)
|
||||
self.ufeff_srt_file.write(filedate)
|
||||
self.assertIn("ufeff", filedata)
|
||||
self.ufeff_srt_file.write(filedata)
|
||||
self.ufeff_srt_file.seek(0)
|
||||
|
||||
link = reverse('upload_transcripts')
|
||||
|
||||
@@ -398,10 +398,11 @@ def generate_sjson_for_all_speeds(item, user_filename, result_subs_dict, lang):
|
||||
if not lang:
|
||||
lang = item.transcript_language
|
||||
|
||||
# Used utf-8-sig encoding type instead of utf-8 to remove BOM(Byte Order Mark), e.g. U+FEFF
|
||||
generate_subs_from_source(
|
||||
result_subs_dict,
|
||||
os.path.splitext(user_filename)[1][1:],
|
||||
srt_transcripts.data.decode('utf8'),
|
||||
srt_transcripts.data.decode('utf-8-sig'),
|
||||
item,
|
||||
lang
|
||||
)
|
||||
|
||||
@@ -489,3 +489,23 @@ class VideoEditorTest(CMSVideoBaseTest):
|
||||
self.assertIn(unicode_text, self.video.captions_text)
|
||||
self.assertEqual(self.video.caption_languages.keys(), [u'table', u'uk'])
|
||||
self.assertEqual(self.video.caption_languages.keys()[0], 'table')
|
||||
|
||||
def test_upload_transcript_with_BOM(self):
|
||||
"""
|
||||
Scenario: User can upload transcript file with BOM(Byte Order Mark) in it.
|
||||
Given I have created a Video component
|
||||
And I edit the component
|
||||
And I open tab "Advanced"
|
||||
And I upload transcript file "chinese_transcripts_with_BOM.srt" for "zh" language code
|
||||
And I save changes
|
||||
Then when I view the video it does show the captions
|
||||
And I see "莎拉·佩林 (Sarah Palin)" text in the captions
|
||||
"""
|
||||
self._create_video_component()
|
||||
self.edit_component()
|
||||
self.open_advanced_tab()
|
||||
self.video.upload_translation('chinese_transcripts_with_BOM.srt', 'zh')
|
||||
self.save_unit_settings()
|
||||
self.assertTrue(self.video.is_captions_visible())
|
||||
unicode_text = "莎拉·佩林 (Sarah Palin)".decode('utf-8')
|
||||
self.assertIn(unicode_text, self.video.captions_lines())
|
||||
|
||||
19
common/test/data/uploads/chinese_transcripts_with_BOM.srt
Normal file
19
common/test/data/uploads/chinese_transcripts_with_BOM.srt
Normal file
@@ -0,0 +1,19 @@
|
||||
1
|
||||
00:00:16,850 --> 00:00:23,850
|
||||
莎拉·佩林 (Sarah Palin) 的著作《我行我素》被乔纳森·拉班(Jonathan Raban) 评论为“400页对高尚无知的赞美”
|
||||
|
||||
2
|
||||
00:00:24,040 --> 00:00:30,680
|
||||
他是什么意思呢?拉班所指的那种思想
|
||||
|
||||
3
|
||||
00:00:30,680 --> 00:00:35,660
|
||||
可以用“我不太懂艺术 但我知道我喜欢什么”做比喻
|
||||
|
||||
4
|
||||
00:00:35,660 --> 00:00:42,410
|
||||
他将其描述为“常识性保守派”
|
||||
|
||||
5
|
||||
00:00:42,410 --> 00:00:47,510
|
||||
即占据道德制高点的外行人能比专家更好地评价 比方说
|
||||
Reference in New Issue
Block a user