From d23e772dd099e2bc124a3457bdd5148b776317ff Mon Sep 17 00:00:00 2001 From: Waheed Ahmed Date: Fri, 12 Dec 2014 19:26:08 +0500 Subject: [PATCH] Fixed unicode problem if transcript file is not UTF-8 encoded. TNL-527 --- .../lib/xmodule/xmodule/video_module/video_handlers.py | 9 ++++++++- lms/djangoapps/courseware/tests/test_video_handlers.py | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py index 4c6b3340ce..9e9db860ca 100644 --- a/common/lib/xmodule/xmodule/video_module/video_handlers.py +++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py @@ -300,7 +300,14 @@ class VideoStudioViewHandlers(object): if request.method == 'POST': subtitles = request.POST['file'] - save_to_store(subtitles.file.read(), unicode(subtitles.filename), 'application/x-subrip', self.location) + try: + file_data = subtitles.file.read() + unicode(file_data, "utf-8", "strict") + except UnicodeDecodeError: + log.info("Invalid encoding type for transcript file: {}".format(subtitles.filename)) + msg = _("Invalid encoding type, transcripts should be UTF-8 encoded.") + return Response(msg, status=400) + save_to_store(file_data, unicode(subtitles.filename), 'application/x-subrip', self.location) generate_sjson_for_all_speeds(self, unicode(subtitles.filename), {}, language) response = {'filename': unicode(subtitles.filename), 'status': 'Success'} return Response(json.dumps(response), status=201) diff --git a/lms/djangoapps/courseware/tests/test_video_handlers.py b/lms/djangoapps/courseware/tests/test_video_handlers.py index 704d68cd79..0bf1dad60e 100644 --- a/lms/djangoapps/courseware/tests/test_video_handlers.py +++ b/lms/djangoapps/courseware/tests/test_video_handlers.py @@ -577,8 +577,10 @@ class TestStudioTranscriptTranslationPostDispatch(TestVideo): response = self.item_descriptor.studio_transcript(request=request, dispatch='translation/uk') request = Request.blank('/translation/uk', POST={'file': ('filename.srt', SRT_content.decode('utf8').encode('cp1251'))}) - with self.assertRaises(UnicodeDecodeError): # Non-UTF8 file content encoding. - response = self.item_descriptor.studio_transcript(request=request, dispatch='translation/uk') + # Non-UTF8 file content encoding. + response = self.item_descriptor.studio_transcript(request=request, dispatch='translation/uk') + self.assertEqual(response.status_code, 400) + self.assertEqual(response.body, "Invalid encoding type, transcripts should be UTF-8 encoded.") # No language is passed. request = Request.blank('/translation', POST={'file': ('filename', SRT_content)})