From 97399cf97c986159c60052557aabf20118e44574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Wed, 19 Aug 2020 10:00:08 +0200 Subject: [PATCH] Fix TypeError during transcript upload to S3 On a platform that is configured to upload video transcripts to S3 (`DEFAULT_FILE_STORAGE = "storages.backends.s3boto3.S3Boto3Storage"`), uploads from the studio fail with a TypeError: "Unicode-objects must be encoded before hashing" A full stacktrace of the issue can be found here: https://sentry.overhang.io/share/issue/2249b6f67d794c7e986cc288758f4ebe/ This error is triggered by md5 hashing in the botocore library, which itself is used by the S3Boto3Storage storage class. This error does not occur with filesystem-based uploads because it does not perform checksum verification. The reason why this error would not occur on edx.org is unknown. Similar issues were already fixed from edxval. To address this issue, we encode the transcript file content prior to sending it to s3. --- cms/djangoapps/contentstore/views/transcript_settings.py | 2 +- cms/djangoapps/contentstore/views/transcripts_ajax.py | 4 ++-- common/lib/xmodule/xmodule/video_module/video_handlers.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cms/djangoapps/contentstore/views/transcript_settings.py b/cms/djangoapps/contentstore/views/transcript_settings.py index b6bd83add2..f92722160b 100644 --- a/cms/djangoapps/contentstore/views/transcript_settings.py +++ b/cms/djangoapps/contentstore/views/transcript_settings.py @@ -233,7 +233,7 @@ def transcript_upload_handler(request): content=transcript_file.read().decode('utf-8'), input_format=Transcript.SRT, output_format=Transcript.SJSON - ) + ).encode() create_or_update_video_transcript( video_id=edx_video_id, language_code=language_code, diff --git a/cms/djangoapps/contentstore/views/transcripts_ajax.py b/cms/djangoapps/contentstore/views/transcripts_ajax.py index 378d03dd6a..461fd9deb2 100644 --- a/cms/djangoapps/contentstore/views/transcripts_ajax.py +++ b/cms/djangoapps/contentstore/views/transcripts_ajax.py @@ -109,7 +109,7 @@ def save_video_transcript(edx_video_id, input_format, transcript_content, langua content=transcript_content, input_format=input_format, output_format=Transcript.SJSON - ) + ).encode() create_or_update_video_transcript( video_id=edx_video_id, language_code=language_code, @@ -222,7 +222,7 @@ def upload_transcripts(request): content=transcript_file.read().decode('utf-8'), input_format=Transcript.SRT, output_format=Transcript.SJSON - ) + ).encode() transcript_created = create_or_update_video_transcript( video_id=edx_video_id, language_code=u'en', diff --git a/common/lib/xmodule/xmodule/video_module/video_handlers.py b/common/lib/xmodule/xmodule/video_module/video_handlers.py index fde2f0ed8d..7f46e1f610 100644 --- a/common/lib/xmodule/xmodule/video_module/video_handlers.py +++ b/common/lib/xmodule/xmodule/video_module/video_handlers.py @@ -509,7 +509,7 @@ class VideoStudioViewHandlers(object): content=transcript_file.read().decode('utf-8'), input_format=Transcript.SRT, output_format=Transcript.SJSON - ) + ).encode() create_or_update_video_transcript( video_id=edx_video_id, language_code=language_code,