From d5db673a8acc73d8271cf6ed099ebab16d840cfa Mon Sep 17 00:00:00 2001 From: MarCnu Date: Tue, 12 Aug 2014 17:27:36 +0200 Subject: [PATCH 1/2] AUTHORS update --- AUTHORS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 1556e21249..2feb253c1e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -169,4 +169,5 @@ Clinton Blackburn Dennis Jen Filippo Valsorda Ivica Ceraj -Jason Zhu \ No newline at end of file +Jason Zhu +Marceau Cnudde From 77f7f269336ff90034a9c81916e32873c43b3233 Mon Sep 17 00:00:00 2001 From: MarCnu Date: Fri, 1 Aug 2014 18:10:47 +0200 Subject: [PATCH 2/2] Add HTTP_RANGE compatibility for ContentStore file streaming Currently, users can only download ContentStore files from first byte to last byte. With this change, when a request to the ContentStore includes the HTTP "Range" parameter, it is parsed and StaticContentStream will stream the requested bytes. This change makes possible to stream video files (.mp4 especially) from the ContentStore. Users can now seek a specific time in the video without loading all the file. This is useful for courses with a limited number of students that doesn't require a dedicated video server. --- common/djangoapps/contentserver/middleware.py | 53 ++++++++- common/djangoapps/contentserver/tests/test.py | 96 +++++++++++++++-- .../xmodule/xmodule/contentstore/content.py | 19 +++- .../lib/xmodule/xmodule/tests/test_content.py | 101 +++++++++++++++++- 4 files changed, 259 insertions(+), 10 deletions(-) diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index e711fd2b66..0bcfbe3819 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -75,7 +75,58 @@ class StaticContentServer(object): if if_modified_since == last_modified_at_str: return HttpResponseNotModified() - response = HttpResponse(content.stream_data(), content_type=content.content_type) + # *** File streaming within a byte range *** + # If a Range is provided, parse Range attribute of the request + # Add Content-Range in the response if Range is structurally correct + # Request -> Range attribute structure: "Range: bytes=first-[last]" + # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" + response = None + if request.META.get('HTTP_RANGE'): + # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) + if type(content) == StaticContent: + content = contentstore().find(loc, as_stream=True) + + # Let's parse the Range header, bytes=first-[last] + range_header = request.META['HTTP_RANGE'] + if '=' in range_header: + unit, byte_range = range_header.split('=') + # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed + if unit == 'bytes' and '-' in byte_range: + first, last = byte_range.split('-') + # "first" must be a valid integer + try: + first = int(first) + except ValueError: + pass + if type(first) is int: + # "last" default value is the last byte of the file + # Users can ask "bytes=0-" to request the whole file when they don't know the length + try: + last = int(last) + except ValueError: + last = content.length - 1 + + if 0 <= first <= last < content.length: + # Valid Range attribute + response = HttpResponse(content.stream_data_in_range(first, last)) + response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( + first=first, last=last, length=content.length + ) + response['Content-Length'] = str(last - first + 1) + response.status_code = 206 # HTTP_206_PARTIAL_CONTENT + if not response: + # Malformed Range attribute + response = HttpResponse() + response.status_code = 400 # HTTP_400_BAD_REQUEST + return response + + else: + # No Range attribute + response = HttpResponse(content.stream_data()) + response['Content-Length'] = content.length + + response['Accept-Ranges'] = 'bytes' + response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 465b49709a..066551ac18 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -48,12 +48,12 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): # A locked asset self.locked_asset = self.course_key.make_asset_key('asset', 'sample_static.txt') self.url_locked = self.locked_asset.to_deprecated_string() + self.contentstore.set_attr(self.locked_asset, 'locked', True) # An unlocked asset self.unlocked_asset = self.course_key.make_asset_key('asset', 'another_static.txt') self.url_unlocked = self.unlocked_asset.to_deprecated_string() - - self.contentstore.set_attr(self.locked_asset, 'locked', True) + self.length_unlocked = self.contentstore.get_attr(self.unlocked_asset, 'length') def test_unlocked_asset(self): """ @@ -61,7 +61,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.logout() resp = self.client.get(self.url_unlocked) - self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 def test_locked_asset_not_logged_in(self): """ @@ -70,7 +70,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.logout() resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 def test_locked_asset_not_registered(self): """ @@ -79,7 +79,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd) resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 def test_locked_asset_registered(self): """ @@ -91,7 +91,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd) resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 def test_locked_asset_staff(self): """ @@ -99,5 +99,87 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.login(username=self.staff_usr, password=self.staff_pwd) resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + def test_range_request_full_file(self): + """ + Test that a range request from byte 0 to last, + outputs partial content status code and valid Content-Range and Content-Length. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-') + + self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT + self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format( + first=0, last=self.length_unlocked-1, length=self.length_unlocked) + ) + self.assertEqual(resp['Content-Length'], str(self.length_unlocked)) + + def test_range_request_partial_file(self): + """ + Test that a range request for a partial file, + outputs partial content status code and valid Content-Range and Content-Length. + first_byte and last_byte are chosen to be simple but non trivial values. + """ + first_byte = self.length_unlocked / 4 + last_byte = self.length_unlocked / 2 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=first_byte, last=last_byte) + ) + + self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT + self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format( + first=first_byte, last=last_byte, length=self.length_unlocked)) + self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) + + def test_range_request_malformed_missing_equal(self): + """ + Test that a range request with malformed Range (missing '=') outputs status 400. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_not_bytes(self): + """ + Test that a range request with malformed Range (not "bytes") outputs status 400. + "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_missing_minus(self): + """ + Test that a range request with malformed Range (missing '-') outputs status 400. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_first_not_integer(self): + """ + Test that a range request with malformed Range (first is not an integer) outputs status 400. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_invalid_range(self): + """ + Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. + """ + first_byte = self.length_unlocked / 2 + last_byte = self.length_unlocked / 4 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=first_byte, last=last_byte) + ) + + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_out_of_bounds(self): + """ + Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) + outputs status 400. + """ + last_byte = self.length_unlocked + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( + last=last_byte) + ) + + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST diff --git a/common/lib/xmodule/xmodule/contentstore/content.py b/common/lib/xmodule/xmodule/contentstore/content.py index 1d8a2cd23f..c45a56f391 100644 --- a/common/lib/xmodule/xmodule/contentstore/content.py +++ b/common/lib/xmodule/xmodule/contentstore/content.py @@ -5,6 +5,8 @@ XASSET_SRCREF_PREFIX = 'xasset:' XASSET_THUMBNAIL_TAIL_NAME = '.jpg' +STREAM_DATA_CHUNK_SIZE = 1024 + import os import logging import StringIO @@ -164,11 +166,26 @@ class StaticContentStream(StaticContent): def stream_data(self): while True: - chunk = self._stream.read(1024) + chunk = self._stream.read(STREAM_DATA_CHUNK_SIZE) if len(chunk) == 0: break yield chunk + def stream_data_in_range(self, first_byte, last_byte): + """ + Stream the data between first_byte and last_byte (included) + """ + self._stream.seek(first_byte) + position = first_byte + while True: + if last_byte < position + STREAM_DATA_CHUNK_SIZE - 1: + chunk = self._stream.read(last_byte - position + 1) + yield chunk + break + chunk = self._stream.read(STREAM_DATA_CHUNK_SIZE) + position += STREAM_DATA_CHUNK_SIZE + yield chunk + def close(self): self._stream.close() diff --git a/common/lib/xmodule/xmodule/tests/test_content.py b/common/lib/xmodule/xmodule/tests/test_content.py index 271d211422..f4e97f092f 100644 --- a/common/lib/xmodule/xmodule/tests/test_content.py +++ b/common/lib/xmodule/xmodule/tests/test_content.py @@ -1,8 +1,47 @@ import unittest -from xmodule.contentstore.content import StaticContent +from xmodule.contentstore.content import StaticContent, StaticContentStream from xmodule.contentstore.content import ContentStore from opaque_keys.edx.locations import SlashSeparatedCourseKey, AssetLocation +SAMPLE_STRING = """ +This is a sample string with more than 1024 bytes, the default STREAM_DATA_CHUNK_SIZE + +Lorem Ipsum is simply dummy text of the printing and typesetting industry. +Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, +when an unknown printer took a galley of type and scrambled it to make a type +specimen book. It has survived not only five centuries, but also the leap into +electronic typesetting, remaining essentially unchanged. It was popularised in +the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, +nd more recently with desktop publishing software like Aldus PageMaker including +versions of Lorem Ipsum. + +It is a long established fact that a reader will be distracted by the readable +content of a page when looking at its layout. The point of using Lorem Ipsum is +that it has a more-or-less normal distribution of letters, as opposed to using +'Content here, content here', making it look like readable English. Many desktop +ublishing packages and web page editors now use Lorem Ipsum as their default model +text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. +Various versions have evolved over the years, sometimes by accident, sometimes on purpose +injected humour and the like). + +Lorem Ipsum is simply dummy text of the printing and typesetting industry. +Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, +when an unknown printer took a galley of type and scrambled it to make a type +specimen book. It has survived not only five centuries, but also the leap into +electronic typesetting, remaining essentially unchanged. It was popularised in +the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, +nd more recently with desktop publishing software like Aldus PageMaker including +versions of Lorem Ipsum. + +It is a long established fact that a reader will be distracted by the readable +content of a page when looking at its layout. The point of using Lorem Ipsum is +that it has a more-or-less normal distribution of letters, as opposed to using +'Content here, content here', making it look like readable English. Many desktop +ublishing packages and web page editors now use Lorem Ipsum as their default model +text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. +Various versions have evolved over the years, sometimes by accident, sometimes on purpose +injected humour and the like). +""" class Content: def __init__(self, location, content_type): @@ -10,6 +49,30 @@ class Content: self.content_type = content_type +class FakeGridFsItem: + """ + This class provides the basic methods to get data from a GridFS item + """ + def __init__(self, string_data): + self.cursor = 0 + self.data = string_data + self.length = len(string_data) + + def seek(self, position): + """ + Set the cursor at "position" + """ + self.cursor = position + + def read(self, chunk_size): + """ + Read "chunk_size" bytes of data at position cursor and move the cursor + """ + chunk = self.data[self.cursor:(self.cursor + chunk_size)] + self.cursor += chunk_size + return chunk + + class ContentTest(unittest.TestCase): def test_thumbnail_none(self): # We had a bug where a thumbnail location of None was getting transformed into a Location tuple, with @@ -46,3 +109,39 @@ class ContentTest(unittest.TestCase): AssetLocation(u'foo', u'bar', None, u'asset', u'images_course_image.jpg', None), asset_location ) + + def test_static_content_stream_stream_data(self): + """ + Test StaticContentStream stream_data function, asserts that we get all the bytes + """ + data = SAMPLE_STRING + item = FakeGridFsItem(data) + static_content_stream = StaticContentStream('loc', 'name', 'type', item, length=item.length) + + total_length = 0 + stream = static_content_stream.stream_data() + for chunck in stream: + total_length += len(chunck) + + self.assertEqual(total_length, static_content_stream.length) + + def test_static_content_stream_stream_data_in_range(self): + """ + Test StaticContentStream stream_data_in_range function, + asserts that we get the requested number of bytes + first_byte and last_byte are chosen to be simple but non trivial values + and to have total_length > STREAM_DATA_CHUNK_SIZE (1024) + """ + data = SAMPLE_STRING + item = FakeGridFsItem(data) + static_content_stream = StaticContentStream('loc', 'name', 'type', item, length=item.length) + + first_byte = 100 + last_byte = 1500 + + total_length = 0 + stream = static_content_stream.stream_data_in_range(first_byte, last_byte) + for chunck in stream: + total_length += len(chunck) + + self.assertEqual(total_length, last_byte - first_byte + 1)