diff --git a/AUTHORS b/AUTHORS index 1556e21249..2feb253c1e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -169,4 +169,5 @@ Clinton Blackburn Dennis Jen Filippo Valsorda Ivica Ceraj -Jason Zhu \ No newline at end of file +Jason Zhu +Marceau Cnudde diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index e711fd2b66..0bcfbe3819 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -75,7 +75,58 @@ class StaticContentServer(object): if if_modified_since == last_modified_at_str: return HttpResponseNotModified() - response = HttpResponse(content.stream_data(), content_type=content.content_type) + # *** File streaming within a byte range *** + # If a Range is provided, parse Range attribute of the request + # Add Content-Range in the response if Range is structurally correct + # Request -> Range attribute structure: "Range: bytes=first-[last]" + # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" + response = None + if request.META.get('HTTP_RANGE'): + # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) + if type(content) == StaticContent: + content = contentstore().find(loc, as_stream=True) + + # Let's parse the Range header, bytes=first-[last] + range_header = request.META['HTTP_RANGE'] + if '=' in range_header: + unit, byte_range = range_header.split('=') + # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed + if unit == 'bytes' and '-' in byte_range: + first, last = byte_range.split('-') + # "first" must be a valid integer + try: + first = int(first) + except ValueError: + pass + if type(first) is int: + # "last" default value is the last byte of the file + # Users can ask "bytes=0-" to request the whole file when they don't know the length + try: + last = int(last) + except ValueError: + last = content.length - 1 + + if 0 <= first <= last < content.length: + # Valid Range attribute + response = HttpResponse(content.stream_data_in_range(first, last)) + response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( + first=first, last=last, length=content.length + ) + response['Content-Length'] = str(last - first + 1) + response.status_code = 206 # HTTP_206_PARTIAL_CONTENT + if not response: + # Malformed Range attribute + response = HttpResponse() + response.status_code = 400 # HTTP_400_BAD_REQUEST + return response + + else: + # No Range attribute + response = HttpResponse(content.stream_data()) + response['Content-Length'] = content.length + + response['Accept-Ranges'] = 'bytes' + response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 465b49709a..066551ac18 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -48,12 +48,12 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): # A locked asset self.locked_asset = self.course_key.make_asset_key('asset', 'sample_static.txt') self.url_locked = self.locked_asset.to_deprecated_string() + self.contentstore.set_attr(self.locked_asset, 'locked', True) # An unlocked asset self.unlocked_asset = self.course_key.make_asset_key('asset', 'another_static.txt') self.url_unlocked = self.unlocked_asset.to_deprecated_string() - - self.contentstore.set_attr(self.locked_asset, 'locked', True) + self.length_unlocked = self.contentstore.get_attr(self.unlocked_asset, 'length') def test_unlocked_asset(self): """ @@ -61,7 +61,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.logout() resp = self.client.get(self.url_unlocked) - self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 def test_locked_asset_not_logged_in(self): """ @@ -70,7 +70,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.logout() resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 def test_locked_asset_not_registered(self): """ @@ -79,7 +79,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd) resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 403) # pylint: disable=E1103 def test_locked_asset_registered(self): """ @@ -91,7 +91,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): self.client.login(username=self.non_staff_usr, password=self.non_staff_pwd) resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 def test_locked_asset_staff(self): """ @@ -99,5 +99,87 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): """ self.client.login(username=self.staff_usr, password=self.staff_pwd) resp = self.client.get(self.url_locked) - self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + self.assertEqual(resp.status_code, 200) # pylint: disable=E1103 + def test_range_request_full_file(self): + """ + Test that a range request from byte 0 to last, + outputs partial content status code and valid Content-Range and Content-Length. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-') + + self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT + self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format( + first=0, last=self.length_unlocked-1, length=self.length_unlocked) + ) + self.assertEqual(resp['Content-Length'], str(self.length_unlocked)) + + def test_range_request_partial_file(self): + """ + Test that a range request for a partial file, + outputs partial content status code and valid Content-Range and Content-Length. + first_byte and last_byte are chosen to be simple but non trivial values. + """ + first_byte = self.length_unlocked / 4 + last_byte = self.length_unlocked / 2 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=first_byte, last=last_byte) + ) + + self.assertEqual(resp.status_code, 206) # HTTP_206_PARTIAL_CONTENT + self.assertEqual(resp['Content-Range'], 'bytes {first}-{last}/{length}'.format( + first=first_byte, last=last_byte, length=self.length_unlocked)) + self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) + + def test_range_request_malformed_missing_equal(self): + """ + Test that a range request with malformed Range (missing '=') outputs status 400. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_not_bytes(self): + """ + Test that a range request with malformed Range (not "bytes") outputs status 400. + "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_missing_minus(self): + """ + Test that a range request with malformed Range (missing '-') outputs status 400. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_first_not_integer(self): + """ + Test that a range request with malformed Range (first is not an integer) outputs status 400. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_invalid_range(self): + """ + Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. + """ + first_byte = self.length_unlocked / 2 + last_byte = self.length_unlocked / 4 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=first_byte, last=last_byte) + ) + + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + + def test_range_request_malformed_out_of_bounds(self): + """ + Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) + outputs status 400. + """ + last_byte = self.length_unlocked + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( + last=last_byte) + ) + + self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST diff --git a/common/lib/xmodule/xmodule/contentstore/content.py b/common/lib/xmodule/xmodule/contentstore/content.py index 1d8a2cd23f..c45a56f391 100644 --- a/common/lib/xmodule/xmodule/contentstore/content.py +++ b/common/lib/xmodule/xmodule/contentstore/content.py @@ -5,6 +5,8 @@ XASSET_SRCREF_PREFIX = 'xasset:' XASSET_THUMBNAIL_TAIL_NAME = '.jpg' +STREAM_DATA_CHUNK_SIZE = 1024 + import os import logging import StringIO @@ -164,11 +166,26 @@ class StaticContentStream(StaticContent): def stream_data(self): while True: - chunk = self._stream.read(1024) + chunk = self._stream.read(STREAM_DATA_CHUNK_SIZE) if len(chunk) == 0: break yield chunk + def stream_data_in_range(self, first_byte, last_byte): + """ + Stream the data between first_byte and last_byte (included) + """ + self._stream.seek(first_byte) + position = first_byte + while True: + if last_byte < position + STREAM_DATA_CHUNK_SIZE - 1: + chunk = self._stream.read(last_byte - position + 1) + yield chunk + break + chunk = self._stream.read(STREAM_DATA_CHUNK_SIZE) + position += STREAM_DATA_CHUNK_SIZE + yield chunk + def close(self): self._stream.close() diff --git a/common/lib/xmodule/xmodule/tests/test_content.py b/common/lib/xmodule/xmodule/tests/test_content.py index 271d211422..f4e97f092f 100644 --- a/common/lib/xmodule/xmodule/tests/test_content.py +++ b/common/lib/xmodule/xmodule/tests/test_content.py @@ -1,8 +1,47 @@ import unittest -from xmodule.contentstore.content import StaticContent +from xmodule.contentstore.content import StaticContent, StaticContentStream from xmodule.contentstore.content import ContentStore from opaque_keys.edx.locations import SlashSeparatedCourseKey, AssetLocation +SAMPLE_STRING = """ +This is a sample string with more than 1024 bytes, the default STREAM_DATA_CHUNK_SIZE + +Lorem Ipsum is simply dummy text of the printing and typesetting industry. +Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, +when an unknown printer took a galley of type and scrambled it to make a type +specimen book. It has survived not only five centuries, but also the leap into +electronic typesetting, remaining essentially unchanged. It was popularised in +the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, +nd more recently with desktop publishing software like Aldus PageMaker including +versions of Lorem Ipsum. + +It is a long established fact that a reader will be distracted by the readable +content of a page when looking at its layout. The point of using Lorem Ipsum is +that it has a more-or-less normal distribution of letters, as opposed to using +'Content here, content here', making it look like readable English. Many desktop +ublishing packages and web page editors now use Lorem Ipsum as their default model +text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. +Various versions have evolved over the years, sometimes by accident, sometimes on purpose +injected humour and the like). + +Lorem Ipsum is simply dummy text of the printing and typesetting industry. +Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, +when an unknown printer took a galley of type and scrambled it to make a type +specimen book. It has survived not only five centuries, but also the leap into +electronic typesetting, remaining essentially unchanged. It was popularised in +the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, +nd more recently with desktop publishing software like Aldus PageMaker including +versions of Lorem Ipsum. + +It is a long established fact that a reader will be distracted by the readable +content of a page when looking at its layout. The point of using Lorem Ipsum is +that it has a more-or-less normal distribution of letters, as opposed to using +'Content here, content here', making it look like readable English. Many desktop +ublishing packages and web page editors now use Lorem Ipsum as their default model +text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. +Various versions have evolved over the years, sometimes by accident, sometimes on purpose +injected humour and the like). +""" class Content: def __init__(self, location, content_type): @@ -10,6 +49,30 @@ class Content: self.content_type = content_type +class FakeGridFsItem: + """ + This class provides the basic methods to get data from a GridFS item + """ + def __init__(self, string_data): + self.cursor = 0 + self.data = string_data + self.length = len(string_data) + + def seek(self, position): + """ + Set the cursor at "position" + """ + self.cursor = position + + def read(self, chunk_size): + """ + Read "chunk_size" bytes of data at position cursor and move the cursor + """ + chunk = self.data[self.cursor:(self.cursor + chunk_size)] + self.cursor += chunk_size + return chunk + + class ContentTest(unittest.TestCase): def test_thumbnail_none(self): # We had a bug where a thumbnail location of None was getting transformed into a Location tuple, with @@ -46,3 +109,39 @@ class ContentTest(unittest.TestCase): AssetLocation(u'foo', u'bar', None, u'asset', u'images_course_image.jpg', None), asset_location ) + + def test_static_content_stream_stream_data(self): + """ + Test StaticContentStream stream_data function, asserts that we get all the bytes + """ + data = SAMPLE_STRING + item = FakeGridFsItem(data) + static_content_stream = StaticContentStream('loc', 'name', 'type', item, length=item.length) + + total_length = 0 + stream = static_content_stream.stream_data() + for chunck in stream: + total_length += len(chunck) + + self.assertEqual(total_length, static_content_stream.length) + + def test_static_content_stream_stream_data_in_range(self): + """ + Test StaticContentStream stream_data_in_range function, + asserts that we get the requested number of bytes + first_byte and last_byte are chosen to be simple but non trivial values + and to have total_length > STREAM_DATA_CHUNK_SIZE (1024) + """ + data = SAMPLE_STRING + item = FakeGridFsItem(data) + static_content_stream = StaticContentStream('loc', 'name', 'type', item, length=item.length) + + first_byte = 100 + last_byte = 1500 + + total_length = 0 + stream = static_content_stream.stream_data_in_range(first_byte, last_byte) + for chunck in stream: + total_length += len(chunck) + + self.assertEqual(total_length, last_byte - first_byte + 1)