From ec508f591cd75050ff0fb7c83ceb67f8f44e329a Mon Sep 17 00:00:00 2001 From: Usman Khalid <2200617@gmail.com> Date: Tue, 16 Sep 2014 19:15:17 +0500 Subject: [PATCH 1/3] StaticContentServer middleware conforms closer to spec for byte range requests. This commit makes the following changes: 1. If there are syntactic errors in the Range header, it is ignored and the full content is returned. This conforms to the spec. 2. In case of multiple ranges in the header, the full content is returned. This behavior differs from the spec which says that a multipart response should be returned. PLAT-104 --- common/djangoapps/contentserver/middleware.py | 122 +++++++++++++----- common/djangoapps/contentserver/tests/test.py | 49 ++++--- 2 files changed, 122 insertions(+), 49 deletions(-) diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index d32470ea15..27804734d9 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -1,3 +1,9 @@ +""" +Middleware to serve assets. +""" + +import logging + from django.http import ( HttpResponse, HttpResponseNotModified, HttpResponseForbidden ) @@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # to change this file so instead of using course_id_partial, we're just using asset keys +log = logging.getLogger(__name__) class StaticContentServer(object): def process_request(self, request): @@ -82,53 +89,100 @@ class StaticContentServer(object): # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) if type(content) == StaticContent: content = contentstore().find(loc, as_stream=True) - # Let's parse the Range header, bytes=first-[last] - range_header = request.META['HTTP_RANGE'] - if '=' in range_header: - unit, byte_range = range_header.split('=') - # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed - if unit == 'bytes' and '-' in byte_range: - first, last = byte_range.split('-') - # "first" must be a valid integer - try: - first = int(first) - except ValueError: - pass - if type(first) is int: - # "last" default value is the last byte of the file - # Users can ask "bytes=0-" to request the whole file when they don't know the length - try: - last = int(last) - except ValueError: - last = content.length - 1 + header_value = request.META['HTTP_RANGE'] + try: + unit, ranges = parse_range_header(header_value, content.length) + except ValueError as exception: + # If the header field is syntactically invalid it should be ignored. + log.exception( + u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) + ) + else: + if unit != 'bytes': + # Only accept ranges in bytes + log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) + elif len(ranges) > 1: + # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 + # But we send back the full content. + log.warning( + u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) + ) + else: + first, last = ranges[0] - if 0 <= first <= last < content.length: - # Valid Range attribute - response = HttpResponse(content.stream_data_in_range(first, last)) - response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( - first=first, last=last, length=content.length - ) - response['Content-Length'] = str(last - first + 1) - response.status_code = 206 # HTTP_206_PARTIAL_CONTENT - if not response: - # Malformed Range attribute - response = HttpResponse() - response.status_code = 400 # HTTP_400_BAD_REQUEST - return response + if 0 <= first <= last < content.length: + # If the byte range is satisfiable + response = HttpResponse(content.stream_data_in_range(first, last)) + response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( + first=first, last=last, length=content.length + ) + response['Content-Length'] = str(last - first + 1) + response.status_code = 206 # Partial Content + else: + log.warning( + u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) + ) + return HttpResponse(status=416) # Requested Range Not Satisfiable - else: - # No Range attribute + # If Range header is absent or syntactically invalid return a full content response. + if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length + # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response + + +def parse_range_header(header_value, content_length): + """ + Returns the unit and a list of (start, end) tuples of ranges. + + Raises ValueError if header is syntactically invalid or does not contain a range. + + See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 + """ + + unit = None + ranges = [] + + if '=' in header_value: + unit, byte_ranges_string = header_value.split('=') + + # Parse the byte ranges. + for byte_range_string in byte_ranges_string.split(','): + byte_range_string = byte_range_string.strip() + # Case 0: + if '-' not in byte_range_string: # Invalid syntax of header value. + raise ValueError('Invalid syntax.') + # Case 1: -500 + elif byte_range_string.startswith('-'): + first = max(0, (content_length + int(byte_range_string))) + last = content_length - 1 + # Case 2: 500- + elif byte_range_string.endswith('-'): + first = int(byte_range_string[0:-1]) + last = content_length - 1 + # Case 3: 500-999 + else: + first, last = byte_range_string.split('-') + first = int(first) + last = min(int(last), content_length - 1) + + ranges.append((first, last)) + + if len(ranges) == 0: + raise ValueError('Invalid syntax') + + return unit, ranges diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 94d95d4d4a..63a2a0f7cb 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -137,38 +137,57 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): first=first_byte, last=last_byte, length=self.length_unlocked)) self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) + def test_range_request_multiple_ranges(self): + """ + Test that multiple ranges in request outputs the full content. + """ + first_byte = self.length_unlocked / 4 + last_byte = self.length_unlocked / 2 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format( + first=first_byte, last=last_byte) + ) + + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) + self.assertEqual(resp['Content-Length'], str(self.length_unlocked)) + def test_range_request_malformed_missing_equal(self): """ - Test that a range request with malformed Range (missing '=') outputs status 400. + Test that a range request with malformed Range (missing '=') outputs a 200 OK full content response. """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_not_bytes(self): """ - Test that a range request with malformed Range (not "bytes") outputs status 400. + Test that a range request with malformed Range (not "bytes") outputs a 200 OK full content response. "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_missing_minus(self): """ - Test that a range request with malformed Range (missing '-') outputs status 400. + Test that a range request with malformed Range (missing '-') outputs a 200 OK full content response. """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_first_not_integer(self): """ - Test that a range request with malformed Range (first is not an integer) outputs status 400. + Test that a range request with malformed Range (first is not an integer) outputs a 200 OK full content response. """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_invalid_range(self): """ - Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. + Test that a range request with malformed Range (first_byte > last_byte) outputs + 416 Requested Range Not Satisfiable. """ first_byte = self.length_unlocked / 2 last_byte = self.length_unlocked / 4 @@ -176,16 +195,16 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): first=first_byte, last=last_byte) ) - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 416) def test_range_request_malformed_out_of_bounds(self): """ - Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) - outputs status 400. + Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error) + outputs 416 Requested Range Not Satisfiable. """ last_byte = self.length_unlocked - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( - last=last_byte) + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=last_byte, last=last_byte) ) - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 416) From 7e29a5a65c8d492676ba0ca509a0aab14538d292 Mon Sep 17 00:00:00 2001 From: Usman Khalid <2200617@gmail.com> Date: Mon, 22 Sep 2014 18:34:27 +0500 Subject: [PATCH 2/3] Added tests for parse_range_header() in contentserver. PLAT-104 --- common/djangoapps/contentserver/tests/test.py | 62 ++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 63a2a0f7cb..36411842ae 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -3,20 +3,22 @@ Tests for StaticContentServer """ import copy import logging +import unittest from uuid import uuid4 from django.conf import settings from django.test.client import Client from django.test.utils import override_settings -from student.models import CourseEnrollment - from xmodule.contentstore.django import contentstore from xmodule.modulestore.django import modulestore from opaque_keys.edx.locations import SlashSeparatedCourseKey from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.xml_importer import import_from_xml +from contentserver.middleware import parse_range_header +from student.models import CourseEnrollment + log = logging.getLogger(__name__) TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) @@ -208,3 +210,59 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): ) self.assertEqual(resp.status_code, 416) + + +class ParseRangeHeaderTestCase(unittest.TestCase): + """ + Tests for the parse_range_header function. + """ + + def setUp(self): + self.content_length = 10000 + + def test_bytes_unit(self): + unit, __ = parse_range_header('bytes=100-', self.content_length) + self.assertEqual(unit, 'bytes') + + def test_invalid_syntax(self): + self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes', self.content_length) + self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes=', self.content_length) + self.assertRaisesRegexp( + ValueError, 'too many values to unpack', parse_range_header, 'bytes=0=', self.content_length + ) + self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes=0', self.content_length) + self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes=0-10,0', self.content_length) + + def test_byte_range_spec(self): + __, ranges = parse_range_header('bytes=100-', self.content_length) + self.assertEqual(len(ranges), 1) + self.assertEqual(ranges[0], (100, 9999)) + + __, ranges = parse_range_header('bytes=1000-', self.content_length) + self.assertEqual(len(ranges), 1) + self.assertEqual(ranges[0], (1000, 9999)) + + __, ranges = parse_range_header('bytes=100-199, 200-', self.content_length) + self.assertEqual(len(ranges), 2) + self.assertEqual(ranges, [(100, 199), (200, 9999)]) + + __, ranges = parse_range_header('bytes=100-199, 200-499', self.content_length) + self.assertEqual(len(ranges), 2) + self.assertEqual(ranges, [(100, 199), (200, 499)]) + + self.assertRaisesRegexp( + ValueError, 'invalid literal for int()', parse_range_header, 'bytes=one-20', self.content_length + ) + + def test_suffix_byte_range_spec(self): + __, ranges = parse_range_header('bytes=-100', self.content_length) + self.assertEqual(len(ranges), 1) + self.assertEqual(ranges[0], (9900, 9999)) + + __, ranges = parse_range_header('bytes=-100, -200', self.content_length) + self.assertEqual(len(ranges), 2) + self.assertEqual(ranges, [(9900, 9999), (9800, 9999)]) + + self.assertRaisesRegexp( + ValueError, 'invalid literal for int()', parse_range_header, 'bytes=-one', self.content_length + ) From 5af162ebdf671e83f7f6b8f9e56e3fbb26339d2b Mon Sep 17 00:00:00 2001 From: Usman Khalid <2200617@gmail.com> Date: Tue, 23 Sep 2014 16:42:09 +0500 Subject: [PATCH 3/3] Refactored ContentServer tests to use ddt. PLAT-104 --- common/djangoapps/contentserver/tests/test.py | 117 +++++++----------- 1 file changed, 42 insertions(+), 75 deletions(-) diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 36411842ae..8ecb14cc13 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -2,6 +2,7 @@ Tests for StaticContentServer """ import copy +import ddt import logging import unittest from uuid import uuid4 @@ -25,6 +26,7 @@ TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex +@ddt.ddt @override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) class ContentStoreToyCourseTest(ModuleStoreTestCase): """ @@ -153,36 +155,17 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): self.assertNotIn('Content-Range', resp) self.assertEqual(resp['Content-Length'], str(self.length_unlocked)) - def test_range_request_malformed_missing_equal(self): + @ddt.data( + 'bytes 0-', + 'bits=0-', + 'bytes=0', + 'bytes=one-', + ) + def test_syntax_errors_in_range(self, header_value): """ - Test that a range request with malformed Range (missing '=') outputs a 200 OK full content response. + Test that syntactically invalid Range values result in a 200 OK full content response. """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') - self.assertEqual(resp.status_code, 200) - self.assertNotIn('Content-Range', resp) - - def test_range_request_malformed_not_bytes(self): - """ - Test that a range request with malformed Range (not "bytes") outputs a 200 OK full content response. - "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed - """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') - self.assertEqual(resp.status_code, 200) - self.assertNotIn('Content-Range', resp) - - def test_range_request_malformed_missing_minus(self): - """ - Test that a range request with malformed Range (missing '-') outputs a 200 OK full content response. - """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') - self.assertEqual(resp.status_code, 200) - self.assertNotIn('Content-Range', resp) - - def test_range_request_malformed_first_not_integer(self): - """ - Test that a range request with malformed Range (first is not an integer) outputs a 200 OK full content response. - """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') + resp = self.client.get(self.url_unlocked, HTTP_RANGE=header_value) self.assertEqual(resp.status_code, 200) self.assertNotIn('Content-Range', resp) @@ -191,12 +174,9 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): Test that a range request with malformed Range (first_byte > last_byte) outputs 416 Requested Range Not Satisfiable. """ - first_byte = self.length_unlocked / 2 - last_byte = self.length_unlocked / 4 resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( - first=first_byte, last=last_byte) + first=(self.length_unlocked / 2), last=(self.length_unlocked / 4)) ) - self.assertEqual(resp.status_code, 416) def test_range_request_malformed_out_of_bounds(self): @@ -204,14 +184,13 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error) outputs 416 Requested Range Not Satisfiable. """ - last_byte = self.length_unlocked resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( - first=last_byte, last=last_byte) + first=(self.length_unlocked), last=(self.length_unlocked)) ) - self.assertEqual(resp.status_code, 416) +@ddt.ddt class ParseRangeHeaderTestCase(unittest.TestCase): """ Tests for the parse_range_header function. @@ -224,45 +203,33 @@ class ParseRangeHeaderTestCase(unittest.TestCase): unit, __ = parse_range_header('bytes=100-', self.content_length) self.assertEqual(unit, 'bytes') - def test_invalid_syntax(self): - self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes', self.content_length) - self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes=', self.content_length) + @ddt.data( + ('bytes=100-', 1, [(100, 9999)]), + ('bytes=1000-', 1, [(1000, 9999)]), + ('bytes=100-199, 200-', 2, [(100, 199), (200, 9999)]), + ('bytes=100-199, 200-499', 2, [(100, 199), (200, 499)]), + ('bytes=-100', 1, [(9900, 9999)]), + ('bytes=-100, -200', 2, [(9900, 9999), (9800, 9999)]) + ) + @ddt.unpack + def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges): + __, ranges = parse_range_header(header_value, self.content_length) + self.assertEqual(len(ranges), excepted_ranges_length) + self.assertEqual(ranges, expected_ranges) + + @ddt.data( + ('bytes=one-20', ValueError, 'invalid literal for int()'), + ('bytes=-one', ValueError, 'invalid literal for int()'), + ('bytes=-', ValueError, 'invalid literal for int()'), + ('bytes=--', ValueError, 'invalid literal for int()'), + ('bytes', ValueError, 'Invalid syntax'), + ('bytes=', ValueError, 'Invalid syntax'), + ('bytes=0', ValueError, 'Invalid syntax'), + ('bytes=0-10,0', ValueError, 'Invalid syntax'), + ('bytes=0=', ValueError, 'too many values to unpack'), + ) + @ddt.unpack + def test_invalid_syntax(self, header_value, exception_class, exception_message_regex): self.assertRaisesRegexp( - ValueError, 'too many values to unpack', parse_range_header, 'bytes=0=', self.content_length - ) - self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes=0', self.content_length) - self.assertRaisesRegexp(ValueError, 'Invalid syntax', parse_range_header, 'bytes=0-10,0', self.content_length) - - def test_byte_range_spec(self): - __, ranges = parse_range_header('bytes=100-', self.content_length) - self.assertEqual(len(ranges), 1) - self.assertEqual(ranges[0], (100, 9999)) - - __, ranges = parse_range_header('bytes=1000-', self.content_length) - self.assertEqual(len(ranges), 1) - self.assertEqual(ranges[0], (1000, 9999)) - - __, ranges = parse_range_header('bytes=100-199, 200-', self.content_length) - self.assertEqual(len(ranges), 2) - self.assertEqual(ranges, [(100, 199), (200, 9999)]) - - __, ranges = parse_range_header('bytes=100-199, 200-499', self.content_length) - self.assertEqual(len(ranges), 2) - self.assertEqual(ranges, [(100, 199), (200, 499)]) - - self.assertRaisesRegexp( - ValueError, 'invalid literal for int()', parse_range_header, 'bytes=one-20', self.content_length - ) - - def test_suffix_byte_range_spec(self): - __, ranges = parse_range_header('bytes=-100', self.content_length) - self.assertEqual(len(ranges), 1) - self.assertEqual(ranges[0], (9900, 9999)) - - __, ranges = parse_range_header('bytes=-100, -200', self.content_length) - self.assertEqual(len(ranges), 2) - self.assertEqual(ranges, [(9900, 9999), (9800, 9999)]) - - self.assertRaisesRegexp( - ValueError, 'invalid literal for int()', parse_range_header, 'bytes=-one', self.content_length + exception_class, exception_message_regex, parse_range_header, header_value, self.content_length )