diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index d32470ea15..27804734d9 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -1,3 +1,9 @@ +""" +Middleware to serve assets. +""" + +import logging + from django.http import ( HttpResponse, HttpResponseNotModified, HttpResponseForbidden ) @@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # to change this file so instead of using course_id_partial, we're just using asset keys +log = logging.getLogger(__name__) class StaticContentServer(object): def process_request(self, request): @@ -82,53 +89,100 @@ class StaticContentServer(object): # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) if type(content) == StaticContent: content = contentstore().find(loc, as_stream=True) - # Let's parse the Range header, bytes=first-[last] - range_header = request.META['HTTP_RANGE'] - if '=' in range_header: - unit, byte_range = range_header.split('=') - # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed - if unit == 'bytes' and '-' in byte_range: - first, last = byte_range.split('-') - # "first" must be a valid integer - try: - first = int(first) - except ValueError: - pass - if type(first) is int: - # "last" default value is the last byte of the file - # Users can ask "bytes=0-" to request the whole file when they don't know the length - try: - last = int(last) - except ValueError: - last = content.length - 1 + header_value = request.META['HTTP_RANGE'] + try: + unit, ranges = parse_range_header(header_value, content.length) + except ValueError as exception: + # If the header field is syntactically invalid it should be ignored. + log.exception( + u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) + ) + else: + if unit != 'bytes': + # Only accept ranges in bytes + log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) + elif len(ranges) > 1: + # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 + # But we send back the full content. + log.warning( + u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) + ) + else: + first, last = ranges[0] - if 0 <= first <= last < content.length: - # Valid Range attribute - response = HttpResponse(content.stream_data_in_range(first, last)) - response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( - first=first, last=last, length=content.length - ) - response['Content-Length'] = str(last - first + 1) - response.status_code = 206 # HTTP_206_PARTIAL_CONTENT - if not response: - # Malformed Range attribute - response = HttpResponse() - response.status_code = 400 # HTTP_400_BAD_REQUEST - return response + if 0 <= first <= last < content.length: + # If the byte range is satisfiable + response = HttpResponse(content.stream_data_in_range(first, last)) + response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( + first=first, last=last, length=content.length + ) + response['Content-Length'] = str(last - first + 1) + response.status_code = 206 # Partial Content + else: + log.warning( + u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) + ) + return HttpResponse(status=416) # Requested Range Not Satisfiable - else: - # No Range attribute + # If Range header is absent or syntactically invalid return a full content response. + if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length + # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response + + +def parse_range_header(header_value, content_length): + """ + Returns the unit and a list of (start, end) tuples of ranges. + + Raises ValueError if header is syntactically invalid or does not contain a range. + + See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 + """ + + unit = None + ranges = [] + + if '=' in header_value: + unit, byte_ranges_string = header_value.split('=') + + # Parse the byte ranges. + for byte_range_string in byte_ranges_string.split(','): + byte_range_string = byte_range_string.strip() + # Case 0: + if '-' not in byte_range_string: # Invalid syntax of header value. + raise ValueError('Invalid syntax.') + # Case 1: -500 + elif byte_range_string.startswith('-'): + first = max(0, (content_length + int(byte_range_string))) + last = content_length - 1 + # Case 2: 500- + elif byte_range_string.endswith('-'): + first = int(byte_range_string[0:-1]) + last = content_length - 1 + # Case 3: 500-999 + else: + first, last = byte_range_string.split('-') + first = int(first) + last = min(int(last), content_length - 1) + + ranges.append((first, last)) + + if len(ranges) == 0: + raise ValueError('Invalid syntax') + + return unit, ranges diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 94d95d4d4a..8ecb14cc13 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -2,27 +2,31 @@ Tests for StaticContentServer """ import copy +import ddt import logging +import unittest from uuid import uuid4 from django.conf import settings from django.test.client import Client from django.test.utils import override_settings -from student.models import CourseEnrollment - from xmodule.contentstore.django import contentstore from xmodule.modulestore.django import modulestore from opaque_keys.edx.locations import SlashSeparatedCourseKey from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase from xmodule.modulestore.xml_importer import import_from_xml +from contentserver.middleware import parse_range_header +from student.models import CourseEnrollment + log = logging.getLogger(__name__) TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex +@ddt.ddt @override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) class ContentStoreToyCourseTest(ModuleStoreTestCase): """ @@ -137,55 +141,95 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): first=first_byte, last=last_byte, length=self.length_unlocked)) self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) - def test_range_request_malformed_missing_equal(self): + def test_range_request_multiple_ranges(self): """ - Test that a range request with malformed Range (missing '=') outputs status 400. + Test that multiple ranges in request outputs the full content. """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST - - def test_range_request_malformed_not_bytes(self): - """ - Test that a range request with malformed Range (not "bytes") outputs status 400. - "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed - """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST - - def test_range_request_malformed_missing_minus(self): - """ - Test that a range request with malformed Range (missing '-') outputs status 400. - """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST - - def test_range_request_malformed_first_not_integer(self): - """ - Test that a range request with malformed Range (first is not an integer) outputs status 400. - """ - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST - - def test_range_request_malformed_invalid_range(self): - """ - Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. - """ - first_byte = self.length_unlocked / 2 - last_byte = self.length_unlocked / 4 - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first_byte = self.length_unlocked / 4 + last_byte = self.length_unlocked / 2 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format( first=first_byte, last=last_byte) ) - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) + self.assertEqual(resp['Content-Length'], str(self.length_unlocked)) + + @ddt.data( + 'bytes 0-', + 'bits=0-', + 'bytes=0', + 'bytes=one-', + ) + def test_syntax_errors_in_range(self, header_value): + """ + Test that syntactically invalid Range values result in a 200 OK full content response. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE=header_value) + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) + + def test_range_request_malformed_invalid_range(self): + """ + Test that a range request with malformed Range (first_byte > last_byte) outputs + 416 Requested Range Not Satisfiable. + """ + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=(self.length_unlocked / 2), last=(self.length_unlocked / 4)) + ) + self.assertEqual(resp.status_code, 416) def test_range_request_malformed_out_of_bounds(self): """ - Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) - outputs status 400. + Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error) + outputs 416 Requested Range Not Satisfiable. """ - last_byte = self.length_unlocked - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( - last=last_byte) + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=(self.length_unlocked), last=(self.length_unlocked)) ) + self.assertEqual(resp.status_code, 416) - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + +@ddt.ddt +class ParseRangeHeaderTestCase(unittest.TestCase): + """ + Tests for the parse_range_header function. + """ + + def setUp(self): + self.content_length = 10000 + + def test_bytes_unit(self): + unit, __ = parse_range_header('bytes=100-', self.content_length) + self.assertEqual(unit, 'bytes') + + @ddt.data( + ('bytes=100-', 1, [(100, 9999)]), + ('bytes=1000-', 1, [(1000, 9999)]), + ('bytes=100-199, 200-', 2, [(100, 199), (200, 9999)]), + ('bytes=100-199, 200-499', 2, [(100, 199), (200, 499)]), + ('bytes=-100', 1, [(9900, 9999)]), + ('bytes=-100, -200', 2, [(9900, 9999), (9800, 9999)]) + ) + @ddt.unpack + def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges): + __, ranges = parse_range_header(header_value, self.content_length) + self.assertEqual(len(ranges), excepted_ranges_length) + self.assertEqual(ranges, expected_ranges) + + @ddt.data( + ('bytes=one-20', ValueError, 'invalid literal for int()'), + ('bytes=-one', ValueError, 'invalid literal for int()'), + ('bytes=-', ValueError, 'invalid literal for int()'), + ('bytes=--', ValueError, 'invalid literal for int()'), + ('bytes', ValueError, 'Invalid syntax'), + ('bytes=', ValueError, 'Invalid syntax'), + ('bytes=0', ValueError, 'Invalid syntax'), + ('bytes=0-10,0', ValueError, 'Invalid syntax'), + ('bytes=0=', ValueError, 'too many values to unpack'), + ) + @ddt.unpack + def test_invalid_syntax(self, header_value, exception_class, exception_message_regex): + self.assertRaisesRegexp( + exception_class, exception_message_regex, parse_range_header, header_value, self.content_length + )