diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index d32470ea15..27804734d9 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -1,3 +1,9 @@ +""" +Middleware to serve assets. +""" + +import logging + from django.http import ( HttpResponse, HttpResponseNotModified, HttpResponseForbidden ) @@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError # TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need # to change this file so instead of using course_id_partial, we're just using asset keys +log = logging.getLogger(__name__) class StaticContentServer(object): def process_request(self, request): @@ -82,53 +89,100 @@ class StaticContentServer(object): # Add Content-Range in the response if Range is structurally correct # Request -> Range attribute structure: "Range: bytes=first-[last]" # Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength" + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 response = None if request.META.get('HTTP_RANGE'): # Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream) if type(content) == StaticContent: content = contentstore().find(loc, as_stream=True) - # Let's parse the Range header, bytes=first-[last] - range_header = request.META['HTTP_RANGE'] - if '=' in range_header: - unit, byte_range = range_header.split('=') - # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed - if unit == 'bytes' and '-' in byte_range: - first, last = byte_range.split('-') - # "first" must be a valid integer - try: - first = int(first) - except ValueError: - pass - if type(first) is int: - # "last" default value is the last byte of the file - # Users can ask "bytes=0-" to request the whole file when they don't know the length - try: - last = int(last) - except ValueError: - last = content.length - 1 + header_value = request.META['HTTP_RANGE'] + try: + unit, ranges = parse_range_header(header_value, content.length) + except ValueError as exception: + # If the header field is syntactically invalid it should be ignored. + log.exception( + u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc) + ) + else: + if unit != 'bytes': + # Only accept ranges in bytes + log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc)) + elif len(ranges) > 1: + # According to Http/1.1 spec content for multiple ranges should be sent as a multipart message. + # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16 + # But we send back the full content. + log.warning( + u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc) + ) + else: + first, last = ranges[0] - if 0 <= first <= last < content.length: - # Valid Range attribute - response = HttpResponse(content.stream_data_in_range(first, last)) - response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( - first=first, last=last, length=content.length - ) - response['Content-Length'] = str(last - first + 1) - response.status_code = 206 # HTTP_206_PARTIAL_CONTENT - if not response: - # Malformed Range attribute - response = HttpResponse() - response.status_code = 400 # HTTP_400_BAD_REQUEST - return response + if 0 <= first <= last < content.length: + # If the byte range is satisfiable + response = HttpResponse(content.stream_data_in_range(first, last)) + response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( + first=first, last=last, length=content.length + ) + response['Content-Length'] = str(last - first + 1) + response.status_code = 206 # Partial Content + else: + log.warning( + u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) + ) + return HttpResponse(status=416) # Requested Range Not Satisfiable - else: - # No Range attribute + # If Range header is absent or syntactically invalid return a full content response. + if response is None: response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length + # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type response['Last-Modified'] = last_modified_at_str return response + + +def parse_range_header(header_value, content_length): + """ + Returns the unit and a list of (start, end) tuples of ranges. + + Raises ValueError if header is syntactically invalid or does not contain a range. + + See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35 + """ + + unit = None + ranges = [] + + if '=' in header_value: + unit, byte_ranges_string = header_value.split('=') + + # Parse the byte ranges. + for byte_range_string in byte_ranges_string.split(','): + byte_range_string = byte_range_string.strip() + # Case 0: + if '-' not in byte_range_string: # Invalid syntax of header value. + raise ValueError('Invalid syntax.') + # Case 1: -500 + elif byte_range_string.startswith('-'): + first = max(0, (content_length + int(byte_range_string))) + last = content_length - 1 + # Case 2: 500- + elif byte_range_string.endswith('-'): + first = int(byte_range_string[0:-1]) + last = content_length - 1 + # Case 3: 500-999 + else: + first, last = byte_range_string.split('-') + first = int(first) + last = min(int(last), content_length - 1) + + ranges.append((first, last)) + + if len(ranges) == 0: + raise ValueError('Invalid syntax') + + return unit, ranges diff --git a/common/djangoapps/contentserver/tests/test.py b/common/djangoapps/contentserver/tests/test.py index 94d95d4d4a..63a2a0f7cb 100644 --- a/common/djangoapps/contentserver/tests/test.py +++ b/common/djangoapps/contentserver/tests/test.py @@ -137,38 +137,57 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): first=first_byte, last=last_byte, length=self.length_unlocked)) self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1)) + def test_range_request_multiple_ranges(self): + """ + Test that multiple ranges in request outputs the full content. + """ + first_byte = self.length_unlocked / 4 + last_byte = self.length_unlocked / 2 + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format( + first=first_byte, last=last_byte) + ) + + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) + self.assertEqual(resp['Content-Length'], str(self.length_unlocked)) + def test_range_request_malformed_missing_equal(self): """ - Test that a range request with malformed Range (missing '=') outputs status 400. + Test that a range request with malformed Range (missing '=') outputs a 200 OK full content response. """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_not_bytes(self): """ - Test that a range request with malformed Range (not "bytes") outputs status 400. + Test that a range request with malformed Range (not "bytes") outputs a 200 OK full content response. "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_missing_minus(self): """ - Test that a range request with malformed Range (missing '-') outputs status 400. + Test that a range request with malformed Range (missing '-') outputs a 200 OK full content response. """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_first_not_integer(self): """ - Test that a range request with malformed Range (first is not an integer) outputs status 400. + Test that a range request with malformed Range (first is not an integer) outputs a 200 OK full content response. """ resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-') - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 200) + self.assertNotIn('Content-Range', resp) def test_range_request_malformed_invalid_range(self): """ - Test that a range request with malformed Range (first_byte > last_byte) outputs status 400. + Test that a range request with malformed Range (first_byte > last_byte) outputs + 416 Requested Range Not Satisfiable. """ first_byte = self.length_unlocked / 2 last_byte = self.length_unlocked / 4 @@ -176,16 +195,16 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): first=first_byte, last=last_byte) ) - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 416) def test_range_request_malformed_out_of_bounds(self): """ - Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error) - outputs status 400. + Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error) + outputs 416 Requested Range Not Satisfiable. """ last_byte = self.length_unlocked - resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format( - last=last_byte) + resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format( + first=last_byte, last=last_byte) ) - self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST + self.assertEqual(resp.status_code, 416)