Merge pull request #5246 from edx/usman/plat104-static-content-server
Improvements to byte range handling in StaticContentServer.
This commit is contained in:
@@ -1,3 +1,9 @@
|
||||
"""
|
||||
Middleware to serve assets.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from django.http import (
|
||||
HttpResponse, HttpResponseNotModified, HttpResponseForbidden
|
||||
)
|
||||
@@ -14,6 +20,7 @@ from xmodule.exceptions import NotFoundError
|
||||
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
|
||||
# to change this file so instead of using course_id_partial, we're just using asset keys
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
class StaticContentServer(object):
|
||||
def process_request(self, request):
|
||||
@@ -82,53 +89,100 @@ class StaticContentServer(object):
|
||||
# Add Content-Range in the response if Range is structurally correct
|
||||
# Request -> Range attribute structure: "Range: bytes=first-[last]"
|
||||
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
|
||||
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
|
||||
response = None
|
||||
if request.META.get('HTTP_RANGE'):
|
||||
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
|
||||
if type(content) == StaticContent:
|
||||
content = contentstore().find(loc, as_stream=True)
|
||||
|
||||
# Let's parse the Range header, bytes=first-[last]
|
||||
range_header = request.META['HTTP_RANGE']
|
||||
if '=' in range_header:
|
||||
unit, byte_range = range_header.split('=')
|
||||
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
|
||||
if unit == 'bytes' and '-' in byte_range:
|
||||
first, last = byte_range.split('-')
|
||||
# "first" must be a valid integer
|
||||
try:
|
||||
first = int(first)
|
||||
except ValueError:
|
||||
pass
|
||||
if type(first) is int:
|
||||
# "last" default value is the last byte of the file
|
||||
# Users can ask "bytes=0-" to request the whole file when they don't know the length
|
||||
try:
|
||||
last = int(last)
|
||||
except ValueError:
|
||||
last = content.length - 1
|
||||
header_value = request.META['HTTP_RANGE']
|
||||
try:
|
||||
unit, ranges = parse_range_header(header_value, content.length)
|
||||
except ValueError as exception:
|
||||
# If the header field is syntactically invalid it should be ignored.
|
||||
log.exception(
|
||||
u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
|
||||
)
|
||||
else:
|
||||
if unit != 'bytes':
|
||||
# Only accept ranges in bytes
|
||||
log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
|
||||
elif len(ranges) > 1:
|
||||
# According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
|
||||
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
|
||||
# But we send back the full content.
|
||||
log.warning(
|
||||
u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
|
||||
)
|
||||
else:
|
||||
first, last = ranges[0]
|
||||
|
||||
if 0 <= first <= last < content.length:
|
||||
# Valid Range attribute
|
||||
response = HttpResponse(content.stream_data_in_range(first, last))
|
||||
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
|
||||
first=first, last=last, length=content.length
|
||||
)
|
||||
response['Content-Length'] = str(last - first + 1)
|
||||
response.status_code = 206 # HTTP_206_PARTIAL_CONTENT
|
||||
if not response:
|
||||
# Malformed Range attribute
|
||||
response = HttpResponse()
|
||||
response.status_code = 400 # HTTP_400_BAD_REQUEST
|
||||
return response
|
||||
if 0 <= first <= last < content.length:
|
||||
# If the byte range is satisfiable
|
||||
response = HttpResponse(content.stream_data_in_range(first, last))
|
||||
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
|
||||
first=first, last=last, length=content.length
|
||||
)
|
||||
response['Content-Length'] = str(last - first + 1)
|
||||
response.status_code = 206 # Partial Content
|
||||
else:
|
||||
log.warning(
|
||||
u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
|
||||
)
|
||||
return HttpResponse(status=416) # Requested Range Not Satisfiable
|
||||
|
||||
else:
|
||||
# No Range attribute
|
||||
# If Range header is absent or syntactically invalid return a full content response.
|
||||
if response is None:
|
||||
response = HttpResponse(content.stream_data())
|
||||
response['Content-Length'] = content.length
|
||||
|
||||
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
|
||||
response['Accept-Ranges'] = 'bytes'
|
||||
response['Content-Type'] = content.content_type
|
||||
response['Last-Modified'] = last_modified_at_str
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def parse_range_header(header_value, content_length):
|
||||
"""
|
||||
Returns the unit and a list of (start, end) tuples of ranges.
|
||||
|
||||
Raises ValueError if header is syntactically invalid or does not contain a range.
|
||||
|
||||
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
|
||||
"""
|
||||
|
||||
unit = None
|
||||
ranges = []
|
||||
|
||||
if '=' in header_value:
|
||||
unit, byte_ranges_string = header_value.split('=')
|
||||
|
||||
# Parse the byte ranges.
|
||||
for byte_range_string in byte_ranges_string.split(','):
|
||||
byte_range_string = byte_range_string.strip()
|
||||
# Case 0:
|
||||
if '-' not in byte_range_string: # Invalid syntax of header value.
|
||||
raise ValueError('Invalid syntax.')
|
||||
# Case 1: -500
|
||||
elif byte_range_string.startswith('-'):
|
||||
first = max(0, (content_length + int(byte_range_string)))
|
||||
last = content_length - 1
|
||||
# Case 2: 500-
|
||||
elif byte_range_string.endswith('-'):
|
||||
first = int(byte_range_string[0:-1])
|
||||
last = content_length - 1
|
||||
# Case 3: 500-999
|
||||
else:
|
||||
first, last = byte_range_string.split('-')
|
||||
first = int(first)
|
||||
last = min(int(last), content_length - 1)
|
||||
|
||||
ranges.append((first, last))
|
||||
|
||||
if len(ranges) == 0:
|
||||
raise ValueError('Invalid syntax')
|
||||
|
||||
return unit, ranges
|
||||
|
||||
@@ -2,27 +2,31 @@
|
||||
Tests for StaticContentServer
|
||||
"""
|
||||
import copy
|
||||
import ddt
|
||||
import logging
|
||||
import unittest
|
||||
from uuid import uuid4
|
||||
|
||||
from django.conf import settings
|
||||
from django.test.client import Client
|
||||
from django.test.utils import override_settings
|
||||
|
||||
from student.models import CourseEnrollment
|
||||
|
||||
from xmodule.contentstore.django import contentstore
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from opaque_keys.edx.locations import SlashSeparatedCourseKey
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
||||
from xmodule.modulestore.xml_importer import import_from_xml
|
||||
|
||||
from contentserver.middleware import parse_range_header
|
||||
from student.models import CourseEnrollment
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE)
|
||||
TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
|
||||
class ContentStoreToyCourseTest(ModuleStoreTestCase):
|
||||
"""
|
||||
@@ -137,55 +141,95 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase):
|
||||
first=first_byte, last=last_byte, length=self.length_unlocked))
|
||||
self.assertEqual(resp['Content-Length'], str(last_byte - first_byte + 1))
|
||||
|
||||
def test_range_request_malformed_missing_equal(self):
|
||||
def test_range_request_multiple_ranges(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (missing '=') outputs status 400.
|
||||
Test that multiple ranges in request outputs the full content.
|
||||
"""
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes 0-')
|
||||
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
|
||||
|
||||
def test_range_request_malformed_not_bytes(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (not "bytes") outputs status 400.
|
||||
"Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
|
||||
"""
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bits=0-')
|
||||
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
|
||||
|
||||
def test_range_request_malformed_missing_minus(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (missing '-') outputs status 400.
|
||||
"""
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0')
|
||||
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
|
||||
|
||||
def test_range_request_malformed_first_not_integer(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (first is not an integer) outputs status 400.
|
||||
"""
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=one-')
|
||||
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
|
||||
|
||||
def test_range_request_malformed_invalid_range(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (first_byte > last_byte) outputs status 400.
|
||||
"""
|
||||
first_byte = self.length_unlocked / 2
|
||||
last_byte = self.length_unlocked / 4
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
|
||||
first_byte = self.length_unlocked / 4
|
||||
last_byte = self.length_unlocked / 2
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}, -100'.format(
|
||||
first=first_byte, last=last_byte)
|
||||
)
|
||||
|
||||
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertNotIn('Content-Range', resp)
|
||||
self.assertEqual(resp['Content-Length'], str(self.length_unlocked))
|
||||
|
||||
@ddt.data(
|
||||
'bytes 0-',
|
||||
'bits=0-',
|
||||
'bytes=0',
|
||||
'bytes=one-',
|
||||
)
|
||||
def test_syntax_errors_in_range(self, header_value):
|
||||
"""
|
||||
Test that syntactically invalid Range values result in a 200 OK full content response.
|
||||
"""
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE=header_value)
|
||||
self.assertEqual(resp.status_code, 200)
|
||||
self.assertNotIn('Content-Range', resp)
|
||||
|
||||
def test_range_request_malformed_invalid_range(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (first_byte > last_byte) outputs
|
||||
416 Requested Range Not Satisfiable.
|
||||
"""
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
|
||||
first=(self.length_unlocked / 2), last=(self.length_unlocked / 4))
|
||||
)
|
||||
self.assertEqual(resp.status_code, 416)
|
||||
|
||||
def test_range_request_malformed_out_of_bounds(self):
|
||||
"""
|
||||
Test that a range request with malformed Range (last_byte == totalLength, offset by 1 error)
|
||||
outputs status 400.
|
||||
Test that a range request with malformed Range (first_byte, last_byte == totalLength, offset by 1 error)
|
||||
outputs 416 Requested Range Not Satisfiable.
|
||||
"""
|
||||
last_byte = self.length_unlocked
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes=0-{last}'.format(
|
||||
last=last_byte)
|
||||
resp = self.client.get(self.url_unlocked, HTTP_RANGE='bytes={first}-{last}'.format(
|
||||
first=(self.length_unlocked), last=(self.length_unlocked))
|
||||
)
|
||||
self.assertEqual(resp.status_code, 416)
|
||||
|
||||
self.assertEqual(resp.status_code, 400) # HTTP_400_BAD_REQUEST
|
||||
|
||||
@ddt.ddt
|
||||
class ParseRangeHeaderTestCase(unittest.TestCase):
|
||||
"""
|
||||
Tests for the parse_range_header function.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.content_length = 10000
|
||||
|
||||
def test_bytes_unit(self):
|
||||
unit, __ = parse_range_header('bytes=100-', self.content_length)
|
||||
self.assertEqual(unit, 'bytes')
|
||||
|
||||
@ddt.data(
|
||||
('bytes=100-', 1, [(100, 9999)]),
|
||||
('bytes=1000-', 1, [(1000, 9999)]),
|
||||
('bytes=100-199, 200-', 2, [(100, 199), (200, 9999)]),
|
||||
('bytes=100-199, 200-499', 2, [(100, 199), (200, 499)]),
|
||||
('bytes=-100', 1, [(9900, 9999)]),
|
||||
('bytes=-100, -200', 2, [(9900, 9999), (9800, 9999)])
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_valid_syntax(self, header_value, excepted_ranges_length, expected_ranges):
|
||||
__, ranges = parse_range_header(header_value, self.content_length)
|
||||
self.assertEqual(len(ranges), excepted_ranges_length)
|
||||
self.assertEqual(ranges, expected_ranges)
|
||||
|
||||
@ddt.data(
|
||||
('bytes=one-20', ValueError, 'invalid literal for int()'),
|
||||
('bytes=-one', ValueError, 'invalid literal for int()'),
|
||||
('bytes=-', ValueError, 'invalid literal for int()'),
|
||||
('bytes=--', ValueError, 'invalid literal for int()'),
|
||||
('bytes', ValueError, 'Invalid syntax'),
|
||||
('bytes=', ValueError, 'Invalid syntax'),
|
||||
('bytes=0', ValueError, 'Invalid syntax'),
|
||||
('bytes=0-10,0', ValueError, 'Invalid syntax'),
|
||||
('bytes=0=', ValueError, 'too many values to unpack'),
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_invalid_syntax(self, header_value, exception_class, exception_message_regex):
|
||||
self.assertRaisesRegexp(
|
||||
exception_class, exception_message_regex, parse_range_header, header_value, self.content_length
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user