193 lines
9.0 KiB
Python
193 lines
9.0 KiB
Python
"""
|
|
Middleware to serve assets.
|
|
"""
|
|
|
|
import logging
|
|
|
|
from django.http import (
|
|
HttpResponse, HttpResponseNotModified, HttpResponseForbidden
|
|
)
|
|
from student.models import CourseEnrollment
|
|
|
|
from xmodule.assetstore.assetmgr import AssetManager
|
|
from xmodule.contentstore.content import StaticContent, XASSET_LOCATION_TAG
|
|
from xmodule.modulestore import InvalidLocationError
|
|
from opaque_keys import InvalidKeyError
|
|
from opaque_keys.edx.locator import AssetLocator
|
|
from cache_toolbox.core import get_cached_content, set_cached_content
|
|
from xmodule.modulestore.exceptions import ItemNotFoundError
|
|
from xmodule.exceptions import NotFoundError
|
|
|
|
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
|
|
# to change this file so instead of using course_id_partial, we're just using asset keys
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class StaticContentServer(object):
|
|
def process_request(self, request):
|
|
# look to see if the request is prefixed with an asset prefix tag
|
|
if (
|
|
request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or
|
|
request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)
|
|
):
|
|
if AssetLocator.CANONICAL_NAMESPACE in request.path:
|
|
request.path = request.path.replace('block/', 'block@', 1)
|
|
try:
|
|
loc = StaticContent.get_location_from_path(request.path)
|
|
except (InvalidLocationError, InvalidKeyError):
|
|
# return a 'Bad Request' to browser as we have a malformed Location
|
|
response = HttpResponse()
|
|
response.status_code = 400
|
|
return response
|
|
|
|
# first look in our cache so we don't have to round-trip to the DB
|
|
content = get_cached_content(loc)
|
|
if content is None:
|
|
# nope, not in cache, let's fetch from DB
|
|
try:
|
|
content = AssetManager.find(loc, as_stream=True)
|
|
except (ItemNotFoundError, NotFoundError):
|
|
response = HttpResponse()
|
|
response.status_code = 404
|
|
return response
|
|
|
|
# since we fetched it from DB, let's cache it going forward, but only if it's < 1MB
|
|
# this is because I haven't been able to find a means to stream data out of memcached
|
|
if content.length is not None:
|
|
if content.length < 1048576:
|
|
# since we've queried as a stream, let's read in the stream into memory to set in cache
|
|
content = content.copy_to_in_mem()
|
|
set_cached_content(content)
|
|
else:
|
|
# NOP here, but we may wish to add a "cache-hit" counter in the future
|
|
pass
|
|
|
|
# Check that user has access to content
|
|
if getattr(content, "locked", False):
|
|
if not hasattr(request, "user") or not request.user.is_authenticated():
|
|
return HttpResponseForbidden('Unauthorized')
|
|
if not request.user.is_staff:
|
|
if getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled_by_partial(
|
|
request.user, loc.course_key
|
|
):
|
|
return HttpResponseForbidden('Unauthorized')
|
|
if not getattr(loc, 'deprecated', False) and not CourseEnrollment.is_enrolled(
|
|
request.user, loc.course_key
|
|
):
|
|
return HttpResponseForbidden('Unauthorized')
|
|
|
|
# convert over the DB persistent last modified timestamp to a HTTP compatible
|
|
# timestamp, so we can simply compare the strings
|
|
last_modified_at_str = content.last_modified_at.strftime("%a, %d-%b-%Y %H:%M:%S GMT")
|
|
|
|
# see if the client has cached this content, if so then compare the
|
|
# timestamps, if they are the same then just return a 304 (Not Modified)
|
|
if 'HTTP_IF_MODIFIED_SINCE' in request.META:
|
|
if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
|
|
if if_modified_since == last_modified_at_str:
|
|
return HttpResponseNotModified()
|
|
|
|
# *** File streaming within a byte range ***
|
|
# If a Range is provided, parse Range attribute of the request
|
|
# Add Content-Range in the response if Range is structurally correct
|
|
# Request -> Range attribute structure: "Range: bytes=first-[last]"
|
|
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
|
|
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
|
|
response = None
|
|
if request.META.get('HTTP_RANGE'):
|
|
# Data from cache (StaticContent) has no easy byte management, so we use the DB instead (StaticContentStream)
|
|
if type(content) == StaticContent:
|
|
content = AssetManager.find(loc, as_stream=True)
|
|
|
|
header_value = request.META['HTTP_RANGE']
|
|
try:
|
|
unit, ranges = parse_range_header(header_value, content.length)
|
|
except ValueError as exception:
|
|
# If the header field is syntactically invalid it should be ignored.
|
|
log.exception(
|
|
u"%s in Range header: %s for content: %s", exception.message, header_value, unicode(loc)
|
|
)
|
|
else:
|
|
if unit != 'bytes':
|
|
# Only accept ranges in bytes
|
|
log.warning(u"Unknown unit in Range header: %s for content: %s", header_value, unicode(loc))
|
|
elif len(ranges) > 1:
|
|
# According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
|
|
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
|
|
# But we send back the full content.
|
|
log.warning(
|
|
u"More than 1 ranges in Range header: %s for content: %s", header_value, unicode(loc)
|
|
)
|
|
else:
|
|
first, last = ranges[0]
|
|
|
|
if 0 <= first <= last < content.length:
|
|
# If the byte range is satisfiable
|
|
response = HttpResponse(content.stream_data_in_range(first, last))
|
|
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
|
|
first=first, last=last, length=content.length
|
|
)
|
|
response['Content-Length'] = str(last - first + 1)
|
|
response.status_code = 206 # Partial Content
|
|
else:
|
|
log.warning(
|
|
u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc)
|
|
)
|
|
return HttpResponse(status=416) # Requested Range Not Satisfiable
|
|
|
|
# If Range header is absent or syntactically invalid return a full content response.
|
|
if response is None:
|
|
response = HttpResponse(content.stream_data())
|
|
response['Content-Length'] = content.length
|
|
|
|
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
|
|
response['Accept-Ranges'] = 'bytes'
|
|
response['Content-Type'] = content.content_type
|
|
response['Last-Modified'] = last_modified_at_str
|
|
|
|
return response
|
|
|
|
|
|
def parse_range_header(header_value, content_length):
|
|
"""
|
|
Returns the unit and a list of (start, end) tuples of ranges.
|
|
|
|
Raises ValueError if header is syntactically invalid or does not contain a range.
|
|
|
|
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
|
|
"""
|
|
|
|
unit = None
|
|
ranges = []
|
|
|
|
if '=' in header_value:
|
|
unit, byte_ranges_string = header_value.split('=')
|
|
|
|
# Parse the byte ranges.
|
|
for byte_range_string in byte_ranges_string.split(','):
|
|
byte_range_string = byte_range_string.strip()
|
|
# Case 0:
|
|
if '-' not in byte_range_string: # Invalid syntax of header value.
|
|
raise ValueError('Invalid syntax.')
|
|
# Case 1: -500
|
|
elif byte_range_string.startswith('-'):
|
|
first = max(0, (content_length + int(byte_range_string)))
|
|
last = content_length - 1
|
|
# Case 2: 500-
|
|
elif byte_range_string.endswith('-'):
|
|
first = int(byte_range_string[0:-1])
|
|
last = content_length - 1
|
|
# Case 3: 500-999
|
|
else:
|
|
first, last = byte_range_string.split('-')
|
|
first = int(first)
|
|
last = min(int(last), content_length - 1)
|
|
|
|
ranges.append((first, last))
|
|
|
|
if len(ranges) == 0:
|
|
raise ValueError('Invalid syntax')
|
|
|
|
return unit, ranges
|