feat: Convert contentserver to view permanently (drop middleware and flag) (#35420)

Final planned portion of https://github.com/openedx/edx-platform/issues/34702 -- waffle flag and middleware are removed.

Commits:

1. Feature change
    - Delete `content_server.use_view` waffle flag in favor of always using view
    - Delete the husk of `StaticContentServerMiddleware` and references to it
    - Update views module docstring
2. Refactor (move)
    - Move contentserver implementation into views.py
3. Lint cleanup
    - Fix import ordering (from refactor debris + amnestied lint)
This commit is contained in:
Tim McCormack
2024-09-06 14:42:32 +00:00
committed by GitHub
parent 8ddbf90e64
commit b615a8767f
5 changed files with 348 additions and 412 deletions

View File

@@ -949,7 +949,6 @@ MIDDLEWARE = [
'openedx.core.djangoapps.cache_toolbox.middleware.CacheBackedAuthenticationMiddleware',
'common.djangoapps.student.middleware.UserStandingMiddleware',
'openedx.core.djangoapps.contentserver.middleware.StaticContentServerMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'common.djangoapps.track.middleware.TrackMiddleware',

View File

@@ -2295,7 +2295,6 @@ MIDDLEWARE = [
'openedx.core.djangoapps.safe_sessions.middleware.EmailChangeMiddleware',
'common.djangoapps.student.middleware.UserStandingMiddleware',
'openedx.core.djangoapps.contentserver.middleware.StaticContentServerMiddleware',
# Adds user tags to tracking events
# Must go before TrackMiddleware, to get the context set up

View File

@@ -1,368 +0,0 @@
"""
Middleware to serve assets.
"""
import datetime
import logging
from django.http import (
HttpResponse,
HttpResponseBadRequest,
HttpResponseForbidden,
HttpResponseNotFound,
HttpResponseNotModified,
HttpResponsePermanentRedirect
)
from django.utils.deprecation import MiddlewareMixin
from edx_django_utils.monitoring import set_custom_attribute
from edx_toggles.toggles import WaffleFlag
from opaque_keys import InvalidKeyError
from opaque_keys.edx.locator import AssetLocator
from openedx.core.djangoapps.header_control import force_header_for_response
from common.djangoapps.student.models import CourseEnrollment
from xmodule.assetstore.assetmgr import AssetManager # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.contentstore.content import XASSET_LOCATION_TAG, StaticContent # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.exceptions import NotFoundError # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore import InvalidLocationError # lint-amnesty, pylint: disable=wrong-import-order
from xmodule.modulestore.exceptions import ItemNotFoundError # lint-amnesty, pylint: disable=wrong-import-order
from .caching import get_cached_content, set_cached_content
from .models import CdnUserAgentsConfig, CourseAssetCacheTtlConfig
log = logging.getLogger(__name__)
# .. toggle_name: content_server.use_view
# .. toggle_implementation: WaffleFlag
# .. toggle_default: False
# .. toggle_description: Deployment flag for switching asset serving from a middleware
# to a view. Intended to be used once in each environment to test the cutover and
# ensure there are no errors or changes in behavior. Once this has been tested,
# the middleware can be fully converted to a view.
# .. toggle_use_cases: temporary
# .. toggle_creation_date: 2024-05-02
# .. toggle_target_removal_date: 2024-07-01
# .. toggle_tickets: https://github.com/openedx/edx-platform/issues/34702
CONTENT_SERVER_USE_VIEW = WaffleFlag('content_server.use_view', module_name=__name__)
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
# to change this file so instead of using course_id_partial, we're just using asset keys
HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT"
class StaticContentServerMiddleware(MiddlewareMixin):
"""
Shim to maintain old pattern of serving course assets from a middleware. See views.py.
"""
def process_request(self, request):
"""Intercept asset request or allow view to handle it, depending on config."""
if CONTENT_SERVER_USE_VIEW.is_enabled():
return
else:
set_custom_attribute('content_server.handled_by.middleware', True)
return IMPL.process_request(request)
class StaticContentServer():
"""
Serves course assets to end users. Colloquially referred to as "contentserver."
"""
def is_asset_request(self, request):
"""Determines whether the given request is an asset request"""
# Don't change this without updating urls.py! See docstring of views.py.
return (
request.path.startswith('/' + XASSET_LOCATION_TAG + '/')
or
request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)
or
StaticContent.is_versioned_asset_path(request.path)
)
# pylint: disable=too-many-statements
def process_request(self, request):
"""Process the given request"""
asset_path = request.path
if self.is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks
# Make sure we can convert this request into a location.
if AssetLocator.CANONICAL_NAMESPACE in asset_path:
asset_path = asset_path.replace('block/', 'block@', 1)
# If this is a versioned request, pull out the digest and chop off the prefix.
requested_digest = None
if StaticContent.is_versioned_asset_path(asset_path):
requested_digest, asset_path = StaticContent.parse_versioned_asset_path(asset_path)
# Make sure we have a valid location value for this asset.
try:
loc = StaticContent.get_location_from_path(asset_path)
except (InvalidLocationError, InvalidKeyError):
return HttpResponseBadRequest()
# Attempt to load the asset to make sure it exists, and grab the asset digest
# if we're able to load it.
actual_digest = None
try:
content = self.load_asset_from_location(loc)
actual_digest = getattr(content, "content_digest", None)
except (ItemNotFoundError, NotFoundError):
return HttpResponseNotFound()
# If this was a versioned asset, and the digest doesn't match, redirect
# them to the actual version.
if requested_digest is not None and actual_digest is not None and (actual_digest != requested_digest):
actual_asset_path = StaticContent.add_version_to_asset_path(asset_path, actual_digest)
return HttpResponsePermanentRedirect(actual_asset_path)
# Set the basics for this request. Make sure that the course key for this
# asset has a run, which old-style courses do not. Otherwise, this will
# explode when the key is serialized to be sent to NR.
safe_course_key = loc.course_key
if safe_course_key.run is None:
safe_course_key = safe_course_key.replace(run='only')
set_custom_attribute('course_id', safe_course_key)
set_custom_attribute('org', loc.org)
set_custom_attribute('contentserver.path', loc.path)
# Figure out if this is a CDN using us as the origin.
is_from_cdn = StaticContentServer.is_cdn_request(request)
set_custom_attribute('contentserver.from_cdn', is_from_cdn)
# Check if this content is locked or not.
locked = self.is_content_locked(content)
set_custom_attribute('contentserver.locked', locked)
# Check that user has access to the content.
if not self.is_user_authorized(request, content, loc):
return HttpResponseForbidden('Unauthorized')
# Figure out if the client sent us a conditional request, and let them know
# if this asset has changed since then.
last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
if 'HTTP_IF_MODIFIED_SINCE' in request.META:
if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
if if_modified_since == last_modified_at_str:
return HttpResponseNotModified()
# *** File streaming within a byte range ***
# If a Range is provided, parse Range attribute of the request
# Add Content-Range in the response if Range is structurally correct
# Request -> Range attribute structure: "Range: bytes=first-[last]"
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
response = None
if request.META.get('HTTP_RANGE'):
# If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise.
if isinstance(content, StaticContent):
content = AssetManager.find(loc, as_stream=True)
header_value = request.META['HTTP_RANGE']
try:
unit, ranges = parse_range_header(header_value, content.length)
except ValueError as exception:
# If the header field is syntactically invalid it should be ignored.
log.exception(
"%s in Range header: %s for content: %s",
str(exception), header_value, str(loc)
)
else:
if unit != 'bytes':
# Only accept ranges in bytes
log.warning("Unknown unit in Range header: %s for content: %s", header_value, str(loc))
elif len(ranges) > 1:
# According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
# But we send back the full content.
log.warning(
"More than 1 ranges in Range header: %s for content: %s", header_value, str(loc)
)
else:
first, last = ranges[0]
if 0 <= first <= last < content.length:
# If the byte range is satisfiable
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # Partial Content
set_custom_attribute('contentserver.ranged', True)
else:
log.warning(
"Cannot satisfy ranges in Range header: %s for content: %s",
header_value, str(loc)
)
return HttpResponse(status=416) # Requested Range Not Satisfiable
# If Range header is absent or syntactically invalid return a full content response.
if response is None:
response = HttpResponse(content.stream_data())
response['Content-Length'] = content.length
set_custom_attribute('contentserver.content_len', content.length)
set_custom_attribute('contentserver.content_type', content.content_type)
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type
response['X-Frame-Options'] = 'ALLOW'
# Set any caching headers, and do any response cleanup needed. Based on how much
# middleware we have in place, there's no easy way to use the built-in Django
# utilities and properly sanitize and modify a response to ensure that it is as
# cacheable as possible, which is why we do it ourselves.
self.set_caching_headers(content, response)
return response
def set_caching_headers(self, content, response):
"""
Sets caching headers based on whether or not the asset is locked.
"""
is_locked = getattr(content, "locked", False)
# We want to signal to the end user's browser, and to any intermediate proxies/caches,
# whether or not this asset is cacheable. If we have a TTL configured, we inform the
# caller, for unlocked assets, how long they are allowed to cache it. Since locked
# assets should be restricted to enrolled students, we simply send headers that
# indicate there should be no caching whatsoever.
cache_ttl = CourseAssetCacheTtlConfig.get_cache_ttl()
if cache_ttl > 0 and not is_locked:
set_custom_attribute('contentserver.cacheable', True)
response['Expires'] = StaticContentServer.get_expiration_value(datetime.datetime.utcnow(), cache_ttl)
response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl)
elif is_locked:
set_custom_attribute('contentserver.cacheable', False)
response['Cache-Control'] = "private, no-cache, no-store"
response['Last-Modified'] = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
# Force the Vary header to only vary responses on Origin, so that XHR and browser requests get cached
# separately and don't screw over one another. i.e. a browser request that doesn't send Origin, and
# caches a version of the response without CORS headers, in turn breaking XHR requests.
force_header_for_response(response, 'Vary', 'Origin')
@staticmethod
def is_cdn_request(request):
"""
Attempts to determine whether or not the given request is coming from a CDN.
Currently, this is a static check because edx.org only uses CloudFront, but may
be expanded in the future.
"""
cdn_user_agents = CdnUserAgentsConfig.get_cdn_user_agents()
user_agent = request.META.get('HTTP_USER_AGENT', '')
if user_agent in cdn_user_agents:
# This is a CDN request.
return True
return False
@staticmethod
def get_expiration_value(now, cache_ttl):
"""Generates an RFC1123 datetime string based on a future offset."""
expire_dt = now + datetime.timedelta(seconds=cache_ttl)
return expire_dt.strftime(HTTP_DATE_FORMAT)
def is_content_locked(self, content):
"""
Determines whether or not the given content is locked.
"""
return bool(getattr(content, "locked", False))
def is_user_authorized(self, request, content, location):
"""
Determines whether or not the user for this request is authorized to view the given asset.
"""
if not self.is_content_locked(content):
return True
if not hasattr(request, "user") or not request.user.is_authenticated:
return False
if not request.user.is_staff:
deprecated = getattr(location, 'deprecated', False)
if deprecated and not CourseEnrollment.is_enrolled_by_partial(request.user, location.course_key):
return False
if not deprecated and not CourseEnrollment.is_enrolled(request.user, location.course_key):
return False
return True
def load_asset_from_location(self, location):
"""
Loads an asset based on its location, either retrieving it from a cache
or loading it directly from the contentstore.
"""
# See if we can load this item from cache.
content = get_cached_content(location)
if content is None:
# Not in cache, so just try and load it from the asset manager.
try:
content = AssetManager.find(location, as_stream=True)
except (ItemNotFoundError, NotFoundError): # lint-amnesty, pylint: disable=try-except-raise
raise
# Now that we fetched it, let's go ahead and try to cache it. We cap this at 1MB
# because it's the default for memcached and also we don't want to do too much
# buffering in memory when we're serving an actual request.
if content.length is not None and content.length < 1048576:
content = content.copy_to_in_mem()
set_cached_content(content)
return content
IMPL = StaticContentServer()
def parse_range_header(header_value, content_length):
"""
Returns the unit and a list of (start, end) tuples of ranges.
Raises ValueError if header is syntactically invalid or does not contain a range.
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
"""
unit = None
ranges = []
if '=' in header_value:
unit, byte_ranges_string = header_value.split('=')
# Parse the byte ranges.
for byte_range_string in byte_ranges_string.split(','):
byte_range_string = byte_range_string.strip()
# Case 0:
if '-' not in byte_range_string: # Invalid syntax of header value. # lint-amnesty, pylint: disable=no-else-raise
raise ValueError('Invalid syntax.')
# Case 1: -500
elif byte_range_string.startswith('-'):
first = max(0, (content_length + int(byte_range_string)))
last = content_length - 1
# Case 2: 500-
elif byte_range_string.endswith('-'):
first = int(byte_range_string[0:-1])
last = content_length - 1
# Case 3: 500-999
else:
first, last = byte_range_string.split('-')
first = int(first)
last = min(int(last), content_length - 1)
ranges.append((first, last))
if len(ranges) == 0:
raise ValueError('Invalid syntax')
return unit, ranges

View File

@@ -4,7 +4,6 @@ Tests for StaticContentServer
import copy
import datetime
import logging
import unittest
@@ -17,18 +16,18 @@ from django.test import RequestFactory
from django.test.client import Client
from django.test.utils import override_settings
from opaque_keys import InvalidKeyError
from xmodule.contentstore.django import contentstore
from xmodule.contentstore.content import StaticContent, VERSIONED_ASSETS_PREFIX
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, SharedModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_course_from_xml
from xmodule.assetstore.assetmgr import AssetManager
from xmodule.modulestore.exceptions import ItemNotFoundError
from common.djangoapps.student.models import CourseEnrollment
from common.djangoapps.student.tests.factories import UserFactory, AdminFactory
from common.djangoapps.student.tests.factories import AdminFactory, UserFactory
from xmodule.assetstore.assetmgr import AssetManager
from xmodule.contentstore.content import VERSIONED_ASSETS_PREFIX, StaticContent
from xmodule.contentstore.django import contentstore
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError
from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, SharedModuleStoreTestCase
from xmodule.modulestore.xml_importer import import_course_from_xml
from ..middleware import parse_range_header, HTTP_DATE_FORMAT, StaticContentServer
from ..views import HTTP_DATE_FORMAT, StaticContentServer, parse_range_header
log = logging.getLogger(__name__)

View File

@@ -9,28 +9,41 @@ Django what view handled the request, it does so by looking at the result of the
`resolve` utility, but these URLs get a Resolver404 (because there's no
registered urlpattern).
We'd like to turn this into a proper view:
https://github.com/openedx/edx-platform/issues/34702
We've turned it into a proper view, with a few warts remaining:
The first step, seen here, is to have urlpatterns (redundant with the
middleware's `is_asset_request` method) and a view, but the view just calls into
the same code the middleware uses. The implementation of the middleware has been
moved into StaticContentServerImpl, leaving the middleware as just a shell
around the latter.
A waffle flag chooses whether to allow the middleware to handle the request, or
whether to pass the request along to the view. Why? Because we might be relying
by accident on some weird behavior inherent to misusing a middleware this way,
and we need a way to quickly switch back if we encounter problems.
If the view works, we can move all of StaticContentServerImpl directly into the
view and drop the middleware and the waffle flag.
- The view implementation is all bundled into a StaticContentServer class that
doesn't appear to have any state. The methods could likely just be extracted
as top-level functions.
- All three urlpatterns are registered to the same view, which then has to
re-parse the URL to determine which pattern is in effect. We should probably
have 3 views as entry points.
"""
from django.http import HttpResponseNotFound
import datetime
import logging
from django.http import (
HttpResponse,
HttpResponseBadRequest,
HttpResponseForbidden,
HttpResponseNotFound,
HttpResponseNotModified,
HttpResponsePermanentRedirect
)
from django.views.decorators.http import require_safe
from edx_django_utils.monitoring import set_custom_attribute
from opaque_keys import InvalidKeyError
from opaque_keys.edx.locator import AssetLocator
from .middleware import CONTENT_SERVER_USE_VIEW, IMPL
from common.djangoapps.student.models import CourseEnrollment
from openedx.core.djangoapps.header_control import force_header_for_response
from xmodule.assetstore.assetmgr import AssetManager
from xmodule.contentstore.content import XASSET_LOCATION_TAG, StaticContent
from xmodule.exceptions import NotFoundError
from xmodule.modulestore import InvalidLocationError
from xmodule.modulestore.exceptions import ItemNotFoundError
from .caching import get_cached_content, set_cached_content
from .models import CdnUserAgentsConfig, CourseAssetCacheTtlConfig
@require_safe
@@ -38,21 +51,315 @@ def course_assets_view(request):
"""
Serve course assets to end users. Colloquially referred to as "contentserver."
"""
set_custom_attribute('content_server.handled_by.view', True)
return IMPL.process_request(request)
if not CONTENT_SERVER_USE_VIEW.is_enabled():
# Should never happen; keep track of occurrences.
set_custom_attribute('content_server.view.called_when_disabled', True)
# But handle the request anyhow.
# We'll delegate request handling to an instance of the middleware
# until we can verify that the behavior is identical when requests
# come all the way through to the view.
response = IMPL.process_request(request)
log = logging.getLogger(__name__)
if response is None:
# Shouldn't happen
set_custom_attribute('content_server.view.no_response_from_impl', True)
return HttpResponseNotFound()
else:
return response
# TODO: Soon as we have a reasonable way to serialize/deserialize AssetKeys, we need
# to change this file so instead of using course_id_partial, we're just using asset keys
HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT"
class StaticContentServer():
"""
Serves course assets to end users. Colloquially referred to as "contentserver."
"""
def is_asset_request(self, request):
"""Determines whether the given request is an asset request"""
# Don't change this without updating urls.py! See docstring of views.py.
return (
request.path.startswith('/' + XASSET_LOCATION_TAG + '/')
or
request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE)
or
StaticContent.is_versioned_asset_path(request.path)
)
# pylint: disable=too-many-statements
def process_request(self, request):
"""Process the given request"""
asset_path = request.path
if self.is_asset_request(request): # lint-amnesty, pylint: disable=too-many-nested-blocks
# Make sure we can convert this request into a location.
if AssetLocator.CANONICAL_NAMESPACE in asset_path:
asset_path = asset_path.replace('block/', 'block@', 1)
# If this is a versioned request, pull out the digest and chop off the prefix.
requested_digest = None
if StaticContent.is_versioned_asset_path(asset_path):
requested_digest, asset_path = StaticContent.parse_versioned_asset_path(asset_path)
# Make sure we have a valid location value for this asset.
try:
loc = StaticContent.get_location_from_path(asset_path)
except (InvalidLocationError, InvalidKeyError):
return HttpResponseBadRequest()
# Attempt to load the asset to make sure it exists, and grab the asset digest
# if we're able to load it.
actual_digest = None
try:
content = self.load_asset_from_location(loc)
actual_digest = getattr(content, "content_digest", None)
except (ItemNotFoundError, NotFoundError):
return HttpResponseNotFound()
# If this was a versioned asset, and the digest doesn't match, redirect
# them to the actual version.
if requested_digest is not None and actual_digest is not None and (actual_digest != requested_digest):
actual_asset_path = StaticContent.add_version_to_asset_path(asset_path, actual_digest)
return HttpResponsePermanentRedirect(actual_asset_path)
# Set the basics for this request. Make sure that the course key for this
# asset has a run, which old-style courses do not. Otherwise, this will
# explode when the key is serialized to be sent to NR.
safe_course_key = loc.course_key
if safe_course_key.run is None:
safe_course_key = safe_course_key.replace(run='only')
set_custom_attribute('course_id', safe_course_key)
set_custom_attribute('org', loc.org)
set_custom_attribute('contentserver.path', loc.path)
# Figure out if this is a CDN using us as the origin.
is_from_cdn = StaticContentServer.is_cdn_request(request)
set_custom_attribute('contentserver.from_cdn', is_from_cdn)
# Check if this content is locked or not.
locked = self.is_content_locked(content)
set_custom_attribute('contentserver.locked', locked)
# Check that user has access to the content.
if not self.is_user_authorized(request, content, loc):
return HttpResponseForbidden('Unauthorized')
# Figure out if the client sent us a conditional request, and let them know
# if this asset has changed since then.
last_modified_at_str = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
if 'HTTP_IF_MODIFIED_SINCE' in request.META:
if_modified_since = request.META['HTTP_IF_MODIFIED_SINCE']
if if_modified_since == last_modified_at_str:
return HttpResponseNotModified()
# *** File streaming within a byte range ***
# If a Range is provided, parse Range attribute of the request
# Add Content-Range in the response if Range is structurally correct
# Request -> Range attribute structure: "Range: bytes=first-[last]"
# Response -> Content-Range attribute structure: "Content-Range: bytes first-last/totalLength"
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
response = None
if request.META.get('HTTP_RANGE'):
# If we have a StaticContent, get a StaticContentStream. Can't manipulate the bytes otherwise.
if isinstance(content, StaticContent):
content = AssetManager.find(loc, as_stream=True)
header_value = request.META['HTTP_RANGE']
try:
unit, ranges = parse_range_header(header_value, content.length)
except ValueError as exception:
# If the header field is syntactically invalid it should be ignored.
log.exception(
"%s in Range header: %s for content: %s",
str(exception), header_value, str(loc)
)
else:
if unit != 'bytes':
# Only accept ranges in bytes
log.warning("Unknown unit in Range header: %s for content: %s", header_value, str(loc))
elif len(ranges) > 1:
# According to Http/1.1 spec content for multiple ranges should be sent as a multipart message.
# http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.16
# But we send back the full content.
log.warning(
"More than 1 ranges in Range header: %s for content: %s", header_value, str(loc)
)
else:
first, last = ranges[0]
if 0 <= first <= last < content.length:
# If the byte range is satisfiable
response = HttpResponse(content.stream_data_in_range(first, last))
response['Content-Range'] = 'bytes {first}-{last}/{length}'.format(
first=first, last=last, length=content.length
)
response['Content-Length'] = str(last - first + 1)
response.status_code = 206 # Partial Content
set_custom_attribute('contentserver.ranged', True)
else:
log.warning(
"Cannot satisfy ranges in Range header: %s for content: %s",
header_value, str(loc)
)
return HttpResponse(status=416) # Requested Range Not Satisfiable
# If Range header is absent or syntactically invalid return a full content response.
if response is None:
response = HttpResponse(content.stream_data())
response['Content-Length'] = content.length
set_custom_attribute('contentserver.content_len', content.length)
set_custom_attribute('contentserver.content_type', content.content_type)
# "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed
response['Accept-Ranges'] = 'bytes'
response['Content-Type'] = content.content_type
response['X-Frame-Options'] = 'ALLOW'
# Set any caching headers, and do any response cleanup needed. Based on how much
# middleware we have in place, there's no easy way to use the built-in Django
# utilities and properly sanitize and modify a response to ensure that it is as
# cacheable as possible, which is why we do it ourselves.
self.set_caching_headers(content, response)
return response
def set_caching_headers(self, content, response):
"""
Sets caching headers based on whether or not the asset is locked.
"""
is_locked = getattr(content, "locked", False)
# We want to signal to the end user's browser, and to any intermediate proxies/caches,
# whether or not this asset is cacheable. If we have a TTL configured, we inform the
# caller, for unlocked assets, how long they are allowed to cache it. Since locked
# assets should be restricted to enrolled students, we simply send headers that
# indicate there should be no caching whatsoever.
cache_ttl = CourseAssetCacheTtlConfig.get_cache_ttl()
if cache_ttl > 0 and not is_locked:
set_custom_attribute('contentserver.cacheable', True)
response['Expires'] = StaticContentServer.get_expiration_value(datetime.datetime.utcnow(), cache_ttl)
response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl)
elif is_locked:
set_custom_attribute('contentserver.cacheable', False)
response['Cache-Control'] = "private, no-cache, no-store"
response['Last-Modified'] = content.last_modified_at.strftime(HTTP_DATE_FORMAT)
# Force the Vary header to only vary responses on Origin, so that XHR and browser requests get cached
# separately and don't screw over one another. i.e. a browser request that doesn't send Origin, and
# caches a version of the response without CORS headers, in turn breaking XHR requests.
force_header_for_response(response, 'Vary', 'Origin')
@staticmethod
def is_cdn_request(request):
"""
Attempts to determine whether or not the given request is coming from a CDN.
Currently, this is a static check because edx.org only uses CloudFront, but may
be expanded in the future.
"""
cdn_user_agents = CdnUserAgentsConfig.get_cdn_user_agents()
user_agent = request.META.get('HTTP_USER_AGENT', '')
if user_agent in cdn_user_agents:
# This is a CDN request.
return True
return False
@staticmethod
def get_expiration_value(now, cache_ttl):
"""Generates an RFC1123 datetime string based on a future offset."""
expire_dt = now + datetime.timedelta(seconds=cache_ttl)
return expire_dt.strftime(HTTP_DATE_FORMAT)
def is_content_locked(self, content):
"""
Determines whether or not the given content is locked.
"""
return bool(getattr(content, "locked", False))
def is_user_authorized(self, request, content, location):
"""
Determines whether or not the user for this request is authorized to view the given asset.
"""
if not self.is_content_locked(content):
return True
if not hasattr(request, "user") or not request.user.is_authenticated:
return False
if not request.user.is_staff:
deprecated = getattr(location, 'deprecated', False)
if deprecated and not CourseEnrollment.is_enrolled_by_partial(request.user, location.course_key):
return False
if not deprecated and not CourseEnrollment.is_enrolled(request.user, location.course_key):
return False
return True
def load_asset_from_location(self, location):
"""
Loads an asset based on its location, either retrieving it from a cache
or loading it directly from the contentstore.
"""
# See if we can load this item from cache.
content = get_cached_content(location)
if content is None:
# Not in cache, so just try and load it from the asset manager.
try:
content = AssetManager.find(location, as_stream=True)
except (ItemNotFoundError, NotFoundError): # lint-amnesty, pylint: disable=try-except-raise
raise
# Now that we fetched it, let's go ahead and try to cache it. We cap this at 1MB
# because it's the default for memcached and also we don't want to do too much
# buffering in memory when we're serving an actual request.
if content.length is not None and content.length < 1048576:
content = content.copy_to_in_mem()
set_cached_content(content)
return content
IMPL = StaticContentServer()
def parse_range_header(header_value, content_length):
"""
Returns the unit and a list of (start, end) tuples of ranges.
Raises ValueError if header is syntactically invalid or does not contain a range.
See spec for details: http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
"""
unit = None
ranges = []
if '=' in header_value:
unit, byte_ranges_string = header_value.split('=')
# Parse the byte ranges.
for byte_range_string in byte_ranges_string.split(','):
byte_range_string = byte_range_string.strip()
# Case 0:
if '-' not in byte_range_string: # Invalid syntax of header value. # lint-amnesty, pylint: disable=no-else-raise
raise ValueError('Invalid syntax.')
# Case 1: -500
elif byte_range_string.startswith('-'):
first = max(0, (content_length + int(byte_range_string)))
last = content_length - 1
# Case 2: 500-
elif byte_range_string.endswith('-'):
first = int(byte_range_string[0:-1])
last = content_length - 1
# Case 3: 500-999
else:
first, last = byte_range_string.split('-')
first = int(first)
last = min(int(last), content_length - 1)
ranges.append((first, last))
if len(ranges) == 0:
raise ValueError('Invalid syntax')
return unit, ranges