diff --git a/common/djangoapps/contentserver/admin.py b/common/djangoapps/contentserver/admin.py index aa385279f8..893b267f0b 100644 --- a/common/djangoapps/contentserver/admin.py +++ b/common/djangoapps/contentserver/admin.py @@ -4,7 +4,7 @@ that gets used when sending cachability headers back with request course assets. """ from django.contrib import admin from config_models.admin import ConfigurationModelAdmin -from .models import CourseAssetCacheTtlConfig +from .models import CourseAssetCacheTtlConfig, CdnUserAgentsConfig class CourseAssetCacheTtlConfigAdmin(ConfigurationModelAdmin): @@ -26,4 +26,24 @@ class CourseAssetCacheTtlConfigAdmin(ConfigurationModelAdmin): return self.list_display +class CdnUserAgentsConfigAdmin(ConfigurationModelAdmin): + """ + Basic configuration for CDN user agent whitelist. + """ + list_display = [ + 'cdn_user_agents' + ] + + def get_list_display(self, request): + """ + Restore default list_display behavior. + + ConfigurationModelAdmin overrides this, but in a way that doesn't + respect the ordering. This lets us customize it the usual Django admin + way. + """ + return self.list_display + + admin.site.register(CourseAssetCacheTtlConfig, CourseAssetCacheTtlConfigAdmin) +admin.site.register(CdnUserAgentsConfig, CdnUserAgentsConfigAdmin) diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index 433967e1cb..bd6088093f 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -3,13 +3,14 @@ Middleware to serve assets. """ import logging - import datetime +import newrelic.agent + from django.http import ( HttpResponse, HttpResponseNotModified, HttpResponseForbidden, HttpResponseBadRequest, HttpResponseNotFound) from student.models import CourseEnrollment -from contentserver.models import CourseAssetCacheTtlConfig +from contentserver.models import CourseAssetCacheTtlConfig, CdnUserAgentsConfig from header_control import force_header_for_response from xmodule.assetstore.assetmgr import AssetManager @@ -55,6 +56,19 @@ class StaticContentServer(object): except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() + # Set the basics for this request. + newrelic.agent.add_custom_parameter('course_id', loc.course_key) + newrelic.agent.add_custom_parameter('org', loc.org) + newrelic.agent.add_custom_parameter('contentserver.path', loc.path) + + # Figure out if this is a CDN using us as the origin. + is_from_cdn = StaticContentServer.is_cdn_request(request) + newrelic.agent.add_custom_parameter('contentserver.from_cdn', True if is_from_cdn else False) + + # Check if this content is locked or not. + locked = self.is_content_locked(content) + newrelic.agent.add_custom_parameter('contentserver.locked', True if locked else False) + # Check that user has access to the content. if not self.is_user_authorized(request, content, loc): return HttpResponseForbidden('Unauthorized') @@ -107,8 +121,11 @@ class StaticContentServer(object): response['Content-Range'] = 'bytes {first}-{last}/{length}'.format( first=first, last=last, length=content.length ) - response['Content-Length'] = str(last - first + 1) + range_len = last - first + 1 + response['Content-Length'] = str(range_len) response.status_code = 206 # Partial Content + + newrelic.agent.add_custom_parameter('contentserver.range_len', range_len) else: log.warning( u"Cannot satisfy ranges in Range header: %s for content: %s", header_value, unicode(loc) @@ -120,6 +137,9 @@ class StaticContentServer(object): response = HttpResponse(content.stream_data()) response['Content-Length'] = content.length + newrelic.agent.add_custom_parameter('contentserver.content_len', content.length) + newrelic.agent.add_custom_parameter('contentserver.content_type', content.content_type) + # "Accept-Ranges: bytes" tells the user that only "bytes" ranges are allowed response['Accept-Ranges'] = 'bytes' response['Content-Type'] = content.content_type @@ -146,9 +166,11 @@ class StaticContentServer(object): # indicate there should be no caching whatsoever. cache_ttl = CourseAssetCacheTtlConfig.get_cache_ttl() if cache_ttl > 0 and not is_locked: + newrelic.agent.add_custom_parameter('contentserver.cacheable', True) response['Expires'] = StaticContentServer.get_expiration_value(datetime.datetime.utcnow(), cache_ttl) response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl) elif is_locked: + newrelic.agent.add_custom_parameter('contentserver.cacheable', False) response['Cache-Control'] = "private, no-cache, no-store" response['Last-Modified'] = content.last_modified_at.strftime(HTTP_DATE_FORMAT) @@ -158,19 +180,39 @@ class StaticContentServer(object): # caches a version of the response without CORS headers, in turn breaking XHR requests. force_header_for_response(response, 'Vary', 'Origin') + @staticmethod + def is_cdn_request(request): + """ + Attempts to determine whether or not the given request is coming from a CDN. + + Currently, this is a static check because edx.org only uses CloudFront, but may + be expanded in the future. + """ + cdn_user_agents = CdnUserAgentsConfig.get_cdn_user_agents() + user_agent = request.META.get('HTTP_USER_AGENT', '') + if user_agent in cdn_user_agents: + # This is a CDN request. + return True + + return False + @staticmethod def get_expiration_value(now, cache_ttl): """Generates an RFC1123 datetime string based on a future offset.""" expire_dt = now + datetime.timedelta(seconds=cache_ttl) return expire_dt.strftime(HTTP_DATE_FORMAT) + def is_content_locked(self, content): + """ + Determines whether or not the given content is locked. + """ + return getattr(content, "locked", False) + def is_user_authorized(self, request, content, location): """ Determines whether or not the user for this request is authorized to view the given asset. """ - - is_locked = getattr(content, "locked", False) - if not is_locked: + if not self.is_content_locked(content): return True if not hasattr(request, "user") or not request.user.is_authenticated(): diff --git a/common/djangoapps/contentserver/migrations/0002_cdnuseragentsconfig.py b/common/djangoapps/contentserver/migrations/0002_cdnuseragentsconfig.py new file mode 100644 index 0000000000..a078ae35e5 --- /dev/null +++ b/common/djangoapps/contentserver/migrations/0002_cdnuseragentsconfig.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion +from django.conf import settings + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('contentserver', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='CdnUserAgentsConfig', + fields=[ + ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), + ('change_date', models.DateTimeField(auto_now_add=True, verbose_name='Change date')), + ('enabled', models.BooleanField(default=False, verbose_name='Enabled')), + ('cdn_user_agents', models.TextField(default=b'Amazon CloudFront', help_text=b'A newline-separated list of user agents that should be considered CDNs.')), + ('changed_by', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, editable=False, to=settings.AUTH_USER_MODEL, null=True, verbose_name='Changed by')), + ], + ), + ] diff --git a/common/djangoapps/contentserver/models.py b/common/djangoapps/contentserver/models.py index 1d8dbfa647..013ff364f1 100644 --- a/common/djangoapps/contentserver/models.py +++ b/common/djangoapps/contentserver/models.py @@ -2,7 +2,7 @@ Models for contentserver """ -from django.db.models.fields import PositiveIntegerField +from django.db.models.fields import PositiveIntegerField, TextField from config_models.models import ConfigurationModel @@ -27,3 +27,26 @@ class CourseAssetCacheTtlConfig(ConfigurationModel): def __unicode__(self): return unicode(repr(self)) + + +class CdnUserAgentsConfig(ConfigurationModel): + """Configuration for the user agents we expect to see from CDNs.""" + + class Meta(object): + app_label = 'contentserver' + + cdn_user_agents = TextField( + default='Amazon CloudFront', + help_text="A newline-separated list of user agents that should be considered CDNs." + ) + + @classmethod + def get_cdn_user_agents(cls): + """Gets the list of CDN user agents, if present""" + return cls.current().cdn_user_agents + + def __repr__(self): + return ''.format(self.get_cdn_user_agents()) + + def __unicode__(self): + return unicode(repr(self)) diff --git a/common/djangoapps/contentserver/test/test_contentserver.py b/common/djangoapps/contentserver/test/test_contentserver.py index d34e714b1f..0181e11560 100644 --- a/common/djangoapps/contentserver/test/test_contentserver.py +++ b/common/djangoapps/contentserver/test/test_contentserver.py @@ -10,6 +10,7 @@ import unittest from uuid import uuid4 from django.conf import settings +from django.test import RequestFactory from django.test.client import Client from django.test.utils import override_settings from mock import patch @@ -271,6 +272,49 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase): near_expire_dt = StaticContentServer.get_expiration_value(start_dt, 55) self.assertEqual("Thu, 01 Dec 1983 20:00:55 GMT", near_expire_dt) + @patch('contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents') + def test_cache_is_cdn_with_normal_request(self, mock_get_cdn_user_agents): + """ + Tests that when a normal request is made -- i.e. from an end user with their + browser -- that we don't classify the request as coming from a CDN. + """ + mock_get_cdn_user_agents.return_value = 'Amazon CloudFront' + + request_factory = RequestFactory() + browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Chrome 1234') + + is_from_cdn = StaticContentServer.is_cdn_request(browser_request) + self.assertEqual(is_from_cdn, False) + + @patch('contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents') + def test_cache_is_cdn_with_cdn_request(self, mock_get_cdn_user_agents): + """ + Tests that when a CDN request is made -- i.e. from an edge node back to the + origin -- that we classify the request as coming from a CDN. + """ + mock_get_cdn_user_agents.return_value = 'Amazon CloudFront' + + request_factory = RequestFactory() + browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Amazon CloudFront') + + is_from_cdn = StaticContentServer.is_cdn_request(browser_request) + self.assertEqual(is_from_cdn, True) + + @patch('contentserver.models.CdnUserAgentsConfig.get_cdn_user_agents') + def test_cache_is_cdn_with_cdn_request_multiple_user_agents(self, mock_get_cdn_user_agents): + """ + Tests that when a CDN request is made -- i.e. from an edge node back to the + origin -- that we classify the request as coming from a CDN when multiple UAs + are configured. + """ + mock_get_cdn_user_agents.return_value = 'Amazon CloudFront\nAkamai GHost' + + request_factory = RequestFactory() + browser_request = request_factory.get('/fake', HTTP_USER_AGENT='Amazon CloudFront') + + is_from_cdn = StaticContentServer.is_cdn_request(browser_request) + self.assertEqual(is_from_cdn, True) + @ddt.ddt class ParseRangeHeaderTestCase(unittest.TestCase):