From 849ebc5f222501b7b89e8541d237816925b6900c Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Thu, 26 May 2016 14:00:03 -0400 Subject: [PATCH] [PERF-325] Add versioned course asset URLs when canonicalizing asset paths. --- .../contentstore/tests/test_contentstore.py | 4 +- cms/djangoapps/contentstore/tests/utils.py | 2 +- .../contentstore/views/tests/test_assets.py | 8 +- common/djangoapps/contentserver/middleware.py | 35 +- .../contentserver/test/test_contentserver.py | 56 ++- .../test/test_static_replace.py | 389 +++++++++++------- .../xmodule/xmodule/contentstore/content.py | 95 ++++- .../lib/xmodule/xmodule/contentstore/mongo.py | 6 +- .../{sample_static.txt => sample_static.html} | 0 9 files changed, 406 insertions(+), 189 deletions(-) rename common/test/data/toy/static/{sample_static.txt => sample_static.html} (100%) diff --git a/cms/djangoapps/contentstore/tests/test_contentstore.py b/cms/djangoapps/contentstore/tests/test_contentstore.py index 4382c05764..a03958ab34 100644 --- a/cms/djangoapps/contentstore/tests/test_contentstore.py +++ b/cms/djangoapps/contentstore/tests/test_contentstore.py @@ -977,7 +977,7 @@ class MiscCourseTests(ContentStoreTestCase): 3) computing thumbnail location of asset 4) deleting the asset from the course """ - asset_key = self.course.id.make_asset_key('asset', 'sample_static.txt') + asset_key = self.course.id.make_asset_key('asset', 'sample_static.html') content = StaticContent( asset_key, "Fake asset", "application/text", "test", ) @@ -1049,7 +1049,7 @@ class MiscCourseTests(ContentStoreTestCase): draft content is also deleted """ # add an asset - asset_key = self.course.id.make_asset_key('asset', 'sample_static.txt') + asset_key = self.course.id.make_asset_key('asset', 'sample_static.html') content = StaticContent( asset_key, "Fake asset", "application/text", "test", ) diff --git a/cms/djangoapps/contentstore/tests/utils.py b/cms/djangoapps/contentstore/tests/utils.py index 2026295c9e..af5e7b8e52 100644 --- a/cms/djangoapps/contentstore/tests/utils.py +++ b/cms/djangoapps/contentstore/tests/utils.py @@ -121,7 +121,7 @@ class CourseTestCase(ProceduralCourseTestMixin, ModuleStoreTestCase): SEQUENTIAL = 'vertical_sequential' DRAFT_HTML = 'draft_html' DRAFT_VIDEO = 'draft_video' - LOCKED_ASSET_KEY = AssetLocation.from_deprecated_string('/c4x/edX/toy/asset/sample_static.txt') + LOCKED_ASSET_KEY = AssetLocation.from_deprecated_string('/c4x/edX/toy/asset/sample_static.html') def import_and_populate_course(self): """ diff --git a/cms/djangoapps/contentstore/views/tests/test_assets.py b/cms/djangoapps/contentstore/views/tests/test_assets.py index 78ce5b4050..797b76be59 100644 --- a/cms/djangoapps/contentstore/views/tests/test_assets.py +++ b/cms/djangoapps/contentstore/views/tests/test_assets.py @@ -126,7 +126,7 @@ class BasicAssetsTestCase(AssetsTestCase): ) course = module_store.get_course(course_id) - filename = 'sample_static.txt' + filename = 'sample_static.html' html_src_attribute = '"/static/{}"'.format(filename) asset_url = replace_static_urls(html_src_attribute, course_id=course.id) url = asset_url.replace('"', '') @@ -379,7 +379,7 @@ class LockAssetTestCase(AssetsTestCase): """ def verify_asset_locked_state(locked): """ Helper method to verify lock state in the contentstore """ - asset_location = StaticContent.get_location_from_path('/c4x/edX/toy/asset/sample_static.txt') + asset_location = StaticContent.get_location_from_path('/c4x/edX/toy/asset/sample_static.html') content = contentstore().find(asset_location) self.assertEqual(content.locked, locked) @@ -387,14 +387,14 @@ class LockAssetTestCase(AssetsTestCase): """ Helper method for posting asset update. """ content_type = 'application/txt' upload_date = datetime(2013, 6, 1, 10, 30, tzinfo=UTC) - asset_location = course.id.make_asset_key('asset', 'sample_static.txt') + asset_location = course.id.make_asset_key('asset', 'sample_static.html') url = reverse_course_url('assets_handler', course.id, kwargs={'asset_key_string': unicode(asset_location)}) resp = self.client.post( url, # pylint: disable=protected-access json.dumps(assets._get_asset_json( - "sample_static.txt", content_type, upload_date, asset_location, None, lock)), + "sample_static.html", content_type, upload_date, asset_location, None, lock)), "application/json" ) diff --git a/common/djangoapps/contentserver/middleware.py b/common/djangoapps/contentserver/middleware.py index 8b96c6b536..7c0875bbf8 100644 --- a/common/djangoapps/contentserver/middleware.py +++ b/common/djangoapps/contentserver/middleware.py @@ -3,12 +3,11 @@ Middleware to serve assets. """ import logging - import datetime import newrelic.agent from django.http import ( HttpResponse, HttpResponseNotModified, HttpResponseForbidden, - HttpResponseBadRequest, HttpResponseNotFound) + HttpResponseBadRequest, HttpResponseNotFound, HttpResponsePermanentRedirect) from student.models import CourseEnrollment from contentserver.models import CourseAssetCacheTtlConfig, CdnUserAgentsConfig @@ -30,32 +29,54 @@ HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S GMT" class StaticContentServer(object): + """ + Serves course assets to end users. Colloquially referred to as "contentserver." + """ def is_asset_request(self, request): """Determines whether the given request is an asset request""" return ( request.path.startswith('/' + XASSET_LOCATION_TAG + '/') or request.path.startswith('/' + AssetLocator.CANONICAL_NAMESPACE) + or + StaticContent.is_versioned_asset_path(request.path) ) def process_request(self, request): """Process the given request""" + asset_path = request.path + if self.is_asset_request(request): # Make sure we can convert this request into a location. - if AssetLocator.CANONICAL_NAMESPACE in request.path: - request.path = request.path.replace('block/', 'block@', 1) + if AssetLocator.CANONICAL_NAMESPACE in asset_path: + asset_path = asset_path.replace('block/', 'block@', 1) + + # If this is a versioned request, pull out the digest and chop off the prefix. + requested_digest = None + if StaticContent.is_versioned_asset_path(asset_path): + requested_digest, asset_path = StaticContent.parse_versioned_asset_path(asset_path) + + # Make sure we have a valid location value for this asset. try: - loc = StaticContent.get_location_from_path(request.path) + loc = StaticContent.get_location_from_path(asset_path) except (InvalidLocationError, InvalidKeyError): return HttpResponseBadRequest() - # Try and load the asset. - content = None + # Attempt to load the asset to make sure it exists, and grab the asset digest + # if we're able to load it. + actual_digest = None try: content = self.load_asset_from_location(loc) + actual_digest = content.content_digest except (ItemNotFoundError, NotFoundError): return HttpResponseNotFound() + # If this was a versioned asset, and the digest doesn't match, redirect + # them to the actual version. + if requested_digest is not None and (actual_digest != requested_digest): + actual_asset_path = StaticContent.add_version_to_asset_path(asset_path, actual_digest) + return HttpResponsePermanentRedirect(actual_asset_path) + # Set the basics for this request. Make sure that the course key for this # asset has a run, which old-style courses do not. Otherwise, this will # explode when the key is serialized to be sent to NR. diff --git a/common/djangoapps/contentserver/test/test_contentserver.py b/common/djangoapps/contentserver/test/test_contentserver.py index c68bc5a63c..52bdf83d2d 100644 --- a/common/djangoapps/contentserver/test/test_contentserver.py +++ b/common/djangoapps/contentserver/test/test_contentserver.py @@ -16,9 +16,13 @@ from django.test.utils import override_settings from mock import patch from xmodule.contentstore.django import contentstore +from xmodule.contentstore.content import StaticContent from xmodule.modulestore.django import modulestore from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase from xmodule.modulestore.xml_importer import import_course_from_xml +from xmodule.assetstore.assetmgr import AssetManager +from opaque_keys import InvalidKeyError +from xmodule.modulestore.exceptions import ItemNotFoundError from contentserver.middleware import parse_range_header, HTTP_DATE_FORMAT, StaticContentServer from student.models import CourseEnrollment @@ -28,9 +32,24 @@ log = logging.getLogger(__name__) TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex - TEST_DATA_DIR = settings.COMMON_TEST_DATA_ROOT +FAKE_MD5_HASH = 'ffffffffffffffffffffffffffffffff' + + +def get_versioned_asset_url(asset_path): + """ + Creates a versioned asset URL. + """ + try: + locator = StaticContent.get_location_from_path(asset_path) + content = AssetManager.find(locator, as_stream=True) + return StaticContent.add_version_to_asset_path(asset_path, content.content_digest) + except (InvalidKeyError, ItemNotFoundError): + pass + + return asset_path + @ddt.ddt @override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) @@ -54,13 +73,15 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase): ) # A locked asset - cls.locked_asset = cls.course_key.make_asset_key('asset', 'sample_static.txt') + cls.locked_asset = cls.course_key.make_asset_key('asset', 'sample_static.html') cls.url_locked = unicode(cls.locked_asset) + cls.url_locked_versioned = get_versioned_asset_url(cls.url_locked) cls.contentstore.set_attr(cls.locked_asset, 'locked', True) # An unlocked asset cls.unlocked_asset = cls.course_key.make_asset_key('asset', 'another_static.txt') cls.url_unlocked = unicode(cls.unlocked_asset) + cls.url_unlocked_versioned = get_versioned_asset_url(cls.url_unlocked) cls.length_unlocked = cls.contentstore.get_attr(cls.unlocked_asset, 'length') def setUp(self): @@ -81,6 +102,37 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase): resp = self.client.get(self.url_unlocked) self.assertEqual(resp.status_code, 200) + def test_unlocked_versioned_asset(self): + """ + Test that unlocked assets that are versioned are being served. + """ + self.client.logout() + resp = self.client.get(self.url_unlocked_versioned) + self.assertEqual(resp.status_code, 200) + + def test_unlocked_versioned_asset_with_nonexistent_version(self): + """ + Test that unlocked assets that are versioned, but have a nonexistent version, + are sent back as a 301 redirect which tells the caller the correct URL. + """ + url_unlocked_versioned_old = StaticContent.add_version_to_asset_path(self.url_unlocked, FAKE_MD5_HASH) + + self.client.logout() + resp = self.client.get(url_unlocked_versioned_old) + self.assertEqual(resp.status_code, 301) + self.assertTrue(resp.url.endswith(self.url_unlocked_versioned)) # pylint: disable=no-member + + def test_locked_versioned_asset(self): + """ + Test that locked assets that are versioned are being served. + """ + CourseEnrollment.enroll(self.non_staff_usr, self.course_key) + self.assertTrue(CourseEnrollment.is_enrolled(self.non_staff_usr, self.course_key)) + + self.client.login(username=self.non_staff_usr, password='test') + resp = self.client.get(self.url_locked_versioned) + self.assertEqual(resp.status_code, 200) + def test_locked_asset_not_logged_in(self): """ Test that locked assets behave appropriately in case the user is not diff --git a/common/djangoapps/static_replace/test/test_static_replace.py b/common/djangoapps/static_replace/test/test_static_replace.py index bc203a85bc..b0a74e3315 100644 --- a/common/djangoapps/static_replace/test/test_static_replace.py +++ b/common/djangoapps/static_replace/test/test_static_replace.py @@ -1,12 +1,13 @@ +# -*- coding: utf-8 -*- """Tests for static_replace""" -from urllib import quote_plus - import ddt import re + +from django.utils.http import urlquote, urlencode +from urlparse import urlparse, urlunparse, parse_qsl from PIL import Image from cStringIO import StringIO - from nose.tools import assert_equals, assert_true, assert_false # pylint: disable=no-name-in-module from static_replace import ( replace_static_urls, @@ -16,7 +17,6 @@ from static_replace import ( make_static_urls_absolute ) from mock import patch, Mock - from opaque_keys.edx.locations import SlashSeparatedCourseKey from xmodule.contentstore.content import StaticContent from xmodule.contentstore.django import contentstore @@ -25,12 +25,29 @@ from xmodule.modulestore.mongo import MongoModuleStore from xmodule.modulestore.tests.django_utils import SharedModuleStoreTestCase from xmodule.modulestore.tests.factories import CourseFactory, check_mongo_calls from xmodule.modulestore.xml import XMLModuleStore +from xmodule.modulestore.exceptions import ItemNotFoundError +from xmodule.exceptions import NotFoundError +from xmodule.assetstore.assetmgr import AssetManager DATA_DIRECTORY = 'data_dir' COURSE_KEY = SlashSeparatedCourseKey('org', 'course', 'run') STATIC_SOURCE = '"/static/file.png"' +def encode_unicode_characters_in_url(url): + """ + Encodes all Unicode characters to their percent-encoding representation + in both the path portion and query parameter portion of the given URL. + """ + scheme, netloc, path, params, query, fragment = urlparse(url) + query_params = parse_qsl(query) + updated_query_params = [] + for query_name, query_val in query_params: + updated_query_params.append((query_name, urlquote(query_val))) + + return urlunparse((scheme, netloc, urlquote(path, '/:+@'), params, urlencode(query_params), fragment)) + + def test_multi_replace(): course_source = '"/course/file.png"' @@ -202,7 +219,7 @@ class CanonicalContentTest(SharedModuleStoreTestCase): cls.courses[prefix] = CourseFactory.create(org='a', course='b', run=prefix) # Create an unlocked image. - unlock_content = cls.create_image(prefix, (32, 32), 'blue', '{}_unlock.png') + unlock_content = cls.create_image(prefix, (32, 32), 'blue', u'{}_ünlöck.png') # Create a locked image. lock_content = cls.create_image(prefix, (32, 32), 'green', '{}_lock.png', locked=True) @@ -212,14 +229,14 @@ class CanonicalContentTest(SharedModuleStoreTestCase): contentstore().generate_thumbnail(lock_content, dimensions=(16, 16)) # Create an unlocked image in a subdirectory. - cls.create_image(prefix, (1, 1), 'red', 'special/{}_unlock.png') + cls.create_image(prefix, (1, 1), 'red', u'special/{}_ünlöck.png') # Create a locked image in a subdirectory. cls.create_image(prefix, (1, 1), 'yellow', 'special/{}_lock.png', locked=True) # Create an unlocked image with funky characters in the name. - cls.create_image(prefix, (1, 1), 'black', 'weird {}_unlock.png') - cls.create_image(prefix, (1, 1), 'black', 'special/weird {}_unlock.png') + cls.create_image(prefix, (1, 1), 'black', u'weird {}_ünlöck.png') + cls.create_image(prefix, (1, 1), 'black', u'special/weird {}_ünlöck.png') # Create an HTML file to test extension exclusion, and create a control file. cls.create_arbitrary_content(prefix, '{}_not_excluded.htm') @@ -227,6 +244,24 @@ class CanonicalContentTest(SharedModuleStoreTestCase): cls.create_arbitrary_content(prefix, 'special/{}_not_excluded.htm') cls.create_arbitrary_content(prefix, 'special/{}_excluded.html') + @classmethod + def get_content_digest_for_asset_path(cls, prefix, path): + """ + Takes an unprocessed asset path, parses it just enough to try and find the + asset it refers to, and returns the content digest of that asset if it exists. + """ + + # Parse the path as if it was potentially a relative URL with query parameters, + # or an absolute URL, etc. Only keep the path because that's all we need. + _, _, relative_path, _, _, _ = urlparse(path) + asset_key = StaticContent.get_asset_key_from_path(cls.courses[prefix].id, relative_path) + + try: + content = AssetManager.find(asset_key, as_stream=True) + return content.content_digest + except (ItemNotFoundError, NotFoundError): + return None + @classmethod def create_image(cls, prefix, dimensions, color, name, locked=False): """ @@ -277,100 +312,100 @@ class CanonicalContentTest(SharedModuleStoreTestCase): @ddt.data( # No leading slash. - (u'', u'{prfx}_unlock.png', u'/{asset}@{prfx}_unlock.png', 1), + (u'', u'{prfx}_ünlöck.png', u'/{asset}@{prfx}_ünlöck.png', 1), (u'', u'{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'', u'weird {prfx}_unlock.png', u'/{asset}@weird_{prfx}_unlock.png', 1), - (u'', u'{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), + (u'', u'weird {prfx}_ünlöck.png', u'/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'', u'{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), (u'', u'{prfx}_not_excluded.htm', u'/{asset}@{prfx}_not_excluded.htm', 1), - (u'dev', u'{prfx}_unlock.png', u'//dev/{asset}@{prfx}_unlock.png', 1), + (u'dev', u'{prfx}_ünlöck.png', u'//dev/{asset}@{prfx}_ünlöck.png', 1), (u'dev', u'{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'dev', u'weird {prfx}_unlock.png', u'//dev/{asset}@weird_{prfx}_unlock.png', 1), - (u'dev', u'{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), + (u'dev', u'weird {prfx}_ünlöck.png', u'//dev/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'dev', u'{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), (u'dev', u'{prfx}_not_excluded.htm', u'//dev/{asset}@{prfx}_not_excluded.htm', 1), # No leading slash with subdirectory. This ensures we properly substitute slashes. - (u'', u'special/{prfx}_unlock.png', u'/{asset}@special_{prfx}_unlock.png', 1), + (u'', u'special/{prfx}_ünlöck.png', u'/{asset}@special_{prfx}_ünlöck.png', 1), (u'', u'special/{prfx}_lock.png', u'/{asset}@special_{prfx}_lock.png', 1), - (u'', u'special/weird {prfx}_unlock.png', u'/{asset}@special_weird_{prfx}_unlock.png', 1), - (u'', u'special/{prfx}_excluded.html', u'/{asset}@special_{prfx}_excluded.html', 1), + (u'', u'special/weird {prfx}_ünlöck.png', u'/{asset}@special_weird_{prfx}_ünlöck.png', 1), + (u'', u'special/{prfx}_excluded.html', u'/{base_asset}@special_{prfx}_excluded.html', 1), (u'', u'special/{prfx}_not_excluded.htm', u'/{asset}@special_{prfx}_not_excluded.htm', 1), - (u'dev', u'special/{prfx}_unlock.png', u'//dev/{asset}@special_{prfx}_unlock.png', 1), + (u'dev', u'special/{prfx}_ünlöck.png', u'//dev/{asset}@special_{prfx}_ünlöck.png', 1), (u'dev', u'special/{prfx}_lock.png', u'/{asset}@special_{prfx}_lock.png', 1), - (u'dev', u'special/weird {prfx}_unlock.png', u'//dev/{asset}@special_weird_{prfx}_unlock.png', 1), - (u'dev', u'special/{prfx}_excluded.html', u'/{asset}@special_{prfx}_excluded.html', 1), + (u'dev', u'special/weird {prfx}_ünlöck.png', u'//dev/{asset}@special_weird_{prfx}_ünlöck.png', 1), + (u'dev', u'special/{prfx}_excluded.html', u'/{base_asset}@special_{prfx}_excluded.html', 1), (u'dev', u'special/{prfx}_not_excluded.htm', u'//dev/{asset}@special_{prfx}_not_excluded.htm', 1), # Leading slash. - (u'', u'/{prfx}_unlock.png', u'/{asset}@{prfx}_unlock.png', 1), + (u'', u'/{prfx}_ünlöck.png', u'/{asset}@{prfx}_ünlöck.png', 1), (u'', u'/{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'', u'/weird {prfx}_unlock.png', u'/{asset}@weird_{prfx}_unlock.png', 1), - (u'', u'/{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), + (u'', u'/weird {prfx}_ünlöck.png', u'/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'', u'/{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), (u'', u'/{prfx}_not_excluded.htm', u'/{asset}@{prfx}_not_excluded.htm', 1), - (u'dev', u'/{prfx}_unlock.png', u'//dev/{asset}@{prfx}_unlock.png', 1), + (u'dev', u'/{prfx}_ünlöck.png', u'//dev/{asset}@{prfx}_ünlöck.png', 1), (u'dev', u'/{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'dev', u'/weird {prfx}_unlock.png', u'//dev/{asset}@weird_{prfx}_unlock.png', 1), - (u'dev', u'/{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), + (u'dev', u'/weird {prfx}_ünlöck.png', u'//dev/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), (u'dev', u'/{prfx}_not_excluded.htm', u'//dev/{asset}@{prfx}_not_excluded.htm', 1), # Leading slash with subdirectory. This ensures we properly substitute slashes. - (u'', u'/special/{prfx}_unlock.png', u'/{asset}@special_{prfx}_unlock.png', 1), + (u'', u'/special/{prfx}_ünlöck.png', u'/{asset}@special_{prfx}_ünlöck.png', 1), (u'', u'/special/{prfx}_lock.png', u'/{asset}@special_{prfx}_lock.png', 1), - (u'', u'/special/weird {prfx}_unlock.png', u'/{asset}@special_weird_{prfx}_unlock.png', 1), - (u'', u'/special/{prfx}_excluded.html', u'/{asset}@special_{prfx}_excluded.html', 1), + (u'', u'/special/weird {prfx}_ünlöck.png', u'/{asset}@special_weird_{prfx}_ünlöck.png', 1), + (u'', u'/special/{prfx}_excluded.html', u'/{base_asset}@special_{prfx}_excluded.html', 1), (u'', u'/special/{prfx}_not_excluded.htm', u'/{asset}@special_{prfx}_not_excluded.htm', 1), - (u'dev', u'/special/{prfx}_unlock.png', u'//dev/{asset}@special_{prfx}_unlock.png', 1), + (u'dev', u'/special/{prfx}_ünlöck.png', u'//dev/{asset}@special_{prfx}_ünlöck.png', 1), (u'dev', u'/special/{prfx}_lock.png', u'/{asset}@special_{prfx}_lock.png', 1), - (u'dev', u'/special/weird {prfx}_unlock.png', u'//dev/{asset}@special_weird_{prfx}_unlock.png', 1), - (u'dev', u'/special/{prfx}_excluded.html', u'/{asset}@special_{prfx}_excluded.html', 1), + (u'dev', u'/special/weird {prfx}_ünlöck.png', u'//dev/{asset}@special_weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/special/{prfx}_excluded.html', u'/{base_asset}@special_{prfx}_excluded.html', 1), (u'dev', u'/special/{prfx}_not_excluded.htm', u'//dev/{asset}@special_{prfx}_not_excluded.htm', 1), # Static path. - (u'', u'/static/{prfx}_unlock.png', u'/{asset}@{prfx}_unlock.png', 1), + (u'', u'/static/{prfx}_ünlöck.png', u'/{asset}@{prfx}_ünlöck.png', 1), (u'', u'/static/{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'', u'/static/weird {prfx}_unlock.png', u'/{asset}@weird_{prfx}_unlock.png', 1), - (u'', u'/static/{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), + (u'', u'/static/weird {prfx}_ünlöck.png', u'/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'', u'/static/{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), (u'', u'/static/{prfx}_not_excluded.htm', u'/{asset}@{prfx}_not_excluded.htm', 1), - (u'dev', u'/static/{prfx}_unlock.png', u'//dev/{asset}@{prfx}_unlock.png', 1), + (u'dev', u'/static/{prfx}_ünlöck.png', u'//dev/{asset}@{prfx}_ünlöck.png', 1), (u'dev', u'/static/{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'dev', u'/static/weird {prfx}_unlock.png', u'//dev/{asset}@weird_{prfx}_unlock.png', 1), - (u'dev', u'/static/{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), + (u'dev', u'/static/weird {prfx}_ünlöck.png', u'//dev/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/static/{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), (u'dev', u'/static/{prfx}_not_excluded.htm', u'//dev/{asset}@{prfx}_not_excluded.htm', 1), # Static path with subdirectory. This ensures we properly substitute slashes. - (u'', u'/static/special/{prfx}_unlock.png', u'/{asset}@special_{prfx}_unlock.png', 1), + (u'', u'/static/special/{prfx}_ünlöck.png', u'/{asset}@special_{prfx}_ünlöck.png', 1), (u'', u'/static/special/{prfx}_lock.png', u'/{asset}@special_{prfx}_lock.png', 1), - (u'', u'/static/special/weird {prfx}_unlock.png', u'/{asset}@special_weird_{prfx}_unlock.png', 1), - (u'', u'/static/special/{prfx}_excluded.html', u'/{asset}@special_{prfx}_excluded.html', 1), + (u'', u'/static/special/weird {prfx}_ünlöck.png', u'/{asset}@special_weird_{prfx}_ünlöck.png', 1), + (u'', u'/static/special/{prfx}_excluded.html', u'/{base_asset}@special_{prfx}_excluded.html', 1), (u'', u'/static/special/{prfx}_not_excluded.htm', u'/{asset}@special_{prfx}_not_excluded.htm', 1), - (u'dev', u'/static/special/{prfx}_unlock.png', u'//dev/{asset}@special_{prfx}_unlock.png', 1), + (u'dev', u'/static/special/{prfx}_ünlöck.png', u'//dev/{asset}@special_{prfx}_ünlöck.png', 1), (u'dev', u'/static/special/{prfx}_lock.png', u'/{asset}@special_{prfx}_lock.png', 1), - (u'dev', u'/static/special/weird {prfx}_unlock.png', u'//dev/{asset}@special_weird_{prfx}_unlock.png', 1), - (u'dev', u'/static/special/{prfx}_excluded.html', u'/{asset}@special_{prfx}_excluded.html', 1), + (u'dev', u'/static/special/weird {prfx}_ünlöck.png', u'//dev/{asset}@special_weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/static/special/{prfx}_excluded.html', u'/{base_asset}@special_{prfx}_excluded.html', 1), (u'dev', u'/static/special/{prfx}_not_excluded.htm', u'//dev/{asset}@special_{prfx}_not_excluded.htm', 1), # Static path with query parameter. ( u'', - u'/static/{prfx}_unlock.png?foo=/static/{prfx}_lock.png', - u'/{asset}@{prfx}_unlock.png?foo={encoded_asset}{prfx}_lock.png', + u'/static/{prfx}_ünlöck.png?foo=/static/{prfx}_lock.png', + u'/{asset}@{prfx}_ünlöck.png?foo={encoded_asset}{prfx}_lock.png', 2 ), ( u'', - u'/static/{prfx}_lock.png?foo=/static/{prfx}_unlock.png', - u'/{asset}@{prfx}_lock.png?foo={encoded_asset}{prfx}_unlock.png', + u'/static/{prfx}_lock.png?foo=/static/{prfx}_ünlöck.png', + u'/{asset}@{prfx}_lock.png?foo={encoded_asset}{prfx}_ünlöck.png', 2 ), ( u'', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_excluded.html', - u'/{asset}@{prfx}_excluded.html?foo={encoded_asset}{prfx}_excluded.html', + u'/{base_asset}@{prfx}_excluded.html?foo={encoded_base_asset}{prfx}_excluded.html', 2 ), ( u'', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_not_excluded.htm', - u'/{asset}@{prfx}_excluded.html?foo={encoded_asset}{prfx}_not_excluded.htm', + u'/{base_asset}@{prfx}_excluded.html?foo={encoded_asset}{prfx}_not_excluded.htm', 2 ), ( u'', u'/static/{prfx}_not_excluded.htm?foo=/static/{prfx}_excluded.html', - u'/{asset}@{prfx}_not_excluded.htm?foo={encoded_asset}{prfx}_excluded.html', + u'/{asset}@{prfx}_not_excluded.htm?foo={encoded_base_asset}{prfx}_excluded.html', 2 ), ( @@ -381,32 +416,32 @@ class CanonicalContentTest(SharedModuleStoreTestCase): ), ( u'dev', - u'/static/{prfx}_unlock.png?foo=/static/{prfx}_lock.png', - u'//dev/{asset}@{prfx}_unlock.png?foo={encoded_asset}{prfx}_lock.png', + u'/static/{prfx}_ünlöck.png?foo=/static/{prfx}_lock.png', + u'//dev/{asset}@{prfx}_ünlöck.png?foo={encoded_asset}{prfx}_lock.png', 2 ), ( u'dev', - u'/static/{prfx}_lock.png?foo=/static/{prfx}_unlock.png', - u'/{asset}@{prfx}_lock.png?foo={encoded_base_url}{encoded_asset}{prfx}_unlock.png', + u'/static/{prfx}_lock.png?foo=/static/{prfx}_ünlöck.png', + u'/{asset}@{prfx}_lock.png?foo={encoded_base_url}{encoded_asset}{prfx}_ünlöck.png', 2 ), ( u'dev', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_excluded.html', - u'/{asset}@{prfx}_excluded.html?foo={encoded_asset}{prfx}_excluded.html', + u'/{base_asset}@{prfx}_excluded.html?foo={encoded_base_asset}{prfx}_excluded.html', 2 ), ( u'dev', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_not_excluded.htm', - u'/{asset}@{prfx}_excluded.html?foo={encoded_base_url}{encoded_asset}{prfx}_not_excluded.htm', + u'/{base_asset}@{prfx}_excluded.html?foo={encoded_base_url}{encoded_asset}{prfx}_not_excluded.htm', 2 ), ( u'dev', u'/static/{prfx}_not_excluded.htm?foo=/static/{prfx}_excluded.html', - u'//dev/{asset}@{prfx}_not_excluded.htm?foo={encoded_asset}{prfx}_excluded.html', + u'//dev/{asset}@{prfx}_not_excluded.htm?foo={encoded_base_asset}{prfx}_excluded.html', 2 ), ( @@ -416,163 +451,189 @@ class CanonicalContentTest(SharedModuleStoreTestCase): 2 ), # Already asset key. - (u'', u'/{asset}@{prfx}_unlock.png', u'/{asset}@{prfx}_unlock.png', 1), - (u'', u'/{asset}@{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'', u'/{asset}@weird_{prfx}_unlock.png', u'/{asset}@weird_{prfx}_unlock.png', 1), - (u'', u'/{asset}@{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), - (u'', u'/{asset}@{prfx}_not_excluded.htm', u'/{asset}@{prfx}_not_excluded.htm', 1), - (u'dev', u'/{asset}@{prfx}_unlock.png', u'//dev/{asset}@{prfx}_unlock.png', 1), - (u'dev', u'/{asset}@{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), - (u'dev', u'/{asset}@weird_{prfx}_unlock.png', u'//dev/{asset}@weird_{prfx}_unlock.png', 1), - (u'dev', u'/{asset}@{prfx}_excluded.html', u'/{asset}@{prfx}_excluded.html', 1), - (u'dev', u'/{asset}@{prfx}_not_excluded.htm', u'//dev/{asset}@{prfx}_not_excluded.htm', 1), + (u'', u'/{base_asset}@{prfx}_ünlöck.png', u'/{asset}@{prfx}_ünlöck.png', 1), + (u'', u'/{base_asset}@{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), + (u'', u'/{base_asset}@weird_{prfx}_ünlöck.png', u'/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'', u'/{base_asset}@{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), + (u'', u'/{base_asset}@{prfx}_not_excluded.htm', u'/{asset}@{prfx}_not_excluded.htm', 1), + (u'dev', u'/{base_asset}@{prfx}_ünlöck.png', u'//dev/{asset}@{prfx}_ünlöck.png', 1), + (u'dev', u'/{base_asset}@{prfx}_lock.png', u'/{asset}@{prfx}_lock.png', 1), + (u'dev', u'/{base_asset}@weird_{prfx}_ünlöck.png', u'//dev/{asset}@weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/{base_asset}@{prfx}_excluded.html', u'/{base_asset}@{prfx}_excluded.html', 1), + (u'dev', u'/{base_asset}@{prfx}_not_excluded.htm', u'//dev/{asset}@{prfx}_not_excluded.htm', 1), # Old, c4x-style path. - (u'', u'/{c4x}/{prfx}_unlock.png', u'/{c4x}/{prfx}_unlock.png', 1), + (u'', u'/{c4x}/{prfx}_ünlöck.png', u'/{c4x}/{prfx}_ünlöck.png', 1), (u'', u'/{c4x}/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), (u'', u'/{c4x}/weird_{prfx}_lock.png', u'/{c4x}/weird_{prfx}_lock.png', 1), (u'', u'/{c4x}/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), (u'', u'/{c4x}/{prfx}_not_excluded.htm', u'/{c4x}/{prfx}_not_excluded.htm', 1), - (u'dev', u'/{c4x}/{prfx}_unlock.png', u'/{c4x}/{prfx}_unlock.png', 1), + (u'dev', u'/{c4x}/{prfx}_ünlöck.png', u'/{c4x}/{prfx}_ünlöck.png', 1), (u'dev', u'/{c4x}/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'dev', u'/{c4x}/weird_{prfx}_unlock.png', u'/{c4x}/weird_{prfx}_unlock.png', 1), + (u'dev', u'/{c4x}/weird_{prfx}_ünlöck.png', u'/{c4x}/weird_{prfx}_ünlöck.png', 1), (u'dev', u'/{c4x}/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), (u'dev', u'/{c4x}/{prfx}_not_excluded.htm', u'/{c4x}/{prfx}_not_excluded.htm', 1), # Thumbnails. - (u'', u'/{th_key}@{prfx}_unlock-{th_ext}', u'/{th_key}@{prfx}_unlock-{th_ext}', 1), - (u'', u'/{th_key}@{prfx}_lock-{th_ext}', u'/{th_key}@{prfx}_lock-{th_ext}', 1), - (u'dev', u'/{th_key}@{prfx}_unlock-{th_ext}', u'//dev/{th_key}@{prfx}_unlock-{th_ext}', 1), - (u'dev', u'/{th_key}@{prfx}_lock-{th_ext}', u'//dev/{th_key}@{prfx}_lock-{th_ext}', 1), + (u'', u'/{base_th_key}@{prfx}_ünlöck-{th_ext}', u'/{th_key}@{prfx}_ünlöck-{th_ext}', 1), + (u'', u'/{base_th_key}@{prfx}_lock-{th_ext}', u'/{th_key}@{prfx}_lock-{th_ext}', 1), + (u'dev', u'/{base_th_key}@{prfx}_ünlöck-{th_ext}', u'//dev/{th_key}@{prfx}_ünlöck-{th_ext}', 1), + (u'dev', u'/{base_th_key}@{prfx}_lock-{th_ext}', u'//dev/{th_key}@{prfx}_lock-{th_ext}', 1), ) @ddt.unpack def test_canonical_asset_path_with_new_style_assets(self, base_url, start, expected, mongo_calls): exts = ['.html', '.tm'] - prefix = 'split' - encoded_base_url = quote_plus('//' + base_url) - c4x = 'c4x/a/b/asset' - asset_key = 'asset-v1:a+b+{}+type@asset+block'.format(prefix) - encoded_asset_key = quote_plus('/asset-v1:a+b+{}+type@asset+block@'.format(prefix)) - th_key = 'asset-v1:a+b+{}+type@thumbnail+block'.format(prefix) - th_ext = 'png-16x16.jpg' + prefix = u'split' + encoded_base_url = urlquote(u'//' + base_url) + c4x = u'c4x/a/b/asset' + base_asset_key = u'asset-v1:a+b+{}+type@asset+block'.format(prefix) + adjusted_asset_key = base_asset_key + encoded_asset_key = urlquote(u'/asset-v1:a+b+{}+type@asset+block@'.format(prefix)) + encoded_base_asset_key = encoded_asset_key + base_th_key = u'asset-v1:a+b+{}+type@thumbnail+block'.format(prefix) + adjusted_th_key = base_th_key + th_ext = u'png-16x16.jpg' start = start.format( prfx=prefix, c4x=c4x, - asset=asset_key, + base_asset=base_asset_key, + asset=adjusted_asset_key, encoded_base_url=encoded_base_url, encoded_asset=encoded_asset_key, - th_key=th_key, + base_th_key=base_th_key, + th_key=adjusted_th_key, th_ext=th_ext ) + + # Adjust for content digest. This gets dicey quickly and we have to order our steps: + # - replace format markets because they have curly braces + # - encode Unicode characters to percent-encoded + # - finally shove back in our regex patterns + digest = CanonicalContentTest.get_content_digest_for_asset_path(prefix, start) + if digest: + adjusted_asset_key = u'assets/courseware/MARK/asset-v1:a+b+{}+type@asset+block'.format(prefix) + adjusted_th_key = u'assets/courseware/MARK/asset-v1:a+b+{}+type@thumbnail+block'.format(prefix) + encoded_asset_key = u'/assets/courseware/MARK/asset-v1:a+b+{}+type@asset+block@'.format(prefix) + encoded_asset_key = urlquote(encoded_asset_key) + expected = expected.format( prfx=prefix, c4x=c4x, - asset=asset_key, + base_asset=base_asset_key, + asset=adjusted_asset_key, encoded_base_url=encoded_base_url, encoded_asset=encoded_asset_key, - th_key=th_key, - th_ext=th_ext + base_th_key=base_th_key, + th_key=adjusted_th_key, + th_ext=th_ext, + encoded_base_asset=encoded_base_asset_key, ) + expected = encode_unicode_characters_in_url(expected) + expected = expected.replace('MARK', '[a-f0-9]{32}') + expected = expected.replace('+', r'\+').replace('?', r'\?') + with check_mongo_calls(mongo_calls): asset_path = StaticContent.get_canonicalized_asset_path(self.courses[prefix].id, start, base_url, exts) - self.assertEqual(asset_path, expected) + print expected + print asset_path + self.assertIsNotNone(re.match(expected, asset_path)) @ddt.data( # No leading slash. - (u'', u'{prfx}_unlock.png', u'/{c4x}/{prfx}_unlock.png', 1), + (u'', u'{prfx}_ünlöck.png', u'/{c4x}/{prfx}_ünlöck.png', 1), (u'', u'{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'', u'weird {prfx}_unlock.png', u'/{c4x}/weird_{prfx}_unlock.png', 1), - (u'', u'{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'', u'weird {prfx}_ünlöck.png', u'/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'', u'{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'', u'{prfx}_not_excluded.htm', u'/{c4x}/{prfx}_not_excluded.htm', 1), - (u'dev', u'{prfx}_unlock.png', u'//dev/{c4x}/{prfx}_unlock.png', 1), + (u'dev', u'{prfx}_ünlöck.png', u'//dev/{c4x}/{prfx}_ünlöck.png', 1), (u'dev', u'{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'dev', u'weird {prfx}_unlock.png', u'//dev/{c4x}/weird_{prfx}_unlock.png', 1), - (u'dev', u'{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'dev', u'weird {prfx}_ünlöck.png', u'//dev/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'dev', u'{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'dev', u'{prfx}_not_excluded.htm', u'//dev/{c4x}/{prfx}_not_excluded.htm', 1), # No leading slash with subdirectory. This ensures we probably substitute slashes. - (u'', u'special/{prfx}_unlock.png', u'/{c4x}/special_{prfx}_unlock.png', 1), + (u'', u'special/{prfx}_ünlöck.png', u'/{c4x}/special_{prfx}_ünlöck.png', 1), (u'', u'special/{prfx}_lock.png', u'/{c4x}/special_{prfx}_lock.png', 1), - (u'', u'special/weird {prfx}_unlock.png', u'/{c4x}/special_weird_{prfx}_unlock.png', 1), - (u'', u'special/{prfx}_excluded.html', u'/{c4x}/special_{prfx}_excluded.html', 1), + (u'', u'special/weird {prfx}_ünlöck.png', u'/{c4x}/special_weird_{prfx}_ünlöck.png', 1), + (u'', u'special/{prfx}_excluded.html', u'/{base_c4x}/special_{prfx}_excluded.html', 1), (u'', u'special/{prfx}_not_excluded.htm', u'/{c4x}/special_{prfx}_not_excluded.htm', 1), - (u'dev', u'special/{prfx}_unlock.png', u'//dev/{c4x}/special_{prfx}_unlock.png', 1), + (u'dev', u'special/{prfx}_ünlöck.png', u'//dev/{c4x}/special_{prfx}_ünlöck.png', 1), (u'dev', u'special/{prfx}_lock.png', u'/{c4x}/special_{prfx}_lock.png', 1), - (u'dev', u'special/weird {prfx}_unlock.png', u'//dev/{c4x}/special_weird_{prfx}_unlock.png', 1), - (u'dev', u'special/{prfx}_excluded.html', u'/{c4x}/special_{prfx}_excluded.html', 1), + (u'dev', u'special/weird {prfx}_ünlöck.png', u'//dev/{c4x}/special_weird_{prfx}_ünlöck.png', 1), + (u'dev', u'special/{prfx}_excluded.html', u'/{base_c4x}/special_{prfx}_excluded.html', 1), (u'dev', u'special/{prfx}_not_excluded.htm', u'//dev/{c4x}/special_{prfx}_not_excluded.htm', 1), # Leading slash. - (u'', u'/{prfx}_unlock.png', u'/{c4x}/{prfx}_unlock.png', 1), + (u'', u'/{prfx}_ünlöck.png', u'/{c4x}/{prfx}_ünlöck.png', 1), (u'', u'/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'', u'/weird {prfx}_unlock.png', u'/{c4x}/weird_{prfx}_unlock.png', 1), - (u'', u'/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'', u'/weird {prfx}_ünlöck.png', u'/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'', u'/{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'', u'/{prfx}_not_excluded.htm', u'/{c4x}/{prfx}_not_excluded.htm', 1), - (u'dev', u'/{prfx}_unlock.png', u'//dev/{c4x}/{prfx}_unlock.png', 1), + (u'dev', u'/{prfx}_ünlöck.png', u'//dev/{c4x}/{prfx}_ünlöck.png', 1), (u'dev', u'/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'dev', u'/weird {prfx}_unlock.png', u'//dev/{c4x}/weird_{prfx}_unlock.png', 1), - (u'dev', u'/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'dev', u'/weird {prfx}_ünlöck.png', u'//dev/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'dev', u'/{prfx}_not_excluded.htm', u'//dev/{c4x}/{prfx}_not_excluded.htm', 1), # Leading slash with subdirectory. This ensures we properly substitute slashes. - (u'', u'/special/{prfx}_unlock.png', u'/{c4x}/special_{prfx}_unlock.png', 1), + (u'', u'/special/{prfx}_ünlöck.png', u'/{c4x}/special_{prfx}_ünlöck.png', 1), (u'', u'/special/{prfx}_lock.png', u'/{c4x}/special_{prfx}_lock.png', 1), - (u'', u'/special/weird {prfx}_unlock.png', u'/{c4x}/special_weird_{prfx}_unlock.png', 1), - (u'', u'/special/{prfx}_excluded.html', u'/{c4x}/special_{prfx}_excluded.html', 1), + (u'', u'/special/weird {prfx}_ünlöck.png', u'/{c4x}/special_weird_{prfx}_ünlöck.png', 1), + (u'', u'/special/{prfx}_excluded.html', u'/{base_c4x}/special_{prfx}_excluded.html', 1), (u'', u'/special/{prfx}_not_excluded.htm', u'/{c4x}/special_{prfx}_not_excluded.htm', 1), - (u'dev', u'/special/{prfx}_unlock.png', u'//dev/{c4x}/special_{prfx}_unlock.png', 1), + (u'dev', u'/special/{prfx}_ünlöck.png', u'//dev/{c4x}/special_{prfx}_ünlöck.png', 1), (u'dev', u'/special/{prfx}_lock.png', u'/{c4x}/special_{prfx}_lock.png', 1), - (u'dev', u'/special/weird {prfx}_unlock.png', u'//dev/{c4x}/special_weird_{prfx}_unlock.png', 1), - (u'dev', u'/special/{prfx}_excluded.html', u'/{c4x}/special_{prfx}_excluded.html', 1), + (u'dev', u'/special/weird {prfx}_ünlöck.png', u'//dev/{c4x}/special_weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/special/{prfx}_excluded.html', u'/{base_c4x}/special_{prfx}_excluded.html', 1), (u'dev', u'/special/{prfx}_not_excluded.htm', u'//dev/{c4x}/special_{prfx}_not_excluded.htm', 1), # Static path. - (u'', u'/static/{prfx}_unlock.png', u'/{c4x}/{prfx}_unlock.png', 1), + (u'', u'/static/{prfx}_ünlöck.png', u'/{c4x}/{prfx}_ünlöck.png', 1), (u'', u'/static/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'', u'/static/weird {prfx}_unlock.png', u'/{c4x}/weird_{prfx}_unlock.png', 1), - (u'', u'/static/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'', u'/static/weird {prfx}_ünlöck.png', u'/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'', u'/static/{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'', u'/static/{prfx}_not_excluded.htm', u'/{c4x}/{prfx}_not_excluded.htm', 1), - (u'dev', u'/static/{prfx}_unlock.png', u'//dev/{c4x}/{prfx}_unlock.png', 1), + (u'dev', u'/static/{prfx}_ünlöck.png', u'//dev/{c4x}/{prfx}_ünlöck.png', 1), (u'dev', u'/static/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'dev', u'/static/weird {prfx}_unlock.png', u'//dev/{c4x}/weird_{prfx}_unlock.png', 1), - (u'dev', u'/static/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'dev', u'/static/weird {prfx}_ünlöck.png', u'//dev/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/static/{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'dev', u'/static/{prfx}_not_excluded.htm', u'//dev/{c4x}/{prfx}_not_excluded.htm', 1), # Static path with subdirectory. This ensures we properly substitute slashes. - (u'', u'/static/special/{prfx}_unlock.png', u'/{c4x}/special_{prfx}_unlock.png', 1), + (u'', u'/static/special/{prfx}_ünlöck.png', u'/{c4x}/special_{prfx}_ünlöck.png', 1), (u'', u'/static/special/{prfx}_lock.png', u'/{c4x}/special_{prfx}_lock.png', 1), - (u'', u'/static/special/weird {prfx}_unlock.png', u'/{c4x}/special_weird_{prfx}_unlock.png', 1), - (u'', u'/static/special/{prfx}_excluded.html', u'/{c4x}/special_{prfx}_excluded.html', 1), + (u'', u'/static/special/weird {prfx}_ünlöck.png', u'/{c4x}/special_weird_{prfx}_ünlöck.png', 1), + (u'', u'/static/special/{prfx}_excluded.html', u'/{base_c4x}/special_{prfx}_excluded.html', 1), (u'', u'/static/special/{prfx}_not_excluded.htm', u'/{c4x}/special_{prfx}_not_excluded.htm', 1), - (u'dev', u'/static/special/{prfx}_unlock.png', u'//dev/{c4x}/special_{prfx}_unlock.png', 1), + (u'dev', u'/static/special/{prfx}_ünlöck.png', u'//dev/{c4x}/special_{prfx}_ünlöck.png', 1), (u'dev', u'/static/special/{prfx}_lock.png', u'/{c4x}/special_{prfx}_lock.png', 1), - (u'dev', u'/static/special/weird {prfx}_unlock.png', u'//dev/{c4x}/special_weird_{prfx}_unlock.png', 1), - (u'dev', u'/static/special/{prfx}_excluded.html', u'/{c4x}/special_{prfx}_excluded.html', 1), + (u'dev', u'/static/special/weird {prfx}_ünlöck.png', u'//dev/{c4x}/special_weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/static/special/{prfx}_excluded.html', u'/{base_c4x}/special_{prfx}_excluded.html', 1), (u'dev', u'/static/special/{prfx}_not_excluded.htm', u'//dev/{c4x}/special_{prfx}_not_excluded.htm', 1), # Static path with query parameter. ( u'', - u'/static/{prfx}_unlock.png?foo=/static/{prfx}_lock.png', - u'/{c4x}/{prfx}_unlock.png?foo={encoded_c4x}{prfx}_lock.png', + u'/static/{prfx}_ünlöck.png?foo=/static/{prfx}_lock.png', + u'/{c4x}/{prfx}_ünlöck.png?foo={encoded_c4x}{prfx}_lock.png', 2 ), ( u'', - u'/static/{prfx}_lock.png?foo=/static/{prfx}_unlock.png', - u'/{c4x}/{prfx}_lock.png?foo={encoded_c4x}{prfx}_unlock.png', + u'/static/{prfx}_lock.png?foo=/static/{prfx}_ünlöck.png', + u'/{c4x}/{prfx}_lock.png?foo={encoded_c4x}{prfx}_ünlöck.png', 2 ), ( u'', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_excluded.html', - u'/{c4x}/{prfx}_excluded.html?foo={encoded_c4x}{prfx}_excluded.html', + u'/{base_c4x}/{prfx}_excluded.html?foo={encoded_base_c4x}{prfx}_excluded.html', 2 ), ( u'', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_not_excluded.htm', - u'/{c4x}/{prfx}_excluded.html?foo={encoded_c4x}{prfx}_not_excluded.htm', + u'/{base_c4x}/{prfx}_excluded.html?foo={encoded_c4x}{prfx}_not_excluded.htm', 2 ), ( u'', u'/static/{prfx}_not_excluded.htm?foo=/static/{prfx}_excluded.html', - u'/{c4x}/{prfx}_not_excluded.htm?foo={encoded_c4x}{prfx}_excluded.html', + u'/{c4x}/{prfx}_not_excluded.htm?foo={encoded_base_c4x}{prfx}_excluded.html', 2 ), ( @@ -583,32 +644,32 @@ class CanonicalContentTest(SharedModuleStoreTestCase): ), ( u'dev', - u'/static/{prfx}_unlock.png?foo=/static/{prfx}_lock.png', - u'//dev/{c4x}/{prfx}_unlock.png?foo={encoded_c4x}{prfx}_lock.png', + u'/static/{prfx}_ünlöck.png?foo=/static/{prfx}_lock.png', + u'//dev/{c4x}/{prfx}_ünlöck.png?foo={encoded_c4x}{prfx}_lock.png', 2 ), ( u'dev', - u'/static/{prfx}_lock.png?foo=/static/{prfx}_unlock.png', - u'/{c4x}/{prfx}_lock.png?foo={encoded_base_url}{encoded_c4x}{prfx}_unlock.png', + u'/static/{prfx}_lock.png?foo=/static/{prfx}_ünlöck.png', + u'/{c4x}/{prfx}_lock.png?foo={encoded_base_url}{encoded_c4x}{prfx}_ünlöck.png', 2 ), ( u'dev', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_excluded.html', - u'/{c4x}/{prfx}_excluded.html?foo={encoded_c4x}{prfx}_excluded.html', + u'/{base_c4x}/{prfx}_excluded.html?foo={encoded_base_c4x}{prfx}_excluded.html', 2 ), ( u'dev', u'/static/{prfx}_excluded.html?foo=/static/{prfx}_not_excluded.htm', - u'/{c4x}/{prfx}_excluded.html?foo={encoded_base_url}{encoded_c4x}{prfx}_not_excluded.htm', + u'/{base_c4x}/{prfx}_excluded.html?foo={encoded_base_url}{encoded_c4x}{prfx}_not_excluded.htm', 2 ), ( u'dev', u'/static/{prfx}_not_excluded.htm?foo=/static/{prfx}_excluded.html', - u'//dev/{c4x}/{prfx}_not_excluded.htm?foo={encoded_c4x}{prfx}_excluded.html', + u'//dev/{c4x}/{prfx}_not_excluded.htm?foo={encoded_base_c4x}{prfx}_excluded.html', 2 ), ( @@ -618,38 +679,58 @@ class CanonicalContentTest(SharedModuleStoreTestCase): 2 ), # Old, c4x-style path. - (u'', u'/{c4x}/{prfx}_unlock.png', u'/{c4x}/{prfx}_unlock.png', 1), + (u'', u'/{c4x}/{prfx}_ünlöck.png', u'/{c4x}/{prfx}_ünlöck.png', 1), (u'', u'/{c4x}/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), (u'', u'/{c4x}/weird_{prfx}_lock.png', u'/{c4x}/weird_{prfx}_lock.png', 1), - (u'', u'/{c4x}/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'', u'/{c4x}/{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'', u'/{c4x}/{prfx}_not_excluded.htm', u'/{c4x}/{prfx}_not_excluded.htm', 1), - (u'dev', u'/{c4x}/{prfx}_unlock.png', u'//dev/{c4x}/{prfx}_unlock.png', 1), + (u'dev', u'/{c4x}/{prfx}_ünlöck.png', u'//dev/{c4x}/{prfx}_ünlöck.png', 1), (u'dev', u'/{c4x}/{prfx}_lock.png', u'/{c4x}/{prfx}_lock.png', 1), - (u'dev', u'/{c4x}/weird_{prfx}_unlock.png', u'//dev/{c4x}/weird_{prfx}_unlock.png', 1), - (u'dev', u'/{c4x}/{prfx}_excluded.html', u'/{c4x}/{prfx}_excluded.html', 1), + (u'dev', u'/{c4x}/weird_{prfx}_ünlöck.png', u'//dev/{c4x}/weird_{prfx}_ünlöck.png', 1), + (u'dev', u'/{c4x}/{prfx}_excluded.html', u'/{base_c4x}/{prfx}_excluded.html', 1), (u'dev', u'/{c4x}/{prfx}_not_excluded.htm', u'//dev/{c4x}/{prfx}_not_excluded.htm', 1), ) @ddt.unpack def test_canonical_asset_path_with_c4x_style_assets(self, base_url, start, expected, mongo_calls): exts = ['.html', '.tm'] prefix = 'old' - c4x_block = 'c4x/a/b/asset' - encoded_c4x_block = quote_plus('/' + c4x_block + '/') - encoded_base_url = quote_plus('//' + base_url) + base_c4x_block = 'c4x/a/b/asset' + adjusted_c4x_block = base_c4x_block + encoded_c4x_block = urlquote('/' + base_c4x_block + '/') + encoded_base_url = urlquote('//' + base_url) + encoded_base_c4x_block = encoded_c4x_block start = start.format( prfx=prefix, encoded_base_url=encoded_base_url, - c4x=c4x_block, - encoded_c4x=encoded_c4x_block - ) - expected = expected.format( - prfx=prefix, - encoded_base_url=encoded_base_url, - c4x=c4x_block, + c4x=base_c4x_block, encoded_c4x=encoded_c4x_block ) + # Adjust for content digest. This gets dicey quickly and we have to order our steps: + # - replace format markets because they have curly braces + # - encode Unicode characters to percent-encoded + # - finally shove back in our regex patterns + digest = CanonicalContentTest.get_content_digest_for_asset_path(prefix, start) + if digest: + adjusted_c4x_block = 'assets/courseware/MARK/c4x/a/b/asset' + encoded_c4x_block = urlquote('/' + adjusted_c4x_block + '/') + + expected = expected.format( + prfx=prefix, + encoded_base_url=encoded_base_url, + base_c4x=base_c4x_block, + c4x=adjusted_c4x_block, + encoded_c4x=encoded_c4x_block, + encoded_base_c4x=encoded_base_c4x_block, + ) + + expected = encode_unicode_characters_in_url(expected) + expected = expected.replace('MARK', '[a-f0-9]{32}') + expected = expected.replace('+', r'\+').replace('?', r'\?') + with check_mongo_calls(mongo_calls): asset_path = StaticContent.get_canonicalized_asset_path(self.courses[prefix].id, start, base_url, exts) - self.assertEqual(asset_path, expected) + print expected + print asset_path + self.assertIsNotNone(re.match(expected, asset_path)) diff --git a/common/lib/xmodule/xmodule/contentstore/content.py b/common/lib/xmodule/xmodule/contentstore/content.py index 8b5ae95ea0..660169f0f5 100644 --- a/common/lib/xmodule/xmodule/contentstore/content.py +++ b/common/lib/xmodule/xmodule/contentstore/content.py @@ -5,16 +5,16 @@ from xmodule.assetstore.assetmgr import AssetManager XASSET_LOCATION_TAG = 'c4x' XASSET_SRCREF_PREFIX = 'xasset:' - XASSET_THUMBNAIL_TAIL_NAME = '.jpg' - STREAM_DATA_CHUNK_SIZE = 1024 +VERSIONED_ASSETS_PREFIX = '/assets/courseware' +VERSIONED_ASSETS_PATTERN = r'/assets/courseware/([a-f0-9]{32})' import os import logging import StringIO from urlparse import urlparse, urlunparse, parse_qsl -from urllib import urlencode +from urllib import urlencode, quote_plus from opaque_keys.edx.locator import AssetLocator from opaque_keys.edx.keys import CourseKey, AssetKey @@ -26,7 +26,7 @@ from PIL import Image class StaticContent(object): def __init__(self, loc, name, content_type, data, last_modified_at=None, thumbnail_location=None, import_path=None, - length=None, locked=False): + length=None, locked=False, content_digest=None): self.location = loc self.name = name # a display string which can be edited, and thus not part of the location which needs to be fixed self.content_type = content_type @@ -38,6 +38,7 @@ class StaticContent(object): # cycles self.import_path = import_path self.locked = locked + self.content_digest = content_digest @property def is_thumbnail(self): @@ -145,6 +146,40 @@ class StaticContent(object): # try stripping off the leading slash and try again return AssetKey.from_string(path[1:]) + @staticmethod + def is_versioned_asset_path(path): + """Determines whether the given asset path is versioned.""" + return path.startswith(VERSIONED_ASSETS_PREFIX) + + @staticmethod + def parse_versioned_asset_path(path): + """ + Examines an asset path and breaks it apart if it is versioned, + returning both the asset digest and the unversioned asset path, + which will normally be an AssetKey. + """ + asset_digest = None + asset_path = path + if StaticContent.is_versioned_asset_path(asset_path): + result = re.match(VERSIONED_ASSETS_PATTERN, asset_path) + if result is not None: + asset_digest = result.groups()[0] + asset_path = re.sub(VERSIONED_ASSETS_PATTERN, '', asset_path) + + return (asset_digest, asset_path) + + @staticmethod + def add_version_to_asset_path(path, version): + """ + Adds a prefix to an asset path indicating the asset's version. + """ + + # Don't version an already-versioned path. + if StaticContent.is_versioned_asset_path(path): + return path + + return VERSIONED_ASSETS_PREFIX + '/' + version + path + @staticmethod def get_asset_key_from_path(course_key, path): """ @@ -172,7 +207,16 @@ class StaticContent(object): return StaticContent.compute_location(course_key, path) @staticmethod - def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts): + def is_excluded_asset_type(path, excluded_exts): + """ + Check if this is an allowed file extension to serve. + + Some files aren't served through the CDN in order to avoid same-origin policy/CORS-related issues. + """ + return any(path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts) + + @staticmethod + def get_canonicalized_asset_path(course_key, path, base_url, excluded_exts, encode=True): """ Returns a fully-qualified path to a piece of static content. @@ -188,25 +232,27 @@ class StaticContent(object): """ # Break down the input path. - _, _, relative_path, params, query_string, fragment = urlparse(path) + _, _, relative_path, params, query_string, _ = urlparse(path) # Convert our path to an asset key if it isn't one already. asset_key = StaticContent.get_asset_key_from_path(course_key, relative_path) # Check the status of the asset to see if this can be served via CDN aka publicly. serve_from_cdn = False + content_digest = None try: content = AssetManager.find(asset_key, as_stream=True) - is_locked = getattr(content, "locked", True) - serve_from_cdn = not is_locked + serve_from_cdn = not getattr(content, "locked", True) + content_digest = content.content_digest except (ItemNotFoundError, NotFoundError): # If we can't find the item, just treat it as if it's locked. serve_from_cdn = False - # See if this is an allowed file extension to serve. Some files aren't served through the - # CDN in order to avoid same-origin policy/CORS-related issues. - if any(relative_path.lower().endswith(excluded_ext.lower()) for excluded_ext in excluded_exts): + # Do a generic check to see if anything about this asset disqualifies it from being CDN'd. + is_excluded = False + if StaticContent.is_excluded_asset_type(relative_path, excluded_exts): serve_from_cdn = False + is_excluded = True # Update any query parameter values that have asset paths in them. This is for assets that # require their own after-the-fact values, like a Flash file that needs the path of a config @@ -215,15 +261,29 @@ class StaticContent(object): updated_query_params = [] for query_name, query_val in query_params: if query_val.startswith("/static/"): - new_val = StaticContent.get_canonicalized_asset_path(course_key, query_val, base_url, excluded_exts) + new_val = StaticContent.get_canonicalized_asset_path( + course_key, query_val, base_url, excluded_exts, encode=False) updated_query_params.append((query_name, new_val)) else: - updated_query_params.append((query_name, query_val)) + # Make sure we're encoding Unicode strings down to their byte string + # representation so that `urlencode` can handle it. + updated_query_params.append((query_name, query_val.encode('utf-8'))) serialized_asset_key = StaticContent.serialize_asset_key_with_slash(asset_key) base_url = base_url if serve_from_cdn else '' + asset_path = serialized_asset_key - return urlunparse((None, base_url, serialized_asset_key, params, urlencode(updated_query_params), fragment)) + # If the content has a digest (i.e. md5sum) value specified, create a versioned path to the asset using it. + if not is_excluded and content_digest: + asset_path = StaticContent.add_version_to_asset_path(serialized_asset_key, content_digest) + + # Only encode this if told to. Important so that we don't double encode + # when working with paths that are in query parameters. + asset_path = asset_path.encode('utf-8') + if encode: + asset_path = quote_plus(asset_path, '/:+@') + + return urlunparse((None, base_url.encode('utf-8'), asset_path, params, urlencode(updated_query_params), None)) def stream_data(self): yield self._data @@ -242,10 +302,10 @@ class StaticContent(object): class StaticContentStream(StaticContent): def __init__(self, loc, name, content_type, stream, last_modified_at=None, thumbnail_location=None, import_path=None, - length=None, locked=False): + length=None, locked=False, content_digest=None): super(StaticContentStream, self).__init__(loc, name, content_type, None, last_modified_at=last_modified_at, thumbnail_location=thumbnail_location, import_path=import_path, - length=length, locked=locked) + length=length, locked=locked, content_digest=content_digest) self._stream = stream def stream_data(self): @@ -277,7 +337,8 @@ class StaticContentStream(StaticContent): self._stream.seek(0) content = StaticContent(self.location, self.name, self.content_type, self._stream.read(), last_modified_at=self.last_modified_at, thumbnail_location=self.thumbnail_location, - import_path=self.import_path, length=self.length, locked=self.locked) + import_path=self.import_path, length=self.length, locked=self.locked, + content_digest=self.content_digest) return content diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 8fc505e121..1d1f2a0314 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -128,7 +128,8 @@ class MongoContentStore(ContentStore): location, fp.displayname, fp.content_type, fp, last_modified_at=fp.uploadDate, thumbnail_location=thumbnail_location, import_path=getattr(fp, 'import_path', None), - length=fp.length, locked=getattr(fp, 'locked', False) + length=fp.length, locked=getattr(fp, 'locked', False), + content_digest=getattr(fp, 'md5', None), ) else: with self.fs.get(content_id) as fp: @@ -142,7 +143,8 @@ class MongoContentStore(ContentStore): location, fp.displayname, fp.content_type, fp.read(), last_modified_at=fp.uploadDate, thumbnail_location=thumbnail_location, import_path=getattr(fp, 'import_path', None), - length=fp.length, locked=getattr(fp, 'locked', False) + length=fp.length, locked=getattr(fp, 'locked', False), + content_digest=getattr(fp, 'md5', None), ) except NoFile: if throw_on_not_found: diff --git a/common/test/data/toy/static/sample_static.txt b/common/test/data/toy/static/sample_static.html similarity index 100% rename from common/test/data/toy/static/sample_static.txt rename to common/test/data/toy/static/sample_static.html