From 6740e75c0fdc7ba095baf88e9f5e4f3e15cfd8ba Mon Sep 17 00:00:00 2001 From: Tim McCormack <59623490+timmc-edx@users.noreply.github.com> Date: Wed, 21 May 2025 14:14:51 -0400 Subject: [PATCH] Merge commit from fork Allow overriding but prevent download by default. Also, extract `PYTHON_LIB_FILENAME` Django setting as a shared function and document the setting. --- .../contentstore/tests/test_import.py | 3 +- common/test/data/toy/static/python_lib.zip | Bin 0 -> 208 bytes .../contentserver/test/test_contentserver.py | 82 ++++++++++++++++++ .../core/djangoapps/contentserver/views.py | 52 +++++++++-- openedx/core/lib/xblock_serializer/utils.py | 6 +- xmodule/util/sandboxing.py | 14 ++- 6 files changed, 146 insertions(+), 11 deletions(-) create mode 100644 common/test/data/toy/static/python_lib.zip diff --git a/cms/djangoapps/contentstore/tests/test_import.py b/cms/djangoapps/contentstore/tests/test_import.py index 73b65197da..260636b51b 100644 --- a/cms/djangoapps/contentstore/tests/test_import.py +++ b/cms/djangoapps/contentstore/tests/test_import.py @@ -146,6 +146,7 @@ class ContentStoreImportTest(ModuleStoreTestCase): import_course_from_xml( module_store, self.user.id, TEST_DATA_DIR, ['toy'], static_content_store=content_store, do_import_static=False, + do_import_python_lib=False, # python_lib.zip is special-cased -- exclude it too create_if_not_present=True, verbose=True ) @@ -153,7 +154,7 @@ class ContentStoreImportTest(ModuleStoreTestCase): # make sure we have NO assets in our contentstore all_assets, count = content_store.get_all_content_for_course(course.id) - self.assertEqual(len(all_assets), 0) + self.assertEqual(all_assets, []) self.assertEqual(count, 0) def test_no_static_link_rewrites_on_import(self): diff --git a/common/test/data/toy/static/python_lib.zip b/common/test/data/toy/static/python_lib.zip new file mode 100644 index 0000000000000000000000000000000000000000..5854e82b68d9af5b4142c3f1a5b9bebfb7684285 GIT binary patch literal 208 zcmWIWW@h1H00A}cg;9rIq&lgA*g(w1Aj6Pel$es4m#$Y(85+XLz-(S7oB_h672FJr zEH9WD7{EkIYMMf3aeQi7YMw@grWKb05EP}BlosVFR4Aw_7%AA=Dj0ADcr!A|G2^mJ l0^|e+Mj+nO2x1}I%nGp?&DH>KRyL4IMj#9T(pex50|1QbDPjNs literal 0 HcmV?d00001 diff --git a/openedx/core/djangoapps/contentserver/test/test_contentserver.py b/openedx/core/djangoapps/contentserver/test/test_contentserver.py index 4c0180c402..df29b64d27 100644 --- a/openedx/core/djangoapps/contentserver/test/test_contentserver.py +++ b/openedx/core/djangoapps/contentserver/test/test_contentserver.py @@ -102,6 +102,11 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase): cls.url_unlocked_versioned_old_style = get_old_style_versioned_asset_url(cls.url_unlocked) cls.length_unlocked = cls.contentstore.get_attr(cls.unlocked_asset, 'length') + # Special case: python_lib.zip + cls.pylib_asset = cls.course_key.make_asset_key('asset', 'python_lib.zip') + cls.url_pylib = '/' + str(cls.pylib_asset) + cls.contentstore.set_attr(cls.pylib_asset, 'locked', False) + def setUp(self): """ Create user and login. @@ -208,6 +213,83 @@ class ContentStoreToyCourseTest(SharedModuleStoreTestCase): resp = self.client.get(self.url_locked) assert resp.status_code == 200 + def test_python_lib_zip_staff(self): + """ + Test that staff can download python_lib.zip. + """ + self.client.login(username=self.staff_usr, password=self.TEST_PASSWORD) + resp = self.client.get(self.url_pylib) + assert resp.status_code == 200 + assert resp['Cache-Control'] == 'private, no-cache, no-store' + + def test_python_lib_zip_not_staff(self): + """ + Test that python_lib.zip cannot be downloaded by non-staff by default. + """ + self.client.login(username=self.non_staff_usr, password=self.TEST_PASSWORD) + resp = self.client.get(self.url_pylib) + assert resp.status_code == 403 + # We should be sending a no-cache header, but the contentserver + # currently doesn't set caching headers for "unauthorized" responses. So + # this test allows either in order to make the transition easier if we + # fix that. + assert 'Cache-Control' not in resp or resp['Cache-Control'] == 'private, no-cache, no-store' + + @patch( + 'openedx.core.djangoapps.contentserver.views.COURSE_CODE_LIBRARY_DOWNLOAD_ALLOWED.is_enabled', + return_value=True, + ) + def test_python_lib_zip_not_staff_but_course_allows_it(self, mock_download_allowed_flag): + """ + Test that python_lib.zip can be downloaded by non-staff when flag enabled. + """ + self.client.login(username=self.non_staff_usr, password=self.TEST_PASSWORD) + resp = self.client.get(self.url_pylib) + assert resp.status_code == 200 + assert resp['Cache-Control'] == 'private, no-cache, no-store' + + mock_download_allowed_flag.assert_called_once_with(self.course_key) + + @patch( + 'openedx.core.djangoapps.contentserver.views.COURSE_CODE_LIBRARY_DOWNLOAD_ALLOWED.is_enabled', + return_value=True, + ) + @patch( + 'openedx.core.djangoapps.contentserver.views.is_content_locked', + return_value=True, + ) + def test_python_lib_zip_can_be_locked(self, mock_is_locked, mock_download_allowed_flag): + """ + Even when python_lib.zip download is broadly allowed, it can be locked. + """ + self.client.login(username=self.non_staff_usr, password=self.TEST_PASSWORD) + resp = self.client.get(self.url_pylib) + assert resp.status_code == 403 + assert 'Cache-Control' not in resp or resp['Cache-Control'] == 'private, no-cache, no-store' + + assert mock_is_locked.call_count == 2 # for auth check, then caching check + mock_download_allowed_flag.assert_called_once_with(self.course_key) + + @ddt.data(True, False) + def test_python_lib_zip_uses_studio_read_check(self, allow): + """ + Specifically check that python_lib.zip is gated on studio read access. + + Ideally this test would actually check access for a course team member + who is *not* site staff/superuser, but that would require more + complicated setup. + """ + self.client.login(username=self.non_staff_usr, password=self.TEST_PASSWORD) + with patch('openedx.core.djangoapps.contentserver.views.has_studio_read_access', return_value=allow): + resp = self.client.get(self.url_pylib) + + if allow: + assert resp.status_code == 200 + assert resp['Cache-Control'] == 'private, no-cache, no-store' + else: + assert resp.status_code == 403 + assert 'Cache-Control' not in resp or resp['Cache-Control'] == 'private, no-cache, no-store' + def test_range_request_full_file(self): """ Test that a range request from byte 0 to last, diff --git a/openedx/core/djangoapps/contentserver/views.py b/openedx/core/djangoapps/contentserver/views.py index 3a267f0852..cbdb4124fe 100644 --- a/openedx/core/djangoapps/contentserver/views.py +++ b/openedx/core/djangoapps/contentserver/views.py @@ -21,13 +21,16 @@ from edx_django_utils.monitoring import set_custom_attribute from opaque_keys import InvalidKeyError from opaque_keys.edx.locator import AssetLocator +from common.djangoapps.student.auth import has_studio_read_access from common.djangoapps.student.models import CourseEnrollment from openedx.core.djangoapps.header_control import force_header_for_response +from openedx.core.djangoapps.waffle_utils import CourseWaffleFlag from xmodule.assetstore.assetmgr import AssetManager from xmodule.contentstore.content import XASSET_LOCATION_TAG, StaticContent from xmodule.exceptions import NotFoundError from xmodule.modulestore import InvalidLocationError from xmodule.modulestore.exceptions import ItemNotFoundError +from xmodule.util.sandboxing import course_code_library_asset_name from .caching import get_cached_content, set_cached_content from .models import CdnUserAgentsConfig, CourseAssetCacheTtlConfig @@ -197,17 +200,21 @@ def process_request(request): # middleware we have in place, there's no easy way to use the built-in Django # utilities and properly sanitize and modify a response to ensure that it is as # cacheable as possible, which is why we do it ourselves. - set_caching_headers(content, response) + set_caching_headers(content, loc, response) return response -def set_caching_headers(content, response): +def set_caching_headers(content, location, response): """ - Sets caching headers based on whether or not the asset is locked. + Sets caching headers based on whether or not the asset is restricted. """ - is_locked = getattr(content, "locked", False) + is_pylib = location.path == course_code_library_asset_name() + + # All classes of asset that have any kind of access control should be marked + # as non-cacheable. + is_restricted = is_locked or is_pylib # We want to signal to the end user's browser, and to any intermediate proxies/caches, # whether or not this asset is cacheable. If we have a TTL configured, we inform the @@ -215,12 +222,12 @@ def set_caching_headers(content, response): # assets should be restricted to enrolled students, we simply send headers that # indicate there should be no caching whatsoever. cache_ttl = CourseAssetCacheTtlConfig.get_cache_ttl() - if cache_ttl > 0 and not is_locked: + if cache_ttl > 0 and not is_restricted: set_custom_attribute('contentserver.cacheable', True) response['Expires'] = get_expiration_value(datetime.datetime.utcnow(), cache_ttl) response['Cache-Control'] = "public, max-age={ttl}, s-maxage={ttl}".format(ttl=cache_ttl) - elif is_locked: + elif is_restricted: set_custom_attribute('contentserver.cacheable', False) response['Cache-Control'] = "private, no-cache, no-store" @@ -264,10 +271,43 @@ def is_content_locked(content): return bool(getattr(content, "locked", False)) +# .. toggle_name: course_assets.allow_download_code_library +# .. toggle_implementation: CourseWaffleFlag +# .. toggle_default: False +# .. toggle_description: Whether to allow learners to download the course code library +# that is used for custom Python-graded problem blocks. (This is conventionally +# ``python_lib.zip``, but configurable with Django setting ``PYTHON_LIB_FILENAME``). +# This file may contain custom grading code or problem answers that should not be +# revealed to learners. +# .. toggle_warning: This flag is only intended as a temporary override for use +# in rollout, to be removed before Ulmo. Courses that rely on learners being able +# to download the code library should find an alternative workflow, or the toggle +# should be re-documented as permanent. +# .. toggle_use_cases: temporary +# .. toggle_creation_date: 2025-05-01 +# .. toggle_target_removal_date: 2025-10-01 +COURSE_CODE_LIBRARY_DOWNLOAD_ALLOWED = CourseWaffleFlag( + 'course_assets.allow_download_code_library', module_name=__name__, +) + + def is_user_authorized(request, content, location): """ Determines whether or not the user for this request is authorized to view the given asset. + + Any asset classes that have restrictions placed on them should also + be marked as no-cache in `set_caching_headers`. """ + # Special-case python_lib.zip, since it often contains grading code that + # shouldn't be revealed to learners. + if location.path == course_code_library_asset_name(): + if has_studio_read_access(request.user, location.course_key) or \ + COURSE_CODE_LIBRARY_DOWNLOAD_ALLOWED.is_enabled(location.course_key): + # Fall through to other access checks + pass + else: + return False + if not is_content_locked(content): return True diff --git a/openedx/core/lib/xblock_serializer/utils.py b/openedx/core/lib/xblock_serializer/utils.py index e78c900b18..6f48eef391 100644 --- a/openedx/core/lib/xblock_serializer/utils.py +++ b/openedx/core/lib/xblock_serializer/utils.py @@ -2,11 +2,11 @@ Helper functions for XBlock serialization """ from __future__ import annotations + import logging import re from contextlib import contextmanager -from django.conf import settings from fs.memoryfs import MemoryFS from fs.wrapfs import WrapFS from opaque_keys import InvalidKeyError @@ -17,7 +17,7 @@ from xmodule.assetstore.assetmgr import AssetManager from xmodule.contentstore.content import StaticContent from xmodule.exceptions import NotFoundError from xmodule.modulestore.exceptions import ItemNotFoundError -from xmodule.util.sandboxing import DEFAULT_PYTHON_LIB_FILENAME +from xmodule.util.sandboxing import course_code_library_asset_name from xmodule.xml_block import XmlMixin from .data import StaticFile @@ -105,7 +105,7 @@ def get_python_lib_zip_if_using(olx: str, course_id: CourseKey) -> StaticFile | using python_lib.zip """ if _has_python_script(olx): - python_lib_filename = getattr(settings, 'PYTHON_LIB_FILENAME', DEFAULT_PYTHON_LIB_FILENAME) + python_lib_filename = course_code_library_asset_name() asset_key = StaticContent.get_asset_key_from_path(course_id, python_lib_filename) # Now, it seems like this capa problem uses python_lib.zip - but does it exist in the course? if AssetManager.find(asset_key, throw_on_not_found=False): diff --git a/xmodule/util/sandboxing.py b/xmodule/util/sandboxing.py index 12c4243acf..a8883ba3e9 100644 --- a/xmodule/util/sandboxing.py +++ b/xmodule/util/sandboxing.py @@ -7,6 +7,18 @@ from django.conf import settings DEFAULT_PYTHON_LIB_FILENAME = 'python_lib.zip' +def course_code_library_asset_name(): + """ + Return the asset name to use for course code libraries, defaulting to python_lib.zip. + """ + # .. setting_name: PYTHON_LIB_FILENAME + # .. setting_default: python_lib.zip + # .. setting_description: Name of the course file to make available to code in + # custom Python-graded problems. By default, this file will not be downloadable + # by learners. + return getattr(settings, 'PYTHON_LIB_FILENAME', DEFAULT_PYTHON_LIB_FILENAME) + + def can_execute_unsafe_code(course_id): """ Determine if this course is allowed to run unsafe code. @@ -34,7 +46,7 @@ def can_execute_unsafe_code(course_id): def get_python_lib_zip(contentstore, course_id): """Return the bytes of the course code library file, if it exists.""" - python_lib_filename = getattr(settings, 'PYTHON_LIB_FILENAME', DEFAULT_PYTHON_LIB_FILENAME) + python_lib_filename = course_code_library_asset_name() asset_key = course_id.make_asset_key("asset", python_lib_filename) zip_lib = contentstore().find(asset_key, throw_on_not_found=False) if zip_lib is not None: