From 02fc9c928f7635d3298557c2034b825d7c4104aa Mon Sep 17 00:00:00 2001 From: Raymond Zhou <56318341+rayzhou-bit@users.noreply.github.com> Date: Thu, 6 Feb 2025 12:47:17 -0500 Subject: [PATCH] 2u/course optimizer (#35887) --- cms/djangoapps/contentstore/core/__init__.py | 0 .../core/course_optimizer_provider.py | 279 ++++++++++++++ .../contentstore/core/tests/__init__.py | 0 .../tests/test_course_optimizer_provider.py | 219 +++++++++++ .../how-tos/test_course_related_view_auth.rst | 64 ++++ .../rest_api/v0/serializers/__init__.py | 1 + .../v0/serializers/course_optimizer.py | 48 +++ .../v0/tests/test_course_optimizer.py | 79 ++++ .../contentstore/rest_api/v0/urls.py | 14 +- .../rest_api/v0/views/__init__.py | 3 +- .../rest_api/v0/views/course_optimizer.py | 144 ++++++++ cms/djangoapps/contentstore/tasks.py | 340 +++++++++++++++++- .../contentstore/tests/test_tasks.py | 298 ++++++++++++++- cms/djangoapps/contentstore/utils.py | 14 + cms/static/sass/elements/_header.scss | 3 +- cms/templates/widgets/header.html | 8 +- 16 files changed, 1505 insertions(+), 9 deletions(-) create mode 100644 cms/djangoapps/contentstore/core/__init__.py create mode 100644 cms/djangoapps/contentstore/core/course_optimizer_provider.py create mode 100644 cms/djangoapps/contentstore/core/tests/__init__.py create mode 100644 cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py create mode 100644 cms/djangoapps/contentstore/docs/how-tos/test_course_related_view_auth.rst create mode 100644 cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py create mode 100644 cms/djangoapps/contentstore/rest_api/v0/tests/test_course_optimizer.py create mode 100644 cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py diff --git a/cms/djangoapps/contentstore/core/__init__.py b/cms/djangoapps/contentstore/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cms/djangoapps/contentstore/core/course_optimizer_provider.py b/cms/djangoapps/contentstore/core/course_optimizer_provider.py new file mode 100644 index 0000000000..c52ee68656 --- /dev/null +++ b/cms/djangoapps/contentstore/core/course_optimizer_provider.py @@ -0,0 +1,279 @@ +""" +Logic for handling actions in Studio related to Course Optimizer. +""" +import json +from user_tasks.conf import settings as user_tasks_settings +from user_tasks.models import UserTaskArtifact, UserTaskStatus + +from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask +from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_xblock +from cms.djangoapps.contentstore.xblock_storage_handlers.xblock_helpers import usage_key_with_run + + +# Restricts status in the REST API to only those which the requesting user has permission to view. +# These can be overwritten in django settings. +# By default, these should be the UserTaskStatus statuses: +# 'Pending', 'In Progress', 'Succeeded', 'Failed', 'Canceled', 'Retrying' +STATUS_FILTERS = user_tasks_settings.USER_TASKS_STATUS_FILTERS + + +def get_link_check_data(request, course_id): + """ + Retrives data and formats it for the link check get request. + """ + task_status = _latest_task_status(request, course_id) + status = None + created_at = None + broken_links_dto = None + error = None + if task_status is None: + # The task hasn't been initialized yet; did we store info in the session already? + try: + session_status = request.session['link_check_status'] + status = session_status[course_id] + except KeyError: + status = 'Uninitiated' + else: + status = task_status.state + created_at = task_status.created + if task_status.state == UserTaskStatus.SUCCEEDED: + artifact = UserTaskArtifact.objects.get(status=task_status, name='BrokenLinks') + with artifact.file as file: + content = file.read() + json_content = json.loads(content) + broken_links_dto = generate_broken_links_descriptor(json_content, request.user) + elif task_status.state in (UserTaskStatus.FAILED, UserTaskStatus.CANCELED): + errors = UserTaskArtifact.objects.filter(status=task_status, name='Error') + if errors: + error = errors[0].text + try: + error = json.loads(error) + except ValueError: + # Wasn't JSON, just use the value as a string + pass + + data = { + 'LinkCheckStatus': status, + **({'LinkCheckCreatedAt': created_at} if created_at else {}), + **({'LinkCheckOutput': broken_links_dto} if broken_links_dto else {}), + **({'LinkCheckError': error} if error else {}) + } + return data + + +def _latest_task_status(request, course_key_string, view_func=None): + """ + Get the most recent link check status update for the specified course + key. + """ + args = {'course_key_string': course_key_string} + name = CourseLinkCheckTask.generate_name(args) + task_status = UserTaskStatus.objects.filter(name=name) + for status_filter in STATUS_FILTERS: + task_status = status_filter().filter_queryset(request, task_status, view_func) + return task_status.order_by('-created').first() + + +def generate_broken_links_descriptor(json_content, request_user): + """ + Returns a Data Transfer Object for frontend given a list of broken links. + + ** Example json_content structure ** + Note: is_locked is true if the link is a studio link and returns 403 + [ + ['block_id_1', 'link_1', is_locked], + ['block_id_1', 'link_2', is_locked], + ['block_id_2', 'link_3', is_locked], + ... + ] + + ** Example DTO structure ** + { + 'sections': [ + { + 'id': 'section_id', + 'displayName': 'section name', + 'subsections': [ + { + 'id': 'subsection_id', + 'displayName': 'subsection name', + 'units': [ + { + 'id': 'unit_id', + 'displayName': 'unit name', + 'blocks': [ + { + 'id': 'block_id', + 'displayName': 'block name', + 'url': 'url/to/block', + 'brokenLinks: [], + 'lockedLinks: [], + }, + ..., + ] + }, + ..., + ] + }, + ..., + ] + }, + ..., + ] + } + """ + xblock_node_tree = {} # tree representation of xblock relationships + xblock_dictionary = {} # dictionary of xblock attributes + + for item in json_content: + block_id, link, *rest = item + if rest: + is_locked_flag = bool(rest[0]) + else: + is_locked_flag = False + + usage_key = usage_key_with_run(block_id) + block = get_xblock(usage_key, request_user) + xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary( + block=block, + link=link, + is_locked=is_locked_flag, + node_tree=xblock_node_tree, + dictionary=xblock_dictionary + ) + + return _create_dto_recursive(xblock_node_tree, xblock_dictionary) + + +def _update_node_tree_and_dictionary(block, link, is_locked, node_tree, dictionary): + """ + Inserts a block into the node tree and add its attributes to the dictionary. + + ** Example node tree structure ** + { + 'section_id1': { + 'subsection_id1': { + 'unit_id1': { + 'block_id1': {}, + 'block_id2': {}, + ..., + }, + 'unit_id2': { + 'block_id3': {}, + ..., + }, + ..., + }, + ..., + }, + ..., + } + + ** Example dictionary structure ** + { + 'xblock_id: { + 'display_name': 'xblock name', + 'category': 'chapter' + }, + 'html_block_id': { + 'display_name': 'xblock name', + 'category': 'chapter', + 'url': 'url_1', + 'locked_links': [...], + 'broken_links': [...] + } + ..., + } + """ + updated_tree, updated_dictionary = node_tree, dictionary + + path = _get_node_path(block) + current_node = updated_tree + xblock_id = '' + + # Traverse the path and build the tree structure + for xblock in path: + xblock_id = xblock.location.block_id + updated_dictionary.setdefault( + xblock_id, + { + 'display_name': xblock.display_name, + 'category': getattr(xblock, 'category', ''), + } + ) + # Sets new current node and creates the node if it doesn't exist + current_node = current_node.setdefault(xblock_id, {}) + + # Add block-level details for the last xblock in the path (URL and broken/locked links) + updated_dictionary[xblock_id].setdefault( + 'url', + f'/course/{block.course_id}/editor/{block.category}/{block.location}' + ) + + if is_locked: + updated_dictionary[xblock_id].setdefault('locked_links', []).append(link) + else: + updated_dictionary[xblock_id].setdefault('broken_links', []).append(link) + + return updated_tree, updated_dictionary + + +def _get_node_path(block): + """ + Retrieves the path from the course root node to a specific block, excluding the root. + + ** Example Path structure ** + [chapter_node, sequential_node, vertical_node, html_node] + """ + path = [] + current_node = block + + while current_node.get_parent(): + path.append(current_node) + current_node = current_node.get_parent() + + return list(reversed(path)) + + +CATEGORY_TO_LEVEL_MAP = { + "chapter": "sections", + "sequential": "subsections", + "vertical": "units" +} + + +def _create_dto_recursive(xblock_node, xblock_dictionary): + """ + Recursively build the Data Transfer Object by using + the structure from the node tree and data from the dictionary. + """ + # Exit condition when there are no more child nodes (at block level) + if not xblock_node: + return None + + level = None + xblock_children = [] + + for xblock_id, node in xblock_node.items(): + child_blocks = _create_dto_recursive(node, xblock_dictionary) + xblock_data = xblock_dictionary.get(xblock_id, {}) + + xblock_entry = { + 'id': xblock_id, + 'displayName': xblock_data.get('display_name', ''), + } + if child_blocks is None: # Leaf node + level = 'blocks' + xblock_entry.update({ + 'url': xblock_data.get('url', ''), + 'brokenLinks': xblock_data.get('broken_links', []), + 'lockedLinks': xblock_data.get('locked_links', []), + }) + else: # Non-leaf node + category = xblock_data.get('category', None) + level = CATEGORY_TO_LEVEL_MAP.get(category, None) + xblock_entry.update(child_blocks) + + xblock_children.append(xblock_entry) + + return {level: xblock_children} if level else None diff --git a/cms/djangoapps/contentstore/core/tests/__init__.py b/cms/djangoapps/contentstore/core/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py new file mode 100644 index 0000000000..5dd098956e --- /dev/null +++ b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py @@ -0,0 +1,219 @@ +""" +Tests for course optimizer +""" +from unittest.mock import Mock + +from cms.djangoapps.contentstore.tests.utils import CourseTestCase +from cms.djangoapps.contentstore.core.course_optimizer_provider import ( + _update_node_tree_and_dictionary, + _create_dto_recursive +) + + +class TestLinkCheckProvider(CourseTestCase): + """ + Tests for functions that generate a json structure of locked and broken links + to send to the frontend. + """ + def setUp(self): + """Setup course blocks for tests""" + super().setUp() + self.mock_course = Mock() + self.mock_section = Mock( + location=Mock(block_id='chapter_1'), + display_name='Section Name', + category='chapter' + ) + self.mock_subsection = Mock( + location=Mock(block_id='sequential_1'), + display_name='Subsection Name', + category='sequential' + ) + self.mock_unit = Mock( + location=Mock(block_id='vertical_1'), + display_name='Unit Name', + category='vertical' + ) + self.mock_block = Mock( + location=Mock(block_id='block_1'), + display_name='Block Name', + course_id=self.course.id, + category='html' + ) + self.mock_course.get_parent.return_value = None + self.mock_section.get_parent.return_value = self.mock_course + self.mock_subsection.get_parent.return_value = self.mock_section + self.mock_unit.get_parent.return_value = self.mock_subsection + self.mock_block.get_parent.return_value = self.mock_unit + + def test_update_node_tree_and_dictionary_returns_node_tree(self): + """ + Verify _update_node_tree_and_dictionary creates a node tree structure + when passed a block level xblock. + """ + expected_tree = { + 'chapter_1': { + 'sequential_1': { + 'vertical_1': { + 'block_1': {} + } + } + } + } + result_tree, result_dictionary = _update_node_tree_and_dictionary( + self.mock_block, 'example_link', True, {}, {} + ) + + self.assertEqual(expected_tree, result_tree) + + def test_update_node_tree_and_dictionary_returns_dictionary(self): + """ + Verify _update_node_tree_and_dictionary creates a dictionary of parent xblock entries + when passed a block level xblock. + """ + expected_dictionary = { + 'chapter_1': { + 'display_name': 'Section Name', + 'category': 'chapter' + }, + 'sequential_1': { + 'display_name': 'Subsection Name', + 'category': 'sequential' + }, + 'vertical_1': { + 'display_name': 'Unit Name', + 'category': 'vertical' + }, + 'block_1': { + 'display_name': 'Block Name', + 'category': 'html', + 'url': f'/course/{self.course.id}/editor/html/{self.mock_block.location}', + 'locked_links': ['example_link'] + } + } + result_tree, result_dictionary = _update_node_tree_and_dictionary( + self.mock_block, 'example_link', True, {}, {} + ) + + self.assertEqual(expected_dictionary, result_dictionary) + + def test_create_dto_recursive_returns_for_empty_node(self): + """ + Test _create_dto_recursive behavior at the end of recursion. + Function should return None when given empty node tree and empty dictionary. + """ + expected = _create_dto_recursive({}, {}) + self.assertEqual(None, expected) + + def test_create_dto_recursive_returns_for_leaf_node(self): + """ + Test _create_dto_recursive behavior at the step before the end of recursion. + When evaluating a leaf node in the node tree, the function should return broken links + and locked links data from the leaf node. + """ + expected_result = { + 'blocks': [ + { + 'id': 'block_1', + 'displayName': 'Block Name', + 'url': '/block/1', + 'brokenLinks': ['broken_link_1', 'broken_link_2'], + 'lockedLinks': ['locked_link'] + } + ] + } + + mock_node_tree = { + 'block_1': {} + } + mock_dictionary = { + 'chapter_1': { + 'display_name': 'Section Name', + 'category': 'chapter' + }, + 'sequential_1': { + 'display_name': 'Subsection Name', + 'category': 'sequential' + }, + 'vertical_1': { + 'display_name': 'Unit Name', + 'category': 'vertical' + }, + 'block_1': { + 'display_name': 'Block Name', + 'url': '/block/1', + 'broken_links': ['broken_link_1', 'broken_link_2'], + 'locked_links': ['locked_link'] + } + } + expected = _create_dto_recursive(mock_node_tree, mock_dictionary) + self.assertEqual(expected_result, expected) + + def test_create_dto_recursive_returns_for_full_tree(self): + """ + Test _create_dto_recursive behavior when recursing many times. + When evaluating a fully mocked node tree and dictionary, the function should return + a full json DTO prepared for frontend. + """ + expected_result = { + 'sections': [ + { + 'id': 'chapter_1', + 'displayName': 'Section Name', + 'subsections': [ + { + 'id': 'sequential_1', + 'displayName': 'Subsection Name', + 'units': [ + { + 'id': 'vertical_1', + 'displayName': 'Unit Name', + 'blocks': [ + { + 'id': 'block_1', + 'displayName': 'Block Name', + 'url': '/block/1', + 'brokenLinks': ['broken_link_1', 'broken_link_2'], + 'lockedLinks': ['locked_link'] + } + ] + } + ] + } + ] + } + ] + } + + mock_node_tree = { + 'chapter_1': { + 'sequential_1': { + 'vertical_1': { + 'block_1': {} + } + } + } + } + mock_dictionary = { + 'chapter_1': { + 'display_name': 'Section Name', + 'category': 'chapter' + }, + 'sequential_1': { + 'display_name': 'Subsection Name', + 'category': 'sequential' + }, + 'vertical_1': { + 'display_name': 'Unit Name', + 'category': 'vertical' + }, + 'block_1': { + 'display_name': 'Block Name', + 'url': '/block/1', + 'broken_links': ['broken_link_1', 'broken_link_2'], + 'locked_links': ['locked_link'] + } + } + expected = _create_dto_recursive(mock_node_tree, mock_dictionary) + + self.assertEqual(expected_result, expected) diff --git a/cms/djangoapps/contentstore/docs/how-tos/test_course_related_view_auth.rst b/cms/djangoapps/contentstore/docs/how-tos/test_course_related_view_auth.rst new file mode 100644 index 0000000000..b9b797ac43 --- /dev/null +++ b/cms/djangoapps/contentstore/docs/how-tos/test_course_related_view_auth.rst @@ -0,0 +1,64 @@ +============================================== +How to test View Auth for course-related views +============================================== + +What to test +------------ +Each view endpoint that exposes an internal API endpoint - like in files in the rest_api folder - must +be tested for the following. + +- Only authenticated users can access the endpoint. +- Only users with the correct permissions (authorization) can access the endpoint. +- All data and params that are part of the request are properly validated. + +How to test +----------- +The `AuthorizeStaffTestCase` class provides a set of tests that can be used to test the authorization +of a view. If you inherit from this class, these tests will be automatically run. For details, +please look at the source code of the `AuthorizeStaffTestCase` class. + +A lot of these tests can be easily implemented by inheriting from the `AuthorizeStaffTestCase`. +This parent class assumes that the view is for a specific course and that only users who have access +to the course can access the view. (They are either staff or instructors for the course, or global admin). + +Here is an example of how to test a view that requires a user to be authenticated and have access to a course. + +.. code-block:: python + + from cms.djangoapps.contentstore.tests.test_utils import AuthorizeStaffTestCase + from django.test import TestCase + from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase + from django.urls import reverse + + class TestMyGetView(AuthorizeStaffTestCase, ModuleStoreTestCase, TestCase): + def make_request(self, course_id=None, data=None): + url = self.get_url(self.course.id) + response = self.client.get(url, data) + return response + + def get_url(self, course_key): + url = reverse( + 'cms.djangoapps.contentstore:v0:my_get_view', + kwargs={'course_id': self.course.id} + ) + return url + +As you can see, you need to inherit from `AuthorizeStaffTestCase` and `ModuleStoreTestCase`, and then either +`TestCase` or `APITestCase` depending on the type of view you are testing. For cookie-based +authentication, `TestCase` is sufficient, for Oauth2 use `ApiTestCase`. + +The only two methods you need to implement are `make_request` and `get_url`. The `make_request` method +should make the request to the view and return the response. The `get_url` method should return the URL +for the view you are testing. + +Overwriting Tests +----------------- +If you need different behavior you can overwrite the tests from the parent class. +For example, if students should have access to the view, simply implement the +`test_student` method in your test class. + +Adding other tests +------------------ +If you want to test other things in the view - let's say validation - +it's easy to just add another `test_...` function to your test class +and you can use the `make_request` method to make the request. diff --git a/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py b/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py index 33931a4a19..171f746be4 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py +++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/__init__.py @@ -4,6 +4,7 @@ Serializers for v0 contentstore API. from .advanced_settings import AdvancedSettingsFieldSerializer, CourseAdvancedSettingsSerializer from .assets import AssetSerializer from .authoring_grading import CourseGradingModelSerializer +from .course_optimizer import LinkCheckSerializer from .tabs import CourseTabSerializer, CourseTabUpdateSerializer, TabIDLocatorSerializer from .transcripts import TranscriptSerializer, YoutubeTranscriptCheckSerializer, YoutubeTranscriptUploadSerializer from .xblock import XblockSerializer diff --git a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py new file mode 100644 index 0000000000..a7b38e0727 --- /dev/null +++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py @@ -0,0 +1,48 @@ +""" +API Serializers for Course Optimizer +""" + +from rest_framework import serializers + + +class LinkCheckBlockSerializer(serializers.Serializer): + """ Serializer for broken links block model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + url = serializers.CharField(required=True, allow_null=False, allow_blank=False) + brokenLinks = serializers.ListField(required=False) + lockedLinks = serializers.ListField(required=False) + + +class LinkCheckUnitSerializer(serializers.Serializer): + """ Serializer for broken links unit model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + blocks = LinkCheckBlockSerializer(many=True) + + +class LinkCheckSubsectionSerializer(serializers.Serializer): + """ Serializer for broken links subsection model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + units = LinkCheckUnitSerializer(many=True) + + +class LinkCheckSectionSerializer(serializers.Serializer): + """ Serializer for broken links section model data """ + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + displayName = serializers.CharField(required=True, allow_null=False, allow_blank=True) + subsections = LinkCheckSubsectionSerializer(many=True) + + +class LinkCheckOutputSerializer(serializers.Serializer): + """ Serializer for broken links output model data """ + sections = LinkCheckSectionSerializer(many=True) + + +class LinkCheckSerializer(serializers.Serializer): + """ Serializer for broken links """ + LinkCheckStatus = serializers.CharField(required=True) + LinkCheckCreatedAt = serializers.DateTimeField(required=False) + LinkCheckOutput = LinkCheckOutputSerializer(required=False) + LinkCheckError = serializers.CharField(required=False) diff --git a/cms/djangoapps/contentstore/rest_api/v0/tests/test_course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/tests/test_course_optimizer.py new file mode 100644 index 0000000000..14d5a20fb4 --- /dev/null +++ b/cms/djangoapps/contentstore/rest_api/v0/tests/test_course_optimizer.py @@ -0,0 +1,79 @@ +""" +Unit tests for course optimizer +""" +from django.test import TestCase +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase +from django.urls import reverse + +from cms.djangoapps.contentstore.tests.test_utils import AuthorizeStaffTestCase + + +class TestGetLinkCheckStatus(AuthorizeStaffTestCase, ModuleStoreTestCase, TestCase): + ''' + Authentication and Authorization Tests for CourseOptimizer. + For concrete tests that are run, check `AuthorizeStaffTestCase`. + ''' + def make_request(self, course_id=None, data=None, **kwargs): + url = self.get_url(self.course.id) + response = self.client.get(url, data) + return response + + def get_url(self, course_key): + url = reverse( + 'cms.djangoapps.contentstore:v0:link_check_status', + kwargs={'course_id': self.course.id} + ) + return url + + def test_produces_4xx_when_invalid_course_id(self): + ''' + Test course_id validation + ''' + response = self.make_request(course_id='invalid_course_id') + self.assertIn(response.status_code, range(400, 500)) + + def test_produces_4xx_when_additional_kwargs(self): + ''' + Test additional kwargs validation + ''' + response = self.make_request(course_id=self.course.id, malicious_kwarg='malicious_kwarg') + self.assertIn(response.status_code, range(400, 500)) + + +class TestPostLinkCheck(AuthorizeStaffTestCase, ModuleStoreTestCase, TestCase): + ''' + Authentication and Authorization Tests for CourseOptimizer. + For concrete tests that are run, check `AuthorizeStaffTestCase`. + ''' + def make_request(self, course_id=None, data=None, **kwargs): + url = self.get_url(self.course.id) + response = self.client.post(url, data) + return response + + def get_url(self, course_key): + url = reverse( + 'cms.djangoapps.contentstore:v0:link_check', + kwargs={'course_id': self.course.id} + ) + return url + + def test_produces_4xx_when_invalid_course_id(self): + ''' + Test course_id validation + ''' + response = self.make_request(course_id='invalid_course_id') + self.assertIn(response.status_code, range(400, 500)) + + def test_produces_4xx_when_additional_kwargs(self): + ''' + Test additional kwargs validation + ''' + response = self.make_request(course_id=self.course.id, malicious_kwarg='malicious_kwarg') + self.assertIn(response.status_code, range(400, 500)) + + def test_produces_4xx_when_unexpected_data(self): + ''' + Test validation when request contains unexpected data + ''' + response = self.make_request(course_id=self.course.id, data={'unexpected_data': 'unexpected_data'}) + self.assertIn(response.status_code, range(400, 500)) diff --git a/cms/djangoapps/contentstore/rest_api/v0/urls.py b/cms/djangoapps/contentstore/rest_api/v0/urls.py index 6e0c11d22b..9d7006a708 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/urls.py +++ b/cms/djangoapps/contentstore/rest_api/v0/urls.py @@ -7,14 +7,16 @@ from openedx.core.constants import COURSE_ID_PATTERN from .views import ( AdvancedCourseSettingsView, + APIHeartBeatView, AuthoringGradingView, CourseTabSettingsView, CourseTabListView, CourseTabReorderView, + LinkCheckView, + LinkCheckStatusView, TranscriptView, YoutubeTranscriptCheckView, YoutubeTranscriptUploadView, - APIHeartBeatView ) from .views import assets from .views import authoring_videos @@ -102,4 +104,14 @@ urlpatterns = [ fr'^youtube_transcripts/{settings.COURSE_ID_PATTERN}/upload?$', YoutubeTranscriptUploadView.as_view(), name='cms_api_youtube_transcripts_upload' ), + + # Course Optimizer + re_path( + fr'^link_check/{settings.COURSE_ID_PATTERN}$', + LinkCheckView.as_view(), name='link_check' + ), + re_path( + fr'^link_check_status/{settings.COURSE_ID_PATTERN}$', + LinkCheckStatusView.as_view(), name='link_check_status' + ), ] diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py b/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py index 00d22a1ea7..2ce3ea22ea 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py +++ b/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py @@ -2,7 +2,8 @@ Views for v0 contentstore API. """ from .advanced_settings import AdvancedCourseSettingsView +from .api_heartbeat import APIHeartBeatView from .authoring_grading import AuthoringGradingView +from .course_optimizer import LinkCheckView, LinkCheckStatusView from .tabs import CourseTabSettingsView, CourseTabListView, CourseTabReorderView from .transcripts import TranscriptView, YoutubeTranscriptCheckView, YoutubeTranscriptUploadView -from .api_heartbeat import APIHeartBeatView diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py new file mode 100644 index 0000000000..9aa23838e6 --- /dev/null +++ b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py @@ -0,0 +1,144 @@ +""" API Views for Course Optimizer. """ +import edx_api_doc_tools as apidocs +from opaque_keys.edx.keys import CourseKey +from rest_framework.views import APIView +from rest_framework.request import Request +from rest_framework.response import Response +from user_tasks.models import UserTaskStatus + +from cms.djangoapps.contentstore.core.course_optimizer_provider import get_link_check_data +from cms.djangoapps.contentstore.rest_api.v0.serializers.course_optimizer import LinkCheckSerializer +from cms.djangoapps.contentstore.tasks import check_broken_links +from common.djangoapps.student.auth import has_course_author_access, has_studio_read_access +from common.djangoapps.util.json_request import JsonResponse +from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, verify_course_exists, view_auth_classes + + +@view_auth_classes(is_authenticated=True) +class LinkCheckView(DeveloperErrorViewMixin, APIView): + """ + View for queueing a celery task to scan a course for broken links. + """ + @apidocs.schema( + parameters=[ + apidocs.string_parameter("course_id", apidocs.ParameterLocation.PATH, description="Course ID"), + ], + responses={ + 200: "Celery task queued.", + 401: "The requester is not authenticated.", + 403: "The requester cannot access the specified course.", + 404: "The requested course does not exist.", + }, + ) + @verify_course_exists() + def post(self, request: Request, course_id: str): + """ + Queue celery task to scan a course for broken links. + + **Example Request** + POST /api/contentstore/v0/link_check/{course_id} + + **Response Values** + ```json + { + "LinkCheckStatus": "Pending" + } + """ + course_key = CourseKey.from_string(course_id) + + if not has_studio_read_access(request.user, course_key): + self.permission_denied(request) + + check_broken_links.delay(request.user.id, course_id, request.LANGUAGE_CODE) + return JsonResponse({'LinkCheckStatus': UserTaskStatus.PENDING}) + + +@view_auth_classes() +class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): + """ + View for checking the status of the celery task and returning the results. + """ + @apidocs.schema( + parameters=[ + apidocs.string_parameter("course_id", apidocs.ParameterLocation.PATH, description="Course ID"), + ], + responses={ + 200: "OK", + 401: "The requester is not authenticated.", + 403: "The requester cannot access the specified course.", + 404: "The requested course does not exist.", + }, + ) + def get(self, request: Request, course_id: str): + """ + GET handler to return the status of the link_check task from UserTaskStatus. + If no task has been started for the course, return 'Uninitiated'. + If link_check task was successful, an output result is also returned. + + For reference, the following status are in UserTaskStatus: + 'Pending', 'In Progress' (sent to frontend as 'In-Progress'), + 'Succeeded', 'Failed', 'Canceled', 'Retrying' + This function adds a status for when status from UserTaskStatus is None: + 'Uninitiated' + + **Example Request** + GET /api/contentstore/v0/link_check_status/{course_id} + + **Example Response** + ```json + { + "LinkCheckStatus": "Succeeded", + "LinkCheckCreatedAt": "2025-02-05T14:32:01.294587Z", + "LinkCheckOutput": { + sections: [ + { + id: , + displayName: , + subsections: [ + { + id: , + displayName: , + units: [ + { + id: , + displayName: , + blocks: [ + { + id: , + url: , + brokenLinks: [ + , + , + , + ..., + ], + lockedLinks: [ + , + , + , + ..., + ], + }, + { }, + ], + }, + { }, + ], + }, + { }, + ], + }, + } + """ + course_key = CourseKey.from_string(course_id) + if not has_course_author_access(request.user, course_key): + print('missing course author access') + self.permission_denied(request) + + data = get_link_check_data(request, course_id) + serializer = LinkCheckSerializer(data) + + return Response(serializer.data) diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index bb220c3717..1c1b5a44ab 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -7,6 +7,9 @@ import json import os import shutil import tarfile +import re +import aiohttp +import asyncio from datetime import datetime from tempfile import NamedTemporaryFile, mkdtemp @@ -53,8 +56,10 @@ from cms.djangoapps.contentstore.utils import ( translation_language, delete_course ) +from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info from cms.djangoapps.models.settings.course_metadata import CourseMetadata from common.djangoapps.course_action_state.models import CourseRerunState +from common.djangoapps.static_replace import replace_static_urls from common.djangoapps.student.auth import has_course_author_access from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, LibraryUserRole from common.djangoapps.util.monitoring import monitor_import_failure @@ -890,7 +895,7 @@ def copy_v1_user_roles_into_v2_library(v2_library_key, v1_library_key): def _create_copy_content_task(v2_library_key, v1_library_key): """ spin up a celery task to import the V1 Library's content into the V2 library. - This utalizes the fact that course and v1 library content is stored almost identically. + This utilizes the fact that course and v1 library content is stored almost identically. """ return v2contentlib_api.import_blocks_create_task( v2_library_key, v1_library_key, @@ -1066,3 +1071,336 @@ def undo_all_library_source_blocks_ids_for_course(course_key_string, v1_to_v2_li store.update_item(draft_library_source_block, None) # return success return + + +class CourseLinkCheckTask(UserTask): # pylint: disable=abstract-method + """ + Base class for course link check tasks. + """ + + @staticmethod + def calculate_total_steps(arguments_dict): + """ + Get the number of in-progress steps in the link check process, as shown in the UI. + + For reference, these are: + 1. Scanning + """ + return 1 + + @classmethod + def generate_name(cls, arguments_dict): + """ + Create a name for this particular task instance. + + Arguments: + arguments_dict (dict): The arguments given to the task function + + Returns: + str: The generated name + """ + key = arguments_dict['course_key_string'] + return f'Broken link check of {key}' + + +# -------------- Course optimizer functions ------------------ + + +@shared_task(base=CourseLinkCheckTask, bind=True) +# Note: The decorator @set_code_owner_attribute cannot be used here because the UserTaskMixin +# does stack inspection and can't handle additional decorators. +def check_broken_links(self, user_id, course_key_string, language): + """ + Checks for broken links in a course and store the results in a file. + """ + set_code_owner_attribute_from_module(__name__) + return _check_broken_links(self, user_id, course_key_string, language) + + +def _check_broken_links(task_instance, user_id, course_key_string, language): + """ + Checks for broken links in a course and store the results in a file. + """ + user = _validate_user(task_instance, user_id, language) + + task_instance.status.set_state('Scanning') + course_key = CourseKey.from_string(course_key_string) + + url_list = _scan_course_for_links(course_key) + validated_url_list = asyncio.run(_validate_urls_access_in_batches(url_list, course_key, batch_size=100)) + broken_or_locked_urls, retry_list = _filter_by_status(validated_url_list) + + if retry_list: + retry_results = _retry_validation(retry_list, course_key, retry_count=3) + broken_or_locked_urls.extend(retry_results) + + try: + task_instance.status.increment_completed_steps() + + file_name = str(course_key) + broken_links_file = NamedTemporaryFile(prefix=file_name + '.', suffix='.json') + LOGGER.debug(f'[Link Check] json file being generated at {broken_links_file.name}') + + with open(broken_links_file.name, 'w') as file: + json.dump(broken_or_locked_urls, file, indent=4) + + _write_broken_links_to_file(broken_or_locked_urls, broken_links_file) + + artifact = UserTaskArtifact(status=task_instance.status, name='BrokenLinks') + _save_broken_links_file(artifact, broken_links_file) + + # catch all exceptions so we can record useful error messages + except Exception as e: # pylint: disable=broad-except + LOGGER.exception('Error checking links for course %s', course_key, exc_info=True) + if task_instance.status.state != UserTaskStatus.FAILED: + task_instance.status.fail({'raw_error_msg': str(e)}) + + +def _validate_user(task, user_id, language): + """Validate if the user exists. Otherwise log an unknown user id error.""" + try: + return User.objects.get(pk=user_id) + except User.DoesNotExist as exc: + with translation_language(language): + task.status.fail(UserErrors.UNKNOWN_USER_ID.format(user_id)) + return + + +def _scan_course_for_links(course_key): + """ + Scans a course for links found in the data contents of blocks. + + Returns: + list: block id and URL pairs + + Example return: + [ + [block_id1, url1], + [block_id2, url2], + ... + ] + """ + verticals = modulestore().get_items( + course_key, + qualifiers={'category': 'vertical'}, + revision=ModuleStoreEnum.RevisionOption.published_only + ) + blocks = [] + urls_to_validate = [] + + for vertical in verticals: + blocks.extend(vertical.get_children()) + + for block in blocks: + block_id = str(block.usage_key) + block_info = get_block_info(block) + block_data = block_info['data'] + + url_list = _get_urls(block_data) + urls_to_validate += [[block_id, url] for url in url_list] + + return urls_to_validate + + +def _get_urls(content): + """ + Finds and returns a list of URLs in the given content. + Includes strings following 'href=' and 'src='. + Excludes strings that are only '#'. + + Arguments: + content (str): entire content of a block + + Returns: + list: urls + """ + regex = r'\s+(?:href|src)=["\'](?!#)([^"\']*)["\']' + url_list = re.findall(regex, content) + return url_list + + +async def _validate_urls_access_in_batches(url_list, course_key, batch_size=100): + """ + Returns the statuses of a list of URL requests. + + Arguments: + url_list (list): block id and URL pairs + + Returns: + list: dictionary containing URL, associated block id, and request status + """ + responses = [] + url_count = len(url_list) + + for i in range(0, url_count, batch_size): + batch = url_list[i:i + batch_size] + batch_results = await _validate_batch(batch, course_key) + responses.extend(batch_results) + LOGGER.debug(f'[Link Check] request batch {i // batch_size + 1} of {url_count // batch_size + 1}') + + return responses + + +async def _validate_batch(batch, course_key): + """Validate a batch of URLs""" + async with aiohttp.ClientSession() as session: + tasks = [_validate_url_access(session, url_data, course_key) for url_data in batch] + batch_results = await asyncio.gather(*tasks) + return batch_results + + +async def _validate_url_access(session, url_data, course_key): + """ + Validates a URL. + + Arguments: + url_data (list): block id and URL pairs + course_key (str): locator id for a course + + Returns: + dict: URL, associated block id, and request status + + Example return: + { + 'block_id': block_id1, + 'url': url1, + 'status': status + } + """ + block_id, url = url_data + result = {'block_id': block_id, 'url': url} + standardized_url = _convert_to_standard_url(url, course_key) + try: + async with session.get(standardized_url, timeout=5) as response: + result.update({'status': response.status}) + except Exception as e: # lint-amnesty, pylint: disable=broad-except + result.update({'status': None}) + LOGGER.debug(f'[Link Check] Request error when validating {url}: {str(e)}') + return result + + +def _convert_to_standard_url(url, course_key): + """ + Returns standard URLs when given studio URLs. Otherwise returns the URL as is. + + Example URLs: + /assets/courseware/v1/506da5d6f866e8f0be44c5df8b6e6b2a/... + ...asset-v1:edX+DemoX+Demo_Course+type@asset+block/getting-started_x250.png + /static/getting-started_x250.png + /container/block-v1:edX+DemoX+Demo_Course+type@vertical+block@2152d4a4aadc4cb0af5256394a3d1fc7 + """ + if _is_studio_url_without_base(url): + if url.startswith('/static/'): + processed_url = replace_static_urls(f'\"{url}\"', course_id=course_key)[1:-1] + return 'https://' + settings.CMS_BASE + processed_url + elif url.startswith('/'): + return 'https://' + settings.CMS_BASE + url + else: + return 'https://' + settings.CMS_BASE + '/container/' + url + else: + return url + + +def _is_studio_url(url): + """Returns True if url is a studio url.""" + return _is_studio_url_with_base(url) or _is_studio_url_without_base(url) + + +def _is_studio_url_with_base(url): + """Returns True if url is a studio url with cms base.""" + return url.startswith('http://' + settings.CMS_BASE) or url.startswith('https://' + settings.CMS_BASE) + + +def _is_studio_url_without_base(url): + """Returns True if url is a studio url without cms base.""" + return not url.startswith('http://') and not url.startswith('https://') + + +def _filter_by_status(results): + """ + Filter results by status. + + Statuses: + 200: OK. No need to do more + 403: Forbidden. Record as locked link. + None: Error. Retry up to 3 times. + Other: Failure. Record as broken link. + + Arguments: + results (list): URL, associated block id, and request status + + Returns: + filtered_results (list): list of block id, URL and if URL is locked + retry_list (list): block id and url pairs + + Example return: + [ + [block_id1, filtered_results_url1, is_locked], + ... + ], + [ + [block_id1, retry_url1], + ... + ] + """ + filtered_results = [] + retry_list = [] + for result in results: + status, block_id, url = result['status'], result['block_id'], result['url'] + if status is None: + retry_list.append([block_id, url]) + elif status == 200: + continue + elif status == 403 and _is_studio_url(url): + filtered_results.append([block_id, url, True]) + else: + filtered_results.append([block_id, url, False]) + + return filtered_results, retry_list + + +def _retry_validation(url_list, course_key, retry_count=3): + """ + Retry validation for URLs that failed due to connection error. + + Returns: + list: URLs that could not be validated due to being locked or due to persistent connection problems + """ + results = [] + retry_list = url_list + for i in range(retry_count): + if retry_list: + LOGGER.debug(f'[Link Check] retry attempt #{i + 1}') + retry_list = _retry_validation_and_filter_results(course_key, results, retry_list) + results.extend(retry_list) + + return results + + +def _retry_validation_and_filter_results(course_key, results, retry_list): + """ + Validates URLs and then filter them by status. + + Arguments: + retry_list: list of urls to retry + + Returns: + list: URLs that did not pass validation and should be retried + """ + validated_url_list = asyncio.run( + _validate_urls_access_in_batches(retry_list, course_key, batch_size=100) + ) + filtered_url_list, retry_list = _filter_by_status(validated_url_list) + results.extend(filtered_url_list) + return retry_list + + +def _save_broken_links_file(artifact, file_to_save): + artifact.file.save(name=os.path.basename(file_to_save.name), content=File(file_to_save)) + artifact.save() + return True + + +def _write_broken_links_to_file(broken_or_locked_urls, broken_links_file): + with open(broken_links_file.name, 'w') as file: + json.dump(broken_or_locked_urls, file, indent=4) diff --git a/cms/djangoapps/contentstore/tests/test_tasks.py b/cms/djangoapps/contentstore/tests/test_tasks.py index cf82a6d165..09c2f07e1b 100644 --- a/cms/djangoapps/contentstore/tests/test_tasks.py +++ b/cms/djangoapps/contentstore/tests/test_tasks.py @@ -1,31 +1,48 @@ """ Unit tests for course import and export Celery tasks """ - - import copy import json +import logging from unittest import mock +from unittest.mock import AsyncMock, patch, MagicMock from uuid import uuid4 +from celery import Task +import pytest from django.conf import settings from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user from django.test.utils import override_settings from edx_toggles.toggles.testutils import override_waffle_flag +from opaque_keys.edx.keys import CourseKey from opaque_keys.edx.locator import CourseLocator from organizations.models import OrganizationCourse from organizations.tests.factories import OrganizationFactory from user_tasks.models import UserTaskArtifact, UserTaskStatus -from cms.djangoapps.contentstore.tasks import export_olx, update_special_exams_and_publish, rerun_course from cms.djangoapps.contentstore.tests.test_libraries import LibraryTestCase from cms.djangoapps.contentstore.tests.utils import CourseTestCase from common.djangoapps.course_action_state.models import CourseRerunState from common.djangoapps.student.tests.factories import UserFactory from openedx.core.djangoapps.course_apps.toggles import EXAMS_IDA from openedx.core.djangoapps.embargo.models import Country, CountryAccessRule, RestrictedCourse +from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.django import modulestore # lint-amnesty, pylint: disable=wrong-import-order -from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE +from xmodule.modulestore.tests.django_utils import TEST_DATA_SPLIT_MODULESTORE, ModuleStoreTestCase +from xmodule.modulestore.tests.factories import CourseFactory # lint-amnesty, pylint: disable=wrong-import-order +from ..tasks import ( + export_olx, + update_special_exams_and_publish, + rerun_course, + _validate_urls_access_in_batches, + _filter_by_status, + _get_urls, + _check_broken_links, + _is_studio_url, + _scan_course_for_links +) + +logging = logging.getLogger(__name__) TEST_DATA_CONTENTSTORE = copy.deepcopy(settings.CONTENTSTORE) TEST_DATA_CONTENTSTORE['DOC_STORE_CONFIG']['db'] = 'test_xcontent_%s' % uuid4().hex @@ -199,3 +216,276 @@ class RegisterExamsTaskTestCase(CourseTestCase): # pylint: disable=missing-clas _mock_register_exams_proctoring.side_effect = Exception('boom!') update_special_exams_and_publish(str(self.course.id)) course_publish.assert_called() + + +class MockCourseLinkCheckTask(Task): + def __init__(self): + self.status = mock.Mock() + + +############## Course Optimizer tests ############## + + +class CheckBrokenLinksTaskTest(ModuleStoreTestCase): + """Tests for CheckBrokenLinksTask""" + def setUp(self): + super().setUp() + self.store = modulestore()._get_modulestore_by_type(ModuleStoreEnum.Type.mongo) # lint-amnesty, pylint: disable=protected-access + self.test_course = CourseFactory.create( + org="test", course="course1", display_name="run1" + ) + self.mock_urls = [ + ["block-v1:edX+DemoX+Demo_Course+type@vertical+block@1", "http://example.com/valid"], + ["block-v1:edX+DemoX+Demo_Course+type@vertical+block@2", "http://example.com/invalid"] + ] + self.expected_file_contents = [ + ['block-v1:edX+DemoX+Demo_Course+type@vertical+block@1', 'http://example.com/valid', False], + ['block-v1:edX+DemoX+Demo_Course+type@vertical+block@2', 'http://example.com/invalid', False] + ] + + @mock.patch('cms.djangoapps.contentstore.tasks.UserTaskArtifact', autospec=True) + @mock.patch('cms.djangoapps.contentstore.tasks._scan_course_for_links') + @mock.patch('cms.djangoapps.contentstore.tasks._save_broken_links_file', autospec=True) + @mock.patch('cms.djangoapps.contentstore.tasks._write_broken_links_to_file', autospec=True) + def test_check_broken_links_stores_broken_and_locked_urls( + self, + mock_write_broken_links_to_file, + mock_save_broken_links_file, + mock_scan_course_for_links, + mock_user_task_artifact + ): + ''' + The test verifies that the check_broken_links task correctly + stores broken or locked URLs in the course. + The expected behavior is that the after scanning the course, + validating the URLs, and filtering the results, the task stores the results in a + JSON file. + + Note that this test mocks all validation functions and therefore + does not test link validation or any of its support functions. + ''' + # Arrange + mock_user = UserFactory.create(username='student', password='password') + mock_course_key_string = "course-v1:edX+DemoX+Demo_Course" + mock_task = MockCourseLinkCheckTask() + mock_scan_course_for_links.return_value = self.mock_urls + + # Act + _check_broken_links(mock_task, mock_user.id, mock_course_key_string, 'en') # pylint: disable=no-value-for-parameter + + # Assert + ### Check that UserTaskArtifact was called with the correct arguments + mock_user_task_artifact.assert_called_once_with(status=mock.ANY, name='BrokenLinks') + + ### Check that the correct links are written to the file + mock_write_broken_links_to_file.assert_called_once_with(self.expected_file_contents, mock.ANY) + + ### Check that _save_broken_links_file was called with the correct arguments + mock_save_broken_links_file.assert_called_once_with(mock_user_task_artifact.return_value, mock.ANY) + + def test_hash_tags_stripped_from_url_lists(self): + NUM_HASH_TAG_LINES = 2 + url_list = ''' + href='#' # 1 of 2 lines that will be stripped + href='http://google.com' + src='#' # 2 of 2 lines that will be stripped + href='https://microsoft.com' + src="/static/resource_name" + ''' + + # Correct for the two carriage returns surrounding the ''' marks + original_lines = len(url_list.splitlines()) - 2 + + processed_url_list = _get_urls(url_list) + processed_lines = len(processed_url_list) + + assert processed_lines == original_lines - NUM_HASH_TAG_LINES, \ + f'Processed URL list lines = {processed_lines}; expected {original_lines - 2}' + + def test_http_url_not_recognized_as_studio_url_scheme(self): + self.assertFalse(_is_studio_url('http://www.google.com')) + + def test_https_url_not_recognized_as_studio_url_scheme(self): + self.assertFalse(_is_studio_url('https://www.google.com')) + + def test_http_with_studio_base_url_recognized_as_studio_url_scheme(self): + self.assertTrue(_is_studio_url(f'http://{settings.CMS_BASE}/testurl')) + + def test_https_with_studio_base_url_recognized_as_studio_url_scheme(self): + self.assertTrue(_is_studio_url(f'https://{settings.CMS_BASE}/testurl')) + + def test_container_url_without_url_base_is_recognized_as_studio_url_scheme(self): + self.assertTrue(_is_studio_url('container/test')) + + def test_slash_url_without_url_base_is_recognized_as_studio_url_scheme(self): + self.assertTrue(_is_studio_url('/static/test')) + + @mock.patch('cms.djangoapps.contentstore.tasks.ModuleStoreEnum', autospec=True) + @mock.patch('cms.djangoapps.contentstore.tasks.modulestore', autospec=True) + def test_course_scan_occurs_on_published_version(self, mock_modulestore, mock_module_store_enum): + """_scan_course_for_links should only scan published courses""" + mock_modulestore_instance = mock.Mock() + mock_modulestore.return_value = mock_modulestore_instance + mock_modulestore_instance.get_items.return_value = [] + + mock_course_key_string = CourseKey.from_string("course-v1:edX+DemoX+Demo_Course") + mock_module_store_enum.RevisionOption.published_only = "mock_published_only" + + _scan_course_for_links(mock_course_key_string) + + mock_modulestore_instance.get_items.assert_called_once_with( + mock_course_key_string, + qualifiers={'category': 'vertical'}, + revision=mock_module_store_enum.RevisionOption.published_only + ) + + @mock.patch('cms.djangoapps.contentstore.tasks._get_urls', autospec=True) + def test_number_of_scanned_blocks_equals_blocks_in_course(self, mock_get_urls): + """ + _scan_course_for_links should call _get_urls once per block in course. + """ + expected_blocks = self.store.get_items(self.test_course.id) + + _scan_course_for_links(self.test_course.id) + self.assertEqual(len(expected_blocks), mock_get_urls.call_count) + + @pytest.mark.asyncio + async def test_every_detected_link_is_validated(self): + ''' + The call to _validate_urls_access_in_batches() should call _validate_batch() three times, once for each + of the three batches of length 2 in url_list. The lambda function supplied for _validate_batch will + simply return the set of urls fed to _validate_batch(), and _validate_urls_access_in_batches() will + aggregate these into a list identical to the original url_list. + + What this shows is that each url submitted to _validate_urls_access_in_batches() is ending up as an argument + to one of the generated _validate_batch() calls, and that no input URL is left unprocessed. + ''' + url_list = ['1', '2', '3', '4', '5'] + course_key = 'course-v1:edX+DemoX+Demo_Course' + batch_size = 2 + with patch("cms.djangoapps.contentstore.tasks._validate_batch", new_callable=AsyncMock) as mock_validate_batch: + mock_validate_batch.side_effect = lambda x, y: x + validated_urls = await _validate_urls_access_in_batches(url_list, course_key, batch_size) + mock_validate_batch.assert_called() + assert mock_validate_batch.call_count == 3 # two full batches and one partial batch + assert validated_urls == url_list, \ + f"List of validated urls {validated_urls} is not identical to sourced urls {url_list}" + + @pytest.mark.asyncio + async def test_all_links_are_validated_with_batch_validation(self): + ''' + Here the focus is not on batching, but rather that when validation occurs it does so on the intended + URL strings + ''' + with patch("cms.djangoapps.contentstore.tasks._validate_url_access", new_callable=AsyncMock) as mock_validate: + mock_validate.return_value = {"status": 200} + + url_list = ['1', '2', '3', '4', '5'] + course_key = 'course-v1:edX+DemoX+Demo_Course' + batch_size = 2 + await _validate_urls_access_in_batches(url_list, course_key, batch_size) + args_list = mock_validate.call_args_list + urls = [call_args.args[1] for call_args in args_list] # The middle argument in each of the function calls + for i in range(1, len(url_list) + 1): + assert str(i) in urls, f'{i} not supplied as a url for validation in batches function' + + def test_no_retries_on_403_access_denied_links(self): + ''' + No mocking required here. Will populate "filtering_input" with simulated results for link checks where + some links time out, some links receive 403 errors, and some receive 200 success. This test then + ensures that "_filter_by_status()" tallies the three categories as expected, and formats the result + as expected. + ''' + url_list = ['1', '2', '3', '4', '5'] + filtering_input = [] + for i in range(1, len(url_list) + 1): # Notch out one of the URLs, having it return a '403' status code + filtering_input.append({ + 'block_id': f'block_{i}', + 'url': str(i), + 'status': 200 + }) + filtering_input[2]['status'] = 403 + filtering_input[3]['status'] = 500 + filtering_input[4]['status'] = None + + broken_or_locked_urls, retry_list = _filter_by_status(filtering_input) + assert len(broken_or_locked_urls) == 2 # The inputs with status = 403 and 500 + assert len(retry_list) == 1 # The input with status = None + assert retry_list[0][1] == '5' # The only URL fit for a retry operation (status == None) + + @patch("cms.djangoapps.contentstore.tasks._validate_user", return_value=MagicMock()) + @patch("cms.djangoapps.contentstore.tasks._scan_course_for_links", return_value=["url1", "url2"]) + @patch( + "cms.djangoapps.contentstore.tasks._validate_urls_access_in_batches", + return_value=[{"url": "url1", "status": "ok"}] + ) + @patch( + "cms.djangoapps.contentstore.tasks._filter_by_status", + return_value=(["block_1", "url1", True], ["block_2", "url2"]) + ) + @patch("cms.djangoapps.contentstore.tasks._retry_validation", return_value=['block_2', 'url2']) + def test_check_broken_links_calls_expected_support_functions( + self, + mock_retry_validation, + mock_filter, + mock_validate_urls, + mock_scan_course, + mock_validate_user + ): + # Parameters for the function + user_id = 1234 + language = "en" + course_key_string = "course-v1:edX+DemoX+2025" + + # Mocking self and status attributes for the test + class MockStatus: + """Mock for status attributes""" + def __init__(self): + self.state = "READY" + + def set_state(self, state): + self.state = state + + def increment_completed_steps(self): + pass + + def fail(self, error_details): + self.state = "FAILED" + + class MockSelf: + def __init__(self): + self.status = MockStatus() + + mock_self = MockSelf() + + _check_broken_links(mock_self, user_id, course_key_string, language) + + # Prepare expected results based on mock settings + url_list = mock_scan_course.return_value + validated_url_list = mock_validate_urls.return_value + broken_or_locked_urls, retry_list = mock_filter.return_value + course_key = CourseKey.from_string(course_key_string) + + if retry_list: + retry_results = mock_retry_validation.return_value + broken_or_locked_urls.extend(retry_results) + + # Perform verifications + try: + mock_self.status.increment_completed_steps() + mock_retry_validation.assert_called_once_with( + mock_filter.return_value[1], course_key, retry_count=3 + ) + except Exception as e: # pylint: disable=broad-except + logging.exception("Error checking links for course %s", course_key_string, exc_info=True) + if mock_self.status.state != "FAILED": + mock_self.status.fail({"raw_error_msg": str(e)}) + assert False, "Exception should not occur" + + # Assertions to confirm patched calls were invoked + mock_validate_user.assert_called_once_with(mock_self, user_id, language) + mock_scan_course.assert_called_once_with(course_key) + mock_validate_urls.assert_called_once_with(url_list, course_key, batch_size=100) + mock_filter.assert_called_once_with(validated_url_list) + if retry_list: + mock_retry_validation.assert_called_once_with(retry_list, course_key, retry_count=3) diff --git a/cms/djangoapps/contentstore/utils.py b/cms/djangoapps/contentstore/utils.py index a220b8d913..2cbc28730c 100644 --- a/cms/djangoapps/contentstore/utils.py +++ b/cms/djangoapps/contentstore/utils.py @@ -36,6 +36,7 @@ from pytz import UTC from xblock.fields import Scope from cms.djangoapps.contentstore.toggles import ( + enable_course_optimizer, exam_setting_view_enabled, libraries_v1_enabled, libraries_v2_enabled, @@ -390,6 +391,19 @@ def get_export_url(course_locator) -> str: return export_url +def get_optimizer_url(course_locator) -> str: + """ + Gets course authoring microfrontend URL for optimizer page view. + """ + optimizer_url = None + if enable_course_optimizer(course_locator): + mfe_base_url = get_course_authoring_url(course_locator) + course_mfe_url = f'{mfe_base_url}/course/{course_locator}/optimizer' + if mfe_base_url: + optimizer_url = course_mfe_url + return optimizer_url + + def get_files_uploads_url(course_locator) -> str: """ Gets course authoring microfrontend URL for files and uploads page view. diff --git a/cms/static/sass/elements/_header.scss b/cms/static/sass/elements/_header.scss index c17338acad..61abb5002a 100644 --- a/cms/static/sass/elements/_header.scss +++ b/cms/static/sass/elements/_header.scss @@ -388,7 +388,8 @@ body.course.view-export-git .nav-course-tools-export-git, body.course.view-team .nav-library-settings .title, body.course.view-team .nav-library-settings-team, body.course.view-checklists .nav-course-tools .title, -body.course.view-checklists .nav-course-tools-checklists { +body.course.view-checklists .nav-course-tools-checklists, +.nav-course-tools-optimizer { color: theme-color("primary"); a { diff --git a/cms/templates/widgets/header.html b/cms/templates/widgets/header.html index 8b14398fc3..c71f49775a 100644 --- a/cms/templates/widgets/header.html +++ b/cms/templates/widgets/header.html @@ -8,7 +8,7 @@ from urllib.parse import quote_plus from common.djangoapps.student.auth import has_studio_advanced_settings_access from cms.djangoapps.contentstore import toggles - from cms.djangoapps.contentstore.utils import get_pages_and_resources_url, get_course_outline_url, get_updates_url, get_files_uploads_url, get_video_uploads_url, get_schedule_details_url, get_grading_url, get_advanced_settings_url, get_import_url, get_export_url, get_studio_home_url, get_course_team_url + from cms.djangoapps.contentstore.utils import get_pages_and_resources_url, get_course_outline_url, get_updates_url, get_files_uploads_url, get_video_uploads_url, get_schedule_details_url, get_grading_url, get_advanced_settings_url, get_import_url, get_export_url, get_studio_home_url, get_course_team_url, get_optimizer_url from openedx.core.djangoapps.discussions.config.waffle import ENABLE_PAGES_AND_RESOURCES_MICROFRONTEND from openedx.core.djangoapps.lang_pref.api import header_language_selector_is_enabled, released_languages %> @@ -66,6 +66,7 @@ advanced_settings_mfe_enabled = toggles.use_new_advanced_settings_page(context_course.id) import_mfe_enabled = toggles.use_new_import_page(context_course.id) export_mfe_enabled = toggles.use_new_export_page(context_course.id) + optimizer_enabled = toggles.enable_course_optimizer(context_course.id) %>

@@ -255,6 +256,11 @@ + % if optimizer_enabled: + + % endif