From 466aaad85d8a860e2f01499508c4734740d43c0d Mon Sep 17 00:00:00 2001 From: Devasia Joseph Date: Fri, 1 Aug 2025 17:12:01 +0530 Subject: [PATCH] feat: Enhance course optimizer to detect previous run links and expand scanning scope --- .../core/course_optimizer_provider.py | 340 ++++++++++++++++-- .../tests/test_course_optimizer_provider.py | 156 +++++++- .../v0/serializers/course_optimizer.py | 13 + .../rest_api/v0/views/course_optimizer.py | 93 +++-- .../v1/serializers/course_waffle_flags.py | 8 + .../views/tests/test_course_waffle_flags.py | 47 ++- cms/djangoapps/contentstore/tasks.py | 189 +++++++++- .../contentstore/tests/test_tasks.py | 2 +- cms/djangoapps/contentstore/toggles.py | 23 ++ cms/djangoapps/contentstore/utils.py | 30 ++ cms/lib/spectacular.py | 3 +- 11 files changed, 808 insertions(+), 96 deletions(-) diff --git a/cms/djangoapps/contentstore/core/course_optimizer_provider.py b/cms/djangoapps/contentstore/core/course_optimizer_provider.py index 7b44e05a87..476abcd8b0 100644 --- a/cms/djangoapps/contentstore/core/course_optimizer_provider.py +++ b/cms/djangoapps/contentstore/core/course_optimizer_provider.py @@ -2,15 +2,19 @@ Logic for handling actions in Studio related to Course Optimizer. """ import json + +from opaque_keys.edx.keys import CourseKey from user_tasks.conf import settings as user_tasks_settings from user_tasks.models import UserTaskArtifact, UserTaskStatus -from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState +from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState, _get_urls from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_xblock from cms.djangoapps.contentstore.xblock_storage_handlers.xblock_helpers import usage_key_with_run +from lms.djangoapps.courseware.courses import get_course_info_usage_key +from openedx.core.lib.xblock_utils import get_course_update_items from xmodule.modulestore import ModuleStoreEnum from xmodule.modulestore.django import modulestore - +from xmodule.tabs import StaticTab # Restricts status in the REST API to only those which the requesting user has permission to view. # These can be overwritten in django settings. @@ -23,6 +27,7 @@ def get_link_check_data(request, course_id): """ Retrives data and formats it for the link check get request. """ + course_key = CourseKey.from_string(course_id) task_status = _latest_task_status(request, course_id) status = None created_at = None @@ -43,7 +48,7 @@ def get_link_check_data(request, course_id): with artifact.file as file: content = file.read() json_content = json.loads(content) - broken_links_dto = generate_broken_links_descriptor(json_content, request.user) + broken_links_dto = generate_broken_links_descriptor(json_content, request.user, course_key) elif task_status.state in (UserTaskStatus.FAILED, UserTaskStatus.CANCELED): errors = UserTaskArtifact.objects.filter(status=task_status, name='Error') if errors: @@ -53,7 +58,6 @@ def get_link_check_data(request, course_id): except ValueError: # Wasn't JSON, just use the value as a string pass - data = { 'LinkCheckStatus': status, **({'LinkCheckCreatedAt': created_at} if created_at else {}), @@ -76,13 +80,16 @@ def _latest_task_status(request, course_key_string, view_func=None): return task_status.order_by('-created').first() -def generate_broken_links_descriptor(json_content, request_user): +def generate_broken_links_descriptor(json_content, request_user, course_key): """ Returns a Data Transfer Object for frontend given a list of broken links. + Includes ALL link types: broken, locked, external-forbidden, and previous run links. + Now also includes course updates, handouts, and custom pages. ** Example json_content structure ** Note: link_state is locked if the link is a studio link and returns 403 link_state is external-forbidden if the link is not a studio link and returns 403 + link_state is previous-run if the link points to a previous course run [ ['block_id_1', 'link_1', link_state], ['block_id_1', 'link_2', link_state], @@ -111,6 +118,7 @@ def generate_broken_links_descriptor(json_content, request_user): 'url': 'url/to/block', 'brokenLinks: [], 'lockedLinks: [], + 'previousRunLinks: [] }, ..., ] @@ -122,30 +130,40 @@ def generate_broken_links_descriptor(json_content, request_user): ] }, ..., + ], + 'course_updates': [ + { + 'name': 'published_date', + 'url': 'url', + 'brokenLinks': [], + 'lockedLinks': [], + 'externalForbiddenLinks': [], + 'previousRunLinks': [] + }, + ... + { + 'name': 'handouts', + 'url': 'url', + 'brokenLinks': [], + 'lockedLinks': [], + 'externalForbiddenLinks': [], + 'previousRunLinks': [] + } + ], + 'custom_pages': [ + { + 'name': 'page_name', + 'url': 'url', + 'brokenLinks': [], + 'lockedLinks': [], + 'externalForbiddenLinks': [], + 'previousRunLinks': [] + }, + ... ] } """ - xblock_node_tree = {} # tree representation of xblock relationships - xblock_dictionary = {} # dictionary of xblock attributes - - for item in json_content: - block_id, link, *rest = item - if rest: - link_state = rest[0] - else: - link_state = '' - - usage_key = usage_key_with_run(block_id) - block = get_xblock(usage_key, request_user) - xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary( - block=block, - link=link, - link_state=link_state, - node_tree=xblock_node_tree, - dictionary=xblock_dictionary - ) - - return _create_dto_recursive(xblock_node_tree, xblock_dictionary) + return _generate_enhanced_links_descriptor(json_content, request_user, course_key) def _update_node_tree_and_dictionary(block, link, link_state, node_tree, dictionary): @@ -221,6 +239,8 @@ def _update_node_tree_and_dictionary(block, link, link_state, node_tree, diction updated_dictionary[xblock_id].setdefault('locked_links', []).append(link) elif link_state == LinkState.EXTERNAL_FORBIDDEN: updated_dictionary[xblock_id].setdefault('external_forbidden_links', []).append(link) + elif link_state == LinkState.PREVIOUS_RUN: + updated_dictionary[xblock_id].setdefault('previous_run_links', []).append(link) else: updated_dictionary[xblock_id].setdefault('broken_links', []).append(link) @@ -277,7 +297,8 @@ def _create_dto_recursive(xblock_node, xblock_dictionary, parent_id=None): 'url': xblock_data.get('url', ''), 'brokenLinks': xblock_data.get('broken_links', []), 'lockedLinks': xblock_data.get('locked_links', []), - 'externalForbiddenLinks': xblock_data.get('external_forbidden_links', []) + 'externalForbiddenLinks': xblock_data.get('external_forbidden_links', []), + 'previousRunLinks': xblock_data.get('previous_run_links', []) }) else: # Non-leaf node category = xblock_data.get('category', None) @@ -317,3 +338,268 @@ def sort_course_sections(course_key, data): ] return data + + +def _generate_links_descriptor_for_content(json_content, request_user): + """ + Creates a content tree of all links in a course and their states + Returns a structure containing all broken links and locked links for a course. + """ + xblock_node_tree = {} + xblock_dictionary = {} + + for item in json_content: + block_id, link, *rest = item + if rest: + link_state = rest[0] + else: + link_state = "" + + usage_key = usage_key_with_run(block_id) + block = get_xblock(usage_key, request_user) + xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary( + block=block, + link=link, + link_state=link_state, + node_tree=xblock_node_tree, + dictionary=xblock_dictionary, + ) + + result = _create_dto_recursive(xblock_node_tree, xblock_dictionary) + # Ensure we always return a valid structure with sections + if not isinstance(result, dict): + result = {"sections": []} + + return result + + +def _generate_enhanced_links_descriptor(json_content, request_user, course_key): + """ + Generate enhanced link descriptor that includes course updates, handouts, and custom pages. + """ + + content_links = [] + course_updates_links = [] + handouts_links = [] + custom_pages_links = [] + course = modulestore().get_course(course_key) + + for item in json_content: + block_id, link, *rest = item + if "course_info" in block_id and "updates" in block_id: + course_updates_links.append(item) + elif "course_info" in block_id and "handouts" in block_id: + handouts_links.append(item) + elif "static_tab" in block_id: + custom_pages_links.append(item) + else: + content_links.append(item) + + main_content = _generate_links_descriptor_for_content(content_links, request_user) + + if main_content is None: + main_content = {"sections": []} + + course_updates_data = ( + _generate_course_updates_structure(course, course_updates_links) + if course_updates_links and course else [] + ) + + handouts_data = ( + _generate_handouts_structure(course, handouts_links) + if handouts_links and course else [] + ) + + custom_pages_data = ( + _generate_custom_pages_structure(course, custom_pages_links) + if custom_pages_links and course else [] + ) + + result = main_content.copy() + result["course_updates"] = course_updates_data + handouts_data + result["custom_pages"] = custom_pages_data + return result + + +def _generate_enhanced_content_structure(course, content_links, content_type): + """ + Unified function to generate structure for enhanced content (updates, handouts, custom pages). + + Args: + course: Course object + content_links: List of link items for this content type + content_type: 'updates', 'handouts', or 'custom_pages' + + Returns: + List of content items with categorized links + """ + result = [] + try: + if content_type == "custom_pages": + result = _generate_custom_pages_content(course, content_links) + elif content_type == "updates": + result = _generate_course_updates_content(course, content_links) + elif content_type == "handouts": + result = _generate_handouts_content(course, content_links) + return result + except Exception as e: # pylint: disable=broad-exception-caught + return result + + +def _generate_course_updates_content(course, updates_links): + """Generate course updates content with categorized links.""" + store = modulestore() + usage_key = get_course_info_usage_key(course, "updates") + updates_block = store.get_item(usage_key) + course_updates = [] + + if not (updates_block and hasattr(updates_block, "data")): + return course_updates + + update_items = get_course_update_items(updates_block) + if not update_items: + return course_updates + + # Create link state mapping + link_state_map = { + item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN + for item in updates_links if len(item) >= 2 + } + + for update in update_items: + if update.get("status") != "deleted": + update_content = update.get("content", "") + update_links = _get_urls(update_content) if update_content else [] + + # Match links with their states + update_link_data = _create_empty_links_data() + for link in update_links: + link_state = link_state_map.get(link) + if link_state is not None: + _categorize_link_by_state(link, link_state, update_link_data) + + course_updates.append( + { + "name": update.get("date", "Unknown Date"), + "url": f"/course/{str(course.id)}/course_info", + **update_link_data, + } + ) + + return course_updates + + +def _generate_handouts_content(course, handouts_links): + """Generate handouts content with categorized links.""" + store = modulestore() + usage_key = get_course_info_usage_key(course, "handouts") + handouts_block = store.get_item(usage_key) + course_handouts = [] + + if not ( + handouts_block + and hasattr(handouts_block, "data") + and handouts_block.data + ): + return course_handouts + + # Create link state mapping for handouts + link_state_map = { + item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN + for item in handouts_links if len(item) >= 2 + } + + links_data = _create_empty_links_data() + for link, link_state in link_state_map.items(): + _categorize_link_by_state(link, link_state, links_data) + + course_handouts = [ + { + "name": "handouts", + "url": f"/course/{str(course.id)}/course_info", + **links_data, + } + ] + return course_handouts + + +def _generate_custom_pages_content(course, custom_pages_links): + """Generate custom pages content with categorized links.""" + custom_pages = [] + + if not course or not hasattr(course, "tabs"): + return custom_pages + + # Group links by block_id and categorize them + links_by_page = {} + for item in custom_pages_links: + if len(item) >= 2: + block_id, link = item[0], item[1] + link_state = item[2] if len(item) >= 3 else LinkState.BROKEN + links_by_page.setdefault(block_id, _create_empty_links_data()) + _categorize_link_by_state(link, link_state, links_by_page[block_id]) + + # Process static tabs and add their pages + for tab in course.tabs: + if isinstance(tab, StaticTab): + block_id = str(course.id.make_usage_key("static_tab", tab.url_slug)) + custom_pages.append({ + "name": tab.name, + "url": f"/course/{str(course.id)}/custom-pages", + **links_by_page.get(block_id, _create_empty_links_data()), + }) + + return custom_pages + + +def _generate_course_updates_structure(course, updates_links): + """Generate structure for course updates.""" + return _generate_enhanced_content_structure(course, updates_links, "updates") + + +def _generate_handouts_structure(course, handouts_links): + """Generate structure for course handouts.""" + return _generate_enhanced_content_structure(course, handouts_links, "handouts") + + +def _generate_custom_pages_structure(course, custom_pages_links): + """Generate structure for custom pages (static tabs).""" + return _generate_enhanced_content_structure( + course, custom_pages_links, "custom_pages" + ) + + +def _categorize_link_by_state(link, link_state, links_data): + """ + Helper function to categorize a link into the appropriate list based on its state. + + Args: + link (str): The URL link to categorize + link_state (str): The state of the link (broken, locked, external-forbidden, previous-run) + links_data (dict): Dictionary containing the categorized link lists + """ + state_to_key = { + LinkState.BROKEN: "brokenLinks", + LinkState.LOCKED: "lockedLinks", + LinkState.EXTERNAL_FORBIDDEN: "externalForbiddenLinks", + LinkState.PREVIOUS_RUN: "previousRunLinks" + } + + key = state_to_key.get(link_state) + if key: + links_data[key].append(link) + + +def _create_empty_links_data(): + """ + Helper function to create an empty links data structure. + + Returns: + dict: Dictionary with empty lists for each link type + """ + return { + "brokenLinks": [], + "lockedLinks": [], + "externalForbiddenLinks": [], + "previousRunLinks": [], + } diff --git a/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py index ca0b73af71..8cb7c38bc1 100644 --- a/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py +++ b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py @@ -1,16 +1,22 @@ """ Tests for course optimizer """ + from unittest import mock from unittest.mock import Mock -from cms.djangoapps.contentstore.tests.utils import CourseTestCase +from opaque_keys.edx.keys import CourseKey + from cms.djangoapps.contentstore.core.course_optimizer_provider import ( - _update_node_tree_and_dictionary, _create_dto_recursive, + _update_node_tree_and_dictionary, + generate_broken_links_descriptor, sort_course_sections ) -from cms.djangoapps.contentstore.tasks import LinkState +from cms.djangoapps.contentstore.tasks import LinkState, _get_urls +from cms.djangoapps.contentstore.tests.utils import CourseTestCase +from cms.djangoapps.contentstore.utils import _contains_previous_course_reference +from xmodule.tabs import StaticTab class TestLinkCheckProvider(CourseTestCase): @@ -123,6 +129,7 @@ class TestLinkCheckProvider(CourseTestCase): 'brokenLinks': ['broken_link_1', 'broken_link_2'], 'lockedLinks': ['locked_link'], 'externalForbiddenLinks': ['forbidden_link_1'], + 'previousRunLinks': [], } ] } @@ -181,6 +188,7 @@ class TestLinkCheckProvider(CourseTestCase): 'brokenLinks': ['broken_link_1', 'broken_link_2'], 'lockedLinks': ['locked_link'], 'externalForbiddenLinks': ['forbidden_link_1'], + 'previousRunLinks': [], } ] } @@ -295,3 +303,145 @@ class TestLinkCheckProvider(CourseTestCase): ] assert result["LinkCheckOutput"]["sections"] == expected_sections + + def test_prev_run_link_detection(self): + """Test the core logic of separating previous run links from regular links.""" + + previous_course_key = CourseKey.from_string( + "course-v1:edX+DemoX+Demo_Course_2023" + ) + + test_cases = [ + (f"/courses/{previous_course_key}/info", True), + (f"/courses/{previous_course_key}/courseware", True), + (f"/courses/{str(previous_course_key).upper()}/page", True), + # Should NOT match + ("/courses/course-v1:edX+DemoX+Demo_Course_2024/info", False), + ("/static/image.png", False), + ("/assets/courseware/file.pdf", False), + ("", False), + (" ", False), + ] + + for url, expected_match in test_cases: + with self.subTest(url=url, expected=expected_match): + result = _contains_previous_course_reference(url, previous_course_key) + self.assertEqual( + result, + expected_match, + f"URL '{url}' should {'match' if expected_match else 'not match'} previous course", + ) + + def test_enhanced_url_detection_edge_cases(self): + """Test edge cases for enhanced URL detection.""" + + test_cases = [ + ("", []), # Empty content + ("No URLs here", []), # Content without URLs + ( + "Visit https://example.com today!", + ["https://example.com"], + ), # URL in text + ('href="#anchor"', []), # Should exclude fragments + ('src="data:image/png;base64,123"', []), # Should exclude data URLs + ( + "Multiple URLs: http://site1.com and https://site2.com", + ["http://site1.com", "https://site2.com"], + ), # Multiple URLs + ( + "URL with params: https://example.com/page?param=value&other=123", + ["https://example.com/page?param=value&other=123"], + ), # URL with parameters + ] + + for content, expected_urls in test_cases: + with self.subTest(content=content): + urls = _get_urls(content) + for expected_url in expected_urls: + self.assertIn( + expected_url, + urls, + f"Should find '{expected_url}' in content: {content}", + ) + + def test_course_updates_and_custom_pages_structure(self): + """Test that course_updates and custom_pages are properly structured in the response.""" + + json_content = [ + # Regular course content + [ + "course-v1:Test+Course+2024+type@html+block@content1", + "http://content-link.com", + "broken", + ], + [ + "course-v1:Test+Course+2024+type@vertical+block@unit1", + "http://unit-link.com", + "locked", + ], + # Course updates + [ + "course-v1:Test+Course+2024+type@course_info+block@updates", + "http://update1.com", + "broken", + ], + [ + "course-v1:Test+Course+2024+type@course_info+block@updates", + "http://update2.com", + "locked", + ], + # Handouts (should be merged into course_updates) + [ + "course-v1:Test+Course+2024+type@course_info+block@handouts", + "http://handout.com", + "broken", + ], + # Custom pages (static tabs) + [ + "course-v1:Test+Course+2024+type@static_tab+block@page1", + "http://page1.com", + "broken", + ], + [ + "course-v1:Test+Course+2024+type@static_tab+block@page2", + "http://page2.com", + "external-forbidden", + ], + ] + + with mock.patch( + "cms.djangoapps.contentstore.core.course_optimizer_provider._generate_links_descriptor_for_content" + ) as mock_content, mock.patch( + "cms.djangoapps.contentstore.core.course_optimizer_provider.modulestore" + ) as mock_modulestore: + + mock_content.return_value = {"sections": []} + mock_course = self.mock_course + mock_tab1 = StaticTab(name="Page1", url_slug="page1") + mock_tab2 = StaticTab(name="Page2", url_slug="page2") + mock_course.tabs = [mock_tab1, mock_tab2] + mock_course.id = CourseKey.from_string("course-v1:Test+Course+2024") + mock_modulestore.return_value.get_course.return_value = mock_course + + course_key = CourseKey.from_string("course-v1:Test+Course+2024") + result = generate_broken_links_descriptor( + json_content, self.user, course_key + ) + + # Verify top-level structure + self.assertIn("sections", result) + self.assertIn("course_updates", result) + self.assertIn("custom_pages", result) + self.assertNotIn("handouts", result) + + # Course updates should include both updates and handouts + self.assertGreaterEqual( + len(result["course_updates"]), + 1, + "Should have course updates/handouts", + ) + + # Custom pages should have custom pages data + self.assertGreaterEqual( + len(result["custom_pages"]), 1, "Should have custom pages" + ) diff --git a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py index 7411192d16..e6879203f7 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py +++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py @@ -13,6 +13,7 @@ class LinkCheckBlockSerializer(serializers.Serializer): brokenLinks = serializers.ListField(required=False) lockedLinks = serializers.ListField(required=False) externalForbiddenLinks = serializers.ListField(required=False) + previousRunLinks = serializers.ListField(required=False) class LinkCheckUnitSerializer(serializers.Serializer): @@ -36,9 +37,21 @@ class LinkCheckSectionSerializer(serializers.Serializer): subsections = LinkCheckSubsectionSerializer(many=True) +class LinkCheckContentItemSerializer(serializers.Serializer): + """ Serializer for course content items like updates, handouts, and custom pages """ + name = serializers.CharField(required=True, allow_null=False, allow_blank=False) + url = serializers.CharField(required=True, allow_null=False, allow_blank=False) + brokenLinks = serializers.ListField(required=False) + lockedLinks = serializers.ListField(required=False) + externalForbiddenLinks = serializers.ListField(required=False) + previousRunLinks = serializers.ListField(required=False) + + class LinkCheckOutputSerializer(serializers.Serializer): """ Serializer for broken links output model data """ sections = LinkCheckSectionSerializer(many=True) + course_updates = LinkCheckContentItemSerializer(many=True, required=False) + custom_pages = LinkCheckContentItemSerializer(many=True, required=False) class LinkCheckSerializer(serializers.Serializer): diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py index 24c8dd0d18..67bcd8309a 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py +++ b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py @@ -71,53 +71,49 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): ) def get(self, request: Request, course_id: str): """ - GET handler to return the status of the link_check task from UserTaskStatus. - If no task has been started for the course, return 'Uninitiated'. - If link_check task was successful, an output result is also returned. + **Use Case** - For reference, the following status are in UserTaskStatus: - 'Pending', 'In Progress' (sent to frontend as 'In-Progress'), - 'Succeeded', 'Failed', 'Canceled', 'Retrying' - This function adds a status for when status from UserTaskStatus is None: - 'Uninitiated' + GET handler to return the status of the link_check task from UserTaskStatus. + If no task has been started for the course, return 'Uninitiated'. + If link_check task was successful, an output result is also returned. + + For reference, the following status are in UserTaskStatus: + 'Pending', 'In Progress' (sent to frontend as 'In-Progress'), + 'Succeeded', 'Failed', 'Canceled', 'Retrying' + This function adds a status for when status from UserTaskStatus is None: + 'Uninitiated' **Example Request** + GET /api/contentstore/v0/link_check_status/{course_id} **Example Response** + ```json { "LinkCheckStatus": "Succeeded", "LinkCheckCreatedAt": "2025-02-05T14:32:01.294587Z", "LinkCheckOutput": { - sections: [ + "sections": [ { - id: , - displayName: , - subsections: [ + "id": , + "displayName": , + "subsections": [ { - id: , - displayName: , - units: [ + "id": , + "displayName": , + "units": [ { - id: , - displayName: , - blocks: [ + "id": , + "displayName": , + "blocks": [ { - id: , - url: , - brokenLinks: [ - , - , - , - ..., - ], - lockedLinks: [ - , - , - , - ..., - ], + "id": , + "url": , + "brokenLinks": [, ...], + "lockedLinks": [, ...], + "externalForbiddenLinks": [, ...], + "previousRunLinks": [, ...] }, { }, ], @@ -130,6 +126,39 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): }, { }, ], + "course_updates": [ + { + "name": , + "url": , + "brokenLinks": [, ...], + "lockedLinks": [, ...], + "externalForbiddenLinks": [, ...], + "previousRunLinks": [, ...] + }, + ..., + { }, + ..., + { + "name": "handouts", + "url": , + "brokenLinks": [, ...], + "lockedLinks": [, ...], + "externalForbiddenLinks": [, ...], + "previousRunLinks": [, ...] + } + ], + "custom_pages": [ + { + "name": , + "url": , + "brokenLinks": [, ...], + "lockedLinks": [, ...], + "externalForbiddenLinks": [, ...], + "previousRunLinks": [, ...] + }, + ..., + { }, + ] }, } """ diff --git a/cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py b/cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py index dca8e25cb4..3efb7b6226 100644 --- a/cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py +++ b/cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py @@ -30,6 +30,7 @@ class CourseWaffleFlagsSerializer(serializers.Serializer): enable_course_optimizer = serializers.SerializerMethodField() use_react_markdown_editor = serializers.SerializerMethodField() use_video_gallery_flow = serializers.SerializerMethodField() + enable_course_optimizer_check_prev_run_links = serializers.SerializerMethodField() def get_course_key(self): """ @@ -167,3 +168,10 @@ class CourseWaffleFlagsSerializer(serializers.Serializer): Method to get the use_video_gallery_flow waffle flag """ return toggles.use_video_gallery_flow() + + def get_enable_course_optimizer_check_prev_run_links(self, obj): + """ + Method to get the enable_course_optimizer_check_prev_run_links waffle flag + """ + course_key = self.get_course_key() + return toggles.enable_course_optimizer_check_prev_run_links(course_key) diff --git a/cms/djangoapps/contentstore/rest_api/v1/views/tests/test_course_waffle_flags.py b/cms/djangoapps/contentstore/rest_api/v1/views/tests/test_course_waffle_flags.py index ad5696834a..f45cc48810 100644 --- a/cms/djangoapps/contentstore/rest_api/v1/views/tests/test_course_waffle_flags.py +++ b/cms/djangoapps/contentstore/rest_api/v1/views/tests/test_course_waffle_flags.py @@ -1,6 +1,7 @@ """ Unit tests for the course waffle flags view """ + from django.urls import reverse from cms.djangoapps.contentstore import toggles @@ -13,28 +14,30 @@ class CourseWaffleFlagsViewTest(CourseTestCase): Basic test for the CourseWaffleFlagsView endpoint, which returns waffle flag states for a specific course or globally if no course ID is provided. """ + maxDiff = None # Show the whole dictionary in the diff defaults = { - 'enable_course_optimizer': False, - 'use_new_advanced_settings_page': True, - 'use_new_certificates_page': True, - 'use_new_course_outline_page': True, - 'use_new_course_team_page': True, - 'use_new_custom_pages': True, - 'use_new_export_page': True, - 'use_new_files_uploads_page': True, - 'use_new_grading_page': True, - 'use_new_group_configurations_page': True, - 'use_new_home_page': True, - 'use_new_import_page': True, - 'use_new_schedule_details_page': True, - 'use_new_textbooks_page': True, - 'use_new_unit_page': True, - 'use_new_updates_page': True, - 'use_new_video_uploads_page': False, - 'use_react_markdown_editor': False, - 'use_video_gallery_flow': False, + "enable_course_optimizer": False, + "use_new_advanced_settings_page": True, + "use_new_certificates_page": True, + "use_new_course_outline_page": True, + "use_new_course_team_page": True, + "use_new_custom_pages": True, + "use_new_export_page": True, + "use_new_files_uploads_page": True, + "use_new_grading_page": True, + "use_new_group_configurations_page": True, + "use_new_home_page": True, + "use_new_import_page": True, + "use_new_schedule_details_page": True, + "use_new_textbooks_page": True, + "use_new_unit_page": True, + "use_new_updates_page": True, + "use_new_video_uploads_page": False, + "use_react_markdown_editor": False, + "use_video_gallery_flow": False, + "enable_course_optimizer_check_prev_run_links": False, } def setUp(self): @@ -44,6 +47,11 @@ class CourseWaffleFlagsViewTest(CourseTestCase): course_id=self.course.id, enabled=True, ) + WaffleFlagCourseOverrideModel.objects.create( + waffle_flag=toggles.ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS.name, + course_id=self.course.id, + enabled=True, + ) def test_global_defaults(self): url = reverse("cms.djangoapps.contentstore:v1:course_waffle_flags") @@ -59,4 +67,5 @@ class CourseWaffleFlagsViewTest(CourseTestCase): assert response.data == { **self.defaults, "enable_course_optimizer": True, + "enable_course_optimizer_check_prev_run_links": True, } diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index 239fb89840..06af6c4611 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -28,13 +28,13 @@ from edx_django_utils.monitoring import ( set_code_owner_attribute, set_code_owner_attribute_from_module, set_custom_attribute, - set_custom_attributes_for_course_key, + set_custom_attributes_for_course_key ) from olxcleaner.exceptions import ErrorLevel from olxcleaner.reporting import report_error_summary, report_errors from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import CourseKey, UsageKey -from opaque_keys.edx.locator import LibraryLocator, LibraryContainerLocator +from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocator from organizations.api import add_organization_course, ensure_organization from organizations.exceptions import InvalidOrganizationException from organizations.models import Organization @@ -47,16 +47,19 @@ import cms.djangoapps.contentstore.errors as UserErrors from cms.djangoapps.contentstore.courseware_index import ( CoursewareSearchIndexer, LibrarySearchIndexer, - SearchIndexingError, + SearchIndexingError ) from cms.djangoapps.contentstore.storage import course_import_export_storage +from cms.djangoapps.contentstore.toggles import enable_course_optimizer_check_prev_run_links from cms.djangoapps.contentstore.utils import ( IMPORTABLE_FILE_TYPES, + _contains_previous_course_reference, + _get_previous_run_course_key, create_or_update_xblock_upstream_link, delete_course, initialize_permissions, reverse_usage_url, - translation_language, + translation_language ) from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info from cms.djangoapps.models.settings.course_metadata import CourseMetadata @@ -65,6 +68,7 @@ from common.djangoapps.static_replace import replace_static_urls from common.djangoapps.student.auth import has_course_author_access from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, LibraryUserRole from common.djangoapps.util.monitoring import monitor_import_failure +from lms.djangoapps.courseware.courses import get_course_info_usage_key from openedx.core.djangoapps.content.learning_sequences.api import key_supports_outlines from openedx.core.djangoapps.content_libraries import api as v2contentlib_api from openedx.core.djangoapps.content_tagging.api import make_copied_tags_editable @@ -75,6 +79,7 @@ from openedx.core.djangoapps.discussions.tasks import update_unit_discussion_sta from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse from openedx.core.lib import ensure_cms from openedx.core.lib.extract_archive import safe_extractall +from openedx.core.lib.xblock_utils import get_course_update_items from xmodule.contentstore.django import contentstore from xmodule.course_block import CourseFields from xmodule.exceptions import SerializationError @@ -83,8 +88,9 @@ from xmodule.modulestore.django import modulestore from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider, ItemNotFoundError from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml, import_library_from_xml +from xmodule.tabs import StaticTab -from .models import ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices, ComponentLink +from .models import ComponentLink, ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices from .outlines import update_outline_from_modulestore from .outlines_regenerate import CourseOutlineRegenerate from .toggles import bypass_olx_failure_enabled @@ -116,6 +122,7 @@ class LinkState: BROKEN = 'broken' LOCKED = 'locked' EXTERNAL_FORBIDDEN = 'external-forbidden' + PREVIOUS_RUN = 'previous-run' def clone_instance(instance, field_values): @@ -1137,7 +1144,8 @@ def check_broken_links(self, user_id, course_key_string, language): def _check_broken_links(task_instance, user_id, course_key_string, language): """ - Checks for broken links in a course and store the results in a file. + Checks for broken links in a course and stores the results in a file. + Also checks for previous run links if the feature is enabled. """ user = _validate_user(task_instance, user_id, language) @@ -1145,13 +1153,29 @@ def _check_broken_links(task_instance, user_id, course_key_string, language): course_key = CourseKey.from_string(course_key_string) url_list = _scan_course_for_links(course_key) - validated_url_list = asyncio.run(_validate_urls_access_in_batches(url_list, course_key, batch_size=100)) + previous_run_links = [] + urls_to_validate = url_list + + if enable_course_optimizer_check_prev_run_links(course_key): + previous_run_course_key = _get_previous_run_course_key(course_key) + if previous_run_course_key: + + # Separate previous run links from regular links BEFORE validation + urls_to_validate = [] + for block_id, url in url_list: + if _contains_previous_course_reference(url, previous_run_course_key): + previous_run_links.append([block_id, url, LinkState.PREVIOUS_RUN]) + else: + urls_to_validate.append([block_id, url]) + + validated_url_list = asyncio.run(_validate_urls_access_in_batches(urls_to_validate, course_key, batch_size=100)) broken_or_locked_urls, retry_list = _filter_by_status(validated_url_list) if retry_list: retry_results = _retry_validation(retry_list, course_key, retry_count=3) broken_or_locked_urls.extend(retry_results) + all_links = broken_or_locked_urls + previous_run_links try: task_instance.status.increment_completed_steps() @@ -1160,9 +1184,9 @@ def _check_broken_links(task_instance, user_id, course_key_string, language): LOGGER.debug(f'[Link Check] json file being generated at {broken_links_file.name}') with open(broken_links_file.name, 'w') as file: - json.dump(broken_or_locked_urls, file, indent=4) + json.dump(all_links, file, indent=4) - _write_broken_links_to_file(broken_or_locked_urls, broken_links_file) + _write_broken_links_to_file(all_links, broken_links_file) artifact = UserTaskArtifact(status=task_instance.status, name='BrokenLinks') _save_broken_links_file(artifact, broken_links_file) @@ -1186,7 +1210,8 @@ def _validate_user(task, user_id, language): def _scan_course_for_links(course_key): """ - Scans a course for links found in the data contents of blocks. + Scans a course for links found in the data contents of + blocks, course updates, handouts, and custom pages. Returns: list: block id and URL pairs @@ -1205,6 +1230,7 @@ def _scan_course_for_links(course_key): ) blocks = [] urls_to_validate = [] + course = modulestore().get_course(course_key) for vertical in verticals: blocks.extend(vertical.get_children()) @@ -1220,13 +1246,31 @@ def _scan_course_for_links(course_key): url_list = _get_urls(block_data) urls_to_validate += [[block_id, url] for url in url_list] + course_updates_data = _scan_course_updates_for_links(course) + handouts_data = _scan_course_handouts_for_links(course) + custom_pages_data = _scan_custom_pages_for_links(course) + + for update in course_updates_data: + for url in update['urls']: + urls_to_validate.append([update['block_id'], url]) + + for handout in handouts_data: + for url in handout['urls']: + urls_to_validate.append([handout['block_id'], url]) + + for page in custom_pages_data: + for url in page['urls']: + urls_to_validate.append([page['block_id'], url]) + return urls_to_validate def _get_urls(content): """ Finds and returns a list of URLs in the given content. - Includes strings following 'href=' and 'src='. + Uses multiple regex patterns to find URLs in various contexts: + - URLs in href and src attributes + - Standalone URLs starting with http(s):// Excludes strings that are only '#' or start with 'data:'. Arguments: @@ -1235,11 +1279,130 @@ def _get_urls(content): Returns: list: urls """ - regex = r'\s+(?:href|src)=["\'](?!#|data:)([^"\']*)["\']' - url_list = re.findall(regex, content) + url_list = set() + + # Regex to match URLs in href and src attributes, or standalone URLs + regex = ( + r'(?:href|src)=["\'](?!#|data:)([^"\']+)["\']' + r'|(?:^|[\s\'"(<>])((?:https?://|http://|https://|www\.)[^\s\'")<>]+)(?=[\s\'")<>]|$)' + ) + + # Update list to include URLs found in the content + matches = re.findall(regex, content, re.IGNORECASE) + for match in matches: + url = match[0] or match[1] + if url: + url_list.add(url) + return url_list +def _scan_course_updates_for_links(course): + """ + Scans course updates for links. + + Returns: + list: course update data with links + """ + course_updates = [] + try: + store = modulestore() + usage_key = get_course_info_usage_key(course, "updates") + updates_block = store.get_item(usage_key) + + if updates_block and hasattr(updates_block, "data"): + update_items = get_course_update_items(updates_block) + + for update in update_items: + if update.get("status") != "deleted": + update_content = update.get("content", "") + url_list = _get_urls(update_content) + + course_updates.append( + { + "name": update.get("date", "Unknown"), + "block_id": str(usage_key), + "urls": url_list, + } + ) + + return course_updates + + return course_updates + except Exception as e: # pylint: disable=broad-exception-caught + LOGGER.debug(f"Error scanning course updates: {e}") + return course_updates + + +def _scan_course_handouts_for_links(course): + """ + Scans course handouts for links. + + Returns: + list: handouts data with links + """ + + course_handouts = [] + try: + store = modulestore() + usage_key = get_course_info_usage_key(course, "handouts") + handouts_block = store.get_item(usage_key) + + if handouts_block and hasattr(handouts_block, "data") and handouts_block.data: + url_list = _get_urls(handouts_block.data) + course_handouts.append( + {"name": "handouts", "block_id": str(usage_key), "urls": url_list} + ) + + return course_handouts + except Exception as e: # pylint: disable=broad-exception-caught + LOGGER.debug(f"Error scanning course handouts: {e}") + return course_handouts + + +def _scan_custom_pages_for_links(course): + """ + Scans custom pages (static tabs) for links. + + Returns: + list: custom pages data with links + """ + + custom_pages = [] + try: + store = modulestore() + course_key = course.id + + for tab in course.tabs: + if isinstance(tab, StaticTab): + try: + # Get the static tab content + # tab_locator = course_key.make_usage_key("static_tab", tab.url_slug) + static_tab_loc = course_key.make_usage_key( + "static_tab", tab.url_slug + ) + static_tab_block = store.get_item(static_tab_loc) + + if static_tab_block and hasattr(static_tab_block, "data"): + url_list = _get_urls(static_tab_block.data) + + custom_pages.append( + { + "name": tab.name, + "block_id": str(static_tab_loc), + "urls": url_list, + } + ) + except Exception as e: # pylint: disable=broad-exception-caught + LOGGER.debug(f"Error scanning static tab {tab.name}: {e}") + continue + + return custom_pages + except Exception as e: # pylint: disable=broad-exception-caught + LOGGER.debug(f"Error scanning custom pages: {e}") + return custom_pages + + async def _validate_urls_access_in_batches(url_list, course_key, batch_size=100): """ Returns the statuses of a list of URL requests. diff --git a/cms/djangoapps/contentstore/tests/test_tasks.py b/cms/djangoapps/contentstore/tests/test_tasks.py index 8634e7c6e5..fe7c33ac0d 100644 --- a/cms/djangoapps/contentstore/tests/test_tasks.py +++ b/cms/djangoapps/contentstore/tests/test_tasks.py @@ -667,4 +667,4 @@ class CheckBrokenLinksTaskTest(ModuleStoreTestCase): "https://validsite.com", "https://another-valid.com" ] - self.assertEqual(_get_urls(content), expected) + self.assertEqual(_get_urls(content), set(expected)) diff --git a/cms/djangoapps/contentstore/toggles.py b/cms/djangoapps/contentstore/toggles.py index 232bfc45d2..58b036a353 100644 --- a/cms/djangoapps/contentstore/toggles.py +++ b/cms/djangoapps/contentstore/toggles.py @@ -659,3 +659,26 @@ def use_legacy_logged_out_home(): If not, then we should just go to the login page w/ redirect to studio course listing. """ return LEGACY_STUDIO_LOGGED_OUT_HOME.is_enabled() + + +# .. toggle_name: contentstore.enable_course_optimizer_check_prev_run_links +# .. toggle_implementation: CourseWaffleFlag +# .. toggle_default: False +# .. toggle_description: When enabled, allows the Course Optimizer to detect and update links pointing to previous course runs. +# This feature enables instructors to automatically fix internal course links that still point to old course runs +# after creating a course rerun. +# .. toggle_use_cases: temporary, open_edx +# .. toggle_creation_date: 2025-07-21 +# .. toggle_target_removal_date: None +ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS = CourseWaffleFlag( + f'{CONTENTSTORE_NAMESPACE}.enable_course_optimizer_check_prev_run_links', + __name__, + CONTENTSTORE_LOG_PREFIX, +) + + +def enable_course_optimizer_check_prev_run_links(course_key): + """ + Returns a boolean if previous run course optimizer feature is enabled for the given course. + """ + return ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS.is_enabled(course_key) diff --git a/cms/djangoapps/contentstore/utils.py b/cms/djangoapps/contentstore/utils.py index c4049a818f..0d6bf190fa 100644 --- a/cms/djangoapps/contentstore/utils.py +++ b/cms/djangoapps/contentstore/utils.py @@ -2435,3 +2435,33 @@ def create_or_update_xblock_upstream_link(xblock, course_key: CourseKey, created # It is possible that the upstream is a container and UsageKeyV2 parse failed # Create upstream container link and raise InvalidKeyError if xblock.upstream is a valid key. _create_or_update_container_link(course_key, created, xblock) + + +def _get_previous_run_course_key(course_key): + """ + Retrieves the course key of the previous run for a given course. + """ + try: + rerun_state = CourseRerunState.objects.get(course_key=course_key) + except CourseRerunState.DoesNotExist: + log.warning(f'[Link Check] No rerun state found for course {course_key}. Cannot find previous run.') + return None + + return rerun_state.source_course_key + + +def _contains_previous_course_reference(url, previous_course_key): + """ + Checks if a URL contains references to the previous course. + + Arguments: + url: The URL to check + previous_course_key: The previous course key to look for + + Returns: + bool: True if URL contains reference to previous course + """ + if not previous_course_key: + return False + + return str(previous_course_key).lower() in url.lower() diff --git a/cms/lib/spectacular.py b/cms/lib/spectacular.py index 0a4ac831d3..2e05290fc0 100644 --- a/cms/lib/spectacular.py +++ b/cms/lib/spectacular.py @@ -10,7 +10,8 @@ def cms_api_filter(endpoints): """ filtered = [] CMS_PATH_PATTERN = re.compile( - r"^/api/contentstore/v0/(xblock|videos|video_transcripts|file_assets|youtube_transcripts)" + r"^/api/contentstore/v0/(xblock|videos|video_transcripts|file_assets|" + r"youtube_transcripts|link_check|link_check_status)" ) for path, path_regex, method, callback in endpoints: