feat: Enhance course optimizer to detect previous run links and expand scanning scope

2025-08-01 17:12:01 +05:30
parent f4d52e070b
commit 466aaad85d
11 changed files with 808 additions and 96 deletions
--- a/cms/djangoapps/contentstore/core/course_optimizer_provider.py
+++ b/cms/djangoapps/contentstore/core/course_optimizer_provider.py
@@ -2,15 +2,19 @@
 Logic for handling actions in Studio related to Course Optimizer.
 """
 import json
+
+from opaque_keys.edx.keys import CourseKey
 from user_tasks.conf import settings as user_tasks_settings
 from user_tasks.models import UserTaskArtifact, UserTaskStatus

-from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState
+from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState, _get_urls
 from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_xblock
 from cms.djangoapps.contentstore.xblock_storage_handlers.xblock_helpers import usage_key_with_run
+from lms.djangoapps.courseware.courses import get_course_info_usage_key
+from openedx.core.lib.xblock_utils import get_course_update_items
 from xmodule.modulestore import ModuleStoreEnum
 from xmodule.modulestore.django import modulestore
-
+from xmodule.tabs import StaticTab

 # Restricts status in the REST API to only those which the requesting user has permission to view.
 #   These can be overwritten in django settings.
@@ -23,6 +27,7 @@ def get_link_check_data(request, course_id):
    """
    Retrives data and formats it for the link check get request.
    """
+    course_key = CourseKey.from_string(course_id)
    task_status = _latest_task_status(request, course_id)
    status = None
    created_at = None
@@ -43,7 +48,7 @@ def get_link_check_data(request, course_id):
            with artifact.file as file:
                content = file.read()
                json_content = json.loads(content)
-                broken_links_dto = generate_broken_links_descriptor(json_content, request.user)
+                broken_links_dto = generate_broken_links_descriptor(json_content, request.user, course_key)
        elif task_status.state in (UserTaskStatus.FAILED, UserTaskStatus.CANCELED):
            errors = UserTaskArtifact.objects.filter(status=task_status, name='Error')
            if errors:
@@ -53,7 +58,6 @@ def get_link_check_data(request, course_id):
                except ValueError:
                    # Wasn't JSON, just use the value as a string
                    pass
-
    data = {
        'LinkCheckStatus': status,
        **({'LinkCheckCreatedAt': created_at} if created_at else {}),
@@ -76,13 +80,16 @@ def _latest_task_status(request, course_key_string, view_func=None):
    return task_status.order_by('-created').first()


-def generate_broken_links_descriptor(json_content, request_user):
+def generate_broken_links_descriptor(json_content, request_user, course_key):
    """
    Returns a Data Transfer Object for frontend given a list of broken links.
+    Includes ALL link types: broken, locked, external-forbidden, and previous run links.
+    Now also includes course updates, handouts, and custom pages.

    ** Example json_content structure **
        Note: link_state is locked if the link is a studio link and returns 403
              link_state is external-forbidden if the link is not a studio link and returns 403
+              link_state is previous-run if the link points to a previous course run
    [
        ['block_id_1', 'link_1', link_state],
        ['block_id_1', 'link_2', link_state],
@@ -111,6 +118,7 @@ def generate_broken_links_descriptor(json_content, request_user):
                                        'url': 'url/to/block',
                                        'brokenLinks: [],
                                        'lockedLinks: [],
+                                        'previousRunLinks: []
                                    },
                                    ...,
                                ]
@@ -122,30 +130,40 @@ def generate_broken_links_descriptor(json_content, request_user):
                ]
            },
            ...,
+        ],
+        'course_updates': [
+            {
+                'name': 'published_date',
+                'url': 'url',
+                'brokenLinks': [],
+                'lockedLinks': [],
+                'externalForbiddenLinks': [],
+                'previousRunLinks': []
+            },
+            ...
+            {
+                'name': 'handouts',
+                'url': 'url',
+                'brokenLinks': [],
+                'lockedLinks': [],
+                'externalForbiddenLinks': [],
+                'previousRunLinks': []
+            }
+        ],
+        'custom_pages': [
+            {
+                'name': 'page_name',
+                'url': 'url',
+                'brokenLinks': [],
+                'lockedLinks': [],
+                'externalForbiddenLinks': [],
+                'previousRunLinks': []
+            },
+            ...
        ]
    }
    """
-    xblock_node_tree = {}   # tree representation of xblock relationships
-    xblock_dictionary = {}  # dictionary of xblock attributes
-
-    for item in json_content:
-        block_id, link, *rest = item
-        if rest:
-            link_state = rest[0]
-        else:
-            link_state = ''
-
-        usage_key = usage_key_with_run(block_id)
-        block = get_xblock(usage_key, request_user)
-        xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary(
-            block=block,
-            link=link,
-            link_state=link_state,
-            node_tree=xblock_node_tree,
-            dictionary=xblock_dictionary
-        )
-
-    return _create_dto_recursive(xblock_node_tree, xblock_dictionary)
+    return _generate_enhanced_links_descriptor(json_content, request_user, course_key)


 def _update_node_tree_and_dictionary(block, link, link_state, node_tree, dictionary):
@@ -221,6 +239,8 @@ def _update_node_tree_and_dictionary(block, link, link_state, node_tree, diction
        updated_dictionary[xblock_id].setdefault('locked_links', []).append(link)
    elif link_state == LinkState.EXTERNAL_FORBIDDEN:
        updated_dictionary[xblock_id].setdefault('external_forbidden_links', []).append(link)
+    elif link_state == LinkState.PREVIOUS_RUN:
+        updated_dictionary[xblock_id].setdefault('previous_run_links', []).append(link)
    else:
        updated_dictionary[xblock_id].setdefault('broken_links', []).append(link)

@@ -277,7 +297,8 @@ def _create_dto_recursive(xblock_node, xblock_dictionary, parent_id=None):
                'url': xblock_data.get('url', ''),
                'brokenLinks': xblock_data.get('broken_links', []),
                'lockedLinks': xblock_data.get('locked_links', []),
-                'externalForbiddenLinks': xblock_data.get('external_forbidden_links', [])
+                'externalForbiddenLinks': xblock_data.get('external_forbidden_links', []),
+                'previousRunLinks': xblock_data.get('previous_run_links', [])
            })
        else:   # Non-leaf node
            category = xblock_data.get('category', None)
@@ -317,3 +338,268 @@ def sort_course_sections(course_key, data):
    ]

    return data
+
+
+def _generate_links_descriptor_for_content(json_content, request_user):
+    """
+    Creates a content tree of all links in a course and their states
+    Returns a structure containing all broken links and locked links for a course.
+    """
+    xblock_node_tree = {}
+    xblock_dictionary = {}
+
+    for item in json_content:
+        block_id, link, *rest = item
+        if rest:
+            link_state = rest[0]
+        else:
+            link_state = ""
+
+        usage_key = usage_key_with_run(block_id)
+        block = get_xblock(usage_key, request_user)
+        xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary(
+            block=block,
+            link=link,
+            link_state=link_state,
+            node_tree=xblock_node_tree,
+            dictionary=xblock_dictionary,
+        )
+
+    result = _create_dto_recursive(xblock_node_tree, xblock_dictionary)
+    # Ensure we always return a valid structure with sections
+    if not isinstance(result, dict):
+        result = {"sections": []}
+
+    return result
+
+
+def _generate_enhanced_links_descriptor(json_content, request_user, course_key):
+    """
+    Generate enhanced link descriptor that includes course updates, handouts, and custom pages.
+    """
+
+    content_links = []
+    course_updates_links = []
+    handouts_links = []
+    custom_pages_links = []
+    course = modulestore().get_course(course_key)
+
+    for item in json_content:
+        block_id, link, *rest = item
+        if "course_info" in block_id and "updates" in block_id:
+            course_updates_links.append(item)
+        elif "course_info" in block_id and "handouts" in block_id:
+            handouts_links.append(item)
+        elif "static_tab" in block_id:
+            custom_pages_links.append(item)
+        else:
+            content_links.append(item)
+
+    main_content = _generate_links_descriptor_for_content(content_links, request_user)
+
+    if main_content is None:
+        main_content = {"sections": []}
+
+    course_updates_data = (
+        _generate_course_updates_structure(course, course_updates_links)
+        if course_updates_links and course else []
+    )
+
+    handouts_data = (
+        _generate_handouts_structure(course, handouts_links)
+        if handouts_links and course else []
+    )
+
+    custom_pages_data = (
+        _generate_custom_pages_structure(course, custom_pages_links)
+        if custom_pages_links and course else []
+    )
+
+    result = main_content.copy()
+    result["course_updates"] = course_updates_data + handouts_data
+    result["custom_pages"] = custom_pages_data
+    return result
+
+
+def _generate_enhanced_content_structure(course, content_links, content_type):
+    """
+    Unified function to generate structure for enhanced content (updates, handouts, custom pages).
+
+    Args:
+        course: Course object
+        content_links: List of link items for this content type
+        content_type: 'updates', 'handouts', or 'custom_pages'
+
+    Returns:
+        List of content items with categorized links
+    """
+    result = []
+    try:
+        if content_type == "custom_pages":
+            result = _generate_custom_pages_content(course, content_links)
+        elif content_type == "updates":
+            result = _generate_course_updates_content(course, content_links)
+        elif content_type == "handouts":
+            result = _generate_handouts_content(course, content_links)
+        return result
+    except Exception as e:   # pylint: disable=broad-exception-caught
+        return result
+
+
+def _generate_course_updates_content(course, updates_links):
+    """Generate course updates content with categorized links."""
+    store = modulestore()
+    usage_key = get_course_info_usage_key(course, "updates")
+    updates_block = store.get_item(usage_key)
+    course_updates = []
+
+    if not (updates_block and hasattr(updates_block, "data")):
+        return course_updates
+
+    update_items = get_course_update_items(updates_block)
+    if not update_items:
+        return course_updates
+
+    # Create link state mapping
+    link_state_map = {
+        item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN
+        for item in updates_links if len(item) >= 2
+    }
+
+    for update in update_items:
+        if update.get("status") != "deleted":
+            update_content = update.get("content", "")
+            update_links = _get_urls(update_content) if update_content else []
+
+            # Match links with their states
+            update_link_data = _create_empty_links_data()
+            for link in update_links:
+                link_state = link_state_map.get(link)
+                if link_state is not None:
+                    _categorize_link_by_state(link, link_state, update_link_data)
+
+            course_updates.append(
+                {
+                    "name": update.get("date", "Unknown Date"),
+                    "url": f"/course/{str(course.id)}/course_info",
+                    **update_link_data,
+                }
+            )
+
+    return course_updates
+
+
+def _generate_handouts_content(course, handouts_links):
+    """Generate handouts content with categorized links."""
+    store = modulestore()
+    usage_key = get_course_info_usage_key(course, "handouts")
+    handouts_block = store.get_item(usage_key)
+    course_handouts = []
+
+    if not (
+        handouts_block
+        and hasattr(handouts_block, "data")
+        and handouts_block.data
+    ):
+        return course_handouts
+
+    # Create link state mapping for handouts
+    link_state_map = {
+        item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN
+        for item in handouts_links if len(item) >= 2
+    }
+
+    links_data = _create_empty_links_data()
+    for link, link_state in link_state_map.items():
+        _categorize_link_by_state(link, link_state, links_data)
+
+    course_handouts = [
+        {
+            "name": "handouts",
+            "url": f"/course/{str(course.id)}/course_info",
+            **links_data,
+        }
+    ]
+    return course_handouts
+
+
+def _generate_custom_pages_content(course, custom_pages_links):
+    """Generate custom pages content with categorized links."""
+    custom_pages = []
+
+    if not course or not hasattr(course, "tabs"):
+        return custom_pages
+
+    # Group links by block_id and categorize them
+    links_by_page = {}
+    for item in custom_pages_links:
+        if len(item) >= 2:
+            block_id, link = item[0], item[1]
+            link_state = item[2] if len(item) >= 3 else LinkState.BROKEN
+            links_by_page.setdefault(block_id, _create_empty_links_data())
+            _categorize_link_by_state(link, link_state, links_by_page[block_id])
+
+    # Process static tabs and add their pages
+    for tab in course.tabs:
+        if isinstance(tab, StaticTab):
+            block_id = str(course.id.make_usage_key("static_tab", tab.url_slug))
+            custom_pages.append({
+                "name": tab.name,
+                "url": f"/course/{str(course.id)}/custom-pages",
+                **links_by_page.get(block_id, _create_empty_links_data()),
+            })
+
+    return custom_pages
+
+
+def _generate_course_updates_structure(course, updates_links):
+    """Generate structure for course updates."""
+    return _generate_enhanced_content_structure(course, updates_links, "updates")
+
+
+def _generate_handouts_structure(course, handouts_links):
+    """Generate structure for course handouts."""
+    return _generate_enhanced_content_structure(course, handouts_links, "handouts")
+
+
+def _generate_custom_pages_structure(course, custom_pages_links):
+    """Generate structure for custom pages (static tabs)."""
+    return _generate_enhanced_content_structure(
+        course, custom_pages_links, "custom_pages"
+    )
+
+
+def _categorize_link_by_state(link, link_state, links_data):
+    """
+    Helper function to categorize a link into the appropriate list based on its state.
+
+    Args:
+        link (str): The URL link to categorize
+        link_state (str): The state of the link (broken, locked, external-forbidden, previous-run)
+        links_data (dict): Dictionary containing the categorized link lists
+    """
+    state_to_key = {
+        LinkState.BROKEN: "brokenLinks",
+        LinkState.LOCKED: "lockedLinks",
+        LinkState.EXTERNAL_FORBIDDEN: "externalForbiddenLinks",
+        LinkState.PREVIOUS_RUN: "previousRunLinks"
+    }
+
+    key = state_to_key.get(link_state)
+    if key:
+        links_data[key].append(link)
+
+
+def _create_empty_links_data():
+    """
+    Helper function to create an empty links data structure.
+
+    Returns:
+        dict: Dictionary with empty lists for each link type
+    """
+    return {
+        "brokenLinks": [],
+        "lockedLinks": [],
+        "externalForbiddenLinks": [],
+        "previousRunLinks": [],
+    }
--- a/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py
+++ b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py
@@ -1,16 +1,22 @@
 """
 Tests for course optimizer
 """
+
 from unittest import mock
 from unittest.mock import Mock

-from cms.djangoapps.contentstore.tests.utils import CourseTestCase
+from opaque_keys.edx.keys import CourseKey
+
 from cms.djangoapps.contentstore.core.course_optimizer_provider import (
-    _update_node_tree_and_dictionary,
    _create_dto_recursive,
+    _update_node_tree_and_dictionary,
+    generate_broken_links_descriptor,
    sort_course_sections
 )
-from cms.djangoapps.contentstore.tasks import LinkState
+from cms.djangoapps.contentstore.tasks import LinkState, _get_urls
+from cms.djangoapps.contentstore.tests.utils import CourseTestCase
+from cms.djangoapps.contentstore.utils import _contains_previous_course_reference
+from xmodule.tabs import StaticTab


 class TestLinkCheckProvider(CourseTestCase):
@@ -123,6 +129,7 @@ class TestLinkCheckProvider(CourseTestCase):
                    'brokenLinks': ['broken_link_1', 'broken_link_2'],
                    'lockedLinks': ['locked_link'],
                    'externalForbiddenLinks': ['forbidden_link_1'],
+                    'previousRunLinks': [],
                }
            ]
        }
@@ -181,6 +188,7 @@ class TestLinkCheckProvider(CourseTestCase):
                                            'brokenLinks': ['broken_link_1', 'broken_link_2'],
                                            'lockedLinks': ['locked_link'],
                                            'externalForbiddenLinks': ['forbidden_link_1'],
+                                            'previousRunLinks': [],
                                        }
                                    ]
                                }
@@ -295,3 +303,145 @@ class TestLinkCheckProvider(CourseTestCase):
        ]

        assert result["LinkCheckOutput"]["sections"] == expected_sections
+
+    def test_prev_run_link_detection(self):
+        """Test the core logic of separating previous run links from regular links."""
+
+        previous_course_key = CourseKey.from_string(
+            "course-v1:edX+DemoX+Demo_Course_2023"
+        )
+
+        test_cases = [
+            (f"/courses/{previous_course_key}/info", True),
+            (f"/courses/{previous_course_key}/courseware", True),
+            (f"/courses/{str(previous_course_key).upper()}/page", True),
+            # Should NOT match
+            ("/courses/course-v1:edX+DemoX+Demo_Course_2024/info", False),
+            ("/static/image.png", False),
+            ("/assets/courseware/file.pdf", False),
+            ("", False),
+            ("   ", False),
+        ]
+
+        for url, expected_match in test_cases:
+            with self.subTest(url=url, expected=expected_match):
+                result = _contains_previous_course_reference(url, previous_course_key)
+                self.assertEqual(
+                    result,
+                    expected_match,
+                    f"URL '{url}' should {'match' if expected_match else 'not match'} previous course",
+                )
+
+    def test_enhanced_url_detection_edge_cases(self):
+        """Test edge cases for enhanced URL detection."""
+
+        test_cases = [
+            ("", []),  # Empty content
+            ("No URLs here", []),  # Content without URLs
+            (
+                "Visit https://example.com today!",
+                ["https://example.com"],
+            ),  # URL in text
+            ('href="#anchor"', []),  # Should exclude fragments
+            ('src="data:image/png;base64,123"', []),  # Should exclude data URLs
+            (
+                "Multiple URLs: http://site1.com and https://site2.com",
+                ["http://site1.com", "https://site2.com"],
+            ),  # Multiple URLs
+            (
+                "URL with params: https://example.com/page?param=value&other=123",
+                ["https://example.com/page?param=value&other=123"],
+            ),  # URL with parameters
+        ]
+
+        for content, expected_urls in test_cases:
+            with self.subTest(content=content):
+                urls = _get_urls(content)
+                for expected_url in expected_urls:
+                    self.assertIn(
+                        expected_url,
+                        urls,
+                        f"Should find '{expected_url}' in content: {content}",
+                    )
+
+    def test_course_updates_and_custom_pages_structure(self):
+        """Test that course_updates and custom_pages are properly structured in the response."""
+
+        json_content = [
+            # Regular course content
+            [
+                "course-v1:Test+Course+2024+type@html+block@content1",
+                "http://content-link.com",
+                "broken",
+            ],
+            [
+                "course-v1:Test+Course+2024+type@vertical+block@unit1",
+                "http://unit-link.com",
+                "locked",
+            ],
+            # Course updates
+            [
+                "course-v1:Test+Course+2024+type@course_info+block@updates",
+                "http://update1.com",
+                "broken",
+            ],
+            [
+                "course-v1:Test+Course+2024+type@course_info+block@updates",
+                "http://update2.com",
+                "locked",
+            ],
+            # Handouts (should be merged into course_updates)
+            [
+                "course-v1:Test+Course+2024+type@course_info+block@handouts",
+                "http://handout.com",
+                "broken",
+            ],
+            # Custom pages (static tabs)
+            [
+                "course-v1:Test+Course+2024+type@static_tab+block@page1",
+                "http://page1.com",
+                "broken",
+            ],
+            [
+                "course-v1:Test+Course+2024+type@static_tab+block@page2",
+                "http://page2.com",
+                "external-forbidden",
+            ],
+        ]
+
+        with mock.patch(
+            "cms.djangoapps.contentstore.core.course_optimizer_provider._generate_links_descriptor_for_content"
+        ) as mock_content, mock.patch(
+            "cms.djangoapps.contentstore.core.course_optimizer_provider.modulestore"
+        ) as mock_modulestore:
+
+            mock_content.return_value = {"sections": []}
+            mock_course = self.mock_course
+            mock_tab1 = StaticTab(name="Page1", url_slug="page1")
+            mock_tab2 = StaticTab(name="Page2", url_slug="page2")
+            mock_course.tabs = [mock_tab1, mock_tab2]
+            mock_course.id = CourseKey.from_string("course-v1:Test+Course+2024")
+            mock_modulestore.return_value.get_course.return_value = mock_course
+
+            course_key = CourseKey.from_string("course-v1:Test+Course+2024")
+            result = generate_broken_links_descriptor(
+                json_content, self.user, course_key
+            )
+
+            # Verify top-level structure
+            self.assertIn("sections", result)
+            self.assertIn("course_updates", result)
+            self.assertIn("custom_pages", result)
+            self.assertNotIn("handouts", result)
+
+            # Course updates should include both updates and handouts
+            self.assertGreaterEqual(
+                len(result["course_updates"]),
+                1,
+                "Should have course updates/handouts",
+            )
+
+            # Custom pages should have custom pages data
+            self.assertGreaterEqual(
+                len(result["custom_pages"]), 1, "Should have custom pages"
+            )
--- a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py
+++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py
@@ -13,6 +13,7 @@ class LinkCheckBlockSerializer(serializers.Serializer):
    brokenLinks = serializers.ListField(required=False)
    lockedLinks = serializers.ListField(required=False)
    externalForbiddenLinks = serializers.ListField(required=False)
+    previousRunLinks = serializers.ListField(required=False)


 class LinkCheckUnitSerializer(serializers.Serializer):
@@ -36,9 +37,21 @@ class LinkCheckSectionSerializer(serializers.Serializer):
    subsections = LinkCheckSubsectionSerializer(many=True)


+class LinkCheckContentItemSerializer(serializers.Serializer):
+    """ Serializer for course content items like updates, handouts, and custom pages """
+    name = serializers.CharField(required=True, allow_null=False, allow_blank=False)
+    url = serializers.CharField(required=True, allow_null=False, allow_blank=False)
+    brokenLinks = serializers.ListField(required=False)
+    lockedLinks = serializers.ListField(required=False)
+    externalForbiddenLinks = serializers.ListField(required=False)
+    previousRunLinks = serializers.ListField(required=False)
+
+
 class LinkCheckOutputSerializer(serializers.Serializer):
    """ Serializer for broken links output model data """
    sections = LinkCheckSectionSerializer(many=True)
+    course_updates = LinkCheckContentItemSerializer(many=True, required=False)
+    custom_pages = LinkCheckContentItemSerializer(many=True, required=False)


 class LinkCheckSerializer(serializers.Serializer):
--- a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py
+++ b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py
@@ -71,53 +71,49 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView):
    )
    def get(self, request: Request, course_id: str):
        """
-        GET handler to return the status of the link_check task from UserTaskStatus.
-        If no task has been started for the course, return 'Uninitiated'.
-        If link_check task was successful, an output result is also returned.
+        **Use Case**

-        For reference, the following status are in UserTaskStatus:
-            'Pending', 'In Progress' (sent to frontend as 'In-Progress'),
-            'Succeeded', 'Failed', 'Canceled', 'Retrying'
-        This function adds a status for when status from UserTaskStatus is None:
-            'Uninitiated'
+            GET handler to return the status of the link_check task from UserTaskStatus.
+            If no task has been started for the course, return 'Uninitiated'.
+            If link_check task was successful, an output result is also returned.
+
+            For reference, the following status are in UserTaskStatus:
+                'Pending', 'In Progress' (sent to frontend as 'In-Progress'),
+                'Succeeded', 'Failed', 'Canceled', 'Retrying'
+            This function adds a status for when status from UserTaskStatus is None:
+                'Uninitiated'

        **Example Request**
+
            GET /api/contentstore/v0/link_check_status/{course_id}

        **Example Response**
+
        ```json
        {
            "LinkCheckStatus": "Succeeded",
            "LinkCheckCreatedAt": "2025-02-05T14:32:01.294587Z",
            "LinkCheckOutput": {
-                sections: [
+                "sections": [
                    {
-                        id: <string>,
-                        displayName: <string>,
-                        subsections: [
+                        "id": <string>,
+                        "displayName": <string>,
+                        "subsections": [
                            {
-                                id: <string>,
-                                displayName: <string>,
-                                units: [
+                                "id": <string>,
+                                "displayName": <string>,
+                                "units": [
                                    {
-                                        id: <string>,
-                                        displayName: <string>,
-                                        blocks: [
+                                        "id": <string>,
+                                        "displayName": <string>,
+                                        "blocks": [
                                            {
-                                                id: <string>,
-                                                url: <string>,
-                                                brokenLinks: [
-                                                    <string>,
-                                                    <string>,
-                                                    <string>,
-                                                    ...,
-                                                ],
-                                                lockedLinks: [
-                                                    <string>,
-                                                    <string>,
-                                                    <string>,
-                                                    ...,
-                                                ],
+                                                "id": <string>,
+                                                "url": <string>,
+                                                "brokenLinks": [<string>, ...],
+                                                "lockedLinks": [<string>, ...],
+                                                "externalForbiddenLinks": [<string>, ...],
+                                                "previousRunLinks": [<string>, ...]
                                            },
                                            { <another block> },
                                        ],
@@ -130,6 +126,39 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView):
                    },
                    { <another section> },
                ],
+                "course_updates": [
+                    {
+                        "name": <string>,
+                        "url": <string>,
+                        "brokenLinks": [<string>, ...],
+                        "lockedLinks": [<string>, ...],
+                        "externalForbiddenLinks": [<string>, ...],
+                        "previousRunLinks": [<string>, ...]
+                    },
+                    ...,
+                    { <another course-updates> },
+                    ...,
+                    {
+                        "name": "handouts",
+                        "url": <string>,
+                        "brokenLinks": [<string>, ...],
+                        "lockedLinks": [<string>, ...],
+                        "externalForbiddenLinks": [<string>, ...],
+                        "previousRunLinks": [<string>, ...]
+                    }
+                ],
+                "custom_pages": [
+                    {
+                        "name": <string>,
+                        "url": <string>,
+                        "brokenLinks": [<string>, ...],
+                        "lockedLinks": [<string>, ...],
+                        "externalForbiddenLinks": [<string>, ...],
+                        "previousRunLinks": [<string>, ...]
+                    },
+                    ...,
+                    { <another page> },
+                ]
            },
        }
        """
--- a/cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py
+++ b/cms/djangoapps/contentstore/rest_api/v1/serializers/course_waffle_flags.py
@@ -30,6 +30,7 @@ class CourseWaffleFlagsSerializer(serializers.Serializer):
    enable_course_optimizer = serializers.SerializerMethodField()
    use_react_markdown_editor = serializers.SerializerMethodField()
    use_video_gallery_flow = serializers.SerializerMethodField()
+    enable_course_optimizer_check_prev_run_links = serializers.SerializerMethodField()

    def get_course_key(self):
        """
@@ -167,3 +168,10 @@ class CourseWaffleFlagsSerializer(serializers.Serializer):
        Method to get the use_video_gallery_flow waffle flag
        """
        return toggles.use_video_gallery_flow()
+
+    def get_enable_course_optimizer_check_prev_run_links(self, obj):
+        """
+        Method to get the enable_course_optimizer_check_prev_run_links waffle flag
+        """
+        course_key = self.get_course_key()
+        return toggles.enable_course_optimizer_check_prev_run_links(course_key)
--- a/cms/djangoapps/contentstore/rest_api/v1/views/tests/test_course_waffle_flags.py
+++ b/cms/djangoapps/contentstore/rest_api/v1/views/tests/test_course_waffle_flags.py
@@ -1,6 +1,7 @@
 """
 Unit tests for the course waffle flags view
 """
+
 from django.urls import reverse

 from cms.djangoapps.contentstore import toggles
@@ -13,28 +14,30 @@ class CourseWaffleFlagsViewTest(CourseTestCase):
    Basic test for the CourseWaffleFlagsView endpoint, which returns waffle flag states
    for a specific course or globally if no course ID is provided.
    """
+
    maxDiff = None  # Show the whole dictionary in the diff

    defaults = {
-        'enable_course_optimizer': False,
-        'use_new_advanced_settings_page': True,
-        'use_new_certificates_page': True,
-        'use_new_course_outline_page': True,
-        'use_new_course_team_page': True,
-        'use_new_custom_pages': True,
-        'use_new_export_page': True,
-        'use_new_files_uploads_page': True,
-        'use_new_grading_page': True,
-        'use_new_group_configurations_page': True,
-        'use_new_home_page': True,
-        'use_new_import_page': True,
-        'use_new_schedule_details_page': True,
-        'use_new_textbooks_page': True,
-        'use_new_unit_page': True,
-        'use_new_updates_page': True,
-        'use_new_video_uploads_page': False,
-        'use_react_markdown_editor': False,
-        'use_video_gallery_flow': False,
+        "enable_course_optimizer": False,
+        "use_new_advanced_settings_page": True,
+        "use_new_certificates_page": True,
+        "use_new_course_outline_page": True,
+        "use_new_course_team_page": True,
+        "use_new_custom_pages": True,
+        "use_new_export_page": True,
+        "use_new_files_uploads_page": True,
+        "use_new_grading_page": True,
+        "use_new_group_configurations_page": True,
+        "use_new_home_page": True,
+        "use_new_import_page": True,
+        "use_new_schedule_details_page": True,
+        "use_new_textbooks_page": True,
+        "use_new_unit_page": True,
+        "use_new_updates_page": True,
+        "use_new_video_uploads_page": False,
+        "use_react_markdown_editor": False,
+        "use_video_gallery_flow": False,
+        "enable_course_optimizer_check_prev_run_links": False,
    }

    def setUp(self):
@@ -44,6 +47,11 @@ class CourseWaffleFlagsViewTest(CourseTestCase):
            course_id=self.course.id,
            enabled=True,
        )
+        WaffleFlagCourseOverrideModel.objects.create(
+            waffle_flag=toggles.ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS.name,
+            course_id=self.course.id,
+            enabled=True,
+        )

    def test_global_defaults(self):
        url = reverse("cms.djangoapps.contentstore:v1:course_waffle_flags")
@@ -59,4 +67,5 @@ class CourseWaffleFlagsViewTest(CourseTestCase):
        assert response.data == {
            **self.defaults,
            "enable_course_optimizer": True,
+            "enable_course_optimizer_check_prev_run_links": True,
        }
--- a/cms/djangoapps/contentstore/tasks.py
+++ b/cms/djangoapps/contentstore/tasks.py
@@ -28,13 +28,13 @@ from edx_django_utils.monitoring import (
    set_code_owner_attribute,
    set_code_owner_attribute_from_module,
    set_custom_attribute,
-    set_custom_attributes_for_course_key,
+    set_custom_attributes_for_course_key
 )
 from olxcleaner.exceptions import ErrorLevel
 from olxcleaner.reporting import report_error_summary, report_errors
 from opaque_keys import InvalidKeyError
 from opaque_keys.edx.keys import CourseKey, UsageKey
-from opaque_keys.edx.locator import LibraryLocator, LibraryContainerLocator
+from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocator
 from organizations.api import add_organization_course, ensure_organization
 from organizations.exceptions import InvalidOrganizationException
 from organizations.models import Organization
@@ -47,16 +47,19 @@ import cms.djangoapps.contentstore.errors as UserErrors
 from cms.djangoapps.contentstore.courseware_index import (
    CoursewareSearchIndexer,
    LibrarySearchIndexer,
-    SearchIndexingError,
+    SearchIndexingError
 )
 from cms.djangoapps.contentstore.storage import course_import_export_storage
+from cms.djangoapps.contentstore.toggles import enable_course_optimizer_check_prev_run_links
 from cms.djangoapps.contentstore.utils import (
    IMPORTABLE_FILE_TYPES,
+    _contains_previous_course_reference,
+    _get_previous_run_course_key,
    create_or_update_xblock_upstream_link,
    delete_course,
    initialize_permissions,
    reverse_usage_url,
-    translation_language,
+    translation_language
 )
 from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info
 from cms.djangoapps.models.settings.course_metadata import CourseMetadata
@@ -65,6 +68,7 @@ from common.djangoapps.static_replace import replace_static_urls
 from common.djangoapps.student.auth import has_course_author_access
 from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, LibraryUserRole
 from common.djangoapps.util.monitoring import monitor_import_failure
+from lms.djangoapps.courseware.courses import get_course_info_usage_key
 from openedx.core.djangoapps.content.learning_sequences.api import key_supports_outlines
 from openedx.core.djangoapps.content_libraries import api as v2contentlib_api
 from openedx.core.djangoapps.content_tagging.api import make_copied_tags_editable
@@ -75,6 +79,7 @@ from openedx.core.djangoapps.discussions.tasks import update_unit_discussion_sta
 from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse
 from openedx.core.lib import ensure_cms
 from openedx.core.lib.extract_archive import safe_extractall
+from openedx.core.lib.xblock_utils import get_course_update_items
 from xmodule.contentstore.django import contentstore
 from xmodule.course_block import CourseFields
 from xmodule.exceptions import SerializationError
@@ -83,8 +88,9 @@ from xmodule.modulestore.django import modulestore
 from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider, ItemNotFoundError
 from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
 from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml, import_library_from_xml
+from xmodule.tabs import StaticTab

-from .models import ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices, ComponentLink
+from .models import ComponentLink, ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices
 from .outlines import update_outline_from_modulestore
 from .outlines_regenerate import CourseOutlineRegenerate
 from .toggles import bypass_olx_failure_enabled
@@ -116,6 +122,7 @@ class LinkState:
    BROKEN = 'broken'
    LOCKED = 'locked'
    EXTERNAL_FORBIDDEN = 'external-forbidden'
+    PREVIOUS_RUN = 'previous-run'


 def clone_instance(instance, field_values):
@@ -1137,7 +1144,8 @@ def check_broken_links(self, user_id, course_key_string, language):

 def _check_broken_links(task_instance, user_id, course_key_string, language):
    """
-    Checks for broken links in a course and store the results in a file.
+    Checks for broken links in a course and stores the results in a file.
+    Also checks for previous run links if the feature is enabled.
    """
    user = _validate_user(task_instance, user_id, language)

@@ -1145,13 +1153,29 @@ def _check_broken_links(task_instance, user_id, course_key_string, language):
    course_key = CourseKey.from_string(course_key_string)

    url_list = _scan_course_for_links(course_key)
-    validated_url_list = asyncio.run(_validate_urls_access_in_batches(url_list, course_key, batch_size=100))
+    previous_run_links = []
+    urls_to_validate = url_list
+
+    if enable_course_optimizer_check_prev_run_links(course_key):
+        previous_run_course_key = _get_previous_run_course_key(course_key)
+        if previous_run_course_key:
+
+            # Separate previous run links from regular links BEFORE validation
+            urls_to_validate = []
+            for block_id, url in url_list:
+                if _contains_previous_course_reference(url, previous_run_course_key):
+                    previous_run_links.append([block_id, url, LinkState.PREVIOUS_RUN])
+                else:
+                    urls_to_validate.append([block_id, url])
+
+    validated_url_list = asyncio.run(_validate_urls_access_in_batches(urls_to_validate, course_key, batch_size=100))
    broken_or_locked_urls, retry_list = _filter_by_status(validated_url_list)

    if retry_list:
        retry_results = _retry_validation(retry_list, course_key, retry_count=3)
        broken_or_locked_urls.extend(retry_results)

+    all_links = broken_or_locked_urls + previous_run_links
    try:
        task_instance.status.increment_completed_steps()

@@ -1160,9 +1184,9 @@ def _check_broken_links(task_instance, user_id, course_key_string, language):
        LOGGER.debug(f'[Link Check] json file being generated at {broken_links_file.name}')

        with open(broken_links_file.name, 'w') as file:
-            json.dump(broken_or_locked_urls, file, indent=4)
+            json.dump(all_links, file, indent=4)

-        _write_broken_links_to_file(broken_or_locked_urls, broken_links_file)
+        _write_broken_links_to_file(all_links, broken_links_file)

        artifact = UserTaskArtifact(status=task_instance.status, name='BrokenLinks')
        _save_broken_links_file(artifact, broken_links_file)
@@ -1186,7 +1210,8 @@ def _validate_user(task, user_id, language):

 def _scan_course_for_links(course_key):
    """
-    Scans a course for links found in the data contents of blocks.
+    Scans a course for links found in the data contents of
+    blocks, course updates, handouts, and custom pages.

    Returns:
        list: block id and URL pairs
@@ -1205,6 +1230,7 @@ def _scan_course_for_links(course_key):
    )
    blocks = []
    urls_to_validate = []
+    course = modulestore().get_course(course_key)

    for vertical in verticals:
        blocks.extend(vertical.get_children())
@@ -1220,13 +1246,31 @@ def _scan_course_for_links(course_key):
        url_list = _get_urls(block_data)
        urls_to_validate += [[block_id, url] for url in url_list]

+    course_updates_data = _scan_course_updates_for_links(course)
+    handouts_data = _scan_course_handouts_for_links(course)
+    custom_pages_data = _scan_custom_pages_for_links(course)
+
+    for update in course_updates_data:
+        for url in update['urls']:
+            urls_to_validate.append([update['block_id'], url])
+
+    for handout in handouts_data:
+        for url in handout['urls']:
+            urls_to_validate.append([handout['block_id'], url])
+
+    for page in custom_pages_data:
+        for url in page['urls']:
+            urls_to_validate.append([page['block_id'], url])
+
    return urls_to_validate


 def _get_urls(content):
    """
    Finds and returns a list of URLs in the given content.
-    Includes strings following 'href=' and 'src='.
+    Uses multiple regex patterns to find URLs in various contexts:
+    - URLs in href and src attributes
+    - Standalone URLs starting with http(s)://
    Excludes strings that are only '#' or start with 'data:'.

    Arguments:
@@ -1235,11 +1279,130 @@ def _get_urls(content):
    Returns:
        list: urls
    """
-    regex = r'\s+(?:href|src)=["\'](?!#|data:)([^"\']*)["\']'
-    url_list = re.findall(regex, content)
+    url_list = set()
+
+    # Regex to match URLs in href and src attributes, or standalone URLs
+    regex = (
+        r'(?:href|src)=["\'](?!#|data:)([^"\']+)["\']'
+        r'|(?:^|[\s\'"(<>])((?:https?://|http://|https://|www\.)[^\s\'")<>]+)(?=[\s\'")<>]|$)'
+    )
+
+    # Update list to include URLs found in the content
+    matches = re.findall(regex, content, re.IGNORECASE)
+    for match in matches:
+        url = match[0] or match[1]
+        if url:
+            url_list.add(url)
+
    return url_list


+def _scan_course_updates_for_links(course):
+    """
+    Scans course updates for links.
+
+    Returns:
+        list: course update data with links
+    """
+    course_updates = []
+    try:
+        store = modulestore()
+        usage_key = get_course_info_usage_key(course, "updates")
+        updates_block = store.get_item(usage_key)
+
+        if updates_block and hasattr(updates_block, "data"):
+            update_items = get_course_update_items(updates_block)
+
+            for update in update_items:
+                if update.get("status") != "deleted":
+                    update_content = update.get("content", "")
+                    url_list = _get_urls(update_content)
+
+                    course_updates.append(
+                        {
+                            "name": update.get("date", "Unknown"),
+                            "block_id": str(usage_key),
+                            "urls": url_list,
+                        }
+                    )
+
+            return course_updates
+
+        return course_updates
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        LOGGER.debug(f"Error scanning course updates: {e}")
+        return course_updates
+
+
+def _scan_course_handouts_for_links(course):
+    """
+    Scans course handouts for links.
+
+    Returns:
+        list: handouts data with links
+    """
+
+    course_handouts = []
+    try:
+        store = modulestore()
+        usage_key = get_course_info_usage_key(course, "handouts")
+        handouts_block = store.get_item(usage_key)
+
+        if handouts_block and hasattr(handouts_block, "data") and handouts_block.data:
+            url_list = _get_urls(handouts_block.data)
+            course_handouts.append(
+                {"name": "handouts", "block_id": str(usage_key), "urls": url_list}
+            )
+
+        return course_handouts
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        LOGGER.debug(f"Error scanning course handouts: {e}")
+        return course_handouts
+
+
+def _scan_custom_pages_for_links(course):
+    """
+    Scans custom pages (static tabs) for links.
+
+    Returns:
+        list: custom pages data with links
+    """
+
+    custom_pages = []
+    try:
+        store = modulestore()
+        course_key = course.id
+
+        for tab in course.tabs:
+            if isinstance(tab, StaticTab):
+                try:
+                    # Get the static tab content
+                    # tab_locator = course_key.make_usage_key("static_tab", tab.url_slug)
+                    static_tab_loc = course_key.make_usage_key(
+                        "static_tab", tab.url_slug
+                    )
+                    static_tab_block = store.get_item(static_tab_loc)
+
+                    if static_tab_block and hasattr(static_tab_block, "data"):
+                        url_list = _get_urls(static_tab_block.data)
+
+                        custom_pages.append(
+                            {
+                                "name": tab.name,
+                                "block_id": str(static_tab_loc),
+                                "urls": url_list,
+                            }
+                        )
+                except Exception as e:  # pylint: disable=broad-exception-caught
+                    LOGGER.debug(f"Error scanning static tab {tab.name}: {e}")
+                    continue
+
+        return custom_pages
+    except Exception as e:  # pylint: disable=broad-exception-caught
+        LOGGER.debug(f"Error scanning custom pages: {e}")
+        return custom_pages
+
+
 async def _validate_urls_access_in_batches(url_list, course_key, batch_size=100):
    """
    Returns the statuses of a list of URL requests.
--- a/cms/djangoapps/contentstore/tests/test_tasks.py
+++ b/cms/djangoapps/contentstore/tests/test_tasks.py
@@ -667,4 +667,4 @@ class CheckBrokenLinksTaskTest(ModuleStoreTestCase):
            "https://validsite.com",
            "https://another-valid.com"
        ]
-        self.assertEqual(_get_urls(content), expected)
+        self.assertEqual(_get_urls(content), set(expected))
--- a/cms/djangoapps/contentstore/toggles.py
+++ b/cms/djangoapps/contentstore/toggles.py
@@ -659,3 +659,26 @@ def use_legacy_logged_out_home():
    If not, then we should just go to the login page w/ redirect to studio course listing.
    """
    return LEGACY_STUDIO_LOGGED_OUT_HOME.is_enabled()
+
+
+# .. toggle_name: contentstore.enable_course_optimizer_check_prev_run_links
+# .. toggle_implementation: CourseWaffleFlag
+# .. toggle_default: False
+# .. toggle_description: When enabled, allows the Course Optimizer to detect and update links pointing to previous course runs.
+#   This feature enables instructors to automatically fix internal course links that still point to old course runs
+#   after creating a course rerun.
+# .. toggle_use_cases: temporary, open_edx
+# .. toggle_creation_date: 2025-07-21
+# .. toggle_target_removal_date: None
+ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS = CourseWaffleFlag(
+    f'{CONTENTSTORE_NAMESPACE}.enable_course_optimizer_check_prev_run_links',
+    __name__,
+    CONTENTSTORE_LOG_PREFIX,
+)
+
+
+def enable_course_optimizer_check_prev_run_links(course_key):
+    """
+    Returns a boolean if previous run course optimizer feature is enabled for the given course.
+    """
+    return ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS.is_enabled(course_key)
--- a/cms/djangoapps/contentstore/utils.py
+++ b/cms/djangoapps/contentstore/utils.py
@@ -2435,3 +2435,33 @@ def create_or_update_xblock_upstream_link(xblock, course_key: CourseKey, created
        # It is possible that the upstream is a container and UsageKeyV2 parse failed
        # Create upstream container link and raise InvalidKeyError if xblock.upstream is a valid key.
        _create_or_update_container_link(course_key, created, xblock)
+
+
+def _get_previous_run_course_key(course_key):
+    """
+    Retrieves the course key of the previous run for a given course.
+    """
+    try:
+        rerun_state = CourseRerunState.objects.get(course_key=course_key)
+    except CourseRerunState.DoesNotExist:
+        log.warning(f'[Link Check] No rerun state found for course {course_key}. Cannot find previous run.')
+        return None
+
+    return rerun_state.source_course_key
+
+
+def _contains_previous_course_reference(url, previous_course_key):
+    """
+    Checks if a URL contains references to the previous course.
+
+    Arguments:
+        url: The URL to check
+        previous_course_key: The previous course key to look for
+
+    Returns:
+        bool: True if URL contains reference to previous course
+    """
+    if not previous_course_key:
+        return False
+
+    return str(previous_course_key).lower() in url.lower()
--- a/cms/lib/spectacular.py
+++ b/cms/lib/spectacular.py
@@ -10,7 +10,8 @@ def cms_api_filter(endpoints):
    """
    filtered = []
    CMS_PATH_PATTERN = re.compile(
-        r"^/api/contentstore/v0/(xblock|videos|video_transcripts|file_assets|youtube_transcripts)"
+        r"^/api/contentstore/v0/(xblock|videos|video_transcripts|file_assets|"
+        r"youtube_transcripts|link_check|link_check_status)"
    )

    for path, path_regex, method, callback in endpoints: