feat: Enhance course optimizer to detect previous run links and expand scanning scope

This commit is contained in:
Devasia Joseph
2025-08-01 17:12:01 +05:30
committed by Muhammad Faraz Maqsood
parent f4d52e070b
commit 466aaad85d
11 changed files with 808 additions and 96 deletions

View File

@@ -2,15 +2,19 @@
Logic for handling actions in Studio related to Course Optimizer.
"""
import json
from opaque_keys.edx.keys import CourseKey
from user_tasks.conf import settings as user_tasks_settings
from user_tasks.models import UserTaskArtifact, UserTaskStatus
from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState
from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState, _get_urls
from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_xblock
from cms.djangoapps.contentstore.xblock_storage_handlers.xblock_helpers import usage_key_with_run
from lms.djangoapps.courseware.courses import get_course_info_usage_key
from openedx.core.lib.xblock_utils import get_course_update_items
from xmodule.modulestore import ModuleStoreEnum
from xmodule.modulestore.django import modulestore
from xmodule.tabs import StaticTab
# Restricts status in the REST API to only those which the requesting user has permission to view.
# These can be overwritten in django settings.
@@ -23,6 +27,7 @@ def get_link_check_data(request, course_id):
"""
Retrives data and formats it for the link check get request.
"""
course_key = CourseKey.from_string(course_id)
task_status = _latest_task_status(request, course_id)
status = None
created_at = None
@@ -43,7 +48,7 @@ def get_link_check_data(request, course_id):
with artifact.file as file:
content = file.read()
json_content = json.loads(content)
broken_links_dto = generate_broken_links_descriptor(json_content, request.user)
broken_links_dto = generate_broken_links_descriptor(json_content, request.user, course_key)
elif task_status.state in (UserTaskStatus.FAILED, UserTaskStatus.CANCELED):
errors = UserTaskArtifact.objects.filter(status=task_status, name='Error')
if errors:
@@ -53,7 +58,6 @@ def get_link_check_data(request, course_id):
except ValueError:
# Wasn't JSON, just use the value as a string
pass
data = {
'LinkCheckStatus': status,
**({'LinkCheckCreatedAt': created_at} if created_at else {}),
@@ -76,13 +80,16 @@ def _latest_task_status(request, course_key_string, view_func=None):
return task_status.order_by('-created').first()
def generate_broken_links_descriptor(json_content, request_user):
def generate_broken_links_descriptor(json_content, request_user, course_key):
"""
Returns a Data Transfer Object for frontend given a list of broken links.
Includes ALL link types: broken, locked, external-forbidden, and previous run links.
Now also includes course updates, handouts, and custom pages.
** Example json_content structure **
Note: link_state is locked if the link is a studio link and returns 403
link_state is external-forbidden if the link is not a studio link and returns 403
link_state is previous-run if the link points to a previous course run
[
['block_id_1', 'link_1', link_state],
['block_id_1', 'link_2', link_state],
@@ -111,6 +118,7 @@ def generate_broken_links_descriptor(json_content, request_user):
'url': 'url/to/block',
'brokenLinks: [],
'lockedLinks: [],
'previousRunLinks: []
},
...,
]
@@ -122,30 +130,40 @@ def generate_broken_links_descriptor(json_content, request_user):
]
},
...,
],
'course_updates': [
{
'name': 'published_date',
'url': 'url',
'brokenLinks': [],
'lockedLinks': [],
'externalForbiddenLinks': [],
'previousRunLinks': []
},
...
{
'name': 'handouts',
'url': 'url',
'brokenLinks': [],
'lockedLinks': [],
'externalForbiddenLinks': [],
'previousRunLinks': []
}
],
'custom_pages': [
{
'name': 'page_name',
'url': 'url',
'brokenLinks': [],
'lockedLinks': [],
'externalForbiddenLinks': [],
'previousRunLinks': []
},
...
]
}
"""
xblock_node_tree = {} # tree representation of xblock relationships
xblock_dictionary = {} # dictionary of xblock attributes
for item in json_content:
block_id, link, *rest = item
if rest:
link_state = rest[0]
else:
link_state = ''
usage_key = usage_key_with_run(block_id)
block = get_xblock(usage_key, request_user)
xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary(
block=block,
link=link,
link_state=link_state,
node_tree=xblock_node_tree,
dictionary=xblock_dictionary
)
return _create_dto_recursive(xblock_node_tree, xblock_dictionary)
return _generate_enhanced_links_descriptor(json_content, request_user, course_key)
def _update_node_tree_and_dictionary(block, link, link_state, node_tree, dictionary):
@@ -221,6 +239,8 @@ def _update_node_tree_and_dictionary(block, link, link_state, node_tree, diction
updated_dictionary[xblock_id].setdefault('locked_links', []).append(link)
elif link_state == LinkState.EXTERNAL_FORBIDDEN:
updated_dictionary[xblock_id].setdefault('external_forbidden_links', []).append(link)
elif link_state == LinkState.PREVIOUS_RUN:
updated_dictionary[xblock_id].setdefault('previous_run_links', []).append(link)
else:
updated_dictionary[xblock_id].setdefault('broken_links', []).append(link)
@@ -277,7 +297,8 @@ def _create_dto_recursive(xblock_node, xblock_dictionary, parent_id=None):
'url': xblock_data.get('url', ''),
'brokenLinks': xblock_data.get('broken_links', []),
'lockedLinks': xblock_data.get('locked_links', []),
'externalForbiddenLinks': xblock_data.get('external_forbidden_links', [])
'externalForbiddenLinks': xblock_data.get('external_forbidden_links', []),
'previousRunLinks': xblock_data.get('previous_run_links', [])
})
else: # Non-leaf node
category = xblock_data.get('category', None)
@@ -317,3 +338,268 @@ def sort_course_sections(course_key, data):
]
return data
def _generate_links_descriptor_for_content(json_content, request_user):
"""
Creates a content tree of all links in a course and their states
Returns a structure containing all broken links and locked links for a course.
"""
xblock_node_tree = {}
xblock_dictionary = {}
for item in json_content:
block_id, link, *rest = item
if rest:
link_state = rest[0]
else:
link_state = ""
usage_key = usage_key_with_run(block_id)
block = get_xblock(usage_key, request_user)
xblock_node_tree, xblock_dictionary = _update_node_tree_and_dictionary(
block=block,
link=link,
link_state=link_state,
node_tree=xblock_node_tree,
dictionary=xblock_dictionary,
)
result = _create_dto_recursive(xblock_node_tree, xblock_dictionary)
# Ensure we always return a valid structure with sections
if not isinstance(result, dict):
result = {"sections": []}
return result
def _generate_enhanced_links_descriptor(json_content, request_user, course_key):
"""
Generate enhanced link descriptor that includes course updates, handouts, and custom pages.
"""
content_links = []
course_updates_links = []
handouts_links = []
custom_pages_links = []
course = modulestore().get_course(course_key)
for item in json_content:
block_id, link, *rest = item
if "course_info" in block_id and "updates" in block_id:
course_updates_links.append(item)
elif "course_info" in block_id and "handouts" in block_id:
handouts_links.append(item)
elif "static_tab" in block_id:
custom_pages_links.append(item)
else:
content_links.append(item)
main_content = _generate_links_descriptor_for_content(content_links, request_user)
if main_content is None:
main_content = {"sections": []}
course_updates_data = (
_generate_course_updates_structure(course, course_updates_links)
if course_updates_links and course else []
)
handouts_data = (
_generate_handouts_structure(course, handouts_links)
if handouts_links and course else []
)
custom_pages_data = (
_generate_custom_pages_structure(course, custom_pages_links)
if custom_pages_links and course else []
)
result = main_content.copy()
result["course_updates"] = course_updates_data + handouts_data
result["custom_pages"] = custom_pages_data
return result
def _generate_enhanced_content_structure(course, content_links, content_type):
"""
Unified function to generate structure for enhanced content (updates, handouts, custom pages).
Args:
course: Course object
content_links: List of link items for this content type
content_type: 'updates', 'handouts', or 'custom_pages'
Returns:
List of content items with categorized links
"""
result = []
try:
if content_type == "custom_pages":
result = _generate_custom_pages_content(course, content_links)
elif content_type == "updates":
result = _generate_course_updates_content(course, content_links)
elif content_type == "handouts":
result = _generate_handouts_content(course, content_links)
return result
except Exception as e: # pylint: disable=broad-exception-caught
return result
def _generate_course_updates_content(course, updates_links):
"""Generate course updates content with categorized links."""
store = modulestore()
usage_key = get_course_info_usage_key(course, "updates")
updates_block = store.get_item(usage_key)
course_updates = []
if not (updates_block and hasattr(updates_block, "data")):
return course_updates
update_items = get_course_update_items(updates_block)
if not update_items:
return course_updates
# Create link state mapping
link_state_map = {
item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN
for item in updates_links if len(item) >= 2
}
for update in update_items:
if update.get("status") != "deleted":
update_content = update.get("content", "")
update_links = _get_urls(update_content) if update_content else []
# Match links with their states
update_link_data = _create_empty_links_data()
for link in update_links:
link_state = link_state_map.get(link)
if link_state is not None:
_categorize_link_by_state(link, link_state, update_link_data)
course_updates.append(
{
"name": update.get("date", "Unknown Date"),
"url": f"/course/{str(course.id)}/course_info",
**update_link_data,
}
)
return course_updates
def _generate_handouts_content(course, handouts_links):
"""Generate handouts content with categorized links."""
store = modulestore()
usage_key = get_course_info_usage_key(course, "handouts")
handouts_block = store.get_item(usage_key)
course_handouts = []
if not (
handouts_block
and hasattr(handouts_block, "data")
and handouts_block.data
):
return course_handouts
# Create link state mapping for handouts
link_state_map = {
item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN
for item in handouts_links if len(item) >= 2
}
links_data = _create_empty_links_data()
for link, link_state in link_state_map.items():
_categorize_link_by_state(link, link_state, links_data)
course_handouts = [
{
"name": "handouts",
"url": f"/course/{str(course.id)}/course_info",
**links_data,
}
]
return course_handouts
def _generate_custom_pages_content(course, custom_pages_links):
"""Generate custom pages content with categorized links."""
custom_pages = []
if not course or not hasattr(course, "tabs"):
return custom_pages
# Group links by block_id and categorize them
links_by_page = {}
for item in custom_pages_links:
if len(item) >= 2:
block_id, link = item[0], item[1]
link_state = item[2] if len(item) >= 3 else LinkState.BROKEN
links_by_page.setdefault(block_id, _create_empty_links_data())
_categorize_link_by_state(link, link_state, links_by_page[block_id])
# Process static tabs and add their pages
for tab in course.tabs:
if isinstance(tab, StaticTab):
block_id = str(course.id.make_usage_key("static_tab", tab.url_slug))
custom_pages.append({
"name": tab.name,
"url": f"/course/{str(course.id)}/custom-pages",
**links_by_page.get(block_id, _create_empty_links_data()),
})
return custom_pages
def _generate_course_updates_structure(course, updates_links):
"""Generate structure for course updates."""
return _generate_enhanced_content_structure(course, updates_links, "updates")
def _generate_handouts_structure(course, handouts_links):
"""Generate structure for course handouts."""
return _generate_enhanced_content_structure(course, handouts_links, "handouts")
def _generate_custom_pages_structure(course, custom_pages_links):
"""Generate structure for custom pages (static tabs)."""
return _generate_enhanced_content_structure(
course, custom_pages_links, "custom_pages"
)
def _categorize_link_by_state(link, link_state, links_data):
"""
Helper function to categorize a link into the appropriate list based on its state.
Args:
link (str): The URL link to categorize
link_state (str): The state of the link (broken, locked, external-forbidden, previous-run)
links_data (dict): Dictionary containing the categorized link lists
"""
state_to_key = {
LinkState.BROKEN: "brokenLinks",
LinkState.LOCKED: "lockedLinks",
LinkState.EXTERNAL_FORBIDDEN: "externalForbiddenLinks",
LinkState.PREVIOUS_RUN: "previousRunLinks"
}
key = state_to_key.get(link_state)
if key:
links_data[key].append(link)
def _create_empty_links_data():
"""
Helper function to create an empty links data structure.
Returns:
dict: Dictionary with empty lists for each link type
"""
return {
"brokenLinks": [],
"lockedLinks": [],
"externalForbiddenLinks": [],
"previousRunLinks": [],
}

View File

@@ -1,16 +1,22 @@
"""
Tests for course optimizer
"""
from unittest import mock
from unittest.mock import Mock
from cms.djangoapps.contentstore.tests.utils import CourseTestCase
from opaque_keys.edx.keys import CourseKey
from cms.djangoapps.contentstore.core.course_optimizer_provider import (
_update_node_tree_and_dictionary,
_create_dto_recursive,
_update_node_tree_and_dictionary,
generate_broken_links_descriptor,
sort_course_sections
)
from cms.djangoapps.contentstore.tasks import LinkState
from cms.djangoapps.contentstore.tasks import LinkState, _get_urls
from cms.djangoapps.contentstore.tests.utils import CourseTestCase
from cms.djangoapps.contentstore.utils import _contains_previous_course_reference
from xmodule.tabs import StaticTab
class TestLinkCheckProvider(CourseTestCase):
@@ -123,6 +129,7 @@ class TestLinkCheckProvider(CourseTestCase):
'brokenLinks': ['broken_link_1', 'broken_link_2'],
'lockedLinks': ['locked_link'],
'externalForbiddenLinks': ['forbidden_link_1'],
'previousRunLinks': [],
}
]
}
@@ -181,6 +188,7 @@ class TestLinkCheckProvider(CourseTestCase):
'brokenLinks': ['broken_link_1', 'broken_link_2'],
'lockedLinks': ['locked_link'],
'externalForbiddenLinks': ['forbidden_link_1'],
'previousRunLinks': [],
}
]
}
@@ -295,3 +303,145 @@ class TestLinkCheckProvider(CourseTestCase):
]
assert result["LinkCheckOutput"]["sections"] == expected_sections
def test_prev_run_link_detection(self):
"""Test the core logic of separating previous run links from regular links."""
previous_course_key = CourseKey.from_string(
"course-v1:edX+DemoX+Demo_Course_2023"
)
test_cases = [
(f"/courses/{previous_course_key}/info", True),
(f"/courses/{previous_course_key}/courseware", True),
(f"/courses/{str(previous_course_key).upper()}/page", True),
# Should NOT match
("/courses/course-v1:edX+DemoX+Demo_Course_2024/info", False),
("/static/image.png", False),
("/assets/courseware/file.pdf", False),
("", False),
(" ", False),
]
for url, expected_match in test_cases:
with self.subTest(url=url, expected=expected_match):
result = _contains_previous_course_reference(url, previous_course_key)
self.assertEqual(
result,
expected_match,
f"URL '{url}' should {'match' if expected_match else 'not match'} previous course",
)
def test_enhanced_url_detection_edge_cases(self):
"""Test edge cases for enhanced URL detection."""
test_cases = [
("", []), # Empty content
("No URLs here", []), # Content without URLs
(
"Visit https://example.com today!",
["https://example.com"],
), # URL in text
('href="#anchor"', []), # Should exclude fragments
('src="data:image/png;base64,123"', []), # Should exclude data URLs
(
"Multiple URLs: http://site1.com and https://site2.com",
["http://site1.com", "https://site2.com"],
), # Multiple URLs
(
"URL with params: https://example.com/page?param=value&other=123",
["https://example.com/page?param=value&other=123"],
), # URL with parameters
]
for content, expected_urls in test_cases:
with self.subTest(content=content):
urls = _get_urls(content)
for expected_url in expected_urls:
self.assertIn(
expected_url,
urls,
f"Should find '{expected_url}' in content: {content}",
)
def test_course_updates_and_custom_pages_structure(self):
"""Test that course_updates and custom_pages are properly structured in the response."""
json_content = [
# Regular course content
[
"course-v1:Test+Course+2024+type@html+block@content1",
"http://content-link.com",
"broken",
],
[
"course-v1:Test+Course+2024+type@vertical+block@unit1",
"http://unit-link.com",
"locked",
],
# Course updates
[
"course-v1:Test+Course+2024+type@course_info+block@updates",
"http://update1.com",
"broken",
],
[
"course-v1:Test+Course+2024+type@course_info+block@updates",
"http://update2.com",
"locked",
],
# Handouts (should be merged into course_updates)
[
"course-v1:Test+Course+2024+type@course_info+block@handouts",
"http://handout.com",
"broken",
],
# Custom pages (static tabs)
[
"course-v1:Test+Course+2024+type@static_tab+block@page1",
"http://page1.com",
"broken",
],
[
"course-v1:Test+Course+2024+type@static_tab+block@page2",
"http://page2.com",
"external-forbidden",
],
]
with mock.patch(
"cms.djangoapps.contentstore.core.course_optimizer_provider._generate_links_descriptor_for_content"
) as mock_content, mock.patch(
"cms.djangoapps.contentstore.core.course_optimizer_provider.modulestore"
) as mock_modulestore:
mock_content.return_value = {"sections": []}
mock_course = self.mock_course
mock_tab1 = StaticTab(name="Page1", url_slug="page1")
mock_tab2 = StaticTab(name="Page2", url_slug="page2")
mock_course.tabs = [mock_tab1, mock_tab2]
mock_course.id = CourseKey.from_string("course-v1:Test+Course+2024")
mock_modulestore.return_value.get_course.return_value = mock_course
course_key = CourseKey.from_string("course-v1:Test+Course+2024")
result = generate_broken_links_descriptor(
json_content, self.user, course_key
)
# Verify top-level structure
self.assertIn("sections", result)
self.assertIn("course_updates", result)
self.assertIn("custom_pages", result)
self.assertNotIn("handouts", result)
# Course updates should include both updates and handouts
self.assertGreaterEqual(
len(result["course_updates"]),
1,
"Should have course updates/handouts",
)
# Custom pages should have custom pages data
self.assertGreaterEqual(
len(result["custom_pages"]), 1, "Should have custom pages"
)

View File

@@ -13,6 +13,7 @@ class LinkCheckBlockSerializer(serializers.Serializer):
brokenLinks = serializers.ListField(required=False)
lockedLinks = serializers.ListField(required=False)
externalForbiddenLinks = serializers.ListField(required=False)
previousRunLinks = serializers.ListField(required=False)
class LinkCheckUnitSerializer(serializers.Serializer):
@@ -36,9 +37,21 @@ class LinkCheckSectionSerializer(serializers.Serializer):
subsections = LinkCheckSubsectionSerializer(many=True)
class LinkCheckContentItemSerializer(serializers.Serializer):
""" Serializer for course content items like updates, handouts, and custom pages """
name = serializers.CharField(required=True, allow_null=False, allow_blank=False)
url = serializers.CharField(required=True, allow_null=False, allow_blank=False)
brokenLinks = serializers.ListField(required=False)
lockedLinks = serializers.ListField(required=False)
externalForbiddenLinks = serializers.ListField(required=False)
previousRunLinks = serializers.ListField(required=False)
class LinkCheckOutputSerializer(serializers.Serializer):
""" Serializer for broken links output model data """
sections = LinkCheckSectionSerializer(many=True)
course_updates = LinkCheckContentItemSerializer(many=True, required=False)
custom_pages = LinkCheckContentItemSerializer(many=True, required=False)
class LinkCheckSerializer(serializers.Serializer):

View File

@@ -71,53 +71,49 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView):
)
def get(self, request: Request, course_id: str):
"""
GET handler to return the status of the link_check task from UserTaskStatus.
If no task has been started for the course, return 'Uninitiated'.
If link_check task was successful, an output result is also returned.
**Use Case**
For reference, the following status are in UserTaskStatus:
'Pending', 'In Progress' (sent to frontend as 'In-Progress'),
'Succeeded', 'Failed', 'Canceled', 'Retrying'
This function adds a status for when status from UserTaskStatus is None:
'Uninitiated'
GET handler to return the status of the link_check task from UserTaskStatus.
If no task has been started for the course, return 'Uninitiated'.
If link_check task was successful, an output result is also returned.
For reference, the following status are in UserTaskStatus:
'Pending', 'In Progress' (sent to frontend as 'In-Progress'),
'Succeeded', 'Failed', 'Canceled', 'Retrying'
This function adds a status for when status from UserTaskStatus is None:
'Uninitiated'
**Example Request**
GET /api/contentstore/v0/link_check_status/{course_id}
**Example Response**
```json
{
"LinkCheckStatus": "Succeeded",
"LinkCheckCreatedAt": "2025-02-05T14:32:01.294587Z",
"LinkCheckOutput": {
sections: [
"sections": [
{
id: <string>,
displayName: <string>,
subsections: [
"id": <string>,
"displayName": <string>,
"subsections": [
{
id: <string>,
displayName: <string>,
units: [
"id": <string>,
"displayName": <string>,
"units": [
{
id: <string>,
displayName: <string>,
blocks: [
"id": <string>,
"displayName": <string>,
"blocks": [
{
id: <string>,
url: <string>,
brokenLinks: [
<string>,
<string>,
<string>,
...,
],
lockedLinks: [
<string>,
<string>,
<string>,
...,
],
"id": <string>,
"url": <string>,
"brokenLinks": [<string>, ...],
"lockedLinks": [<string>, ...],
"externalForbiddenLinks": [<string>, ...],
"previousRunLinks": [<string>, ...]
},
{ <another block> },
],
@@ -130,6 +126,39 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView):
},
{ <another section> },
],
"course_updates": [
{
"name": <string>,
"url": <string>,
"brokenLinks": [<string>, ...],
"lockedLinks": [<string>, ...],
"externalForbiddenLinks": [<string>, ...],
"previousRunLinks": [<string>, ...]
},
...,
{ <another course-updates> },
...,
{
"name": "handouts",
"url": <string>,
"brokenLinks": [<string>, ...],
"lockedLinks": [<string>, ...],
"externalForbiddenLinks": [<string>, ...],
"previousRunLinks": [<string>, ...]
}
],
"custom_pages": [
{
"name": <string>,
"url": <string>,
"brokenLinks": [<string>, ...],
"lockedLinks": [<string>, ...],
"externalForbiddenLinks": [<string>, ...],
"previousRunLinks": [<string>, ...]
},
...,
{ <another page> },
]
},
}
"""

View File

@@ -30,6 +30,7 @@ class CourseWaffleFlagsSerializer(serializers.Serializer):
enable_course_optimizer = serializers.SerializerMethodField()
use_react_markdown_editor = serializers.SerializerMethodField()
use_video_gallery_flow = serializers.SerializerMethodField()
enable_course_optimizer_check_prev_run_links = serializers.SerializerMethodField()
def get_course_key(self):
"""
@@ -167,3 +168,10 @@ class CourseWaffleFlagsSerializer(serializers.Serializer):
Method to get the use_video_gallery_flow waffle flag
"""
return toggles.use_video_gallery_flow()
def get_enable_course_optimizer_check_prev_run_links(self, obj):
"""
Method to get the enable_course_optimizer_check_prev_run_links waffle flag
"""
course_key = self.get_course_key()
return toggles.enable_course_optimizer_check_prev_run_links(course_key)

View File

@@ -1,6 +1,7 @@
"""
Unit tests for the course waffle flags view
"""
from django.urls import reverse
from cms.djangoapps.contentstore import toggles
@@ -13,28 +14,30 @@ class CourseWaffleFlagsViewTest(CourseTestCase):
Basic test for the CourseWaffleFlagsView endpoint, which returns waffle flag states
for a specific course or globally if no course ID is provided.
"""
maxDiff = None # Show the whole dictionary in the diff
defaults = {
'enable_course_optimizer': False,
'use_new_advanced_settings_page': True,
'use_new_certificates_page': True,
'use_new_course_outline_page': True,
'use_new_course_team_page': True,
'use_new_custom_pages': True,
'use_new_export_page': True,
'use_new_files_uploads_page': True,
'use_new_grading_page': True,
'use_new_group_configurations_page': True,
'use_new_home_page': True,
'use_new_import_page': True,
'use_new_schedule_details_page': True,
'use_new_textbooks_page': True,
'use_new_unit_page': True,
'use_new_updates_page': True,
'use_new_video_uploads_page': False,
'use_react_markdown_editor': False,
'use_video_gallery_flow': False,
"enable_course_optimizer": False,
"use_new_advanced_settings_page": True,
"use_new_certificates_page": True,
"use_new_course_outline_page": True,
"use_new_course_team_page": True,
"use_new_custom_pages": True,
"use_new_export_page": True,
"use_new_files_uploads_page": True,
"use_new_grading_page": True,
"use_new_group_configurations_page": True,
"use_new_home_page": True,
"use_new_import_page": True,
"use_new_schedule_details_page": True,
"use_new_textbooks_page": True,
"use_new_unit_page": True,
"use_new_updates_page": True,
"use_new_video_uploads_page": False,
"use_react_markdown_editor": False,
"use_video_gallery_flow": False,
"enable_course_optimizer_check_prev_run_links": False,
}
def setUp(self):
@@ -44,6 +47,11 @@ class CourseWaffleFlagsViewTest(CourseTestCase):
course_id=self.course.id,
enabled=True,
)
WaffleFlagCourseOverrideModel.objects.create(
waffle_flag=toggles.ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS.name,
course_id=self.course.id,
enabled=True,
)
def test_global_defaults(self):
url = reverse("cms.djangoapps.contentstore:v1:course_waffle_flags")
@@ -59,4 +67,5 @@ class CourseWaffleFlagsViewTest(CourseTestCase):
assert response.data == {
**self.defaults,
"enable_course_optimizer": True,
"enable_course_optimizer_check_prev_run_links": True,
}

View File

@@ -28,13 +28,13 @@ from edx_django_utils.monitoring import (
set_code_owner_attribute,
set_code_owner_attribute_from_module,
set_custom_attribute,
set_custom_attributes_for_course_key,
set_custom_attributes_for_course_key
)
from olxcleaner.exceptions import ErrorLevel
from olxcleaner.reporting import report_error_summary, report_errors
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey, UsageKey
from opaque_keys.edx.locator import LibraryLocator, LibraryContainerLocator
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocator
from organizations.api import add_organization_course, ensure_organization
from organizations.exceptions import InvalidOrganizationException
from organizations.models import Organization
@@ -47,16 +47,19 @@ import cms.djangoapps.contentstore.errors as UserErrors
from cms.djangoapps.contentstore.courseware_index import (
CoursewareSearchIndexer,
LibrarySearchIndexer,
SearchIndexingError,
SearchIndexingError
)
from cms.djangoapps.contentstore.storage import course_import_export_storage
from cms.djangoapps.contentstore.toggles import enable_course_optimizer_check_prev_run_links
from cms.djangoapps.contentstore.utils import (
IMPORTABLE_FILE_TYPES,
_contains_previous_course_reference,
_get_previous_run_course_key,
create_or_update_xblock_upstream_link,
delete_course,
initialize_permissions,
reverse_usage_url,
translation_language,
translation_language
)
from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_block_info
from cms.djangoapps.models.settings.course_metadata import CourseMetadata
@@ -65,6 +68,7 @@ from common.djangoapps.static_replace import replace_static_urls
from common.djangoapps.student.auth import has_course_author_access
from common.djangoapps.student.roles import CourseInstructorRole, CourseStaffRole, LibraryUserRole
from common.djangoapps.util.monitoring import monitor_import_failure
from lms.djangoapps.courseware.courses import get_course_info_usage_key
from openedx.core.djangoapps.content.learning_sequences.api import key_supports_outlines
from openedx.core.djangoapps.content_libraries import api as v2contentlib_api
from openedx.core.djangoapps.content_tagging.api import make_copied_tags_editable
@@ -75,6 +79,7 @@ from openedx.core.djangoapps.discussions.tasks import update_unit_discussion_sta
from openedx.core.djangoapps.embargo.models import CountryAccessRule, RestrictedCourse
from openedx.core.lib import ensure_cms
from openedx.core.lib.extract_archive import safe_extractall
from openedx.core.lib.xblock_utils import get_course_update_items
from xmodule.contentstore.django import contentstore
from xmodule.course_block import CourseFields
from xmodule.exceptions import SerializationError
@@ -83,8 +88,9 @@ from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import DuplicateCourseError, InvalidProctoringProvider, ItemNotFoundError
from xmodule.modulestore.xml_exporter import export_course_to_xml, export_library_to_xml
from xmodule.modulestore.xml_importer import CourseImportException, import_course_from_xml, import_library_from_xml
from xmodule.tabs import StaticTab
from .models import ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices, ComponentLink
from .models import ComponentLink, ContainerLink, LearningContextLinksStatus, LearningContextLinksStatusChoices
from .outlines import update_outline_from_modulestore
from .outlines_regenerate import CourseOutlineRegenerate
from .toggles import bypass_olx_failure_enabled
@@ -116,6 +122,7 @@ class LinkState:
BROKEN = 'broken'
LOCKED = 'locked'
EXTERNAL_FORBIDDEN = 'external-forbidden'
PREVIOUS_RUN = 'previous-run'
def clone_instance(instance, field_values):
@@ -1137,7 +1144,8 @@ def check_broken_links(self, user_id, course_key_string, language):
def _check_broken_links(task_instance, user_id, course_key_string, language):
"""
Checks for broken links in a course and store the results in a file.
Checks for broken links in a course and stores the results in a file.
Also checks for previous run links if the feature is enabled.
"""
user = _validate_user(task_instance, user_id, language)
@@ -1145,13 +1153,29 @@ def _check_broken_links(task_instance, user_id, course_key_string, language):
course_key = CourseKey.from_string(course_key_string)
url_list = _scan_course_for_links(course_key)
validated_url_list = asyncio.run(_validate_urls_access_in_batches(url_list, course_key, batch_size=100))
previous_run_links = []
urls_to_validate = url_list
if enable_course_optimizer_check_prev_run_links(course_key):
previous_run_course_key = _get_previous_run_course_key(course_key)
if previous_run_course_key:
# Separate previous run links from regular links BEFORE validation
urls_to_validate = []
for block_id, url in url_list:
if _contains_previous_course_reference(url, previous_run_course_key):
previous_run_links.append([block_id, url, LinkState.PREVIOUS_RUN])
else:
urls_to_validate.append([block_id, url])
validated_url_list = asyncio.run(_validate_urls_access_in_batches(urls_to_validate, course_key, batch_size=100))
broken_or_locked_urls, retry_list = _filter_by_status(validated_url_list)
if retry_list:
retry_results = _retry_validation(retry_list, course_key, retry_count=3)
broken_or_locked_urls.extend(retry_results)
all_links = broken_or_locked_urls + previous_run_links
try:
task_instance.status.increment_completed_steps()
@@ -1160,9 +1184,9 @@ def _check_broken_links(task_instance, user_id, course_key_string, language):
LOGGER.debug(f'[Link Check] json file being generated at {broken_links_file.name}')
with open(broken_links_file.name, 'w') as file:
json.dump(broken_or_locked_urls, file, indent=4)
json.dump(all_links, file, indent=4)
_write_broken_links_to_file(broken_or_locked_urls, broken_links_file)
_write_broken_links_to_file(all_links, broken_links_file)
artifact = UserTaskArtifact(status=task_instance.status, name='BrokenLinks')
_save_broken_links_file(artifact, broken_links_file)
@@ -1186,7 +1210,8 @@ def _validate_user(task, user_id, language):
def _scan_course_for_links(course_key):
"""
Scans a course for links found in the data contents of blocks.
Scans a course for links found in the data contents of
blocks, course updates, handouts, and custom pages.
Returns:
list: block id and URL pairs
@@ -1205,6 +1230,7 @@ def _scan_course_for_links(course_key):
)
blocks = []
urls_to_validate = []
course = modulestore().get_course(course_key)
for vertical in verticals:
blocks.extend(vertical.get_children())
@@ -1220,13 +1246,31 @@ def _scan_course_for_links(course_key):
url_list = _get_urls(block_data)
urls_to_validate += [[block_id, url] for url in url_list]
course_updates_data = _scan_course_updates_for_links(course)
handouts_data = _scan_course_handouts_for_links(course)
custom_pages_data = _scan_custom_pages_for_links(course)
for update in course_updates_data:
for url in update['urls']:
urls_to_validate.append([update['block_id'], url])
for handout in handouts_data:
for url in handout['urls']:
urls_to_validate.append([handout['block_id'], url])
for page in custom_pages_data:
for url in page['urls']:
urls_to_validate.append([page['block_id'], url])
return urls_to_validate
def _get_urls(content):
"""
Finds and returns a list of URLs in the given content.
Includes strings following 'href=' and 'src='.
Uses multiple regex patterns to find URLs in various contexts:
- URLs in href and src attributes
- Standalone URLs starting with http(s)://
Excludes strings that are only '#' or start with 'data:'.
Arguments:
@@ -1235,11 +1279,130 @@ def _get_urls(content):
Returns:
list: urls
"""
regex = r'\s+(?:href|src)=["\'](?!#|data:)([^"\']*)["\']'
url_list = re.findall(regex, content)
url_list = set()
# Regex to match URLs in href and src attributes, or standalone URLs
regex = (
r'(?:href|src)=["\'](?!#|data:)([^"\']+)["\']'
r'|(?:^|[\s\'"(<>])((?:https?://|http://|https://|www\.)[^\s\'")<>]+)(?=[\s\'")<>]|$)'
)
# Update list to include URLs found in the content
matches = re.findall(regex, content, re.IGNORECASE)
for match in matches:
url = match[0] or match[1]
if url:
url_list.add(url)
return url_list
def _scan_course_updates_for_links(course):
"""
Scans course updates for links.
Returns:
list: course update data with links
"""
course_updates = []
try:
store = modulestore()
usage_key = get_course_info_usage_key(course, "updates")
updates_block = store.get_item(usage_key)
if updates_block and hasattr(updates_block, "data"):
update_items = get_course_update_items(updates_block)
for update in update_items:
if update.get("status") != "deleted":
update_content = update.get("content", "")
url_list = _get_urls(update_content)
course_updates.append(
{
"name": update.get("date", "Unknown"),
"block_id": str(usage_key),
"urls": url_list,
}
)
return course_updates
return course_updates
except Exception as e: # pylint: disable=broad-exception-caught
LOGGER.debug(f"Error scanning course updates: {e}")
return course_updates
def _scan_course_handouts_for_links(course):
"""
Scans course handouts for links.
Returns:
list: handouts data with links
"""
course_handouts = []
try:
store = modulestore()
usage_key = get_course_info_usage_key(course, "handouts")
handouts_block = store.get_item(usage_key)
if handouts_block and hasattr(handouts_block, "data") and handouts_block.data:
url_list = _get_urls(handouts_block.data)
course_handouts.append(
{"name": "handouts", "block_id": str(usage_key), "urls": url_list}
)
return course_handouts
except Exception as e: # pylint: disable=broad-exception-caught
LOGGER.debug(f"Error scanning course handouts: {e}")
return course_handouts
def _scan_custom_pages_for_links(course):
"""
Scans custom pages (static tabs) for links.
Returns:
list: custom pages data with links
"""
custom_pages = []
try:
store = modulestore()
course_key = course.id
for tab in course.tabs:
if isinstance(tab, StaticTab):
try:
# Get the static tab content
# tab_locator = course_key.make_usage_key("static_tab", tab.url_slug)
static_tab_loc = course_key.make_usage_key(
"static_tab", tab.url_slug
)
static_tab_block = store.get_item(static_tab_loc)
if static_tab_block and hasattr(static_tab_block, "data"):
url_list = _get_urls(static_tab_block.data)
custom_pages.append(
{
"name": tab.name,
"block_id": str(static_tab_loc),
"urls": url_list,
}
)
except Exception as e: # pylint: disable=broad-exception-caught
LOGGER.debug(f"Error scanning static tab {tab.name}: {e}")
continue
return custom_pages
except Exception as e: # pylint: disable=broad-exception-caught
LOGGER.debug(f"Error scanning custom pages: {e}")
return custom_pages
async def _validate_urls_access_in_batches(url_list, course_key, batch_size=100):
"""
Returns the statuses of a list of URL requests.

View File

@@ -667,4 +667,4 @@ class CheckBrokenLinksTaskTest(ModuleStoreTestCase):
"https://validsite.com",
"https://another-valid.com"
]
self.assertEqual(_get_urls(content), expected)
self.assertEqual(_get_urls(content), set(expected))

View File

@@ -659,3 +659,26 @@ def use_legacy_logged_out_home():
If not, then we should just go to the login page w/ redirect to studio course listing.
"""
return LEGACY_STUDIO_LOGGED_OUT_HOME.is_enabled()
# .. toggle_name: contentstore.enable_course_optimizer_check_prev_run_links
# .. toggle_implementation: CourseWaffleFlag
# .. toggle_default: False
# .. toggle_description: When enabled, allows the Course Optimizer to detect and update links pointing to previous course runs.
# This feature enables instructors to automatically fix internal course links that still point to old course runs
# after creating a course rerun.
# .. toggle_use_cases: temporary, open_edx
# .. toggle_creation_date: 2025-07-21
# .. toggle_target_removal_date: None
ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS = CourseWaffleFlag(
f'{CONTENTSTORE_NAMESPACE}.enable_course_optimizer_check_prev_run_links',
__name__,
CONTENTSTORE_LOG_PREFIX,
)
def enable_course_optimizer_check_prev_run_links(course_key):
"""
Returns a boolean if previous run course optimizer feature is enabled for the given course.
"""
return ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS.is_enabled(course_key)

View File

@@ -2435,3 +2435,33 @@ def create_or_update_xblock_upstream_link(xblock, course_key: CourseKey, created
# It is possible that the upstream is a container and UsageKeyV2 parse failed
# Create upstream container link and raise InvalidKeyError if xblock.upstream is a valid key.
_create_or_update_container_link(course_key, created, xblock)
def _get_previous_run_course_key(course_key):
"""
Retrieves the course key of the previous run for a given course.
"""
try:
rerun_state = CourseRerunState.objects.get(course_key=course_key)
except CourseRerunState.DoesNotExist:
log.warning(f'[Link Check] No rerun state found for course {course_key}. Cannot find previous run.')
return None
return rerun_state.source_course_key
def _contains_previous_course_reference(url, previous_course_key):
"""
Checks if a URL contains references to the previous course.
Arguments:
url: The URL to check
previous_course_key: The previous course key to look for
Returns:
bool: True if URL contains reference to previous course
"""
if not previous_course_key:
return False
return str(previous_course_key).lower() in url.lower()

View File

@@ -10,7 +10,8 @@ def cms_api_filter(endpoints):
"""
filtered = []
CMS_PATH_PATTERN = re.compile(
r"^/api/contentstore/v0/(xblock|videos|video_transcripts|file_assets|youtube_transcripts)"
r"^/api/contentstore/v0/(xblock|videos|video_transcripts|file_assets|"
r"youtube_transcripts|link_check|link_check_status)"
)
for path, path_regex, method, callback in endpoints: