diff --git a/cms/djangoapps/contentstore/core/course_optimizer_provider.py b/cms/djangoapps/contentstore/core/course_optimizer_provider.py index 16aec9075d..c2fa91c9e3 100644 --- a/cms/djangoapps/contentstore/core/course_optimizer_provider.py +++ b/cms/djangoapps/contentstore/core/course_optimizer_provider.py @@ -7,8 +7,13 @@ from opaque_keys.edx.keys import CourseKey from user_tasks.conf import settings as user_tasks_settings from user_tasks.models import UserTaskArtifact, UserTaskStatus -from cms.djangoapps.contentstore.tasks import CourseLinkCheckTask, LinkState, extract_content_URLs_from_course -from cms.djangoapps.contentstore.utils import create_course_info_usage_key +from cms.djangoapps.contentstore.tasks import ( + CourseLinkCheckTask, + CourseLinkUpdateTask, + LinkState, + extract_content_URLs_from_course +) +from cms.djangoapps.contentstore.utils import create_course_info_usage_key, get_previous_run_course_key from cms.djangoapps.contentstore.xblock_storage_handlers.view_handlers import get_xblock from cms.djangoapps.contentstore.xblock_storage_handlers.xblock_helpers import usage_key_with_run from openedx.core.lib.xblock_utils import get_course_update_items @@ -118,7 +123,13 @@ def generate_broken_links_descriptor(json_content, request_user, course_key): 'url': 'url/to/block', 'brokenLinks: [], 'lockedLinks: [], - 'previousRunLinks: [] + 'previousRunLinks: [ + { + 'originalLink': 'http://...', + 'isUpdated': true, + 'updatedLink': 'http://...' + } + ] }, ..., ] @@ -138,7 +149,13 @@ def generate_broken_links_descriptor(json_content, request_user, course_key): 'brokenLinks': [], 'lockedLinks': [], 'externalForbiddenLinks': [], - 'previousRunLinks': [] + 'previousRunLinks': [ + { + 'originalLink': 'http://...', + 'isUpdated': true, + 'updatedLink': 'http://...' + } + ] }, ... { @@ -147,7 +164,13 @@ def generate_broken_links_descriptor(json_content, request_user, course_key): 'brokenLinks': [], 'lockedLinks': [], 'externalForbiddenLinks': [], - 'previousRunLinks': [] + 'previousRunLinks': [ + { + 'originalLink': 'http://...', + 'isUpdated': true, + 'updatedLink': 'http://...' + } + ] } ], 'custom_pages': [ @@ -157,7 +180,13 @@ def generate_broken_links_descriptor(json_content, request_user, course_key): 'brokenLinks': [], 'lockedLinks': [], 'externalForbiddenLinks': [], - 'previousRunLinks': [] + 'previousRunLinks': [ + { + 'originalLink': 'http://...', + 'isUpdated': true, + 'updatedLink': 'http://...' + } + ] }, ... ] @@ -166,7 +195,7 @@ def generate_broken_links_descriptor(json_content, request_user, course_key): return _generate_enhanced_links_descriptor(json_content, request_user, course_key) -def _update_node_tree_and_dictionary(block, link, link_state, node_tree, dictionary): +def _update_node_tree_and_dictionary(block, link, link_state, node_tree, dictionary, course_key=None): """ Inserts a block into the node tree and add its attributes to the dictionary. @@ -215,7 +244,7 @@ def _update_node_tree_and_dictionary(block, link, link_state, node_tree, diction # Traverse the path and build the tree structure for xblock in path: - xblock_id = xblock.location.block_id + xblock_id = xblock.location updated_dictionary.setdefault( xblock_id, { @@ -240,7 +269,7 @@ def _update_node_tree_and_dictionary(block, link, link_state, node_tree, diction elif link_state == LinkState.EXTERNAL_FORBIDDEN: updated_dictionary[xblock_id].setdefault('external_forbidden_links', []).append(link) elif link_state == LinkState.PREVIOUS_RUN: - updated_dictionary[xblock_id].setdefault('previous_run_links', []).append(link) + _add_previous_run_link(updated_dictionary, xblock_id, link, course_key) else: updated_dictionary[xblock_id].setdefault('broken_links', []).append(link) @@ -325,11 +354,11 @@ def sort_course_sections(course_key, data): revision=ModuleStoreEnum.RevisionOption.published_only ) + # Return unchanged data if course_blocks or required keys are missing if not course_blocks or 'LinkCheckOutput' not in data or 'sections' not in data['LinkCheckOutput']: - return data # Return unchanged data if course_blocks or required keys are missing - - sorted_section_ids = [section.location.block_id for section in course_blocks[0].get_children()] + return data + sorted_section_ids = [section.location for section in course_blocks[0].get_children()] sections_map = {section['id']: section for section in data['LinkCheckOutput']['sections']} data['LinkCheckOutput']['sections'] = [ sections_map[section_id] @@ -340,7 +369,7 @@ def sort_course_sections(course_key, data): return data -def _generate_links_descriptor_for_content(json_content, request_user): +def _generate_links_descriptor_for_content(json_content, request_user, course_key=None): """ Creates a content tree of all links in a course and their states Returns a structure containing all broken links and locked links for a course. @@ -363,6 +392,7 @@ def _generate_links_descriptor_for_content(json_content, request_user): link_state=link_state, node_tree=xblock_node_tree, dictionary=xblock_dictionary, + course_key=course_key, ) result = _create_dto_recursive(xblock_node_tree, xblock_dictionary) @@ -386,7 +416,7 @@ def _generate_enhanced_links_descriptor(json_content, request_user, course_key): for item in json_content: block_id, link, *rest = item - if "course_info" in block_id and "updates" in block_id: + if isinstance(block_id, int): course_updates_links.append(item) elif "course_info" in block_id and "handouts" in block_id: handouts_links.append(item) @@ -396,22 +426,22 @@ def _generate_enhanced_links_descriptor(json_content, request_user, course_key): content_links.append(item) try: - main_content = _generate_links_descriptor_for_content(content_links, request_user) + main_content = _generate_links_descriptor_for_content(content_links, request_user, course_key) except Exception: # pylint: disable=broad-exception-caught main_content = {"sections": []} course_updates_data = ( - _generate_course_updates_structure(course, course_updates_links) + _generate_enhanced_content_structure(course, course_updates_links, "updates", course_key) if course_updates_links and course else [] ) handouts_data = ( - _generate_handouts_structure(course, handouts_links) + _generate_enhanced_content_structure(course, handouts_links, "handouts", course_key) if handouts_links and course else [] ) custom_pages_data = ( - _generate_custom_pages_structure(course, custom_pages_links) + _generate_enhanced_content_structure(course, custom_pages_links, "custom_pages", course_key) if custom_pages_links and course else [] ) @@ -421,7 +451,7 @@ def _generate_enhanced_links_descriptor(json_content, request_user, course_key): return result -def _generate_enhanced_content_structure(course, content_links, content_type): +def _generate_enhanced_content_structure(course, content_links, content_type, course_key=None): """ Unified function to generate structure for enhanced content (updates, handouts, custom pages). @@ -429,24 +459,25 @@ def _generate_enhanced_content_structure(course, content_links, content_type): course: Course object content_links: List of link items for this content type content_type: 'updates', 'handouts', or 'custom_pages' + course_key: Course key to check for link updates (optional) Returns: List of content items with categorized links """ - result = [] - try: - if content_type == "custom_pages": - result = _generate_custom_pages_content(course, content_links) - elif content_type == "updates": - result = _generate_course_updates_content(course, content_links) - elif content_type == "handouts": - result = _generate_handouts_content(course, content_links) - return result - except Exception as e: # pylint: disable=broad-exception-caught - return result + generators = { + "custom_pages": _generate_custom_pages_content, + "updates": _generate_course_updates_content, + "handouts": _generate_handouts_content, + } + + generator = generators.get(content_type) + if generator: + return generator(course, content_links, course_key) + + return [] -def _generate_course_updates_content(course, updates_links): +def _generate_course_updates_content(course, updates_links, course_key=None): """Generate course updates content with categorized links.""" store = modulestore() usage_key = create_course_info_usage_key(course, "updates") @@ -460,23 +491,10 @@ def _generate_course_updates_content(course, updates_links): if not update_items: return course_updates - # Create link state mapping - link_state_map = { - item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN - for item in updates_links if len(item) >= 2 - } - for update in update_items: if update.get("status") != "deleted": update_content = update.get("content", "") - update_links = extract_content_URLs_from_course(update_content) if update_content else [] - - # Match links with their states - update_link_data = _create_empty_links_data() - for link in update_links: - link_state = link_state_map.get(link) - if link_state is not None: - _categorize_link_by_state(link, link_state, update_link_data) + update_link_data = _process_content_links(update_content, updates_links, course_key) course_updates.append( { @@ -490,7 +508,7 @@ def _generate_course_updates_content(course, updates_links): return course_updates -def _generate_handouts_content(course, handouts_links): +def _generate_handouts_content(course, handouts_links, course_key=None): """Generate handouts content with categorized links.""" store = modulestore() usage_key = create_course_info_usage_key(course, "handouts") @@ -504,15 +522,7 @@ def _generate_handouts_content(course, handouts_links): ): return course_handouts - # Create link state mapping for handouts - link_state_map = { - item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN - for item in handouts_links if len(item) >= 2 - } - - links_data = _create_empty_links_data() - for link, link_state in link_state_map.items(): - _categorize_link_by_state(link, link_state, links_data) + links_data = _process_content_links(handouts_block.data, handouts_links, course_key) course_handouts = [ { @@ -525,7 +535,7 @@ def _generate_handouts_content(course, handouts_links): return course_handouts -def _generate_custom_pages_content(course, custom_pages_links): +def _generate_custom_pages_content(course, custom_pages_links, course_key=None): """Generate custom pages content with categorized links.""" custom_pages = [] @@ -539,7 +549,7 @@ def _generate_custom_pages_content(course, custom_pages_links): block_id, link = item[0], item[1] link_state = item[2] if len(item) >= 3 else LinkState.BROKEN links_by_page.setdefault(block_id, _create_empty_links_data()) - _categorize_link_by_state(link, link_state, links_by_page[block_id]) + _categorize_link_by_state(link, link_state, links_by_page[block_id], course_key) # Process static tabs and add their pages for tab in course.tabs: @@ -555,24 +565,7 @@ def _generate_custom_pages_content(course, custom_pages_links): return custom_pages -def _generate_course_updates_structure(course, updates_links): - """Generate structure for course updates.""" - return _generate_enhanced_content_structure(course, updates_links, "updates") - - -def _generate_handouts_structure(course, handouts_links): - """Generate structure for course handouts.""" - return _generate_enhanced_content_structure(course, handouts_links, "handouts") - - -def _generate_custom_pages_structure(course, custom_pages_links): - """Generate structure for custom pages (static tabs).""" - return _generate_enhanced_content_structure( - course, custom_pages_links, "custom_pages" - ) - - -def _categorize_link_by_state(link, link_state, links_data): +def _categorize_link_by_state(link, link_state, links_data, course_key=None): """ Helper function to categorize a link into the appropriate list based on its state. @@ -580,6 +573,7 @@ def _categorize_link_by_state(link, link_state, links_data): link (str): The URL link to categorize link_state (str): The state of the link (broken, locked, external-forbidden, previous-run) links_data (dict): Dictionary containing the categorized link lists + course_key: Course key to check for link updates (optional) """ state_to_key = { LinkState.BROKEN: "brokenLinks", @@ -590,7 +584,11 @@ def _categorize_link_by_state(link, link_state, links_data): key = state_to_key.get(link_state) if key: - links_data[key].append(link) + if key == "previousRunLinks": + data = _generate_link_update_info(link, course_key) + links_data[key].append(data) + else: + links_data[key].append(link) def _create_empty_links_data(): @@ -606,3 +604,267 @@ def _create_empty_links_data(): "externalForbiddenLinks": [], "previousRunLinks": [], } + + +def get_course_link_update_data(request, course_id): + """ + Retrieves data and formats it for the course link update status request. + """ + status = None + results = [] + task_status = _latest_course_link_update_task_status(request, course_id) + + if task_status is None: + status = "uninitiated" + else: + status = task_status.state + + if task_status.state == UserTaskStatus.SUCCEEDED: + try: + artifact = UserTaskArtifact.objects.get( + status=task_status, name="LinkUpdateResults" + ) + with artifact.file as file: + content = file.read() + results = json.loads(content) + except (UserTaskArtifact.DoesNotExist, ValueError): + # If no artifact found or invalid JSON, just return empty results + results = [] + + data = { + "status": status, + **({"results": results}), + } + return data + + +def _latest_course_link_update_task_status(request, course_id, view_func=None): + """ + Get the most recent course link update status for the specified course key. + """ + + args = {"course_id": course_id} + name = CourseLinkUpdateTask.generate_name(args) + task_status = UserTaskStatus.objects.filter(name=name) + for status_filter in STATUS_FILTERS: + task_status = status_filter().filter_queryset(request, task_status, view_func) + return task_status.order_by("-created").first() + + +def _get_link_update_status(original_url, course_key): + """ + Check whether a given link has been updated based on the latest link update results. + + Args: + original_url (str): The original URL to check + course_key: The course key + + Returns: + dict: Dictionary with 'originalLink', 'isUpdated', and 'updatedLink' keys + """ + def _create_response(original_link, is_updated, updated_link=None): + """Helper to create consistent response format.""" + return { + "originalLink": original_link, + "isUpdated": is_updated, + "updatedLink": updated_link, + } + + try: + # Check if URL contains current course key (indicates it's been updated) + current_course_str = str(course_key) + if current_course_str in original_url: + prev_run_key = get_previous_run_course_key(course_key) + if prev_run_key: + reconstructed_original = original_url.replace(current_course_str, str(prev_run_key)) + return _create_response(reconstructed_original, True, original_url) + return _create_response(original_url, True, original_url) + + update_results = _get_update_results(course_key) + if not update_results: + return _create_response(original_url, False, None) + + for result in update_results: + if not result.get("success", False): + continue + + result_original = result.get("original_url", "") + result_new = result.get("new_url", "") + + # Direct match with original URL + if result_original == original_url: + return _create_response(original_url, True, result_new) + + # Check if current URL is an updated URL + if result_new == original_url: + return _create_response(result_original, True, original_url) + + # Check if URLs match through reconstruction + if _urls_match_through_reconstruction(original_url, result_new, course_key): + return _create_response(original_url, True, result_new) + + return _create_response(original_url, False, None) + + except Exception: # pylint: disable=broad-except + return _create_response(original_url, False, None) + + +def _get_update_results(course_key): + """ + Helper function to get update results from the latest link update task. + + Returns: + list: Update results or empty list if not found + """ + try: + task_status = _latest_course_link_update_task_status(None, str(course_key)) + + if not task_status or task_status.state != UserTaskStatus.SUCCEEDED: + return [] + + artifact = UserTaskArtifact.objects.get( + status=task_status, name="LinkUpdateResults" + ) + with artifact.file as file: + content = file.read() + return json.loads(content) + + except (UserTaskArtifact.DoesNotExist, ValueError, json.JSONDecodeError): + return [] + + +def _is_previous_run_link(link, course_key): + """ + Check if a link is a previous run link by checking if it contains a previous course key + or if it has update results indicating it was updated. + + Args: + link: The URL to check + course_key: The current course key + + Returns: + bool: True if the link appears to be a previous run link + """ + try: + if str(course_key) in link: + return True + + prev_run_key = get_previous_run_course_key(course_key) + if prev_run_key and str(prev_run_key) in link: + return True + + update_results = _get_update_results(course_key) + for result in update_results: + if not result.get("success", False): + continue + if link in [result.get("original_url", ""), result.get("new_url", "")]: + return True + + return False + except Exception: # pylint: disable=broad-except + return False + + +def _urls_match_through_reconstruction(original_url, new_url, course_key): + """ + Check if an original URL matches a new URL through course key reconstruction. + + Args: + original_url (str): The original URL from broken links + new_url (str): The new URL from update results + course_key: The current course key + + Returns: + bool: True if they match through reconstruction + """ + try: + prev_run_key = get_previous_run_course_key(course_key) + if not prev_run_key: + return False + + # Reconstruct what the original URL would have been + reconstructed_original = new_url.replace(str(course_key), str(prev_run_key)) + return reconstructed_original == original_url + + except Exception: # pylint: disable=broad-except + return False + + +def _process_content_links(content_text, all_links, course_key=None): + """ + Helper function to process links in content and categorize them by state. + + Args: + content_text: The text content to extract links from + all_links: List of tuples containing (url, state) or (url, state, extra_info) + course_key: Course key to check for link updates (optional) + + Returns: + dict: Categorized link data + """ + if not content_text: + return _create_empty_links_data() + + content_links = extract_content_URLs_from_course(content_text) + if not content_links: + return _create_empty_links_data() + + # Create link state mapping + link_state_map = { + item[1]: item[2] if len(item) >= 3 else LinkState.BROKEN + for item in all_links if len(item) >= 2 + } + + # Categorize links by state + link_data = _create_empty_links_data() + for link in content_links: + link_state = link_state_map.get(link) + if link_state is not None: + _categorize_link_by_state(link, link_state, link_data, course_key) + else: + # Check if this link is a previous run link that might have been updated + if course_key and _is_previous_run_link(link, course_key): + _categorize_link_by_state(link, LinkState.PREVIOUS_RUN, link_data, course_key) + + return link_data + + +def _generate_link_update_info(link, course_key=None): + """ + Create a previous run link data with appropriate update status. + + Args: + link: The link URL + course_key: Course key to check for updates (optional) + + Returns: + dict: Previous run link data with originalLink, isUpdated, and updatedLink + """ + if course_key: + updated_info = _get_link_update_status(link, course_key) + if updated_info: + return { + 'originalLink': updated_info['originalLink'], + 'isUpdated': updated_info['isUpdated'], + 'updatedLink': updated_info['updatedLink'] + } + + return { + 'originalLink': link, + 'isUpdated': False, + 'updatedLink': None + } + + +def _add_previous_run_link(dictionary, xblock_id, link, course_key): + """ + Helper function to add a previous run link with appropriate update status. + + Args: + dictionary: The xblock dictionary to update + xblock_id: The ID of the xblock + link: The link URL + course_key: Course key to check for updates (optional) + """ + data = _generate_link_update_info(link, course_key) + dictionary[xblock_id].setdefault('previous_run_links', []).append(data) diff --git a/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py index 9ce568fc98..26a657a98e 100644 --- a/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py +++ b/cms/djangoapps/contentstore/core/tests/test_course_optimizer_provider.py @@ -61,10 +61,10 @@ class TestLinkCheckProvider(CourseTestCase): when passed a block level xblock. """ expected_tree = { - 'chapter_1': { - 'sequential_1': { - 'vertical_1': { - 'block_1': {} + self.mock_section.location: { + self.mock_subsection.location: { + self.mock_unit.location: { + self.mock_block.location: {} } } } @@ -81,19 +81,19 @@ class TestLinkCheckProvider(CourseTestCase): when passed a block level xblock. """ expected_dictionary = { - 'chapter_1': { + self.mock_section.location: { 'display_name': 'Section Name', 'category': 'chapter' }, - 'sequential_1': { + self.mock_subsection.location: { 'display_name': 'Subsection Name', 'category': 'sequential' }, - 'vertical_1': { + self.mock_unit.location: { 'display_name': 'Unit Name', 'category': 'vertical' }, - 'block_1': { + self.mock_block.location: { 'display_name': 'Block Name', 'category': 'html', 'url': f'/course/{self.course.id}/editor/html/{self.mock_block.location}', @@ -274,11 +274,16 @@ class TestLinkCheckProvider(CourseTestCase): def test_sorts_sections_correctly(self, mock_modulestore): """Test that the function correctly sorts sections based on published course structure.""" + # Create mock location objects that will match the section IDs in data + mock_location2 = "section2" + mock_location3 = "section3" + mock_location1 = "section1" + mock_course_block = Mock() mock_course_block.get_children.return_value = [ - Mock(location=Mock(block_id="section2")), - Mock(location=Mock(block_id="section3")), - Mock(location=Mock(block_id="section1")), + Mock(location=mock_location2), + Mock(location=mock_location3), + Mock(location=mock_location1), ] mock_modulestore_instance = Mock() @@ -301,8 +306,7 @@ class TestLinkCheckProvider(CourseTestCase): {"id": "section3", "name": "Bonus"}, {"id": "section1", "name": "Intro"}, ] - - assert result["LinkCheckOutput"]["sections"] == expected_sections + self.assertEqual(result["LinkCheckOutput"]["sections"], expected_sections) def test_prev_run_link_detection(self): """Test the core logic of separating previous run links from regular links.""" @@ -366,46 +370,47 @@ class TestLinkCheckProvider(CourseTestCase): def test_course_updates_and_custom_pages_structure(self): """Test that course_updates and custom_pages are properly structured in the response.""" + course_key = self.course.id + # Test data that represents the broken links JSON structure json_content = [ - # Regular course content [ - "course-v1:Test+Course+2024+type@html+block@content1", + str(self.mock_block.location), "http://content-link.com", - "broken", + LinkState.BROKEN, ], [ - "course-v1:Test+Course+2024+type@vertical+block@unit1", + str(self.mock_unit.location), "http://unit-link.com", - "locked", + LinkState.LOCKED, ], # Course updates [ - "course-v1:Test+Course+2024+type@course_info+block@updates", + f"{course_key}+type@course_info+block@updates", "http://update1.com", - "broken", + LinkState.BROKEN, ], [ - "course-v1:Test+Course+2024+type@course_info+block@updates", + f"{course_key}+type@course_info+block@updates", "http://update2.com", - "locked", + LinkState.LOCKED, ], # Handouts (should be merged into course_updates) [ - "course-v1:Test+Course+2024+type@course_info+block@handouts", + f"{course_key}+type@course_info+block@handouts", "http://handout.com", - "broken", + LinkState.BROKEN, ], # Custom pages (static tabs) [ - "course-v1:Test+Course+2024+type@static_tab+block@page1", + f"{course_key}+type@static_tab+block@page1", "http://page1.com", - "broken", + LinkState.BROKEN, ], [ - "course-v1:Test+Course+2024+type@static_tab+block@page2", + f"{course_key}+type@static_tab+block@page2", "http://page2.com", - "external-forbidden", + LinkState.EXTERNAL_FORBIDDEN, ], ] @@ -413,17 +418,42 @@ class TestLinkCheckProvider(CourseTestCase): "cms.djangoapps.contentstore.core.course_optimizer_provider._generate_links_descriptor_for_content" ) as mock_content, mock.patch( "cms.djangoapps.contentstore.core.course_optimizer_provider.modulestore" - ) as mock_modulestore: + ) as mock_modulestore, mock.patch( + "cms.djangoapps.contentstore.core.course_optimizer_provider.create_course_info_usage_key" + ) as mock_create_usage_key, mock.patch( + "cms.djangoapps.contentstore.core.course_optimizer_provider.get_course_update_items" + ) as mock_get_update_items, mock.patch( + "cms.djangoapps.contentstore.core.course_optimizer_provider.extract_content_URLs_from_course" + ) as mock_extract_urls: mock_content.return_value = {"sections": []} mock_course = self.mock_course - mock_tab1 = StaticTab(name="Page1", url_slug="page1") - mock_tab2 = StaticTab(name="Page2", url_slug="page2") + mock_tab1 = StaticTab(name="Test Page 1", url_slug="page1") + mock_tab2 = StaticTab(name="Test Page 2", url_slug="page2") mock_course.tabs = [mock_tab1, mock_tab2] - mock_course.id = CourseKey.from_string("course-v1:Test+Course+2024") + mock_course.id = course_key mock_modulestore.return_value.get_course.return_value = mock_course - - course_key = CourseKey.from_string("course-v1:Test+Course+2024") + mock_updates_usage_key = Mock() + mock_handouts_usage_key = Mock() + mock_create_usage_key.side_effect = lambda course, info_type: ( + mock_updates_usage_key if info_type == "updates" else mock_handouts_usage_key + ) + mock_updates_block = Mock() + mock_updates_block.data = "Check out this update" + mock_handouts_block = Mock() + mock_handouts_block.data = "Download handout" + mock_get_item_mapping = { + mock_updates_usage_key: mock_updates_block, + mock_handouts_usage_key: mock_handouts_block, + } + mock_modulestore.return_value.get_item.side_effect = ( + lambda usage_key: mock_get_item_mapping.get(usage_key, Mock()) + ) + mock_get_update_items.return_value = [ + {"id": "update1", "date": "2024-01-01", "content": "Update content 1", "status": "visible"}, + {"id": "update2", "date": "2024-01-02", "content": "Update content 2", "status": "visible"} + ] + mock_extract_urls.return_value = ["http://update1.com", "http://update2.com"] result = generate_broken_links_descriptor( json_content, self.user, course_key ) diff --git a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py index 9faef425e4..c1c81b9d6b 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py +++ b/cms/djangoapps/contentstore/rest_api/v0/serializers/course_optimizer.py @@ -50,3 +50,62 @@ class LinkCheckSerializer(serializers.Serializer): LinkCheckCreatedAt = serializers.DateTimeField(required=False) LinkCheckOutput = LinkCheckOutputSerializer(required=False) LinkCheckError = serializers.CharField(required=False) + + +class CourseRerunLinkDataSerializer(serializers.Serializer): + """ Serializer for individual course rerun link data """ + url = serializers.CharField(required=True, allow_null=False, allow_blank=False) + type = serializers.CharField(required=True, allow_null=False, allow_blank=False) + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + + +class CourseRerunLinkUpdateRequestSerializer(serializers.Serializer): + """Serializer for course rerun link update request.""" + + ACTION_CHOICES = ("all", "single") + + action = serializers.ChoiceField(choices=ACTION_CHOICES, required=True) + data = CourseRerunLinkDataSerializer(many=True, required=False) + + def validate(self, attrs): + """ + Validate that 'data' is provided when action is 'single'. + """ + action = attrs.get("action") + data = attrs.get("data") + + if action == "single" and not data: + raise serializers.ValidationError( + {"data": "This field is required when action is 'single'."} + ) + + return attrs + + +class CourseRerunLinkUpdateResultSerializer(serializers.Serializer): + """ Serializer for individual course rerun link update result """ + new_url = serializers.CharField(required=True, allow_null=False, allow_blank=False) + original_url = serializers.CharField(required=False, allow_null=True, allow_blank=True) + type = serializers.CharField(required=True, allow_null=False, allow_blank=True) + id = serializers.CharField(required=True, allow_null=False, allow_blank=False) + success = serializers.BooleanField(required=True) + error_message = serializers.CharField(required=False, allow_null=True, allow_blank=True) + + def to_representation(self, instance): + """ + Override to exclude error_message field when success is True or error_message is null/empty + """ + data = super().to_representation(instance) + if data.get('success') is True or not data.get('error_message'): + data.pop('error_message', None) + + return data + + +class CourseRerunLinkUpdateStatusSerializer(serializers.Serializer): + """ Serializer for course rerun link update status """ + status = serializers.ChoiceField( + choices=['pending', 'in_progress', 'completed', 'failed', 'uninitiated'], + required=True + ) + results = CourseRerunLinkUpdateResultSerializer(many=True, required=False) diff --git a/cms/djangoapps/contentstore/rest_api/v0/tests/test_course_rerun_link_update.py b/cms/djangoapps/contentstore/rest_api/v0/tests/test_course_rerun_link_update.py new file mode 100644 index 0000000000..fa1489545f --- /dev/null +++ b/cms/djangoapps/contentstore/rest_api/v0/tests/test_course_rerun_link_update.py @@ -0,0 +1,160 @@ +""" +Unit tests for Course Rerun Link Update API +""" + +import json +from unittest.mock import Mock, patch + +from django.urls import reverse +from user_tasks.models import UserTaskStatus + +from cms.djangoapps.contentstore.tests.utils import CourseTestCase + + +class TestCourseLinkUpdateAPI(CourseTestCase): + """ + Tests for the Course Rerun Link Update API endpoints + """ + + def setUp(self): + super().setUp() + self.sample_links_data = [ + { + "url": "http://localhost:18000/course/course-v1:edX+DemoX+Demo_Course_2023/course", + "type": "course_content", + "id": "block-v1:edX+DemoX+Demo_Course+type@html+block@intro", + }, + { + "url": "http://localhost:18000/course/course-v1:edX+DemoX+Demo_Course_2023/progress", + "type": "course_updates", + "id": "1", + }, + { + "url": "http://localhost:18000/course/course-v1:edX+DemoX+Demo_Course_2023/handouts", + "type": "handouts", + "id": "block-v1:edX+DemoX+Demo_Course+type@course_info+block@handouts", + }, + ] + + self.enable_optimizer_patch = ( + "cms.djangoapps.contentstore.rest_api.v0.views.course_optimizer." + "enable_course_optimizer_check_prev_run_links" + ) + self.update_links_patch = ( + "cms.djangoapps.contentstore.rest_api.v0.views.course_optimizer." + "update_course_rerun_links" + ) + self.task_status_patch = ( + "cms.djangoapps.contentstore.core.course_optimizer_provider." + "_latest_course_link_update_task_status" + ) + self.user_task_artifact_patch = ( + "cms.djangoapps.contentstore.core.course_optimizer_provider." + "UserTaskArtifact" + ) + + def make_post_request(self, course_id=None, data=None, **kwargs): + """Helper method to make POST requests to the link update endpoint""" + url = self.get_update_url(course_id or self.course.id) + response = self.client.post( + url, + data=json.dumps(data) if data else None, + content_type="application/json", + ) + return response + + def get_update_url(self, course_key): + """Get the update endpoint URL""" + return reverse( + "cms.djangoapps.contentstore:v0:rerun_link_update", + kwargs={"course_id": str(course_key)}, + ) + + def get_status_url(self, course_key): + """Get the status endpoint URL""" + return reverse( + "cms.djangoapps.contentstore:v0:rerun_link_update_status", + kwargs={"course_id": str(course_key)}, + ) + + def test_post_update_all_links_success(self): + """Test successful request to update all links""" + with patch(self.enable_optimizer_patch, return_value=True): + with patch(self.update_links_patch) as mock_task: + mock_task.delay.return_value = Mock() + + data = {"action": "all"} + response = self.make_post_request(data=data) + + self.assertEqual(response.status_code, 200) + self.assertIn("status", response.json()) + mock_task.delay.assert_called_once() + + def test_post_update_single_links_success(self): + """Test successful request to update single links""" + with patch(self.enable_optimizer_patch, return_value=True): + with patch(self.update_links_patch) as mock_task: + mock_task.delay.return_value = Mock() + + data = { + "action": "single", + "data": [ + { + "url": "http://localhost:18000/course/course-v1:edX+DemoX+Demo_Course/course", + "type": "course_content", + "id": "block-v1:edX+DemoX+Demo_Course+type@html+block@abc123", + }, + { + "url": "http://localhost:18000/course/course-v1:edX+DemoX+Demo_Course/progress", + "type": "course_updates", + "id": "1", + }, + ], + } + response = self.make_post_request(data=data) + + self.assertEqual(response.status_code, 200) + self.assertIn("status", response.json()) + mock_task.delay.assert_called_once() + + def test_post_update_missing_action_returns_400(self): + """Test that missing action parameter returns 400""" + with patch( + self.enable_optimizer_patch, + return_value=True, + ): + data = {} + response = self.make_post_request(data=data) + + self.assertEqual(response.status_code, 400) + self.assertIn("error", response.json()) + self.assertIn("action", response.json()["error"]) + + def test_error_handling_workflow(self): + """Test error handling in the complete workflow""" + with patch( + self.enable_optimizer_patch, + return_value=True, + ): + with patch(self.update_links_patch) as mock_task: + # Step 1: Start task + mock_task.delay.return_value = Mock() + + data = {"action": "all"} + response = self.make_post_request(data=data) + self.assertEqual(response.status_code, 200) + + # Step 2: Check failed status + with patch(self.task_status_patch) as mock_status: + with patch(self.user_task_artifact_patch) as mock_artifact: + mock_task_status = Mock() + mock_task_status.state = UserTaskStatus.FAILED + mock_status.return_value = mock_task_status + + status_url = self.get_status_url(self.course.id) + status_response = self.client.get(status_url) + + self.assertEqual(status_response.status_code, 200) + status_data = status_response.json() + self.assertEqual(status_data["status"], "Failed") + self.assertEqual(status_data["results"], []) diff --git a/cms/djangoapps/contentstore/rest_api/v0/urls.py b/cms/djangoapps/contentstore/rest_api/v0/urls.py index 9d7006a708..974d1b98a0 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/urls.py +++ b/cms/djangoapps/contentstore/rest_api/v0/urls.py @@ -9,11 +9,13 @@ from .views import ( AdvancedCourseSettingsView, APIHeartBeatView, AuthoringGradingView, - CourseTabSettingsView, CourseTabListView, CourseTabReorderView, - LinkCheckView, + CourseTabSettingsView, LinkCheckStatusView, + LinkCheckView, + RerunLinkUpdateStatusView, + RerunLinkUpdateView, TranscriptView, YoutubeTranscriptCheckView, YoutubeTranscriptUploadView, @@ -114,4 +116,13 @@ urlpatterns = [ fr'^link_check_status/{settings.COURSE_ID_PATTERN}$', LinkCheckStatusView.as_view(), name='link_check_status' ), + + re_path( + fr'^rerun_link_update/{settings.COURSE_ID_PATTERN}$', + RerunLinkUpdateView.as_view(), name='rerun_link_update' + ), + re_path( + fr'^rerun_link_update_status/{settings.COURSE_ID_PATTERN}$', + RerunLinkUpdateStatusView.as_view(), name='rerun_link_update_status' + ), ] diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py b/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py index 2ce3ea22ea..5714754b19 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py +++ b/cms/djangoapps/contentstore/rest_api/v0/views/__init__.py @@ -4,6 +4,6 @@ Views for v0 contentstore API. from .advanced_settings import AdvancedCourseSettingsView from .api_heartbeat import APIHeartBeatView from .authoring_grading import AuthoringGradingView -from .course_optimizer import LinkCheckView, LinkCheckStatusView -from .tabs import CourseTabSettingsView, CourseTabListView, CourseTabReorderView +from .course_optimizer import LinkCheckStatusView, LinkCheckView, RerunLinkUpdateStatusView, RerunLinkUpdateView +from .tabs import CourseTabListView, CourseTabReorderView, CourseTabSettingsView from .transcripts import TranscriptView, YoutubeTranscriptCheckView, YoutubeTranscriptUploadView diff --git a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py index b98255ebd3..bd37ae8379 100644 --- a/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py +++ b/cms/djangoapps/contentstore/rest_api/v0/views/course_optimizer.py @@ -1,17 +1,33 @@ -""" API Views for Course Optimizer. """ +"""API Views for Course Optimizer.""" + import edx_api_doc_tools as apidocs +from opaque_keys import InvalidKeyError from opaque_keys.edx.keys import CourseKey -from rest_framework.views import APIView +from rest_framework import status from rest_framework.request import Request from rest_framework.response import Response +from rest_framework.views import APIView from user_tasks.models import UserTaskStatus -from cms.djangoapps.contentstore.core.course_optimizer_provider import get_link_check_data, sort_course_sections -from cms.djangoapps.contentstore.rest_api.v0.serializers.course_optimizer import LinkCheckSerializer -from cms.djangoapps.contentstore.tasks import check_broken_links +from cms.djangoapps.contentstore.core.course_optimizer_provider import ( + get_course_link_update_data, + get_link_check_data, + sort_course_sections, +) +from cms.djangoapps.contentstore.rest_api.v0.serializers.course_optimizer import ( + CourseRerunLinkUpdateStatusSerializer, + LinkCheckSerializer, + CourseRerunLinkUpdateRequestSerializer, +) +from cms.djangoapps.contentstore.tasks import check_broken_links, update_course_rerun_links +from cms.djangoapps.contentstore.toggles import enable_course_optimizer_check_prev_run_links from common.djangoapps.student.auth import has_course_author_access, has_studio_read_access from common.djangoapps.util.json_request import JsonResponse -from openedx.core.lib.api.view_utils import DeveloperErrorViewMixin, verify_course_exists, view_auth_classes +from openedx.core.lib.api.view_utils import ( + DeveloperErrorViewMixin, + verify_course_exists, + view_auth_classes, +) @view_auth_classes(is_authenticated=True) @@ -113,7 +129,14 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): "brokenLinks": [, ...], "lockedLinks": [, ...], "externalForbiddenLinks": [, ...], - "previousRunLinks": [, ...] + "previousRunLinks": [ + { + "originalLink": , + "isUpdated": , + "updatedLink": + }, + ... + ] }, { }, ], @@ -134,7 +157,14 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): "brokenLinks": [, ...], "lockedLinks": [, ...], "externalForbiddenLinks": [, ...], - "previousRunLinks": [, ...] + "previousRunLinks": [ + { + "originalLink": , + "isUpdated": , + "updatedLink": + }, + ... + ] }, ..., { }, @@ -146,7 +176,14 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): "brokenLinks": [, ...], "lockedLinks": [, ...], "externalForbiddenLinks": [, ...], - "previousRunLinks": [, ...] + "previousRunLinks": [ + { + "originalLink": , + "isUpdated": , + "updatedLink": + }, + ... + ] } ], "custom_pages": [ @@ -157,7 +194,14 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): "brokenLinks": [, ...], "lockedLinks": [, ...], "externalForbiddenLinks": [, ...], - "previousRunLinks": [, ...] + "previousRunLinks": [ + { + "originalLink": , + "isUpdated": , + "updatedLink": + }, + ... + ] }, ..., { }, @@ -167,11 +211,212 @@ class LinkCheckStatusView(DeveloperErrorViewMixin, APIView): """ course_key = CourseKey.from_string(course_id) if not has_course_author_access(request.user, course_key): - print('missing course author access') self.permission_denied(request) - data = get_link_check_data(request, course_id) - data = sort_course_sections(course_key, data) + link_check_data = get_link_check_data(request, course_id) + sorted_sections = sort_course_sections(course_key, link_check_data) - serializer = LinkCheckSerializer(data) + serializer = LinkCheckSerializer(sorted_sections) + return Response(serializer.data) + + +@view_auth_classes(is_authenticated=True) +class RerunLinkUpdateView(DeveloperErrorViewMixin, APIView): + """ + View for queueing a celery task to update course links to the latest re-run. + """ + + @apidocs.schema( + parameters=[ + apidocs.string_parameter( + "course_id", apidocs.ParameterLocation.PATH, description="Course ID" + ) + ], + body=CourseRerunLinkUpdateRequestSerializer, + responses={ + 200: "Celery task queued.", + 400: "Bad request - invalid action or missing data.", + 401: "The requester is not authenticated.", + 403: "The requester cannot access the specified course.", + 404: "The requested course does not exist.", + }, + ) + @verify_course_exists() + def post(self, request: Request, course_id: str): + """ + Queue celery task to update course links to the latest re-run. + + **Example Request - Update All Links** + POST /api/contentstore/v0/rerun_link_update/{course_id} + ```json + { + "action": "all" + } + ``` + + **Example Request - Update Single Links** + POST /api/contentstore/v0/rerun_link_update/{course_id} + ```json + { + "action": "single", + "data": [ + { + "url": "http://localhost:18000/course/course-v1:edX+DemoX+Demo_Course/course", + "type": "course_updates", + "id": "block_id_123" + } + ] + } + ``` + + **Response Values** + ```json + { + "status": "pending" + } + ``` + """ + try: + course_key = CourseKey.from_string(course_id) + except (InvalidKeyError, IndexError): + return JsonResponse( + {"error": "Invalid course id, it does not exist"}, + status=status.HTTP_404_NOT_FOUND, + ) + + # Check course author permissions + if not has_course_author_access(request.user, course_key): + self.permission_denied(request) + + if not enable_course_optimizer_check_prev_run_links(course_key): + return JsonResponse( + { + "error": "Course optimizer check for previous run links is not enabled." + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + action = request.data.get("action") + if not action or action not in ["all", "single"]: + return JsonResponse( + {"error": 'Invalid or missing action. Must be "all" or "single".'}, + status=status.HTTP_400_BAD_REQUEST, + ) + + if action == "single": + data = request.data.get("data") + if not data or not isinstance(data, list): + return JsonResponse( + { + 'data': "This field is required when action is 'single'." + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + update_course_rerun_links.delay( + request.user.id, + course_id, + action, + request.data.get("data", []), + request.LANGUAGE_CODE, + ) + + return JsonResponse({"status": UserTaskStatus.PENDING}) + + +@view_auth_classes() +class RerunLinkUpdateStatusView(DeveloperErrorViewMixin, APIView): + """ + View for checking the status of the course link update task and returning the results. + """ + + @apidocs.schema( + parameters=[ + apidocs.string_parameter( + "course_id", apidocs.ParameterLocation.PATH, description="Course ID" + ), + ], + responses={ + 200: "OK", + 401: "The requester is not authenticated.", + 403: "The requester cannot access the specified course.", + 404: "The requested course does not exist.", + }, + ) + def get(self, request: Request, course_id: str): + """ + **Use Case** + + GET handler to return the status of the course link update task from UserTaskStatus. + If no task has been started for the course, return 'uninitiated'. + If the task was successful, the updated links results are also returned. + + Possible statuses: + 'pending', 'in_progress', 'completed', 'failed', 'uninitiated' + + **Example Request** + + GET /api/contentstore/v0/rerun_link_update_status/{course_id} + + **Example Response - Task In Progress** + + ```json + { + "status": "pending" + } + ``` + + **Example Response - Task Completed** + + ```json + { + "status": "completed", + "results": [ + { + "id": "block_id_123", + "type": "course_updates", + "new_url": "http://localhost:18000/course/course-v1:edX+DemoX+2024_Q2/course", + "success": true + }, + { + "id": "block_id_456", + "type": "course_updates", + "new_url": "http://localhost:18000/course/course-v1:edX+DemoX+2024_Q2/progress", + "success": true + } + ] + } + ``` + + **Example Response - Task Failed** + + ```json + { + "status": "failed", + "error": "Target course run not found or inaccessible" + } + ``` + """ + try: + course_key = CourseKey.from_string(course_id) + except (InvalidKeyError, IndexError): + return JsonResponse( + {"error": "Invalid course id, it does not exist"}, + status=status.HTTP_404_NOT_FOUND, + ) + + # Check course author permissions + if not has_course_author_access(request.user, course_key): + self.permission_denied(request) + + if not enable_course_optimizer_check_prev_run_links(course_key): + return JsonResponse( + { + "error": "Course optimizer check for previous run links is not enabled." + }, + status=status.HTTP_400_BAD_REQUEST, + ) + + data = get_course_link_update_data(request, course_id) + serializer = CourseRerunLinkUpdateStatusSerializer(data) return Response(serializer.data) diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index 419d04f571..fd7c9b9652 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -1261,7 +1261,7 @@ def _scan_course_for_links(course_key): # and it doesn't contain user-facing links to scan. if block.category == 'drag-and-drop-v2': continue - block_id = str(block.usage_key) + block_id = str(block.location) block_info = get_block_info(block) block_data = block_info['data'] url_list = extract_content_URLs_from_course(block_data) @@ -1342,7 +1342,7 @@ def _scan_course_updates_for_links(course): course_updates.append( { "displayName": update.get("date", "Unknown"), - "block_id": str(usage_key), + "block_id": update.get("id", str(usage_key)), "urls": url_list, } ) @@ -1753,3 +1753,533 @@ def handle_unlink_upstream_container(upstream_container_key_string: str) -> None upstream_container_key=upstream_container_key, ): make_copied_tags_editable(str(link.downstream_usage_key)) + + +class CourseLinkUpdateTask(UserTask): # pylint: disable=abstract-method + """ + Base class for course link update tasks. + """ + + @staticmethod + def calculate_total_steps(arguments_dict): + """ + Get the number of in-progress steps in the link update process, as shown in the UI. + + For reference, these are: + 1. Scanning + 2. Updating + """ + return 2 + + @classmethod + def generate_name(cls, arguments_dict): + """ + Create a name for this particular task instance. + + Arguments: + arguments_dict (dict): The arguments given to the task function + + Returns: + str: The generated name + """ + key = arguments_dict["course_id"] + return f"Course link update of {key}" + + +@shared_task(base=CourseLinkUpdateTask, bind=True) +def update_course_rerun_links( + self, user_id, course_id, action, data=None, language=None +): + """ + Updates course links to point to the latest re-run. + """ + set_code_owner_attribute_from_module(__name__) + return _update_course_rerun_links( + self, user_id, course_id, action, data, language + ) + + +def _update_course_rerun_links( + task_instance, user_id, course_id, action, data, language +): + """ + Updates course links to point to the latest re-run. + + Args: + task_instance: The Celery task instance + user_id: ID of the user requesting the update + course_id: String representation of the course key + action: 'all' or 'single' + data: List of specific links to update (when action='single') + language: Language code for translations + """ + user = _validate_user(task_instance, user_id, language) + if not user: + return + + task_instance.status.set_state(UserTaskStatus.IN_PROGRESS) + course_key = CourseKey.from_string(course_id) + prev_run_course_key = get_previous_run_course_key(course_key) + try: + task_instance.status.set_state("Scanning") + + if action == "all": + url_list = _scan_course_for_links(course_key) + links_to_update = [] + + # Filter only course-specific links that need updating + for block_id, url in url_list: + if _course_link_update_required(url, course_key, prev_run_course_key): + links_to_update.append( + { + "id": block_id, + "url": url, + "type": _determine_link_type(block_id), + } + ) + else: + # Process only single link updates + links_to_update = data or [] + + task_instance.status.increment_completed_steps() + + task_instance.status.set_state("Updating") + + updated_links = [] + for link_data in links_to_update: + try: + new_url = _update_link_to_latest_rerun( + link_data, course_key, prev_run_course_key, user + ) + updated_links.append( + { + "original_url": link_data.get("url", ""), + "new_url": new_url, + "type": link_data.get("type", "unknown"), + "id": link_data.get("id", ""), + "success": True, + } + ) + except Exception as e: # pylint: disable=broad-except + LOGGER.error( + f'Failed to update link {link_data.get("url", "")}: {str(e)}' + ) + updated_links.append( + { + "original_url": link_data.get("url", ""), + "new_url": link_data.get("url", ""), + "type": link_data.get("type", "unknown"), + "id": link_data.get("id", ""), + "success": False, + "error_message": str(e), + } + ) + + task_instance.status.increment_completed_steps() + + file_name = f"{str(course_key)}_link_updates" + results_file = NamedTemporaryFile(prefix=file_name + ".", suffix=".json") + + with open(results_file.name, "w") as file: + json.dump(updated_links, file, indent=4) + + artifact = UserTaskArtifact( + status=task_instance.status, name="LinkUpdateResults" + ) + artifact.file.save( + name=os.path.basename(results_file.name), content=File(results_file) + ) + artifact.save() + + # Update the existing broken links file to reflect the updated links + _update_broken_links_file_with_updated_links(course_key, updated_links) + + task_instance.status.succeed() + + except Exception as e: # pylint: disable=broad-except + LOGGER.exception( + "Error updating links for course %s", course_key, exc_info=True + ) + if task_instance.status.state != UserTaskStatus.FAILED: + task_instance.status.fail({"raw_error_msg": str(e)}) + + +def _course_link_update_required(url, course_key, prev_run_course_key): + """ + Checks if a course link needs to be updated for a re-run. + + Args: + url: The URL to check + course_key: The current course key + + Returns: + bool: True if the link needs updating + """ + + if not url or not course_key: + return False + + course_id_match = contains_previous_course_reference(url, prev_run_course_key) + if not course_id_match: + return False + + # Check if it's the same org and course but different run + if ( + prev_run_course_key.org == course_key.org + and prev_run_course_key.course == course_key.course + and prev_run_course_key.run != course_key.run + ): + return True + return False + + +def _determine_link_type(block_id): + """ + Determines the type of link based on block_id and URL. + + Args: + block_id: The block ID containing the link + url: The URL + + Returns: + str: The type of link ('course_updates', 'handouts', 'custom_pages', 'course_content') + """ + if not block_id: + return "course_content" + + block_id_str = str(block_id) + + if isinstance(block_id, int): + return "course_updates" + + if "course_info" in block_id_str and "handouts" in block_id_str: + return "handouts" + + if "static_tab" in block_id_str: + return "custom_pages" + + return "course_content" + + +def _update_link_to_latest_rerun(link_data, course_key, prev_run_course_key, user): + """ + Updates a single link to point to the latest course re-run. + + Args: + link_data: Dictionary containing link information + course_key: The current course key + prev_run_course_key: The previous course run key + user: The authenticated user making the request + + Returns: + str: The updated URL + """ + original_url = link_data.get("url", "") + block_id = link_data.get("id", "") + link_type = link_data.get("type", "course_content") + + if not original_url: + return original_url + + prev_run_course_org = prev_run_course_key.org if prev_run_course_key else None + prev_run_course_course = ( + prev_run_course_key.course if prev_run_course_key else None + ) + + if prev_run_course_key == course_key: + return original_url + + # Validate url based on previous-run org + if ( + prev_run_course_org != course_key.org + or prev_run_course_course != course_key.course + ): + return original_url + + new_url = original_url.replace(str(prev_run_course_key), str(course_key)) + + # condition because we're showing handouts as updates + if link_type == "course_updates" and "handouts" in str(block_id): + link_type = "handouts" + + _update_block_content_with_new_url( + block_id, original_url, new_url, link_type, course_key, user + ) + + return new_url + + +def _update_course_updates_link(block_id, old_url, new_url, course_key, user): + """ + Updates course updates with the new URL. + + Args: + block_id: The ID of the block containing the link (can be usage key or update ID) + old_url: The original URL to replace + new_url: The new URL to use + course_key: The current course key + user: The authenticated user making the request + """ + store = modulestore() + course_updates = store.get_item(course_key.make_usage_key("course_info", "updates")) + if hasattr(course_updates, "items"): + for update in course_updates.items: + update_matches = False + if "course_info" in str(block_id) and "updates" in str(block_id): + update_matches = True + else: + try: + update_matches = update.get("id", None) == int(block_id) + except (ValueError, TypeError): + update_matches = False + + if update_matches and "content" in update: + update["content"] = update["content"].replace(old_url, new_url) + store.update_item(course_updates, user.id) + LOGGER.info( + f"Updated course updates with new URL: {old_url} -> {new_url}" + ) + + +def _update_handouts_link(block_id, old_url, new_url, course_key, user): + """ + Updates course handouts with the new URL. + + Args: + block_id: The ID of the block containing the link + old_url: The original URL to replace + new_url: The new URL to use + course_key: The current course key + user: The authenticated user making the request + """ + store = modulestore() + handouts = store.get_item(course_key.make_usage_key("course_info", "handouts")) + if hasattr(handouts, "data") and old_url in handouts.data: + handouts.data = handouts.data.replace(old_url, new_url) + store.update_item(handouts, user.id) + LOGGER.info(f"Updated handouts with new URL: {old_url} -> {new_url}") + + +def _update_custom_pages_link(block_id, old_url, new_url, course_key, user): + """ + Updates custom pages (static tabs) with the new URL. + + Args: + block_id: The ID of the block containing the link (usage key string) + old_url: The original URL to replace + new_url: The new URL to use + course_key: The current course key + user: The authenticated user making the request + """ + store = modulestore() + try: + usage_key = UsageKey.from_string(block_id) + static_tab = store.get_item(usage_key) + if hasattr(static_tab, "data") and old_url in static_tab.data: + static_tab.data = static_tab.data.replace(old_url, new_url) + store.update_item(static_tab, user.id) + LOGGER.info( + f"Updated static tab {block_id} with new URL: {old_url} -> {new_url}" + ) + except InvalidKeyError: + LOGGER.warning(f"Invalid usage key for static tab: {block_id}") + + +def _update_course_content_link(block_id, old_url, new_url, course_key, user): + """ + Updates course content blocks with the new URL. + + Args: + block_id: The ID of the block containing the link (usage key string) + old_url: The original URL to replace + new_url: The new URL to use + course_key: The current course key + user: The authenticated user making the request + """ + store = modulestore() + try: + usage_key = UsageKey.from_string(block_id) + block = store.get_item(usage_key) + if hasattr(block, "data") and old_url in block.data: + block.data = block.data.replace(old_url, new_url) + store.update_item(block, user.id) + store.publish(block.location, user.id) + LOGGER.info( + f"Updated block {block_id} data with new URL: {old_url} -> {new_url}" + ) + + except InvalidKeyError: + LOGGER.warning(f"Invalid usage key for block: {block_id}") + + +def _update_block_content_with_new_url(block_id, old_url, new_url, link_type, course_key, user): + """ + Updates the content of a block in the modulestore to replace old URL with new URL. + + Args: + block_id: The ID of the block containing the link + old_url: The original URL to replace + new_url: The new URL to use + link_type: The type of link ('course_content', 'course_updates', 'handouts', 'custom_pages') + course_key: The current course key + user: The authenticated user making the request + """ + if link_type == "course_updates": + _update_course_updates_link(block_id, old_url, new_url, course_key, user) + elif link_type == "handouts": + _update_handouts_link(block_id, old_url, new_url, course_key, user) + elif link_type == "custom_pages": + _update_custom_pages_link(block_id, old_url, new_url, course_key, user) + else: + _update_course_content_link(block_id, old_url, new_url, course_key, user) + + +def _update_broken_links_file_with_updated_links(course_key, updated_links): + """ + Updates the existing broken links file to reflect the status of updated links. + + This function finds the latest broken links file for the course and updates it + to remove successfully updated links or update their status. + + Args: + course_key: The current course key + updated_links: List of updated link results from the link update task + """ + try: + # Find the latest broken links task artifact for this course + latest_artifact = UserTaskArtifact.objects.filter( + name="BrokenLinks", status__name__contains=str(course_key) + ).order_by("-created").first() + + if not latest_artifact or not latest_artifact.file: + LOGGER.debug(f"No broken links file found for course {course_key}") + return + + # Read the existing broken links file + try: + with latest_artifact.file.open("r") as file: + existing_broken_links = json.load(file) + except (json.JSONDecodeError, IOError) as e: + LOGGER.error( + f"Failed to read broken links file for course {course_key}: {e}" + ) + return + + successful_results = [] + for result in updated_links: + if not result.get("success"): + continue + original_url = result.get("original_url") or _get_original_url_from_updated_result(result, course_key) + if not original_url: + continue + successful_results.append( + { + "original_url": original_url, + "new_url": result.get("new_url"), + "type": result.get("type"), + "id": str(result.get("id")) if result.get("id") is not None else None, + } + ) + + updated_broken_links = [] + for link in existing_broken_links: + if len(link) >= 3: + block_id, url, link_state = link[0], link[1], link[2] + + applied = False + for res in successful_results: + if res["original_url"] != url: + continue + + if _update_result_applies_to_block(res, block_id) and res.get('id') == str(block_id): + new_url = res["new_url"] + updated_broken_links.append([block_id, new_url, link_state]) + applied = True + break + + if not applied: + updated_broken_links.append(link) + else: + updated_broken_links.append(link) + + # Create a new temporary file with updated data + file_name = f"{course_key}_updated" + updated_file = NamedTemporaryFile(prefix=file_name + ".", suffix=".json") + + with open(updated_file.name, "w") as file: + json.dump(updated_broken_links, file, indent=4) + + # Update the existing artifact with the new file + latest_artifact.file.save( + name=os.path.basename(updated_file.name), content=File(updated_file) + ) + latest_artifact.save() + + LOGGER.info(f"Successfully updated broken links file for course {course_key}") + + except Exception as e: # pylint: disable=broad-except + LOGGER.error(f"Failed to update broken links file for course {course_key}: {e}") + + +def _get_original_url_from_updated_result(update_result, course_key): + """ + Reconstruct the original URL from an update result. + + Args: + update_result: The update result containing new_url and other info + course_key: The current course key + + Returns: + str: The original URL before update, or None if it cannot be determined + """ + try: + new_url = update_result.get("new_url", "") + if not new_url or str(course_key) not in new_url: + return None + + prev_run_course_key = get_previous_run_course_key(course_key) + if not prev_run_course_key: + return None + + return new_url.replace(str(course_key), str(prev_run_course_key)) + + except Exception as e: # pylint: disable=broad-except + LOGGER.debug( + f"Failed to reconstruct original URL from update result: {e}" + ) + return None + + +def _update_result_applies_to_block(result_entry, block_id): + """ + Determine if a given update result applies to a specific broken-link block id. + + The task update results contain a 'type' and an 'id' indicating where the + replacement was applied. A single URL may appear in multiple places (course + content, course_updates, handouts, custom pages). We should only apply the + replacement to broken-link entries that match the same target area. + """ + try: + result_type = (result_entry.get("type") or "course_content").lower() + result_id = result_entry.get("id") + block_id_str = str(block_id) if block_id is not None else "" + result_id_str = str(result_id) if result_id is not None else None + + if result_id_str and block_id_str == result_id_str: + return True + + is_course_info = "course_info" in block_id_str + is_updates_section = "updates" in block_id_str + is_handouts_section = "handouts" in block_id_str + is_static_tab = "static_tab" in block_id_str + + block_category = ( + "course_updates" if is_course_info and is_updates_section else + "handouts" if is_course_info and is_handouts_section else + "custom_pages" if is_static_tab else + "course_content" + ) + + return block_category == result_type + except Exception: # pylint: disable=broad-except + return False diff --git a/cms/djangoapps/contentstore/toggles.py b/cms/djangoapps/contentstore/toggles.py index 21d0b90c23..c287f8c4db 100644 --- a/cms/djangoapps/contentstore/toggles.py +++ b/cms/djangoapps/contentstore/toggles.py @@ -669,7 +669,7 @@ def use_legacy_logged_out_home(): # after creating a course rerun. # .. toggle_use_cases: temporary # .. toggle_creation_date: 2025-07-21 -# .. toggle_target_removal_date: None +# .. toggle_target_removal_date: 2026-02-25 ENABLE_COURSE_OPTIMIZER_CHECK_PREV_RUN_LINKS = CourseWaffleFlag( f'{CONTENTSTORE_NAMESPACE}.enable_course_optimizer_check_prev_run_links', __name__,