feat: reindex course and recreate upstream links post import and course re-run (#37237)

Index the newly created course after import and course re-run. Also recreate upstream links after course re-run. We make use of newly created COURSE_RERUN_COMPLETED signal to run post re-run processes.
2025-08-21 23:45:55 +05:30
parent d132efa08d
commit 617b6447cc
9 changed files with 80 additions and 34 deletions
--- a/cms/djangoapps/contentstore/signals/handlers.py
+++ b/cms/djangoapps/contentstore/signals/handlers.py
@@ -23,6 +23,7 @@ from openedx_events.content_authoring.data import (
 from openedx_events.content_authoring.signals import (
    COURSE_CATALOG_INFO_CHANGED,
    COURSE_IMPORT_COMPLETED,
+    COURSE_RERUN_COMPLETED,
    LIBRARY_BLOCK_DELETED,
    LIBRARY_CONTAINER_DELETED,
    XBLOCK_CREATED,
@@ -304,10 +305,10 @@ def delete_upstream_downstream_link_handler(**kwargs):
    ).delete()


-@receiver(COURSE_IMPORT_COMPLETED)
-def handle_new_course_import(**kwargs):
+@receiver([COURSE_IMPORT_COMPLETED, COURSE_RERUN_COMPLETED])
+def handle_upstream_links_on_signal(**kwargs):
    """
-    Automatically create upstream->downstream links for course in database on new import.
+    Automatically create upstream->downstream links for course in database on new import or rerun.
    """
    course_data = kwargs.get("course", None)
    if not course_data or not isinstance(course_data, CourseData):
--- a/cms/djangoapps/contentstore/tasks.py
+++ b/cms/djangoapps/contentstore/tasks.py
@@ -35,6 +35,8 @@ from olxcleaner.reporting import report_error_summary, report_errors
 from opaque_keys import InvalidKeyError
 from opaque_keys.edx.keys import CourseKey, UsageKey
 from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocator, BlockUsageLocator
+from openedx_events.content_authoring.data import CourseData
+from openedx_events.content_authoring.signals import COURSE_RERUN_COMPLETED
 from organizations.api import add_organization_course, ensure_organization
 from organizations.exceptions import InvalidOrganizationException
 from organizations.models import Organization
@@ -176,6 +178,12 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i
        # update state: Succeeded
        CourseRerunState.objects.succeeded(course_key=destination_course_key)

+        COURSE_RERUN_COMPLETED.send_event(
+            time=datetime.now(timezone.utc),
+            course=CourseData(
+                course_key=destination_course_key
+            )
+        )
        # call edxval to attach videos to the rerun
        copy_course_videos(source_course_key, destination_course_key)

--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -18,7 +18,7 @@ from meilisearch import Client as MeilisearchClient
 from meilisearch.errors import MeilisearchApiError, MeilisearchError
 from meilisearch.models.task import TaskInfo
 from opaque_keys import OpaqueKey
-from opaque_keys.edx.keys import UsageKey
+from opaque_keys.edx.keys import CourseKey, UsageKey
 from opaque_keys.edx.locator import (
    LibraryCollectionLocator,
    LibraryContainerLocator,
@@ -397,6 +397,34 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
    reset_index(status_cb)


+def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
+    """
+    Rebuilds the index for a given course.
+    """
+    store = modulestore()
+    client = _get_meilisearch_client()
+    docs = []
+    if index_name is None:
+        index_name = STUDIO_INDEX_NAME
+    # Pre-fetch the course with all of its children:
+    course = store.get_course(course_key, depth=None)
+
+    def add_with_children(block):
+        """ Recursively index the given XBlock/component """
+        doc = searchable_doc_for_course_block(block)
+        doc.update(searchable_doc_tags(block.usage_key))
+        docs.append(doc)  # pylint: disable=cell-var-from-loop
+        _recurse_children(block, add_with_children)  # pylint: disable=cell-var-from-loop
+
+    # Index course children
+    _recurse_children(course, add_with_children)
+
+    if docs:
+        # Add all the docs in this course at once (usually faster than adding one at a time):
+        _wait_for_meili_task(client.index(index_name).add_documents(docs))
+    return docs
+
+
 def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None:  # lint-amnesty, pylint: disable=too-many-statements
    """
    Rebuild the Meilisearch index from scratch
@@ -405,7 +433,6 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
        status_cb = log.info

    client = _get_meilisearch_client()
-    store = modulestore()

    # Get the lists of libraries
    status_cb("Counting libraries...")
@@ -559,26 +586,6 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
        status_cb("Indexing courses...")
        # To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:

-        def index_course(course: CourseOverview) -> list:
-            docs = []
-            # Pre-fetch the course with all of its children:
-            course = store.get_course(course.id, depth=None)
-
-            def add_with_children(block):
-                """ Recursively index the given XBlock/component """
-                doc = searchable_doc_for_course_block(block)
-                doc.update(searchable_doc_tags(block.usage_key))
-                docs.append(doc)  # pylint: disable=cell-var-from-loop
-                _recurse_children(block, add_with_children)  # pylint: disable=cell-var-from-loop
-
-            # Index course children
-            _recurse_children(course, add_with_children)
-
-            if docs:
-                # Add all the docs in this course at once (usually faster than adding one at a time):
-                _wait_for_meili_task(client.index(index_name).add_documents(docs))
-            return docs
-
        paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000)
        for p in paginator.page_range:
            for course in paginator.page(p).object_list:
@@ -588,7 +595,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
                if course.id in keys_indexed:
                    num_contexts_done += 1
                    continue
-                course_docs = index_course(course)
+                course_docs = index_course(course.id, index_name)
                if incremental:
                    IncrementalIndexCompleted.objects.get_or_create(context_key=course.id)
                num_contexts_done += 1
--- a/openedx/core/djangoapps/content/search/handlers.py
+++ b/openedx/core/djangoapps/content/search/handlers.py
@@ -12,6 +12,7 @@ from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLo
 from openedx_events.content_authoring.data import (
    ContentLibraryData,
    ContentObjectChangedData,
+    CourseData,
    LibraryBlockData,
    LibraryCollectionData,
    LibraryContainerData,
@@ -20,21 +21,23 @@ from openedx_events.content_authoring.data import (
 from openedx_events.content_authoring.signals import (
    CONTENT_LIBRARY_DELETED,
    CONTENT_LIBRARY_UPDATED,
+    CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
+    COURSE_IMPORT_COMPLETED,
+    COURSE_RERUN_COMPLETED,
    LIBRARY_BLOCK_CREATED,
    LIBRARY_BLOCK_DELETED,
-    LIBRARY_BLOCK_UPDATED,
    LIBRARY_BLOCK_PUBLISHED,
+    LIBRARY_BLOCK_UPDATED,
    LIBRARY_COLLECTION_CREATED,
    LIBRARY_COLLECTION_DELETED,
    LIBRARY_COLLECTION_UPDATED,
    LIBRARY_CONTAINER_CREATED,
    LIBRARY_CONTAINER_DELETED,
-    LIBRARY_CONTAINER_UPDATED,
    LIBRARY_CONTAINER_PUBLISHED,
+    LIBRARY_CONTAINER_UPDATED,
    XBLOCK_CREATED,
    XBLOCK_DELETED,
    XBLOCK_UPDATED,
-    CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
 )

 from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
@@ -54,6 +57,7 @@ from .tasks import (
    update_content_library_index_docs,
    update_library_collection_index_doc,
    update_library_container_index_doc,
+    upsert_course_blocks_docs,
    upsert_library_block_index_doc,
    upsert_xblock_index_doc,
 )
@@ -327,3 +331,16 @@ def library_container_deleted(**kwargs) -> None:
    # TODO: post-Teak, move all the celery tasks directly inline into this handlers? Because now the
    # events are emitted in an [async] worker, so it doesn't matter if the handlers are synchronous.
    # See https://github.com/openedx/edx-platform/pull/36640 discussion.
+
+
+@receiver([COURSE_IMPORT_COMPLETED, COURSE_RERUN_COMPLETED])
+def handle_reindex_on_signal(**kwargs):
+    """
+    Automatically update Meiliesearch index for course in database on new import or rerun.
+    """
+    course_data = kwargs.get("course", None)
+    if not course_data or not isinstance(course_data, CourseData):
+        log.error("Received null or incorrect data for event")
+        return
+
+    upsert_course_blocks_docs.delay(str(course_data.course_key))
--- a/openedx/core/djangoapps/content/search/tasks.py
+++ b/openedx/core/djangoapps/content/search/tasks.py
@@ -10,7 +10,7 @@ from celery import shared_task
 from celery_utils.logged_task import LoggedTask
 from edx_django_utils.monitoring import set_code_owner_attribute
 from meilisearch.errors import MeilisearchError
-from opaque_keys.edx.keys import UsageKey
+from opaque_keys.edx.keys import CourseKey, UsageKey
 from opaque_keys.edx.locator import (
    LibraryCollectionLocator,
    LibraryContainerLocator,
@@ -36,6 +36,19 @@ def upsert_xblock_index_doc(usage_key_str: str, recursive: bool) -> None:
    api.upsert_xblock_index_doc(usage_key, recursive)


+@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
+@set_code_owner_attribute
+def upsert_course_blocks_docs(course_key_str: str) -> None:
+    """
+    Celery task to update the content index document for all XBlocks in a course.
+    """
+    course_key = CourseKey.from_string(course_key_str)
+
+    log.info("Updating content index documents for XBlocks in course with id: %s", course_key)
+
+    api.index_course(course_key)
+
+
@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
 def delete_xblock_index_doc(usage_key_str: str) -> None:
--- a/requirements/edx/base.txt
+++ b/requirements/edx/base.txt
@@ -833,7 +833,7 @@ openedx-django-require==3.0.0
    # via -r requirements/edx/kernel.in
 openedx-django-wiki==3.1.1
    # via -r requirements/edx/kernel.in
-openedx-events==10.4.0
+openedx-events==10.5.0
    # via
    #   -r requirements/edx/kernel.in
    #   edx-enterprise
--- a/requirements/edx/development.txt
+++ b/requirements/edx/development.txt
@@ -1387,7 +1387,7 @@ openedx-django-wiki==3.1.1
    # via
    #   -r requirements/edx/doc.txt
    #   -r requirements/edx/testing.txt
-openedx-events==10.4.0
+openedx-events==10.5.0
    # via
    #   -r requirements/edx/doc.txt
    #   -r requirements/edx/testing.txt
--- a/requirements/edx/doc.txt
+++ b/requirements/edx/doc.txt
@@ -1009,7 +1009,7 @@ openedx-django-require==3.0.0
    # via -r requirements/edx/base.txt
 openedx-django-wiki==3.1.1
    # via -r requirements/edx/base.txt
-openedx-events==10.4.0
+openedx-events==10.5.0
    # via
    #   -r requirements/edx/base.txt
    #   edx-enterprise
--- a/requirements/edx/testing.txt
+++ b/requirements/edx/testing.txt
@@ -1055,7 +1055,7 @@ openedx-django-require==3.0.0
    # via -r requirements/edx/base.txt
 openedx-django-wiki==3.1.1
    # via -r requirements/edx/base.txt
-openedx-events==10.4.0
+openedx-events==10.5.0
    # via
    #   -r requirements/edx/base.txt
    #   edx-enterprise