feat: reindex course and recreate upstream links post import and course re-run (#37237)

Index the newly created course after import and course re-run. Also recreate upstream links after course re-run.

We make use of newly created COURSE_RERUN_COMPLETED signal to run post re-run processes.
This commit is contained in:
Navin Karkera
2025-08-21 23:45:55 +05:30
committed by GitHub
parent d132efa08d
commit 617b6447cc
9 changed files with 80 additions and 34 deletions

View File

@@ -23,6 +23,7 @@ from openedx_events.content_authoring.data import (
from openedx_events.content_authoring.signals import (
COURSE_CATALOG_INFO_CHANGED,
COURSE_IMPORT_COMPLETED,
COURSE_RERUN_COMPLETED,
LIBRARY_BLOCK_DELETED,
LIBRARY_CONTAINER_DELETED,
XBLOCK_CREATED,
@@ -304,10 +305,10 @@ def delete_upstream_downstream_link_handler(**kwargs):
).delete()
@receiver(COURSE_IMPORT_COMPLETED)
def handle_new_course_import(**kwargs):
@receiver([COURSE_IMPORT_COMPLETED, COURSE_RERUN_COMPLETED])
def handle_upstream_links_on_signal(**kwargs):
"""
Automatically create upstream->downstream links for course in database on new import.
Automatically create upstream->downstream links for course in database on new import or rerun.
"""
course_data = kwargs.get("course", None)
if not course_data or not isinstance(course_data, CourseData):

View File

@@ -35,6 +35,8 @@ from olxcleaner.reporting import report_error_summary, report_errors
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey, UsageKey
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocator, BlockUsageLocator
from openedx_events.content_authoring.data import CourseData
from openedx_events.content_authoring.signals import COURSE_RERUN_COMPLETED
from organizations.api import add_organization_course, ensure_organization
from organizations.exceptions import InvalidOrganizationException
from organizations.models import Organization
@@ -176,6 +178,12 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i
# update state: Succeeded
CourseRerunState.objects.succeeded(course_key=destination_course_key)
COURSE_RERUN_COMPLETED.send_event(
time=datetime.now(timezone.utc),
course=CourseData(
course_key=destination_course_key
)
)
# call edxval to attach videos to the rerun
copy_course_videos(source_course_key, destination_course_key)

View File

@@ -18,7 +18,7 @@ from meilisearch import Client as MeilisearchClient
from meilisearch.errors import MeilisearchApiError, MeilisearchError
from meilisearch.models.task import TaskInfo
from opaque_keys import OpaqueKey
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.keys import CourseKey, UsageKey
from opaque_keys.edx.locator import (
LibraryCollectionLocator,
LibraryContainerLocator,
@@ -397,6 +397,34 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
reset_index(status_cb)
def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
"""
Rebuilds the index for a given course.
"""
store = modulestore()
client = _get_meilisearch_client()
docs = []
if index_name is None:
index_name = STUDIO_INDEX_NAME
# Pre-fetch the course with all of its children:
course = store.get_course(course_key, depth=None)
def add_with_children(block):
""" Recursively index the given XBlock/component """
doc = searchable_doc_for_course_block(block)
doc.update(searchable_doc_tags(block.usage_key))
docs.append(doc) # pylint: disable=cell-var-from-loop
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop
# Index course children
_recurse_children(course, add_with_children)
if docs:
# Add all the docs in this course at once (usually faster than adding one at a time):
_wait_for_meili_task(client.index(index_name).add_documents(docs))
return docs
def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=False) -> None: # lint-amnesty, pylint: disable=too-many-statements
"""
Rebuild the Meilisearch index from scratch
@@ -405,7 +433,6 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
status_cb = log.info
client = _get_meilisearch_client()
store = modulestore()
# Get the lists of libraries
status_cb("Counting libraries...")
@@ -559,26 +586,6 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
status_cb("Indexing courses...")
# To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:
def index_course(course: CourseOverview) -> list:
docs = []
# Pre-fetch the course with all of its children:
course = store.get_course(course.id, depth=None)
def add_with_children(block):
""" Recursively index the given XBlock/component """
doc = searchable_doc_for_course_block(block)
doc.update(searchable_doc_tags(block.usage_key))
docs.append(doc) # pylint: disable=cell-var-from-loop
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop
# Index course children
_recurse_children(course, add_with_children)
if docs:
# Add all the docs in this course at once (usually faster than adding one at a time):
_wait_for_meili_task(client.index(index_name).add_documents(docs))
return docs
paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000)
for p in paginator.page_range:
for course in paginator.page(p).object_list:
@@ -588,7 +595,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
if course.id in keys_indexed:
num_contexts_done += 1
continue
course_docs = index_course(course)
course_docs = index_course(course.id, index_name)
if incremental:
IncrementalIndexCompleted.objects.get_or_create(context_key=course.id)
num_contexts_done += 1

View File

@@ -12,6 +12,7 @@ from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLo
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
CourseData,
LibraryBlockData,
LibraryCollectionData,
LibraryContainerData,
@@ -20,21 +21,23 @@ from openedx_events.content_authoring.data import (
from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_DELETED,
CONTENT_LIBRARY_UPDATED,
CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
COURSE_IMPORT_COMPLETED,
COURSE_RERUN_COMPLETED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_BLOCK_PUBLISHED,
LIBRARY_BLOCK_UPDATED,
LIBRARY_COLLECTION_CREATED,
LIBRARY_COLLECTION_DELETED,
LIBRARY_COLLECTION_UPDATED,
LIBRARY_CONTAINER_CREATED,
LIBRARY_CONTAINER_DELETED,
LIBRARY_CONTAINER_UPDATED,
LIBRARY_CONTAINER_PUBLISHED,
LIBRARY_CONTAINER_UPDATED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
CONTENT_OBJECT_ASSOCIATIONS_CHANGED,
)
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
@@ -54,6 +57,7 @@ from .tasks import (
update_content_library_index_docs,
update_library_collection_index_doc,
update_library_container_index_doc,
upsert_course_blocks_docs,
upsert_library_block_index_doc,
upsert_xblock_index_doc,
)
@@ -327,3 +331,16 @@ def library_container_deleted(**kwargs) -> None:
# TODO: post-Teak, move all the celery tasks directly inline into this handlers? Because now the
# events are emitted in an [async] worker, so it doesn't matter if the handlers are synchronous.
# See https://github.com/openedx/edx-platform/pull/36640 discussion.
@receiver([COURSE_IMPORT_COMPLETED, COURSE_RERUN_COMPLETED])
def handle_reindex_on_signal(**kwargs):
"""
Automatically update Meiliesearch index for course in database on new import or rerun.
"""
course_data = kwargs.get("course", None)
if not course_data or not isinstance(course_data, CourseData):
log.error("Received null or incorrect data for event")
return
upsert_course_blocks_docs.delay(str(course_data.course_key))

View File

@@ -10,7 +10,7 @@ from celery import shared_task
from celery_utils.logged_task import LoggedTask
from edx_django_utils.monitoring import set_code_owner_attribute
from meilisearch.errors import MeilisearchError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.keys import CourseKey, UsageKey
from opaque_keys.edx.locator import (
LibraryCollectionLocator,
LibraryContainerLocator,
@@ -36,6 +36,19 @@ def upsert_xblock_index_doc(usage_key_str: str, recursive: bool) -> None:
api.upsert_xblock_index_doc(usage_key, recursive)
@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def upsert_course_blocks_docs(course_key_str: str) -> None:
"""
Celery task to update the content index document for all XBlocks in a course.
"""
course_key = CourseKey.from_string(course_key_str)
log.info("Updating content index documents for XBlocks in course with id: %s", course_key)
api.index_course(course_key)
@shared_task(base=LoggedTask, autoretry_for=(MeilisearchError, ConnectionError))
@set_code_owner_attribute
def delete_xblock_index_doc(usage_key_str: str) -> None:

View File

@@ -833,7 +833,7 @@ openedx-django-require==3.0.0
# via -r requirements/edx/kernel.in
openedx-django-wiki==3.1.1
# via -r requirements/edx/kernel.in
openedx-events==10.4.0
openedx-events==10.5.0
# via
# -r requirements/edx/kernel.in
# edx-enterprise

View File

@@ -1387,7 +1387,7 @@ openedx-django-wiki==3.1.1
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt
openedx-events==10.4.0
openedx-events==10.5.0
# via
# -r requirements/edx/doc.txt
# -r requirements/edx/testing.txt

View File

@@ -1009,7 +1009,7 @@ openedx-django-require==3.0.0
# via -r requirements/edx/base.txt
openedx-django-wiki==3.1.1
# via -r requirements/edx/base.txt
openedx-events==10.4.0
openedx-events==10.5.0
# via
# -r requirements/edx/base.txt
# edx-enterprise

View File

@@ -1055,7 +1055,7 @@ openedx-django-require==3.0.0
# via -r requirements/edx/base.txt
openedx-django-wiki==3.1.1
# via -r requirements/edx/base.txt
openedx-events==10.4.0
openedx-events==10.5.0
# via
# -r requirements/edx/base.txt
# edx-enterprise