feat: index library collections in studio meilisearch index (#35324)
This commit is contained in:
@@ -19,6 +19,7 @@ from meilisearch.errors import MeilisearchError
|
||||
from meilisearch.models.task import TaskInfo
|
||||
from opaque_keys.edx.keys import UsageKey
|
||||
from opaque_keys.edx.locator import LibraryLocatorV2
|
||||
from openedx_learning.api import authoring as authoring_api
|
||||
from common.djangoapps.student.roles import GlobalStaff
|
||||
from rest_framework.request import Request
|
||||
from common.djangoapps.student.role_helpers import get_course_roles
|
||||
@@ -31,8 +32,9 @@ from .documents import (
|
||||
Fields,
|
||||
meili_id_from_opaque_key,
|
||||
searchable_doc_for_course_block,
|
||||
searchable_doc_for_collection,
|
||||
searchable_doc_for_library_block,
|
||||
searchable_doc_tags
|
||||
searchable_doc_tags,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -294,12 +296,16 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
|
||||
status_cb("Counting courses...")
|
||||
num_courses = CourseOverview.objects.count()
|
||||
|
||||
# Get the list of collections
|
||||
status_cb("Counting collections...")
|
||||
num_collections = authoring_api.get_collections().count()
|
||||
|
||||
# Some counters so we can track our progress as indexing progresses:
|
||||
num_contexts = num_courses + num_libraries
|
||||
num_contexts = num_courses + num_libraries + num_collections
|
||||
num_contexts_done = 0 # How many courses/libraries we've indexed
|
||||
num_blocks_done = 0 # How many individual components/XBlocks we've indexed
|
||||
|
||||
status_cb(f"Found {num_courses} courses and {num_libraries} libraries.")
|
||||
status_cb(f"Found {num_courses} courses, {num_libraries} libraries and {num_collections} collections.")
|
||||
with _using_temp_index(status_cb) as temp_index_name:
|
||||
############## Configure the index ##############
|
||||
|
||||
@@ -332,6 +338,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
|
||||
Fields.block_id,
|
||||
Fields.content,
|
||||
Fields.tags,
|
||||
Fields.description,
|
||||
# If we don't list the following sub-fields _explicitly_, they're only sometimes searchable - that is, they
|
||||
# are searchable only if at least one document in the index has a value. If we didn't list them here and,
|
||||
# say, there were no tags.level3 tags in the index, the client would get an error if trying to search for
|
||||
@@ -363,8 +370,8 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
|
||||
|
||||
############## Libraries ##############
|
||||
status_cb("Indexing libraries...")
|
||||
for lib_key in lib_keys:
|
||||
status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}")
|
||||
|
||||
def index_library(lib_key: str) -> list:
|
||||
docs = []
|
||||
for component in lib_api.get_library_components(lib_key):
|
||||
try:
|
||||
@@ -375,48 +382,88 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
|
||||
docs.append(doc)
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
status_cb(f"Error indexing library component {component}: {err}")
|
||||
finally:
|
||||
num_blocks_done += 1
|
||||
if docs:
|
||||
try:
|
||||
# Add all the docs in this library at once (usually faster than adding one at a time):
|
||||
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
|
||||
except (TypeError, KeyError, MeilisearchError) as err:
|
||||
status_cb(f"Error indexing library {lib_key}: {err}")
|
||||
return docs
|
||||
|
||||
for lib_key in lib_keys:
|
||||
status_cb(f"{num_contexts_done + 1}/{num_contexts}. Now indexing library {lib_key}")
|
||||
lib_docs = index_library(lib_key)
|
||||
num_blocks_done += len(lib_docs)
|
||||
num_contexts_done += 1
|
||||
|
||||
############## Courses ##############
|
||||
status_cb("Indexing courses...")
|
||||
# To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:
|
||||
|
||||
def index_course(course: CourseOverview) -> list:
|
||||
docs = []
|
||||
# Pre-fetch the course with all of its children:
|
||||
course = store.get_course(course.id, depth=None)
|
||||
|
||||
def add_with_children(block):
|
||||
""" Recursively index the given XBlock/component """
|
||||
doc = searchable_doc_for_course_block(block)
|
||||
doc.update(searchable_doc_tags(block.usage_key))
|
||||
docs.append(doc) # pylint: disable=cell-var-from-loop
|
||||
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop
|
||||
|
||||
# Index course children
|
||||
_recurse_children(course, add_with_children)
|
||||
|
||||
if docs:
|
||||
# Add all the docs in this course at once (usually faster than adding one at a time):
|
||||
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
|
||||
return docs
|
||||
|
||||
paginator = Paginator(CourseOverview.objects.only('id', 'display_name'), 1000)
|
||||
for p in paginator.page_range:
|
||||
for course in paginator.page(p).object_list:
|
||||
status_cb(
|
||||
f"{num_contexts_done + 1}/{num_contexts}. Now indexing course {course.display_name} ({course.id})"
|
||||
)
|
||||
docs = []
|
||||
|
||||
# Pre-fetch the course with all of its children:
|
||||
course = store.get_course(course.id, depth=None)
|
||||
|
||||
def add_with_children(block):
|
||||
""" Recursively index the given XBlock/component """
|
||||
doc = searchable_doc_for_course_block(block)
|
||||
doc.update(searchable_doc_tags(block.usage_key))
|
||||
docs.append(doc) # pylint: disable=cell-var-from-loop
|
||||
_recurse_children(block, add_with_children) # pylint: disable=cell-var-from-loop
|
||||
|
||||
# Index course children
|
||||
_recurse_children(course, add_with_children)
|
||||
|
||||
if docs:
|
||||
# Add all the docs in this course at once (usually faster than adding one at a time):
|
||||
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
|
||||
course_docs = index_course(course)
|
||||
num_contexts_done += 1
|
||||
num_blocks_done += len(docs)
|
||||
num_blocks_done += len(course_docs)
|
||||
|
||||
status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses and libraries.")
|
||||
############## Collections ##############
|
||||
status_cb("Indexing collections...")
|
||||
|
||||
def index_collection_batch(batch, num_contexts_done) -> int:
|
||||
docs = []
|
||||
for collection in batch:
|
||||
status_cb(
|
||||
f"{num_contexts_done + 1}/{num_contexts}. "
|
||||
f"Now indexing collection {collection.title} ({collection.id})"
|
||||
)
|
||||
try:
|
||||
doc = searchable_doc_for_collection(collection)
|
||||
# Uncomment below line once collections are tagged.
|
||||
# doc.update(searchable_doc_tags(collection.id))
|
||||
docs.append(doc)
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
status_cb(f"Error indexing collection {collection}: {err}")
|
||||
finally:
|
||||
num_contexts_done += 1
|
||||
|
||||
if docs:
|
||||
try:
|
||||
# Add docs in batch of 100 at once (usually faster than adding one at a time):
|
||||
_wait_for_meili_task(client.index(temp_index_name).add_documents(docs))
|
||||
except (TypeError, KeyError, MeilisearchError) as err:
|
||||
status_cb(f"Error indexing collection batch {p}: {err}")
|
||||
return num_contexts_done
|
||||
|
||||
# To reduce memory usage on large instances, split up the Collections into pages of 100 collections:
|
||||
paginator = Paginator(authoring_api.get_collections(enabled=True), 100)
|
||||
for p in paginator.page_range:
|
||||
num_contexts_done = index_collection_batch(paginator.page(p).object_list, num_contexts_done)
|
||||
|
||||
status_cb(f"Done! {num_blocks_done} blocks indexed across {num_contexts_done} courses, collections and libraries.")
|
||||
|
||||
|
||||
def upsert_xblock_index_doc(usage_key: UsageKey, recursive: bool = True) -> None:
|
||||
|
||||
@@ -13,6 +13,7 @@ from openedx.core.djangoapps.content.search.models import SearchAccess
|
||||
from openedx.core.djangoapps.content_libraries import api as lib_api
|
||||
from openedx.core.djangoapps.content_tagging import api as tagging_api
|
||||
from openedx.core.djangoapps.xblock import api as xblock_api
|
||||
from openedx_learning.api.authoring_models import LearningPackage
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -27,6 +28,7 @@ class Fields:
|
||||
type = "type" # DocType.course_block or DocType.library_block (see below)
|
||||
block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
|
||||
display_name = "display_name"
|
||||
description = "description"
|
||||
modified = "modified"
|
||||
created = "created"
|
||||
last_published = "last_published"
|
||||
@@ -66,6 +68,7 @@ class DocType:
|
||||
"""
|
||||
course_block = "course_block"
|
||||
library_block = "library_block"
|
||||
collection = "collection"
|
||||
|
||||
|
||||
def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
|
||||
@@ -276,3 +279,38 @@ def searchable_doc_for_course_block(block) -> dict:
|
||||
doc.update(_fields_from_block(block))
|
||||
|
||||
return doc
|
||||
|
||||
|
||||
def searchable_doc_for_collection(collection) -> dict:
|
||||
"""
|
||||
Generate a dictionary document suitable for ingestion into a search engine
|
||||
like Meilisearch or Elasticsearch, so that the given collection can be
|
||||
found using faceted search.
|
||||
"""
|
||||
doc = {
|
||||
Fields.id: collection.id,
|
||||
Fields.type: DocType.collection,
|
||||
Fields.display_name: collection.title,
|
||||
Fields.description: collection.description,
|
||||
Fields.created: collection.created.timestamp(),
|
||||
Fields.modified: collection.modified.timestamp(),
|
||||
# Add related learning_package.key as context_key by default.
|
||||
# If related contentlibrary is found, it will override this value below.
|
||||
# Mostly contentlibrary.library_key == learning_package.key
|
||||
Fields.context_key: collection.learning_package.key,
|
||||
}
|
||||
# Just in case learning_package is not related to a library
|
||||
try:
|
||||
context_key = collection.learning_package.contentlibrary.library_key
|
||||
org = str(context_key.org)
|
||||
doc.update({
|
||||
Fields.context_key: str(context_key),
|
||||
Fields.org: org,
|
||||
})
|
||||
except LearningPackage.contentlibrary.RelatedObjectDoesNotExist:
|
||||
log.warning(f"Related library not found for {collection}")
|
||||
doc[Fields.access_id] = _meili_access_id_from_context_key(doc[Fields.context_key])
|
||||
# Add the breadcrumbs.
|
||||
doc[Fields.breadcrumbs] = [{"display_name": collection.learning_package.title}]
|
||||
|
||||
return doc
|
||||
|
||||
@@ -6,12 +6,13 @@ from __future__ import annotations
|
||||
import copy
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from unittest.mock import MagicMock, call, patch
|
||||
from unittest.mock import MagicMock, Mock, call, patch
|
||||
from opaque_keys.edx.keys import UsageKey
|
||||
|
||||
import ddt
|
||||
from django.test import override_settings
|
||||
from freezegun import freeze_time
|
||||
from openedx_learning.api import authoring as authoring_api
|
||||
from organizations.tests.factories import OrganizationFactory
|
||||
|
||||
from common.djangoapps.student.tests.factories import UserFactory
|
||||
@@ -174,6 +175,28 @@ class TestSearchApi(ModuleStoreTestCase):
|
||||
tagging_api.add_tag_to_taxonomy(self.taxonomyB, "three")
|
||||
tagging_api.add_tag_to_taxonomy(self.taxonomyB, "four")
|
||||
|
||||
# Create a collection:
|
||||
self.learning_package = authoring_api.get_learning_package_by_key(self.library.key)
|
||||
self.collection_dict = {
|
||||
'id': 1,
|
||||
'type': 'collection',
|
||||
'display_name': 'my_collection',
|
||||
'description': 'my collection description',
|
||||
'context_key': 'lib:org1:lib',
|
||||
'org': 'org1',
|
||||
'created': created_date.timestamp(),
|
||||
'modified': created_date.timestamp(),
|
||||
"access_id": lib_access.id,
|
||||
'breadcrumbs': [{'display_name': 'Library'}]
|
||||
}
|
||||
with freeze_time(created_date):
|
||||
self.collection = authoring_api.create_collection(
|
||||
learning_package_id=self.learning_package.id,
|
||||
title="my_collection",
|
||||
created_by=None,
|
||||
description="my collection description"
|
||||
)
|
||||
|
||||
@override_settings(MEILISEARCH_ENABLED=False)
|
||||
def test_reindex_meilisearch_disabled(self, mock_meilisearch):
|
||||
with self.assertRaises(RuntimeError):
|
||||
@@ -199,10 +222,27 @@ class TestSearchApi(ModuleStoreTestCase):
|
||||
[
|
||||
call([doc_sequential, doc_vertical]),
|
||||
call([doc_problem1, doc_problem2]),
|
||||
call([self.collection_dict]),
|
||||
],
|
||||
any_order=True,
|
||||
)
|
||||
|
||||
@override_settings(MEILISEARCH_ENABLED=True)
|
||||
@patch(
|
||||
"openedx.core.djangoapps.content.search.api.searchable_doc_for_collection",
|
||||
Mock(side_effect=Exception("Failed to generate document")),
|
||||
)
|
||||
def test_reindex_meilisearch_collection_error(self, mock_meilisearch):
|
||||
|
||||
mock_logger = Mock()
|
||||
api.rebuild_index(mock_logger)
|
||||
assert call(
|
||||
[self.collection_dict]
|
||||
) not in mock_meilisearch.return_value.index.return_value.add_documents.mock_calls
|
||||
mock_logger.assert_any_call(
|
||||
f"Error indexing collection {self.collection}: Failed to generate document"
|
||||
)
|
||||
|
||||
@override_settings(MEILISEARCH_ENABLED=True)
|
||||
def test_reindex_meilisearch_library_block_error(self, mock_meilisearch):
|
||||
|
||||
|
||||
@@ -1,8 +1,12 @@
|
||||
"""
|
||||
Tests for the Studio content search documents (what gets stored in the index)
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from organizations.models import Organization
|
||||
|
||||
from freezegun import freeze_time
|
||||
from openedx_learning.api import authoring as authoring_api
|
||||
|
||||
from openedx.core.djangoapps.content_tagging import api as tagging_api
|
||||
from openedx.core.djangolib.testing.utils import skip_unless_cms
|
||||
from xmodule.modulestore.django import modulestore
|
||||
@@ -11,10 +15,12 @@ from xmodule.modulestore.tests.factories import BlockFactory, ToyCourseFactory
|
||||
|
||||
try:
|
||||
# This import errors in the lms because content.search is not an installed app there.
|
||||
from ..documents import searchable_doc_for_course_block, searchable_doc_tags
|
||||
from ..documents import searchable_doc_for_course_block, searchable_doc_tags, searchable_doc_for_collection
|
||||
from ..models import SearchAccess
|
||||
except RuntimeError:
|
||||
searchable_doc_for_course_block = lambda x: x
|
||||
searchable_doc_tags = lambda x: x
|
||||
searchable_doc_for_collection = lambda x: x
|
||||
SearchAccess = {}
|
||||
|
||||
|
||||
@@ -198,3 +204,30 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
|
||||
"content": {},
|
||||
# This video has no tags.
|
||||
}
|
||||
|
||||
def test_collection_with_no_library(self):
|
||||
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
|
||||
with freeze_time(created_date):
|
||||
learning_package = authoring_api.create_learning_package(
|
||||
key="course-v1:edX+toy+2012_Fall",
|
||||
title="some learning_package",
|
||||
description="some description",
|
||||
)
|
||||
collection = authoring_api.create_collection(
|
||||
learning_package_id=learning_package.id,
|
||||
title="my_collection",
|
||||
created_by=None,
|
||||
description="my collection description"
|
||||
)
|
||||
doc = searchable_doc_for_collection(collection)
|
||||
assert doc == {
|
||||
"id": collection.id,
|
||||
"type": "collection",
|
||||
"display_name": collection.title,
|
||||
"description": collection.description,
|
||||
"context_key": learning_package.key,
|
||||
"access_id": self.toy_course_access_id,
|
||||
"breadcrumbs": [{"display_name": learning_package.title}],
|
||||
"created": created_date.timestamp(),
|
||||
"modified": created_date.timestamp(),
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ libsass==0.10.0
|
||||
click==8.1.6
|
||||
|
||||
# pinning this version to avoid updates while the library is being developed
|
||||
openedx-learning==0.11.1
|
||||
openedx-learning==0.11.2
|
||||
|
||||
# Open AI version 1.0.0 dropped support for openai.ChatCompletion which is currently in use in enterprise.
|
||||
openai<=0.28.1
|
||||
|
||||
@@ -823,7 +823,7 @@ openedx-filters==1.9.0
|
||||
# -r requirements/edx/kernel.in
|
||||
# lti-consumer-xblock
|
||||
# ora2
|
||||
openedx-learning==0.11.1
|
||||
openedx-learning==0.11.2
|
||||
# via
|
||||
# -c requirements/edx/../constraints.txt
|
||||
# -r requirements/edx/kernel.in
|
||||
|
||||
@@ -1372,7 +1372,7 @@ openedx-filters==1.9.0
|
||||
# -r requirements/edx/testing.txt
|
||||
# lti-consumer-xblock
|
||||
# ora2
|
||||
openedx-learning==0.11.1
|
||||
openedx-learning==0.11.2
|
||||
# via
|
||||
# -c requirements/edx/../constraints.txt
|
||||
# -r requirements/edx/doc.txt
|
||||
|
||||
@@ -982,7 +982,7 @@ openedx-filters==1.9.0
|
||||
# -r requirements/edx/base.txt
|
||||
# lti-consumer-xblock
|
||||
# ora2
|
||||
openedx-learning==0.11.1
|
||||
openedx-learning==0.11.2
|
||||
# via
|
||||
# -c requirements/edx/../constraints.txt
|
||||
# -r requirements/edx/base.txt
|
||||
|
||||
@@ -1033,7 +1033,7 @@ openedx-filters==1.9.0
|
||||
# -r requirements/edx/base.txt
|
||||
# lti-consumer-xblock
|
||||
# ora2
|
||||
openedx-learning==0.11.1
|
||||
openedx-learning==0.11.2
|
||||
# via
|
||||
# -c requirements/edx/../constraints.txt
|
||||
# -r requirements/edx/base.txt
|
||||
|
||||
Reference in New Issue
Block a user