feat: adds sortable fields to studio content search index (#35103)

This commit is contained in:
Jillian
2024-07-26 03:48:49 +09:30
committed by GitHub
parent 78b691b56a
commit 6b5d812d38
10 changed files with 131 additions and 13 deletions

View File

@@ -322,6 +322,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level3,
Fields.type,
Fields.access_id,
Fields.last_published,
])
# Mark which attributes are used for keyword search, in order of importance:
client.index(temp_index_name).update_searchable_attributes([
@@ -340,6 +341,24 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None) -> None:
Fields.tags + "." + Fields.tags_level2,
Fields.tags + "." + Fields.tags_level3,
])
# Mark which attributes can be used for sorting search results:
client.index(temp_index_name).update_sortable_attributes([
Fields.display_name,
Fields.created,
Fields.modified,
Fields.last_published,
])
# Update the search ranking rules to let the (optional) "sort" parameter take precedence over keyword relevance.
# cf https://www.meilisearch.com/docs/learn/core_concepts/relevancy
client.index(temp_index_name).update_ranking_rules([
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
])
############## Libraries ##############
status_cb("Indexing libraries...")

View File

@@ -27,6 +27,9 @@ class Fields:
type = "type" # DocType.course_block or DocType.library_block (see below)
block_id = "block_id" # The block_id part of the usage key. Sometimes human-readable, sometimes a random hex ID
display_name = "display_name"
modified = "modified"
created = "created"
last_published = "last_published"
block_type = "block_type"
context_key = "context_key"
org = "org"
@@ -221,6 +224,9 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, so that the given library block can be
found using faceted search.
Datetime fields (created, modified, last_published) are serialized to POSIX timestamps so that they can be used to
sort the search results.
"""
library_name = lib_api.get_library(xblock_metadata.usage_key.context_key).title
block = xblock_api.load_block(xblock_metadata.usage_key, user=None)
@@ -228,7 +234,10 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
doc = {
Fields.id: meili_id_from_opaque_key(xblock_metadata.usage_key),
Fields.type: DocType.library_block,
Fields.breadcrumbs: []
Fields.breadcrumbs: [],
Fields.created: xblock_metadata.created.timestamp(),
Fields.modified: xblock_metadata.modified.timestamp(),
Fields.last_published: xblock_metadata.last_published.timestamp() if xblock_metadata.last_published else None,
}
doc.update(_fields_from_block(block))

View File

@@ -12,6 +12,7 @@ from openedx_events.content_authoring.signals import (
CONTENT_LIBRARY_UPDATED,
LIBRARY_BLOCK_CREATED,
LIBRARY_BLOCK_DELETED,
LIBRARY_BLOCK_UPDATED,
XBLOCK_CREATED,
XBLOCK_DELETED,
XBLOCK_UPDATED,
@@ -96,6 +97,7 @@ def xblock_deleted_handler(**kwargs) -> None:
@receiver(LIBRARY_BLOCK_CREATED)
@receiver(LIBRARY_BLOCK_UPDATED)
@only_if_meilisearch_enabled
def library_block_updated_handler(**kwargs) -> None:
"""

View File

@@ -5,11 +5,13 @@ from __future__ import annotations
import copy
from datetime import datetime, timezone
from unittest.mock import MagicMock, call, patch
from opaque_keys.edx.keys import UsageKey
import ddt
from django.test import override_settings
from freezegun import freeze_time
from organizations.tests.factories import OrganizationFactory
from common.djangoapps.student.tests.factories import UserFactory
@@ -118,8 +120,17 @@ class TestSearchApi(ModuleStoreTestCase):
title="Library",
)
lib_access, _ = SearchAccess.objects.get_or_create(context_key=self.library.key)
# Populate it with a problem:
self.problem1 = library_api.create_library_block(self.library.key, "problem", "p1")
# Populate it with 2 problems, freezing the date so we can verify created date serializes correctly.
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
self.problem1 = library_api.create_library_block(self.library.key, "problem", "p1")
self.problem2 = library_api.create_library_block(self.library.key, "problem", "p2")
# Update problem1, freezing the date so we can verify modified date serializes correctly.
modified_date = datetime(2024, 5, 6, 7, 8, 9, tzinfo=timezone.utc)
with freeze_time(modified_date):
library_api.set_library_block_olx(self.problem1.usage_key, "<problem />")
self.doc_problem1 = {
"id": "lborg1libproblemp1-a698218e",
"usage_key": "lb:org1:lib:problem:p1",
@@ -132,8 +143,10 @@ class TestSearchApi(ModuleStoreTestCase):
"content": {"problem_types": [], "capa_content": " "},
"type": "library_block",
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
"modified": modified_date.timestamp(),
}
self.problem2 = library_api.create_library_block(self.library.key, "problem", "p2")
self.doc_problem2 = {
"id": "lborg1libproblemp2-b2f65e29",
"usage_key": "lb:org1:lib:problem:p2",
@@ -146,6 +159,9 @@ class TestSearchApi(ModuleStoreTestCase):
"content": {"problem_types": [], "capa_content": " "},
"type": "library_block",
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
}
# Create a couple of taxonomies with tags
@@ -223,6 +239,22 @@ class TestSearchApi(ModuleStoreTestCase):
any_order=True,
)
# Check that the sorting-related settings were updated to support sorting on the expected fields
mock_meilisearch.return_value.index.return_value.update_sortable_attributes.assert_called_with([
"display_name",
"created",
"modified",
"last_published",
])
mock_meilisearch.return_value.index.return_value.update_ranking_rules.assert_called_with([
"sort",
"words",
"typo",
"proximity",
"attribute",
"exactness",
])
@ddt.data(
True,
False

View File

@@ -1,9 +1,11 @@
"""
Tests for the search index update handlers
"""
from datetime import datetime, timezone
from unittest.mock import MagicMock, patch
from django.test import LiveServerTestCase, override_settings
from freezegun import freeze_time
from organizations.tests.factories import OrganizationFactory
from common.djangoapps.student.tests.factories import UserFactory
@@ -132,7 +134,10 @@ class TestUpdateIndexHandlers(ModuleStoreTestCase, LiveServerTestCase):
)
lib_access, _ = SearchAccess.objects.get_or_create(context_key=library.key)
problem = library_api.create_library_block(library.key, "problem", "Problem1")
# Populate it with a problem, freezing the date so we can verify created date serializes correctly.
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
problem = library_api.create_library_block(library.key, "problem", "Problem1")
doc_problem = {
"id": "lborgalib_aproblemproblem1-ca3186e9",
"type": "library_block",
@@ -145,6 +150,9 @@ class TestUpdateIndexHandlers(ModuleStoreTestCase, LiveServerTestCase):
"breadcrumbs": [{"display_name": "Library Org A"}],
"content": {"problem_types": [], "capa_content": " "},
"access_id": lib_access.id,
"last_published": None,
"created": created_date.timestamp(),
"modified": created_date.timestamp(),
}
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])
@@ -152,10 +160,24 @@ class TestUpdateIndexHandlers(ModuleStoreTestCase, LiveServerTestCase):
# Rename the content library
library_api.update_library(library.key, title="Updated Library Org A")
# The breadcrumbs should be updated
# The breadcrumbs should be updated (but nothing else)
doc_problem["breadcrumbs"][0]["display_name"] = "Updated Library Org A"
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])
# Edit the problem block, freezing the date so we can verify modified date serializes correctly
modified_date = datetime(2024, 5, 6, 7, 8, 9, tzinfo=timezone.utc)
with freeze_time(modified_date):
library_api.set_library_block_olx(problem.usage_key, "<problem />")
doc_problem["modified"] = modified_date.timestamp()
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])
# Publish the content library, freezing the date so we can verify last_published date serializes correctly
published_date = datetime(2024, 6, 7, 8, 9, 10, tzinfo=timezone.utc)
with freeze_time(published_date):
library_api.publish_changes(library.key)
doc_problem["last_published"] = published_date.timestamp()
meilisearch_client.return_value.index.return_value.update_documents.assert_called_with([doc_problem])
# Delete the Library Block
library_api.delete_library_block(problem.usage_key)

View File

@@ -194,7 +194,10 @@ class LibraryXBlockMetadata:
Class that represents the metadata about an XBlock in a content library.
"""
usage_key = attr.ib(type=LibraryUsageLocatorV2)
created = attr.ib(type=datetime)
modified = attr.ib(type=datetime)
display_name = attr.ib("")
last_published = attr.ib(default=None, type=datetime)
has_unpublished_changes = attr.ib(False)
tags_count = attr.ib(0)
@@ -203,6 +206,8 @@ class LibraryXBlockMetadata:
"""
Construct a LibraryXBlockMetadata from a Component object.
"""
last_publish_log = authoring_api.get_last_publish(component.pk)
return cls(
usage_key=LibraryUsageLocatorV2(
library_key,
@@ -210,6 +215,9 @@ class LibraryXBlockMetadata:
component.local_key,
),
display_name=component.versioning.draft.title,
created=component.created,
modified=component.versioning.draft.created,
last_published=None if last_publish_log is None else last_publish_log.published_at,
has_unpublished_changes=component.versioning.has_unpublished_changes
)
@@ -660,13 +668,11 @@ def get_library_block(usage_key) -> LibraryXBlockMetadata:
if not draft_version:
raise ContentLibraryBlockNotFound(usage_key)
published_version = component.versioning.published
return LibraryXBlockMetadata(
usage_key=usage_key,
display_name=draft_version.title,
has_unpublished_changes=(draft_version != published_version),
xblock_metadata = LibraryXBlockMetadata.from_component(
library_key=usage_key.context_key,
component=component,
)
return xblock_metadata
def set_library_block_olx(usage_key, new_olx_str):

View File

@@ -6,6 +6,9 @@ import logging
from django.core.exceptions import PermissionDenied
from openedx_events.content_authoring.data import LibraryBlockData
from openedx_events.content_authoring.signals import LIBRARY_BLOCK_UPDATED
from openedx.core.djangoapps.content_libraries import api, permissions
from openedx.core.djangoapps.content_libraries.models import ContentLibrary
from openedx.core.djangoapps.xblock.api import LearningContext
@@ -93,3 +96,16 @@ class LibraryContextImpl(LearningContext):
type_name=usage_key.block_type,
local_key=usage_key.block_id,
)
def send_block_updated_event(self, usage_key):
"""
Send a "block updated" event for the library block with the given usage_key.
usage_key: the UsageKeyV2 subclass used for this learning context
"""
LIBRARY_BLOCK_UPDATED.send_event(
library_block=LibraryBlockData(
library_key=usage_key.lib_key,
usage_key=usage_key,
)
)

View File

@@ -441,7 +441,7 @@ class TestContentTagChildrenExport(TaggedCourseMixin): # type: ignore[misc]
"""
Test if we can export a library
"""
with self.assertNumQueries(8):
with self.assertNumQueries(11):
tagged_library = build_object_tree_with_objecttags(self.library.key, self.all_library_object_tags)
assert tagged_library == self.expected_library_tagged_xblock

View File

@@ -58,3 +58,10 @@ class LearningContext:
Retuns None if the usage key doesn't exist in this context.
"""
raise NotImplementedError
def send_block_updated_event(self, usage_key):
"""
Send a "block updated" event for the block with the given usage_key in this context.
usage_key: the UsageKeyV2 subclass used for this learning context
"""

View File

@@ -21,6 +21,7 @@ from xblock.fields import Scope
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from openedx.core.djangoapps.xblock.learning_context.manager import get_learning_context_impl
from openedx.core.lib.api.view_utils import view_auth_classes
from ..api import (
get_block_metadata,
@@ -254,6 +255,10 @@ class BlockFieldsView(APIView):
# Save after the callback so any changes made in the callback will get persisted.
block.save()
# Signal that we've modified this block
context_impl = get_learning_context_impl(usage_key)
context_impl.send_updated_event(usage_key)
return Response({
"id": str(block.location),
"data": data,