feat: container support for tags [FC-0083] (#36484)

* feat: container tag support

* fix: fixes from review

* docs: fix typo
This commit is contained in:
Rômulo Penido
2025-04-10 02:44:54 -03:00
committed by GitHub
parent 1e6b40ac9a
commit a0d99315ad
7 changed files with 183 additions and 79 deletions

View File

@@ -17,6 +17,7 @@ from django.core.paginator import Paginator
from meilisearch import Client as MeilisearchClient
from meilisearch.errors import MeilisearchApiError, MeilisearchError
from meilisearch.models.task import TaskInfo
from opaque_keys import OpaqueKey
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2, LibraryCollectionLocator
from openedx_learning.api import authoring as authoring_api
@@ -42,7 +43,7 @@ from .documents import (
searchable_doc_for_collection,
searchable_doc_for_container,
searchable_doc_for_library_block,
searchable_doc_for_usage_key,
searchable_doc_for_key,
searchable_doc_collections,
searchable_doc_tags,
searchable_doc_tags_for_collection,
@@ -486,8 +487,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
container,
)
doc = searchable_doc_for_container(container_key)
# TODO: when we add container tags
# doc.update(searchable_doc_tags_for_container(container_key))
doc.update(searchable_doc_tags(container_key))
docs.append(doc)
except Exception as err: # pylint: disable=broad-except
status_cb(f"Error indexing container {container.key}: {err}")
@@ -511,7 +511,7 @@ def rebuild_index(status_cb: Callable[[str], None] | None = None, incremental=Fa
collections = authoring_api.get_collections(library.learning_package_id, enabled=True)
num_collections = collections.count()
num_collections_done = 0
status_cb(f"{num_collections_done + 1}/{num_collections}. Now indexing collections in library {lib_key}")
status_cb(f"{num_collections_done}/{num_collections}. Now indexing collections in library {lib_key}")
paginator = Paginator(collections, 100)
for p in paginator.page_range:
num_collections_done = index_collection_batch(
@@ -620,7 +620,7 @@ def delete_index_doc(usage_key: UsageKey) -> None:
Args:
usage_key (UsageKey): The usage key of the XBlock to be removed from the index
"""
doc = searchable_doc_for_usage_key(usage_key)
doc = searchable_doc_for_key(usage_key)
_delete_index_doc(doc[Fields.id])
@@ -811,12 +811,12 @@ def upsert_content_library_index_docs(library_key: LibraryLocatorV2) -> None:
_update_index_docs(docs)
def upsert_block_tags_index_docs(usage_key: UsageKey):
def upsert_content_object_tags_index_doc(key: OpaqueKey):
"""
Updates the tags data in documents for the given Course/Library block
Updates the tags data in document for the given Course/Library item
"""
doc = {Fields.id: meili_id_from_opaque_key(usage_key)}
doc.update(searchable_doc_tags(usage_key))
doc = {Fields.id: meili_id_from_opaque_key(key)}
doc.update(searchable_doc_tags(key))
_update_index_docs([doc])

View File

@@ -8,6 +8,7 @@ from hashlib import blake2b
from django.core.exceptions import ObjectDoesNotExist
from django.utils.text import slugify
from opaque_keys import OpaqueKey
from opaque_keys.edx.keys import LearningContextKey, UsageKey
from opaque_keys.edx.locator import LibraryContainerLocator, LibraryLocatorV2
from openedx_learning.api import authoring as authoring_api
@@ -113,7 +114,7 @@ class PublishStatus:
modified = "modified"
def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
def meili_id_from_opaque_key(key: OpaqueKey) -> str:
"""
Meilisearch requires each document to have a primary key that's either an
integer or a string composed of alphanumeric characters (a-z A-Z 0-9),
@@ -124,7 +125,7 @@ def meili_id_from_opaque_key(usage_key: UsageKey) -> str:
we could use PublishableEntity's primary key / UUID instead.
"""
# The slugified key _may_ not be unique so we append a hashed string to make it unique:
key_str = str(usage_key)
key_str = str(key)
key_bin = key_str.encode()
suffix = blake2b(key_bin, digest_size=4, usedforsecurity=False).hexdigest()
@@ -140,12 +141,12 @@ def _meili_access_id_from_context_key(context_key: LearningContextKey) -> int:
return access.id
def searchable_doc_for_usage_key(usage_key: UsageKey) -> dict:
def searchable_doc_for_key(key: OpaqueKey) -> dict:
"""
Generates a base document identified by its usage key.
Generates a base document identified by its opaque key.
"""
return {
Fields.id: meili_id_from_opaque_key(usage_key),
Fields.id: meili_id_from_opaque_key(key),
}
@@ -244,7 +245,7 @@ def _fields_from_block(block) -> dict:
return block_data
def _tags_for_content_object(object_id: UsageKey | LearningContextKey) -> dict:
def _tags_for_content_object(object_id: OpaqueKey) -> dict:
"""
Given an XBlock, course, library, etc., get the tag data for its index doc.
@@ -406,7 +407,7 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
block_published = None
publish_status = PublishStatus.never
doc = searchable_doc_for_usage_key(xblock_metadata.usage_key)
doc = searchable_doc_for_key(xblock_metadata.usage_key)
doc.update({
Fields.type: DocType.library_block,
Fields.breadcrumbs: [],
@@ -427,13 +428,13 @@ def searchable_doc_for_library_block(xblock_metadata: lib_api.LibraryXBlockMetad
return doc
def searchable_doc_tags(usage_key: UsageKey) -> dict:
def searchable_doc_tags(key: OpaqueKey) -> dict:
"""
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the tags data for the given content object.
"""
doc = searchable_doc_for_usage_key(usage_key)
doc.update(_tags_for_content_object(usage_key))
doc = searchable_doc_for_key(key)
doc.update(_tags_for_content_object(key))
return doc
@@ -443,7 +444,7 @@ def searchable_doc_collections(usage_key: UsageKey) -> dict:
Generate a dictionary document suitable for ingestion into a search engine
like Meilisearch or Elasticsearch, with the collections data for the given content object.
"""
doc = searchable_doc_for_usage_key(usage_key)
doc = searchable_doc_for_key(usage_key)
doc.update(_collections_for_content_object(usage_key))
return doc
@@ -461,7 +462,7 @@ def searchable_doc_tags_for_collection(
library_key,
collection_key,
)
doc = searchable_doc_for_usage_key(collection_usage_key)
doc = searchable_doc_for_key(collection_usage_key)
doc.update(_tags_for_content_object(collection_usage_key))
return doc
@@ -473,7 +474,7 @@ def searchable_doc_for_course_block(block) -> dict:
like Meilisearch or Elasticsearch, so that the given course block can be
found using faceted search.
"""
doc = searchable_doc_for_usage_key(block.usage_key)
doc = searchable_doc_for_key(block.usage_key)
doc.update({
Fields.type: DocType.course_block,
})
@@ -503,7 +504,7 @@ def searchable_doc_for_collection(
collection_key,
)
doc = searchable_doc_for_usage_key(collection_usage_key)
doc = searchable_doc_for_key(collection_usage_key)
try:
collection = collection or lib_api.get_library_collection_from_usage_key(collection_usage_key)

View File

@@ -8,7 +8,7 @@ from django.db.models.signals import post_delete
from django.dispatch import receiver
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import UsageKey
from opaque_keys.edx.locator import LibraryCollectionLocator
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator
from openedx_events.content_authoring.data import (
ContentLibraryData,
ContentObjectChangedData,
@@ -41,7 +41,7 @@ from openedx.core.djangoapps.content.search.models import SearchAccess
from .api import (
only_if_meilisearch_enabled,
upsert_block_collections_index_docs,
upsert_block_tags_index_docs,
upsert_content_object_tags_index_doc,
upsert_collection_tags_index_docs,
)
from .tasks import (
@@ -211,15 +211,20 @@ def content_object_associations_changed_handler(**kwargs) -> None:
return
try:
# Check if valid if course or library block
# Check if valid course or library block
usage_key = UsageKey.from_string(str(content_object.object_id))
except InvalidKeyError:
try:
# Check if valid if library collection
# Check if valid library collection
usage_key = LibraryCollectionLocator.from_string(str(content_object.object_id))
except InvalidKeyError:
log.error("Received invalid content object id")
return
try:
# Check if valid library container
usage_key = LibraryContainerLocator.from_string(str(content_object.object_id))
except InvalidKeyError:
# Invalid content object id
log.error("Received invalid content object id")
return
# This event's changes may contain both "tags" and "collections", but this will happen rarely, if ever.
# So we allow a potential double "upsert" here.
@@ -227,7 +232,7 @@ def content_object_associations_changed_handler(**kwargs) -> None:
if isinstance(usage_key, LibraryCollectionLocator):
upsert_collection_tags_index_docs(usage_key)
else:
upsert_block_tags_index_docs(usage_key)
upsert_content_object_tags_index_doc(usage_key)
if not content_object.changes or "collections" in content_object.changes:
upsert_block_collections_index_docs(usage_key)

View File

@@ -234,7 +234,6 @@ class TestSearchApi(ModuleStoreTestCase):
"modified": created_date.timestamp(),
"access_id": lib_access.id,
"breadcrumbs": [{"display_name": "Library"}],
# "tags" should be here but we haven't implemented them yet
# "published" is not set since we haven't published it yet
}
@@ -262,6 +261,7 @@ class TestSearchApi(ModuleStoreTestCase):
doc_collection = copy.deepcopy(self.collection_dict)
doc_collection["tags"] = {}
doc_unit = copy.deepcopy(self.unit_dict)
doc_unit["tags"] = {}
api.rebuild_index()
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 4
@@ -292,6 +292,7 @@ class TestSearchApi(ModuleStoreTestCase):
doc_collection = copy.deepcopy(self.collection_dict)
doc_collection["tags"] = {}
doc_unit = copy.deepcopy(self.unit_dict)
doc_unit["tags"] = {}
api.rebuild_index(incremental=True)
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 4
@@ -472,8 +473,7 @@ class TestSearchApi(ModuleStoreTestCase):
"""
Test indexing an XBlock with tags.
"""
# Tag XBlock (these internally call `upsert_block_tags_index_docs`)
# Tag XBlock (these internally call `upsert_content_object_tags_index_doc`)
tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyA, ["one", "two"])
tagging_api.tag_object(str(self.sequential.usage_key), self.taxonomyB, ["three", "four"])
@@ -866,3 +866,43 @@ class TestSearchApi(ModuleStoreTestCase):
mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([
doc_problem_without_collection,
])
@override_settings(MEILISEARCH_ENABLED=True)
def test_index_library_container_metadata(self, mock_meilisearch):
"""
Test indexing a Library Container.
"""
api.upsert_library_container_index_doc(self.unit.container_key)
mock_meilisearch.return_value.index.return_value.update_documents.assert_called_once_with([self.unit_dict])
@override_settings(MEILISEARCH_ENABLED=True)
def test_index_tags_in_containers(self, mock_meilisearch):
# Tag collection
tagging_api.tag_object(self.unit_key, self.taxonomyA, ["one", "two"])
tagging_api.tag_object(self.unit_key, self.taxonomyB, ["three", "four"])
# Build expected docs with tags at each stage
doc_unit_with_tags1 = {
"id": "lctorg1libunitunit-1-e4527f7c",
"tags": {
'taxonomy': ['A'],
'level0': ['A > one', 'A > two']
}
}
doc_unit_with_tags2 = {
"id": "lctorg1libunitunit-1-e4527f7c",
"tags": {
'taxonomy': ['A', 'B'],
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
}
}
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
[
call([doc_unit_with_tags1]),
call([doc_unit_with_tags2]),
],
any_order=True,
)

View File

@@ -87,6 +87,14 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
"html",
"text2",
)
cls.container = library_api.create_container(
cls.library.key,
container_type=library_api.ContainerType.Unit,
slug="unit1",
title="A Unit in the Search Index",
user_id=None,
)
cls.container_usage_key = "lct:edX:2012_Fall:unit:unit1"
# Add the problem block to the collection
library_api.update_library_collection_components(
@@ -116,6 +124,7 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
tagging_api.tag_object(str(cls.html_block_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(str(cls.library_block.usage_key), cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(cls.collection_usage_key, cls.difficulty_tags, tags=["Normal"])
tagging_api.tag_object(cls.container_usage_key, cls.difficulty_tags, tags=["Normal"])
@property
def toy_course_access_id(self):
@@ -501,17 +510,8 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
"""
Test creating a search document for a draft-only container
"""
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
container_meta = library_api.create_container(
self.library.key,
container_type=library_api.ContainerType.Unit,
slug="unit1",
title="A Unit in the Search Index",
user_id=None,
)
doc = searchable_doc_for_container(container_meta.container_key)
doc = searchable_doc_for_container(self.container.container_key)
doc.update(searchable_doc_tags(self.container.container_key))
assert doc == {
"id": "lctedx2012_fallunitunit1-edd13a0c",
@@ -529,7 +529,10 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
"breadcrumbs": [{"display_name": "some content_library"}],
"created": 1680674828.0,
"modified": 1680674828.0,
# "tags" should be here but we haven't implemented them yet
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"]
},
# "published" is not set since we haven't published it yet
}
@@ -537,23 +540,17 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
"""
Test creating a search document for a published container
"""
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
container_meta = library_api.create_container(
self.library.key,
container_type=library_api.ContainerType.Unit,
slug="unit1",
title="A Unit in the Search Index",
user_id=None,
)
with freeze_time(self.container.created):
# Create a container with a block in it
library_api.update_container_children(
container_meta.container_key,
self.container.container_key,
[self.library_block.usage_key],
user_id=None,
)
library_api.publish_changes(self.library.key)
doc = searchable_doc_for_container(container_meta.container_key)
doc = searchable_doc_for_container(self.container.container_key)
doc.update(searchable_doc_tags(self.container.container_key))
assert doc == {
"id": "lctedx2012_fallunitunit1-edd13a0c",
@@ -571,29 +568,22 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
"breadcrumbs": [{"display_name": "some content_library"}],
"created": 1680674828.0,
"modified": 1680674828.0,
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"]
},
"published": {"num_children": 1},
# "tags" should be here but we haven't implemented them yet
# "published" is not set since we haven't published it yet
}
def test_published_container_with_changes(self):
"""
Test creating a search document for a published container
"""
created_date = datetime(2023, 4, 5, 6, 7, 8, tzinfo=timezone.utc)
with freeze_time(created_date):
container_meta = library_api.create_container(
self.library.key,
container_type=library_api.ContainerType.Unit,
slug="unit1",
title="A Unit in the Search Index",
user_id=None,
)
library_api.update_container_children(
container_meta.container_key,
[self.library_block.usage_key],
user_id=None,
)
library_api.update_container_children(
self.container.container_key,
[self.library_block.usage_key],
user_id=None,
)
library_api.publish_changes(self.library.key)
block_2 = library_api.create_library_block(
self.library.key,
@@ -602,15 +592,16 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
)
# Add another component after publish
with freeze_time(created_date):
with freeze_time(self.container.created):
library_api.update_container_children(
container_meta.container_key,
self.container.container_key,
[block_2.usage_key],
user_id=None,
entities_action=authoring_api.ChildrenEntitiesAction.APPEND,
)
doc = searchable_doc_for_container(container_meta.container_key)
doc = searchable_doc_for_container(self.container.container_key)
doc.update(searchable_doc_tags(self.container.container_key))
assert doc == {
"id": "lctedx2012_fallunitunit1-edd13a0c",
@@ -628,9 +619,11 @@ class StudioDocumentsTest(SharedModuleStoreTestCase):
"breadcrumbs": [{"display_name": "some content_library"}],
"created": 1680674828.0,
"modified": 1680674828.0,
"tags": {
"taxonomy": ["Difficulty"],
"level0": ["Difficulty > Normal"]
},
"published": {"num_children": 1},
# "tags" should be here but we haven't implemented them yet
# "published" is not set since we haven't published it yet
}
def test_mathjax_plain_text_conversion_for_search(self):

View File

@@ -14,7 +14,7 @@ import ddt
from django.contrib.auth import get_user_model
from django.core.files.uploadedfile import SimpleUploadedFile
from edx_django_utils.cache import RequestCache
from opaque_keys.edx.locator import BlockUsageLocator, CourseLocator, LibraryCollectionLocator
from opaque_keys.edx.locator import BlockUsageLocator, CourseLocator, LibraryCollectionLocator, LibraryContainerLocator
from openedx_tagging.core.tagging.models import Tag, Taxonomy
from openedx_tagging.core.tagging.models.system_defined import SystemDefinedTaxonomy
from openedx_tagging.core.tagging.rest_api.v1.serializers import TaxonomySerializer
@@ -114,6 +114,9 @@ class TestTaxonomyObjectsMixin:
def _setUp_collection(self):
self.collection_key = str(LibraryCollectionLocator(self.content_libraryA.key, 'test-collection'))
def _setUp_container(self):
self.container_key = str(LibraryContainerLocator(self.content_libraryA.key, 'unit', 'unit1'))
def _setUp_users(self):
"""
Create users for testing
@@ -288,6 +291,7 @@ class TestTaxonomyObjectsMixin:
self._setUp_users()
self._setUp_taxonomies()
self._setUp_collection()
self._setUp_container()
# Clear all request caches in between test runs to keep query counts consistent.
RequestCache.clear_all_namespaces()
@@ -1701,6 +1705,50 @@ class TestObjectTagViewSet(TestObjectTagMixin, APITestCase):
assert status.is_success(new_response.status_code)
assert new_response.data == response.data
@ddt.data(
# staffA and staff are staff in collection and can tag using enabled taxonomies
("user", "tA1", ["Tag 1"], status.HTTP_403_FORBIDDEN),
("staffA", "tA1", ["Tag 1"], status.HTTP_200_OK),
("staff", "tA1", ["Tag 1"], status.HTTP_200_OK),
("user", "tA1", [], status.HTTP_403_FORBIDDEN),
("staffA", "tA1", [], status.HTTP_200_OK),
("staff", "tA1", [], status.HTTP_200_OK),
("user", "multiple_taxonomy", ["Tag 1", "Tag 2"], status.HTTP_403_FORBIDDEN),
("staffA", "multiple_taxonomy", ["Tag 1", "Tag 2"], status.HTTP_200_OK),
("staff", "multiple_taxonomy", ["Tag 1", "Tag 2"], status.HTTP_200_OK),
("user", "open_taxonomy", ["tag1"], status.HTTP_403_FORBIDDEN),
("staffA", "open_taxonomy", ["tag1"], status.HTTP_200_OK),
("staff", "open_taxonomy", ["tag1"], status.HTTP_200_OK),
)
@ddt.unpack
def test_tag_container(self, user_attr, taxonomy_attr, tag_values, expected_status):
"""
Tests that only staff and org level users can tag containers
"""
user = getattr(self, user_attr)
self.client.force_authenticate(user=user)
taxonomy = getattr(self, taxonomy_attr)
response = self._call_put_request(self.container_key, taxonomy.pk, tag_values)
assert response.status_code == expected_status
if status.is_success(expected_status):
tags_by_taxonomy = response.data[str(self.container_key)]["taxonomies"]
if tag_values:
response_taxonomy = tags_by_taxonomy[0]
assert response_taxonomy["name"] == taxonomy.name
response_tags = response_taxonomy["tags"]
assert [t["value"] for t in response_tags] == tag_values
else:
assert tags_by_taxonomy == [] # No tags are set from any taxonomy
# Check that re-fetching the tags returns what we set
url = OBJECT_TAG_UPDATE_URL.format(object_id=self.container_key)
new_response = self.client.get(url, format="json")
assert status.is_success(new_response.status_code)
assert new_response.data == response.data
@ddt.data(
"staffA",
"staff",

View File

@@ -6,7 +6,7 @@ import ddt
from django.test.testcases import TestCase
from fs.osfs import OSFS
from opaque_keys.edx.keys import CourseKey, UsageKey
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryCollectionLocator, LibraryContainerLocator
from openedx_tagging.core.tagging.models import ObjectTag
from organizations.models import Organization
from .test_objecttag_export_helpers import TestGetAllObjectTagsMixin, TaggedCourseMixin
@@ -397,6 +397,23 @@ class TestAPIObjectTags(TestGetAllObjectTagsMixin, TestCase):
self.taxonomy_3.id: self.taxonomy_3,
}
def test_tag_container(self):
unit_key = LibraryContainerLocator.from_string('lct:orgA:libX:unit:unit1')
api.tag_object(
object_id=str(unit_key),
taxonomy=self.taxonomy_3,
tags=["Tag 3.1"],
)
with self.assertNumQueries(1):
object_tags, taxonomies = api.get_all_object_tags(unit_key)
assert object_tags == {'lct:orgA:libX:unit:unit1': {3: ['Tag 3.1']}}
assert taxonomies == {
self.taxonomy_3.id: self.taxonomy_3,
}
class TestExportImportTags(TaggedCourseMixin):
"""