From 3233fa92d48e635878a8388da51e9a0e14eca008 Mon Sep 17 00:00:00 2001 From: Sid Verma Date: Thu, 6 Aug 2020 11:56:40 +0530 Subject: [PATCH] Index xblocks in elasticsearch --- .../core/djangoapps/content_libraries/api.py | 20 +- .../content_libraries/libraries_index.py | 323 +++++++++++------- .../content_libraries/library_bundle.py | 19 +- .../commands/reindex_content_library.py | 14 +- .../djangoapps/content_libraries/signals.py | 8 +- .../content_libraries/tests/base.py | 4 +- .../tests/test_libraries_index.py | 129 +++++-- 7 files changed, 358 insertions(+), 159 deletions(-) diff --git a/openedx/core/djangoapps/content_libraries/api.py b/openedx/core/djangoapps/content_libraries/api.py index 7b1111dba7..5ad5bc9e73 100644 --- a/openedx/core/djangoapps/content_libraries/api.py +++ b/openedx/core/djangoapps/content_libraries/api.py @@ -58,7 +58,7 @@ from xblock.exceptions import XBlockNotFoundError from openedx.core.djangoapps.content_libraries import permissions from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle -from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryNotIndexedException +from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, ItemNotIndexedException from openedx.core.djangoapps.content_libraries.models import ContentLibrary, ContentLibraryPermission from openedx.core.djangoapps.content_libraries.signals import ( CONTENT_LIBRARY_CREATED, @@ -240,8 +240,8 @@ def get_metadata_from_index(queryset, text_search=None): if ContentLibraryIndexer.indexing_is_enabled(): try: library_keys = [lib.library_key for lib in queryset] - metadata = ContentLibraryIndexer.get_libraries(library_keys, text_search=text_search) - except (LibraryNotIndexedException, KeyError, ElasticConnectionError) as e: + metadata = ContentLibraryIndexer.get_items(library_keys, text_search=text_search) + except (ItemNotIndexedException, KeyError, ElasticConnectionError) as e: log.exception(e) # If ContentLibraryIndex is not available, we query blockstore for a limited set of metadata @@ -588,7 +588,7 @@ def set_library_block_olx(usage_key, new_olx_str): write_draft_file(draft.uuid, metadata.def_key.olx_path, new_olx_str.encode('utf-8')) # Clear the bundle cache so everyone sees the new block immediately: BundleCache(metadata.def_key.bundle_uuid, draft_name=DRAFT_NAME).clear() - LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=usage_key.context_key) + LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=usage_key.context_key, usage_key=usage_key) def create_library_block(library_key, block_type, definition_id): @@ -630,7 +630,7 @@ def create_library_block(library_key, block_type, definition_id): # Clear the bundle cache so everyone sees the new block immediately: BundleCache(ref.bundle_uuid, draft_name=DRAFT_NAME).clear() # Now return the metadata about the new block: - LIBRARY_BLOCK_CREATED.send(sender=None, library_key=ref.library_key) + LIBRARY_BLOCK_CREATED.send(sender=None, library_key=ref.library_key, usage_key=usage_key) return get_library_block(usage_key) @@ -683,7 +683,7 @@ def delete_library_block(usage_key, remove_from_parent=True): pass # Clear the bundle cache so everyone sees the deleted block immediately: lib_bundle.cache.clear() - LIBRARY_BLOCK_DELETED.send(sender=None, library_key=lib_bundle.library_key) + LIBRARY_BLOCK_DELETED.send(sender=None, library_key=lib_bundle.library_key, usage_key=usage_key) def create_library_block_child(parent_usage_key, block_type, definition_id): @@ -755,7 +755,7 @@ def add_library_block_static_asset_file(usage_key, file_name, file_content): file_metadata = blockstore_cache.get_bundle_file_metadata_with_cache( bundle_uuid=def_key.bundle_uuid, path=file_path, draft_name=DRAFT_NAME, ) - LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key) + LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key, usage_key=usage_key) return LibraryXBlockStaticFile(path=file_metadata.path, url=file_metadata.url, size=file_metadata.size) @@ -776,7 +776,7 @@ def delete_library_block_static_asset_file(usage_key, file_name): write_draft_file(draft.uuid, file_path, contents=None) # Clear the bundle cache so everyone sees the new file immediately: lib_bundle.cache.clear() - LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key) + LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key, usage_key=usage_key) def get_allowed_block_types(library_key): # pylint: disable=unused-argument @@ -903,7 +903,7 @@ def publish_changes(library_key): return # If there is no draft, no action is needed. LibraryBundle(library_key, ref.bundle_uuid).cache.clear() LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear() - CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key) + CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key, update_blocks=True) def revert_changes(library_key): @@ -919,4 +919,4 @@ def revert_changes(library_key): else: return # If there is no draft, no action is needed. LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear() - CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key) + CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key, update_blocks=True) diff --git a/openedx/core/djangoapps/content_libraries/libraries_index.py b/openedx/core/djangoapps/content_libraries/libraries_index.py index bbdbddd970..d0f0e32d2e 100644 --- a/openedx/core/djangoapps/content_libraries/libraries_index.py +++ b/openedx/core/djangoapps/content_libraries/libraries_index.py @@ -1,12 +1,14 @@ """ Code to allow indexing content libraries """ import logging +from abc import ABC, abstractmethod from django.conf import settings from django.dispatch import receiver from elasticsearch.exceptions import ConnectionError as ElasticConnectionError from search.elastic import ElasticSearchEngine, _translate_hits, _process_field_filters, RESERVED_CHARACTERS from search.search_engine_base import SearchEngine +from opaque_keys.edx.locator import LibraryUsageLocatorV2 from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME from openedx.core.djangoapps.content_libraries.signals import ( @@ -26,109 +28,88 @@ log = logging.getLogger(__name__) MAX_SIZE = 10000 # 10000 is the maximum records elastic is able to return in a single result. Defaults to 10. -class LibraryNotIndexedException(Exception): +class ItemNotIndexedException(Exception): """ - Library supplied wasn't indexed in ElasticSearch + Item wasn't indexed in ElasticSearch """ -class ContentLibraryIndexer: - """ - Class to perform indexing for blockstore-based content libraries - """ - - INDEX_NAME = "content_library_index" - LIBRARY_DOCUMENT_TYPE = "content_library" +class SearchIndexerBase(ABC): + INDEX_NAME = None + DOCUMENT_TYPE = None + ENABLE_INDEXING_KEY = None SEARCH_KWARGS = { # Set this to True or 'wait_for' if immediate refresh is required after any update. # See elastic docs for more information. 'refresh': False } - SCHEMA_VERSION = 0 + @classmethod + @abstractmethod + def get_item_definition(cls, item): + """ + Returns a serializable dictionary which can be stored in elasticsearch. + """ @classmethod - def index_libraries(cls, library_keys): + def index_items(cls, items): """ Index the specified libraries. If they already exist, replace them with new ones. """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) - - library_dicts = [] - - for library_key in library_keys: - ref = ContentLibrary.objects.get_by_key(library_key) - lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME) - num_blocks = len(lib_bundle.get_top_level_usages()) - last_published = lib_bundle.get_last_published_time() - last_published_str = None - if last_published: - last_published_str = last_published.strftime('%Y-%m-%dT%H:%M:%SZ') - (has_unpublished_changes, has_unpublished_deletes) = lib_bundle.has_changes() - - bundle_metadata = get_bundle(ref.bundle_uuid) - - # NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing - # with outdated indexes which might cause errors due to missing/invalid attributes. - library_dict = { - "schema_version": ContentLibraryIndexer.SCHEMA_VERSION, - "id": str(library_key), - "uuid": str(bundle_metadata.uuid), - "title": bundle_metadata.title, - "description": bundle_metadata.description, - "num_blocks": num_blocks, - "version": bundle_metadata.latest_version, - "last_published": last_published_str, - "has_unpublished_changes": has_unpublished_changes, - "has_unpublished_deletes": has_unpublished_deletes, - # only 'content' field is analyzed by elastisearch, and allows text-search - "content": { - "id": str(library_key), - "title": bundle_metadata.title, - "description": bundle_metadata.description, - }, - } - library_dicts.append(library_dict) - return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts, **cls.SEARCH_KWARGS) + items = [cls.get_item_definition(item) for item in items] + return searcher.index(cls.DOCUMENT_TYPE, items, **cls.SEARCH_KWARGS) @classmethod - def get_libraries(cls, library_keys, text_search=None): + def search(cls, **kwargs): """ - Retrieve a list of libraries from the index + Search the index with the given kwargs """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) - library_keys_str = [str(key) for key in library_keys] + response = searcher.search(doc_type=cls.DOCUMENT_TYPE, field_dictionary=kwargs, size=MAX_SIZE) + return response["results"] + + @classmethod + def get_items(cls, ids, text_search=None): + """ + Retrieve a list of items from the index + """ + searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) + ids_str = [str(i) for i in ids] response = searcher.search( - doc_type=cls.LIBRARY_DOCUMENT_TYPE, + doc_type=cls.DOCUMENT_TYPE, field_dictionary={ - "id": library_keys_str, - "schema_version": ContentLibraryIndexer.SCHEMA_VERSION + "id": ids_str, + "schema_version": cls.SCHEMA_VERSION }, size=MAX_SIZE, ) - if len(response["results"]) != len(library_keys_str): - missing = set(library_keys_str) - set([result["data"]["id"] for result in response["results"]]) - raise LibraryNotIndexedException("Keys not found in index: {}".format(missing)) + if len(response["results"]) != len(ids_str): + missing = set(ids_str) - set([result["data"]["id"] for result in response["results"]]) + missing = set(ids_str) - set([result["data"]["id"] for result in response["results"]]) + raise ItemNotIndexedException("Keys not found in index: {}".format(missing)) if text_search: # Elastic is hit twice if text_search is valid # Once above to identify unindexed libraries, and now to filter with text_search if isinstance(searcher, ElasticSearchEngine): response = _translate_hits(searcher._es.search( - doc_type=cls.LIBRARY_DOCUMENT_TYPE, + doc_type=cls.DOCUMENT_TYPE, index=searcher.index_name, - body=build_elastic_query(library_keys_str, text_search), + body=cls.build_elastic_query(ids_str, text_search), + size=MAX_SIZE )) else: response = searcher.search( - doc_type=cls.LIBRARY_DOCUMENT_TYPE, - field_dictionary={"id": library_keys_str}, - query_string=text_search + doc_type=cls.DOCUMENT_TYPE, + field_dictionary={"id": ids_str}, + query_string=text_search, + size=MAX_SIZE ) # Search results may not retain the original order of keys - we use this - # dict to construct a list in the original order of library_keys + # dict to construct a list in the original order of ids response_dict = { result["data"]["id"]: result["data"] for result in response["results"] @@ -138,34 +119,159 @@ class ContentLibraryIndexer: response_dict[key] if key in response_dict else None - for key in library_keys_str + for key in ids_str ] @classmethod - def remove_libraries(cls, library_keys): + def remove_items(cls, ids): """ - Remove the provided library_keys from the index + Remove the provided ids from the index """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) - ids_str = [str(key) for key in library_keys] - searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids_str, **cls.SEARCH_KWARGS) + ids_str = [str(i) for i in ids] + searcher.remove(cls.DOCUMENT_TYPE, ids_str, **cls.SEARCH_KWARGS) @classmethod - def remove_all_libraries(cls): + def remove_all_items(cls): """ - Remove all libraries from the index + Remove all items from the index """ searcher = SearchEngine.get_search_engine(cls.INDEX_NAME) - response = searcher.search(doc_type=cls.LIBRARY_DOCUMENT_TYPE, filter_dictionary={}, size=MAX_SIZE) + response = searcher.search(doc_type=cls.DOCUMENT_TYPE, filter_dictionary={}, size=MAX_SIZE) ids = [result["data"]["id"] for result in response["results"]] - searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids, **cls.SEARCH_KWARGS) + searcher.remove(cls.DOCUMENT_TYPE, ids, **cls.SEARCH_KWARGS) @classmethod def indexing_is_enabled(cls): """ Checks to see if the indexing feature is enabled """ - return settings.FEATURES.get("ENABLE_CONTENT_LIBRARY_INDEX", False) + return settings.FEATURES.get(cls.ENABLE_INDEXING_KEY, False) + + @staticmethod + def build_elastic_query(ids_str, text_search): + """ + Build and return an elastic query for doing text search on a library + """ + # Remove reserved characters (and ") from the text to prevent unexpected errors. + text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"')) + text_search_normalised = text_search.replace('-',' ') + # Wrap with asterix to enable partial matches + text_search_normalised = "*{}*".format(text_search_normalised) + return { + 'query': { + 'filtered': { + 'query': { + 'bool': { + 'should': [ + { + 'query_string': { + 'query': text_search_normalised, + "fields": ["content.*"], + "minimum_should_match": "100%", + }, + }, + # Add a special wildcard search for id, as it contains a ":" character which is filtered out + # in query_string + { + 'wildcard': { + 'id': { + 'value': '*{}*'.format(text_search), + } + }, + }, + ], + }, + }, + 'filter': { + 'terms': { + 'id': ids_str + } + } + }, + }, + } + + +class ContentLibraryIndexer(SearchIndexerBase): + """ + Class to perform indexing for blockstore-based content libraries + """ + + INDEX_NAME = "content_library_index" + ENABLE_INDEXING_KEY = "ENABLE_CONTENT_LIBRARY_INDEX" + DOCUMENT_TYPE = "content_library" + SCHEMA_VERSION = 0 + + @classmethod + def get_item_definition(cls, library_key): + ref = ContentLibrary.objects.get_by_key(library_key) + lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME) + num_blocks = len(lib_bundle.get_top_level_usages()) + last_published = lib_bundle.get_last_published_time() + last_published_str = None + if last_published: + last_published_str = last_published.strftime('%Y-%m-%dT%H:%M:%SZ') + (has_unpublished_changes, has_unpublished_deletes) = lib_bundle.has_changes() + + bundle_metadata = get_bundle(ref.bundle_uuid) + + # NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing + # with outdated indexes which might cause errors due to missing/invalid attributes. + return { + "schema_version": ContentLibraryIndexer.SCHEMA_VERSION, + "id": str(library_key), + "uuid": str(bundle_metadata.uuid), + "title": bundle_metadata.title, + "description": bundle_metadata.description, + "num_blocks": num_blocks, + "version": bundle_metadata.latest_version, + "last_published": last_published_str, + "has_unpublished_changes": has_unpublished_changes, + "has_unpublished_deletes": has_unpublished_deletes, + # only 'content' field is analyzed by elastisearch, and allows text-search + "content": { + "id": str(library_key), + "title": bundle_metadata.title, + "description": bundle_metadata.description, + }, + } + + +class LibraryBlockIndexer(SearchIndexerBase): + """ + Class to perform indexing on the XBlocks in content libraries. + """ + + INDEX_NAME = "content_library_index" + ENABLE_INDEXING_KEY = "ENABLE_CONTENT_LIBRARY_INDEX" + DOCUMENT_TYPE = "content_library_block" + SCHEMA_VERSION = 0 + + @classmethod + def get_item_definition(cls, usage_key): + from openedx.core.djangoapps.content_libraries.api import get_block_display_name, _lookup_usage_key + + def_key, lib_bundle = _lookup_usage_key(usage_key) + is_child = usage_key in lib_bundle.get_bundle_includes().keys() + + # NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing + # with outdated indexes which might cause errors due to missing/invalid attributes. + return { + "schema_version": ContentLibraryIndexer.SCHEMA_VERSION, + "id": str(usage_key), + "library_key": str(lib_bundle.library_key), + "is_child": is_child, + "def_key": str(def_key), + "display_name": get_block_display_name(def_key), + "block_type": def_key.block_type, + "has_unpublished_changes": lib_bundle.does_definition_have_unpublished_changes(def_key), + # only 'content' field is analyzed by elastisearch, and allows text-search + "content": { + "id": str(usage_key), + "display_name": get_block_display_name(def_key), + }, + } @receiver(CONTENT_LIBRARY_CREATED) @@ -179,7 +285,11 @@ def index_library(sender, library_key, **kwargs): # pylint: disable=unused-argu """ if ContentLibraryIndexer.indexing_is_enabled(): try: - ContentLibraryIndexer.index_libraries([library_key]) + ContentLibraryIndexer.index_items([library_key]) + if kwargs.get('update_blocks', False): + blocks = LibraryBlockIndexer.search(library_key=str(library_key)) + usage_keys = [LibraryUsageLocatorV2.from_string(block['data']['id']) for block in blocks] + LibraryBlockIndexer.index_items(usage_keys) except ElasticConnectionError as e: log.exception(e) @@ -191,50 +301,33 @@ def remove_library_index(sender, library_key, **kwargs): # pylint: disable=unus """ if ContentLibraryIndexer.indexing_is_enabled(): try: - ContentLibraryIndexer.remove_libraries([library_key]) + ContentLibraryIndexer.remove_items([library_key]) + blocks = LibraryBlockIndexer.search(library_key=str(library_key)) + LibraryBlockIndexer.remove_items([block['data']['id'] for block in blocks]) except ElasticConnectionError as e: log.exception(e) -def build_elastic_query(library_keys_str, text_search): +@receiver(LIBRARY_BLOCK_CREATED) +@receiver(LIBRARY_BLOCK_UPDATED) +def index_block(sender, usage_key, **kwargs): # pylint: disable=unused-argument """ - Build and return an elastic query for doing text search on a library + Index block metadata when created """ - # Remove reserved characters (and ") from the text to prevent unexpected errors. - text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"')) - text_search_normalised = text_search.replace('-',' ') - # Wrap with asterix to enable partial matches - text_search_normalised = "*{}*".format(text_search_normalised) - return { - 'query': { - 'filtered': { - 'query': { - 'bool': { - 'should': [ - { - 'query_string': { - 'query': text_search_normalised, - "fields": ["content.*"], - "minimum_should_match": "100%", - }, - }, - # Add a special wildcard search for id, as it contains a ":" character which is filtered out - # in query_string - { - 'wildcard': { - 'id': { - 'value': '*{}*'.format(text_search), - } - }, - }, - ], - }, - }, - 'filter': { - 'terms': { - 'id': library_keys_str - } - } - }, - }, - } + if LibraryBlockIndexer.indexing_is_enabled(): + try: + LibraryBlockIndexer.index_items([usage_key]) + except ConnectionError as e: + log.exception(e) + + +@receiver(LIBRARY_BLOCK_DELETED) +def remove_block_index(sender, usage_key, **kwargs): # pylint: disable=unused-argument + """ + Remove the block from the index when deleted + """ + if LibraryBlockIndexer.indexing_is_enabled(): + try: + LibraryBlockIndexer.remove_items([usage_key]) + except ConnectionError as e: + log.exception(e) diff --git a/openedx/core/djangoapps/content_libraries/library_bundle.py b/openedx/core/djangoapps/content_libraries/library_bundle.py index 8374d18503..9a566652a9 100644 --- a/openedx/core/djangoapps/content_libraries/library_bundle.py +++ b/openedx/core/djangoapps/content_libraries/library_bundle.py @@ -162,16 +162,23 @@ class LibraryBundle(object): except KeyError: return None + def get_all_usages(self): + """ + Get usage keys of all the blocks in this bundle + """ + usage_keys = [] + for olx_file_path in self.get_olx_files(): + block_type, usage_id, _unused = olx_file_path.split('/') + usage_key = LibraryUsageLocatorV2(self.library_key, block_type, usage_id) + usage_keys.append(usage_key) + + return usage_keys + def get_top_level_usages(self): """ Get the set of usage keys in this bundle that have no parent. """ - own_usage_keys = [] - for olx_file_path in self.get_olx_files(): - block_type, usage_id, _unused = olx_file_path.split('/') - usage_key = LibraryUsageLocatorV2(self.library_key, block_type, usage_id) - own_usage_keys.append(usage_key) - + own_usage_keys = self.get_all_usages() usage_keys_with_parents = self.get_bundle_includes().keys() return [usage_key for usage_key in own_usage_keys if usage_key not in usage_keys_with_parents] diff --git a/openedx/core/djangoapps/content_libraries/management/commands/reindex_content_library.py b/openedx/core/djangoapps/content_libraries/management/commands/reindex_content_library.py index 205ebae552..8a3f3fa2c4 100644 --- a/openedx/core/djangoapps/content_libraries/management/commands/reindex_content_library.py +++ b/openedx/core/djangoapps/content_libraries/management/commands/reindex_content_library.py @@ -7,7 +7,9 @@ from textwrap import dedent from django.core.management import BaseCommand from opaque_keys.edx.locator import LibraryLocatorV2 -from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer +from openedx.core.djangoapps.content_libraries.api import DRAFT_NAME +from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryBlockIndexer +from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle from openedx.core.djangoapps.content_libraries.models import ContentLibrary from cms.djangoapps.contentstore.management.commands.prompt import query_yes_no @@ -57,7 +59,8 @@ class Command(BaseCommand): if options['clear-all']: if options['force'] or query_yes_no(self.CONFIRMATION_PROMPT_CLEAR, default="no"): logging.info("Removing all libraries from the index") - ContentLibraryIndexer.remove_all_libraries() + ContentLibraryIndexer.remove_all_items() + LibraryBlockIndexer.remove_all_items() return if options['all']: @@ -70,4 +73,9 @@ class Command(BaseCommand): logging.info("Indexing libraries: {}".format(options['library_ids'])) library_keys = list(map(LibraryLocatorV2.from_string, options['library_ids'])) - ContentLibraryIndexer.index_libraries(library_keys) + ContentLibraryIndexer.index_items(library_keys) + + for library_key in library_keys: + ref = ContentLibrary.objects.get_by_key(library_key) + lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME) + LibraryBlockIndexer.index_items(lib_bundle.get_all_usages()) diff --git a/openedx/core/djangoapps/content_libraries/signals.py b/openedx/core/djangoapps/content_libraries/signals.py index 6401b50773..90b3228fe2 100644 --- a/openedx/core/djangoapps/content_libraries/signals.py +++ b/openedx/core/djangoapps/content_libraries/signals.py @@ -5,8 +5,8 @@ Content libraries related signals. from django.dispatch import Signal CONTENT_LIBRARY_CREATED = Signal(providing_args=['library_key']) -CONTENT_LIBRARY_UPDATED = Signal(providing_args=['library_key']) +CONTENT_LIBRARY_UPDATED = Signal(providing_args=['library_key', 'update_blocks']) CONTENT_LIBRARY_DELETED = Signal(providing_args=['library_key']) -LIBRARY_BLOCK_CREATED = Signal(providing_args=['library_key']) -LIBRARY_BLOCK_DELETED = Signal(providing_args=['library_key']) -LIBRARY_BLOCK_UPDATED = Signal(providing_args=['library_key']) +LIBRARY_BLOCK_CREATED = Signal(providing_args=['library_key', 'usage_key']) +LIBRARY_BLOCK_DELETED = Signal(providing_args=['library_key', 'usage_key']) +LIBRARY_BLOCK_UPDATED = Signal(providing_args=['library_key', 'usage_key']) diff --git a/openedx/core/djangoapps/content_libraries/tests/base.py b/openedx/core/djangoapps/content_libraries/tests/base.py index 6881f22d46..12f8bb9ea6 100644 --- a/openedx/core/djangoapps/content_libraries/tests/base.py +++ b/openedx/core/djangoapps/content_libraries/tests/base.py @@ -56,12 +56,12 @@ def elasticsearch_test(func): 'host': settings.TEST_ELASTICSEARCH_HOST, 'port': settings.TEST_ELASTICSEARCH_PORT, }])(func) - func = patch("openedx.core.djangoapps.content_libraries.libraries_index.ContentLibraryIndexer.SEARCH_KWARGS", new={ + func = patch("openedx.core.djangoapps.content_libraries.libraries_index.SearchIndexerBase.SEARCH_KWARGS", new={ 'refresh': 'wait_for' })(func) return func else: - return patch("openedx.core.djangoapps.content_libraries.libraries_index.ContentLibraryIndexer.SEARCH_KWARGS", new={})(func) + return patch("openedx.core.djangoapps.content_libraries.libraries_index.SearchIndexerBase.SEARCH_KWARGS", new={})(func) @requires_blockstore diff --git a/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py b/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py index e06a98c28d..2c7cf5bcc7 100644 --- a/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py +++ b/openedx/core/djangoapps/content_libraries/tests/test_libraries_index.py @@ -6,10 +6,14 @@ from django.conf import settings from django.core.management import call_command from django.test.utils import override_settings from mock import patch -from opaque_keys.edx.locator import LibraryLocatorV2 +from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2 from search.search_engine_base import SearchEngine -from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryNotIndexedException +from openedx.core.djangoapps.content_libraries.libraries_index import ( + ContentLibraryIndexer, + LibraryBlockIndexer, + ItemNotIndexedException, +) from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest, elasticsearch_test @@ -23,7 +27,8 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): @elasticsearch_test def setUp(self): super().setUp() - ContentLibraryIndexer.remove_all_libraries() + ContentLibraryIndexer.remove_all_items() + LibraryBlockIndexer.remove_all_items() self.searcher = SearchEngine.get_search_engine(ContentLibraryIndexer.INDEX_NAME) def test_index_libraries(self): @@ -35,7 +40,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): for result in [result1, result2]: library_key = LibraryLocatorV2.from_string(result['id']) - response = ContentLibraryIndexer.get_libraries([library_key])[0] + response = ContentLibraryIndexer.get_items([library_key])[0] self.assertEqual(response['id'], result['id']) self.assertEqual(response['title'], result['title']) @@ -54,31 +59,31 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): result = self._create_library(slug="test-lib-schemaupdates-1", title="Title 1", description="Description") library_key = LibraryLocatorV2.from_string(result['id']) - ContentLibraryIndexer.get_libraries([library_key]) + ContentLibraryIndexer.get_items([library_key]) with patch("openedx.core.djangoapps.content_libraries.libraries_index.ContentLibraryIndexer.SCHEMA_VERSION", new=1): - with self.assertRaises(LibraryNotIndexedException): - ContentLibraryIndexer.get_libraries([library_key]) + with self.assertRaises(ItemNotIndexedException): + ContentLibraryIndexer.get_items([library_key]) call_command("reindex_content_library", all=True, force=True) - ContentLibraryIndexer.get_libraries([library_key]) + ContentLibraryIndexer.get_items([library_key]) def test_remove_all_libraries(self): """ - Test if remove_all_libraries() deletes all libraries + Test if remove_all_items() deletes all libraries """ lib1 = self._create_library(slug="test-lib-rm-all-1", title="Title 1", description="Description") lib2 = self._create_library(slug="test-lib-rm-all-2", title="Title 1", description="Description") library_key1 = LibraryLocatorV2.from_string(lib1['id']) library_key2 = LibraryLocatorV2.from_string(lib2['id']) - ContentLibraryIndexer.get_libraries([library_key1, library_key2]) + self.assertEqual(len(ContentLibraryIndexer.get_items([library_key1, library_key2])), 2) - ContentLibraryIndexer.remove_all_libraries() - with self.assertRaises(LibraryNotIndexedException): - ContentLibraryIndexer.get_libraries([library_key1, library_key2]) + ContentLibraryIndexer.remove_all_items() + with self.assertRaises(ItemNotIndexedException): + ContentLibraryIndexer.get_items([library_key1, library_key2]) def test_update_libraries(self): """ @@ -89,7 +94,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): self._update_library(lib['id'], title="New Title", description="New Title") - response = ContentLibraryIndexer.get_libraries([library_key])[0] + response = ContentLibraryIndexer.get_items([library_key])[0] self.assertEqual(response['id'], lib['id']) self.assertEqual(response['title'], "New Title") @@ -102,8 +107,8 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): self.assertEqual(response['has_unpublished_deletes'], False) self._delete_library(lib['id']) - with self.assertRaises(LibraryNotIndexedException): - ContentLibraryIndexer.get_libraries([library_key]) + with self.assertRaises(ItemNotIndexedException): + ContentLibraryIndexer.get_items([library_key]) def test_update_library_blocks(self): """ @@ -113,9 +118,9 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): """ Commit library changes, and verify that there are no uncommited changes anymore """ - last_published = ContentLibraryIndexer.get_libraries([library_key])[0]['last_published'] + last_published = ContentLibraryIndexer.get_items([library_key])[0]['last_published'] self._commit_library_changes(str(library_key)) - response = ContentLibraryIndexer.get_libraries([library_key])[0] + response = ContentLibraryIndexer.get_items([library_key])[0] self.assertEqual(response['has_unpublished_changes'], False) self.assertEqual(response['has_unpublished_deletes'], False) self.assertGreaterEqual(response['last_published'], last_published) @@ -125,7 +130,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): """ Verify uncommitted changes and deletes in the index """ - response = ContentLibraryIndexer.get_libraries([library_key])[0] + response = ContentLibraryIndexer.get_items([library_key])[0] self.assertEqual(response['has_unpublished_changes'], has_unpublished_changes) self.assertEqual(response['has_unpublished_deletes'], has_unpublished_deletes) return response @@ -177,3 +182,89 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest): #Verify reverting uncommitted changes self._revert_library_changes(lib["id"]) verify_uncommitted_libraries(library_key, False, False) + + +@override_settings(FEATURES={**settings.FEATURES, 'ENABLE_CONTENT_LIBRARY_INDEX': True}) +@elasticsearch_test +class LibraryBlockIndexerTest(ContentLibrariesRestApiTest): + """ + Tests the operation of LibraryBlockIndexer + """ + + @elasticsearch_test + def setUp(self): + super().setUp() + ContentLibraryIndexer.remove_all_items() + LibraryBlockIndexer.remove_all_items() + self.searcher = SearchEngine.get_search_engine(LibraryBlockIndexer.INDEX_NAME) + + def test_index_block(self): + """ + Test if libraries are being indexed correctly + """ + lib = self._create_library(slug="test-lib-index-1", title="Title 1", description="Description") + block1 = self._add_block_to_library(lib['id'], "problem", "problem1") + block2 = self._add_block_to_library(lib['id'], "problem", "problem2") + + self.assertEqual(len(LibraryBlockIndexer.search()), 2) + + for block in [block1, block2]: + usage_key = LibraryUsageLocatorV2.from_string(block['id']) + response = LibraryBlockIndexer.get_items([usage_key])[0] + + self.assertEqual(response['id'], block['id']) + self.assertEqual(response['def_key'], block['def_key']) + self.assertEqual(response['block_type'], block['block_type']) + self.assertEqual(response['display_name'], block['display_name']) + self.assertEqual(response['has_unpublished_changes'], block['has_unpublished_changes']) + + def test_remove_all_items(self): + """ + Test if remove_all_items() deletes all libraries + """ + lib1 = self._create_library(slug="test-lib-rm-all", title="Title 1", description="Description") + self._add_block_to_library(lib1['id'], "problem", "problem1") + self._add_block_to_library(lib1['id'], "problem", "problem2") + self.assertEqual(len(LibraryBlockIndexer.search()), 2) + + LibraryBlockIndexer.remove_all_items() + self.assertEqual(len(LibraryBlockIndexer.search()), 0) + + def test_crud_block(self): + """ + Test that CRUD operations on blocks are reflected in the index + """ + lib = self._create_library(slug="test-lib-crud-block", title="Title", description="Description") + block = self._add_block_to_library(lib['id'], "problem", "problem1") + + # Update OLX, verify updates in index + self._set_library_block_olx(block["id"], '') + response = LibraryBlockIndexer.get_items([block['id']])[0] + self.assertEqual(response['display_name'], "new_name") + self.assertEqual(response['has_unpublished_changes'], True) + + # Verify has_unpublished_changes after committing library + self._commit_library_changes(lib['id']) + response = LibraryBlockIndexer.get_items([block['id']])[0] + self.assertEqual(response['has_unpublished_changes'], False) + + # Verify has_unpublished_changes after reverting library + self._set_library_block_asset(block["id"], "whatever.png", b"data") + response = LibraryBlockIndexer.get_items([block['id']])[0] + self.assertEqual(response['has_unpublished_changes'], True) + + self._revert_library_changes(lib['id']) + response = LibraryBlockIndexer.get_items([block['id']])[0] + self.assertEqual(response['has_unpublished_changes'], False) + + # Verify that deleting block removes it from index + self._delete_library_block(block['id']) + with self.assertRaises(ItemNotIndexedException): + LibraryBlockIndexer.get_items([block['id']]) + + # Verify that deleting a library removes its blocks from index too + self._add_block_to_library(lib['id'], "problem", "problem1") + LibraryBlockIndexer.get_items([block['id']]) + self._delete_library(lib['id']) + with self.assertRaises(ItemNotIndexedException): + LibraryBlockIndexer.get_items([block['id']])