Index xblocks in elasticsearch

This commit is contained in:
Sid Verma
2020-08-06 11:56:40 +05:30
committed by Kyle McCormick
parent 735bc412d4
commit 3233fa92d4
7 changed files with 358 additions and 159 deletions

View File

@@ -58,7 +58,7 @@ from xblock.exceptions import XBlockNotFoundError
from openedx.core.djangoapps.content_libraries import permissions
from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME
from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryNotIndexedException
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, ItemNotIndexedException
from openedx.core.djangoapps.content_libraries.models import ContentLibrary, ContentLibraryPermission
from openedx.core.djangoapps.content_libraries.signals import (
CONTENT_LIBRARY_CREATED,
@@ -240,8 +240,8 @@ def get_metadata_from_index(queryset, text_search=None):
if ContentLibraryIndexer.indexing_is_enabled():
try:
library_keys = [lib.library_key for lib in queryset]
metadata = ContentLibraryIndexer.get_libraries(library_keys, text_search=text_search)
except (LibraryNotIndexedException, KeyError, ElasticConnectionError) as e:
metadata = ContentLibraryIndexer.get_items(library_keys, text_search=text_search)
except (ItemNotIndexedException, KeyError, ElasticConnectionError) as e:
log.exception(e)
# If ContentLibraryIndex is not available, we query blockstore for a limited set of metadata
@@ -588,7 +588,7 @@ def set_library_block_olx(usage_key, new_olx_str):
write_draft_file(draft.uuid, metadata.def_key.olx_path, new_olx_str.encode('utf-8'))
# Clear the bundle cache so everyone sees the new block immediately:
BundleCache(metadata.def_key.bundle_uuid, draft_name=DRAFT_NAME).clear()
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=usage_key.context_key)
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=usage_key.context_key, usage_key=usage_key)
def create_library_block(library_key, block_type, definition_id):
@@ -630,7 +630,7 @@ def create_library_block(library_key, block_type, definition_id):
# Clear the bundle cache so everyone sees the new block immediately:
BundleCache(ref.bundle_uuid, draft_name=DRAFT_NAME).clear()
# Now return the metadata about the new block:
LIBRARY_BLOCK_CREATED.send(sender=None, library_key=ref.library_key)
LIBRARY_BLOCK_CREATED.send(sender=None, library_key=ref.library_key, usage_key=usage_key)
return get_library_block(usage_key)
@@ -683,7 +683,7 @@ def delete_library_block(usage_key, remove_from_parent=True):
pass
# Clear the bundle cache so everyone sees the deleted block immediately:
lib_bundle.cache.clear()
LIBRARY_BLOCK_DELETED.send(sender=None, library_key=lib_bundle.library_key)
LIBRARY_BLOCK_DELETED.send(sender=None, library_key=lib_bundle.library_key, usage_key=usage_key)
def create_library_block_child(parent_usage_key, block_type, definition_id):
@@ -755,7 +755,7 @@ def add_library_block_static_asset_file(usage_key, file_name, file_content):
file_metadata = blockstore_cache.get_bundle_file_metadata_with_cache(
bundle_uuid=def_key.bundle_uuid, path=file_path, draft_name=DRAFT_NAME,
)
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key)
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key, usage_key=usage_key)
return LibraryXBlockStaticFile(path=file_metadata.path, url=file_metadata.url, size=file_metadata.size)
@@ -776,7 +776,7 @@ def delete_library_block_static_asset_file(usage_key, file_name):
write_draft_file(draft.uuid, file_path, contents=None)
# Clear the bundle cache so everyone sees the new file immediately:
lib_bundle.cache.clear()
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key)
LIBRARY_BLOCK_UPDATED.send(sender=None, library_key=lib_bundle.library_key, usage_key=usage_key)
def get_allowed_block_types(library_key): # pylint: disable=unused-argument
@@ -903,7 +903,7 @@ def publish_changes(library_key):
return # If there is no draft, no action is needed.
LibraryBundle(library_key, ref.bundle_uuid).cache.clear()
LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear()
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key)
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key, update_blocks=True)
def revert_changes(library_key):
@@ -919,4 +919,4 @@ def revert_changes(library_key):
else:
return # If there is no draft, no action is needed.
LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME).cache.clear()
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key)
CONTENT_LIBRARY_UPDATED.send(sender=None, library_key=library_key, update_blocks=True)

View File

@@ -1,12 +1,14 @@
""" Code to allow indexing content libraries """
import logging
from abc import ABC, abstractmethod
from django.conf import settings
from django.dispatch import receiver
from elasticsearch.exceptions import ConnectionError as ElasticConnectionError
from search.elastic import ElasticSearchEngine, _translate_hits, _process_field_filters, RESERVED_CHARACTERS
from search.search_engine_base import SearchEngine
from opaque_keys.edx.locator import LibraryUsageLocatorV2
from openedx.core.djangoapps.content_libraries.constants import DRAFT_NAME
from openedx.core.djangoapps.content_libraries.signals import (
@@ -26,109 +28,88 @@ log = logging.getLogger(__name__)
MAX_SIZE = 10000 # 10000 is the maximum records elastic is able to return in a single result. Defaults to 10.
class LibraryNotIndexedException(Exception):
class ItemNotIndexedException(Exception):
"""
Library supplied wasn't indexed in ElasticSearch
Item wasn't indexed in ElasticSearch
"""
class ContentLibraryIndexer:
"""
Class to perform indexing for blockstore-based content libraries
"""
INDEX_NAME = "content_library_index"
LIBRARY_DOCUMENT_TYPE = "content_library"
class SearchIndexerBase(ABC):
INDEX_NAME = None
DOCUMENT_TYPE = None
ENABLE_INDEXING_KEY = None
SEARCH_KWARGS = {
# Set this to True or 'wait_for' if immediate refresh is required after any update.
# See elastic docs for more information.
'refresh': False
}
SCHEMA_VERSION = 0
@classmethod
@abstractmethod
def get_item_definition(cls, item):
"""
Returns a serializable dictionary which can be stored in elasticsearch.
"""
@classmethod
def index_libraries(cls, library_keys):
def index_items(cls, items):
"""
Index the specified libraries. If they already exist, replace them with new ones.
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
library_dicts = []
for library_key in library_keys:
ref = ContentLibrary.objects.get_by_key(library_key)
lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME)
num_blocks = len(lib_bundle.get_top_level_usages())
last_published = lib_bundle.get_last_published_time()
last_published_str = None
if last_published:
last_published_str = last_published.strftime('%Y-%m-%dT%H:%M:%SZ')
(has_unpublished_changes, has_unpublished_deletes) = lib_bundle.has_changes()
bundle_metadata = get_bundle(ref.bundle_uuid)
# NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing
# with outdated indexes which might cause errors due to missing/invalid attributes.
library_dict = {
"schema_version": ContentLibraryIndexer.SCHEMA_VERSION,
"id": str(library_key),
"uuid": str(bundle_metadata.uuid),
"title": bundle_metadata.title,
"description": bundle_metadata.description,
"num_blocks": num_blocks,
"version": bundle_metadata.latest_version,
"last_published": last_published_str,
"has_unpublished_changes": has_unpublished_changes,
"has_unpublished_deletes": has_unpublished_deletes,
# only 'content' field is analyzed by elastisearch, and allows text-search
"content": {
"id": str(library_key),
"title": bundle_metadata.title,
"description": bundle_metadata.description,
},
}
library_dicts.append(library_dict)
return searcher.index(cls.LIBRARY_DOCUMENT_TYPE, library_dicts, **cls.SEARCH_KWARGS)
items = [cls.get_item_definition(item) for item in items]
return searcher.index(cls.DOCUMENT_TYPE, items, **cls.SEARCH_KWARGS)
@classmethod
def get_libraries(cls, library_keys, text_search=None):
def search(cls, **kwargs):
"""
Retrieve a list of libraries from the index
Search the index with the given kwargs
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
library_keys_str = [str(key) for key in library_keys]
response = searcher.search(doc_type=cls.DOCUMENT_TYPE, field_dictionary=kwargs, size=MAX_SIZE)
return response["results"]
@classmethod
def get_items(cls, ids, text_search=None):
"""
Retrieve a list of items from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
ids_str = [str(i) for i in ids]
response = searcher.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
doc_type=cls.DOCUMENT_TYPE,
field_dictionary={
"id": library_keys_str,
"schema_version": ContentLibraryIndexer.SCHEMA_VERSION
"id": ids_str,
"schema_version": cls.SCHEMA_VERSION
},
size=MAX_SIZE,
)
if len(response["results"]) != len(library_keys_str):
missing = set(library_keys_str) - set([result["data"]["id"] for result in response["results"]])
raise LibraryNotIndexedException("Keys not found in index: {}".format(missing))
if len(response["results"]) != len(ids_str):
missing = set(ids_str) - set([result["data"]["id"] for result in response["results"]])
missing = set(ids_str) - set([result["data"]["id"] for result in response["results"]])
raise ItemNotIndexedException("Keys not found in index: {}".format(missing))
if text_search:
# Elastic is hit twice if text_search is valid
# Once above to identify unindexed libraries, and now to filter with text_search
if isinstance(searcher, ElasticSearchEngine):
response = _translate_hits(searcher._es.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
doc_type=cls.DOCUMENT_TYPE,
index=searcher.index_name,
body=build_elastic_query(library_keys_str, text_search),
body=cls.build_elastic_query(ids_str, text_search),
size=MAX_SIZE
))
else:
response = searcher.search(
doc_type=cls.LIBRARY_DOCUMENT_TYPE,
field_dictionary={"id": library_keys_str},
query_string=text_search
doc_type=cls.DOCUMENT_TYPE,
field_dictionary={"id": ids_str},
query_string=text_search,
size=MAX_SIZE
)
# Search results may not retain the original order of keys - we use this
# dict to construct a list in the original order of library_keys
# dict to construct a list in the original order of ids
response_dict = {
result["data"]["id"]: result["data"]
for result in response["results"]
@@ -138,34 +119,159 @@ class ContentLibraryIndexer:
response_dict[key]
if key in response_dict
else None
for key in library_keys_str
for key in ids_str
]
@classmethod
def remove_libraries(cls, library_keys):
def remove_items(cls, ids):
"""
Remove the provided library_keys from the index
Remove the provided ids from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
ids_str = [str(key) for key in library_keys]
searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids_str, **cls.SEARCH_KWARGS)
ids_str = [str(i) for i in ids]
searcher.remove(cls.DOCUMENT_TYPE, ids_str, **cls.SEARCH_KWARGS)
@classmethod
def remove_all_libraries(cls):
def remove_all_items(cls):
"""
Remove all libraries from the index
Remove all items from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
response = searcher.search(doc_type=cls.LIBRARY_DOCUMENT_TYPE, filter_dictionary={}, size=MAX_SIZE)
response = searcher.search(doc_type=cls.DOCUMENT_TYPE, filter_dictionary={}, size=MAX_SIZE)
ids = [result["data"]["id"] for result in response["results"]]
searcher.remove(cls.LIBRARY_DOCUMENT_TYPE, ids, **cls.SEARCH_KWARGS)
searcher.remove(cls.DOCUMENT_TYPE, ids, **cls.SEARCH_KWARGS)
@classmethod
def indexing_is_enabled(cls):
"""
Checks to see if the indexing feature is enabled
"""
return settings.FEATURES.get("ENABLE_CONTENT_LIBRARY_INDEX", False)
return settings.FEATURES.get(cls.ENABLE_INDEXING_KEY, False)
@staticmethod
def build_elastic_query(ids_str, text_search):
"""
Build and return an elastic query for doing text search on a library
"""
# Remove reserved characters (and ") from the text to prevent unexpected errors.
text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"'))
text_search_normalised = text_search.replace('-',' ')
# Wrap with asterix to enable partial matches
text_search_normalised = "*{}*".format(text_search_normalised)
return {
'query': {
'filtered': {
'query': {
'bool': {
'should': [
{
'query_string': {
'query': text_search_normalised,
"fields": ["content.*"],
"minimum_should_match": "100%",
},
},
# Add a special wildcard search for id, as it contains a ":" character which is filtered out
# in query_string
{
'wildcard': {
'id': {
'value': '*{}*'.format(text_search),
}
},
},
],
},
},
'filter': {
'terms': {
'id': ids_str
}
}
},
},
}
class ContentLibraryIndexer(SearchIndexerBase):
"""
Class to perform indexing for blockstore-based content libraries
"""
INDEX_NAME = "content_library_index"
ENABLE_INDEXING_KEY = "ENABLE_CONTENT_LIBRARY_INDEX"
DOCUMENT_TYPE = "content_library"
SCHEMA_VERSION = 0
@classmethod
def get_item_definition(cls, library_key):
ref = ContentLibrary.objects.get_by_key(library_key)
lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME)
num_blocks = len(lib_bundle.get_top_level_usages())
last_published = lib_bundle.get_last_published_time()
last_published_str = None
if last_published:
last_published_str = last_published.strftime('%Y-%m-%dT%H:%M:%SZ')
(has_unpublished_changes, has_unpublished_deletes) = lib_bundle.has_changes()
bundle_metadata = get_bundle(ref.bundle_uuid)
# NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing
# with outdated indexes which might cause errors due to missing/invalid attributes.
return {
"schema_version": ContentLibraryIndexer.SCHEMA_VERSION,
"id": str(library_key),
"uuid": str(bundle_metadata.uuid),
"title": bundle_metadata.title,
"description": bundle_metadata.description,
"num_blocks": num_blocks,
"version": bundle_metadata.latest_version,
"last_published": last_published_str,
"has_unpublished_changes": has_unpublished_changes,
"has_unpublished_deletes": has_unpublished_deletes,
# only 'content' field is analyzed by elastisearch, and allows text-search
"content": {
"id": str(library_key),
"title": bundle_metadata.title,
"description": bundle_metadata.description,
},
}
class LibraryBlockIndexer(SearchIndexerBase):
"""
Class to perform indexing on the XBlocks in content libraries.
"""
INDEX_NAME = "content_library_index"
ENABLE_INDEXING_KEY = "ENABLE_CONTENT_LIBRARY_INDEX"
DOCUMENT_TYPE = "content_library_block"
SCHEMA_VERSION = 0
@classmethod
def get_item_definition(cls, usage_key):
from openedx.core.djangoapps.content_libraries.api import get_block_display_name, _lookup_usage_key
def_key, lib_bundle = _lookup_usage_key(usage_key)
is_child = usage_key in lib_bundle.get_bundle_includes().keys()
# NOTE: Increment ContentLibraryIndexer.SCHEMA_VERSION if the following schema is updated to avoid dealing
# with outdated indexes which might cause errors due to missing/invalid attributes.
return {
"schema_version": ContentLibraryIndexer.SCHEMA_VERSION,
"id": str(usage_key),
"library_key": str(lib_bundle.library_key),
"is_child": is_child,
"def_key": str(def_key),
"display_name": get_block_display_name(def_key),
"block_type": def_key.block_type,
"has_unpublished_changes": lib_bundle.does_definition_have_unpublished_changes(def_key),
# only 'content' field is analyzed by elastisearch, and allows text-search
"content": {
"id": str(usage_key),
"display_name": get_block_display_name(def_key),
},
}
@receiver(CONTENT_LIBRARY_CREATED)
@@ -179,7 +285,11 @@ def index_library(sender, library_key, **kwargs): # pylint: disable=unused-argu
"""
if ContentLibraryIndexer.indexing_is_enabled():
try:
ContentLibraryIndexer.index_libraries([library_key])
ContentLibraryIndexer.index_items([library_key])
if kwargs.get('update_blocks', False):
blocks = LibraryBlockIndexer.search(library_key=str(library_key))
usage_keys = [LibraryUsageLocatorV2.from_string(block['data']['id']) for block in blocks]
LibraryBlockIndexer.index_items(usage_keys)
except ElasticConnectionError as e:
log.exception(e)
@@ -191,50 +301,33 @@ def remove_library_index(sender, library_key, **kwargs): # pylint: disable=unus
"""
if ContentLibraryIndexer.indexing_is_enabled():
try:
ContentLibraryIndexer.remove_libraries([library_key])
ContentLibraryIndexer.remove_items([library_key])
blocks = LibraryBlockIndexer.search(library_key=str(library_key))
LibraryBlockIndexer.remove_items([block['data']['id'] for block in blocks])
except ElasticConnectionError as e:
log.exception(e)
def build_elastic_query(library_keys_str, text_search):
@receiver(LIBRARY_BLOCK_CREATED)
@receiver(LIBRARY_BLOCK_UPDATED)
def index_block(sender, usage_key, **kwargs): # pylint: disable=unused-argument
"""
Build and return an elastic query for doing text search on a library
Index block metadata when created
"""
# Remove reserved characters (and ") from the text to prevent unexpected errors.
text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"'))
text_search_normalised = text_search.replace('-',' ')
# Wrap with asterix to enable partial matches
text_search_normalised = "*{}*".format(text_search_normalised)
return {
'query': {
'filtered': {
'query': {
'bool': {
'should': [
{
'query_string': {
'query': text_search_normalised,
"fields": ["content.*"],
"minimum_should_match": "100%",
},
},
# Add a special wildcard search for id, as it contains a ":" character which is filtered out
# in query_string
{
'wildcard': {
'id': {
'value': '*{}*'.format(text_search),
}
},
},
],
},
},
'filter': {
'terms': {
'id': library_keys_str
}
}
},
},
}
if LibraryBlockIndexer.indexing_is_enabled():
try:
LibraryBlockIndexer.index_items([usage_key])
except ConnectionError as e:
log.exception(e)
@receiver(LIBRARY_BLOCK_DELETED)
def remove_block_index(sender, usage_key, **kwargs): # pylint: disable=unused-argument
"""
Remove the block from the index when deleted
"""
if LibraryBlockIndexer.indexing_is_enabled():
try:
LibraryBlockIndexer.remove_items([usage_key])
except ConnectionError as e:
log.exception(e)

View File

@@ -162,16 +162,23 @@ class LibraryBundle(object):
except KeyError:
return None
def get_all_usages(self):
"""
Get usage keys of all the blocks in this bundle
"""
usage_keys = []
for olx_file_path in self.get_olx_files():
block_type, usage_id, _unused = olx_file_path.split('/')
usage_key = LibraryUsageLocatorV2(self.library_key, block_type, usage_id)
usage_keys.append(usage_key)
return usage_keys
def get_top_level_usages(self):
"""
Get the set of usage keys in this bundle that have no parent.
"""
own_usage_keys = []
for olx_file_path in self.get_olx_files():
block_type, usage_id, _unused = olx_file_path.split('/')
usage_key = LibraryUsageLocatorV2(self.library_key, block_type, usage_id)
own_usage_keys.append(usage_key)
own_usage_keys = self.get_all_usages()
usage_keys_with_parents = self.get_bundle_includes().keys()
return [usage_key for usage_key in own_usage_keys if usage_key not in usage_keys_with_parents]

View File

@@ -7,7 +7,9 @@ from textwrap import dedent
from django.core.management import BaseCommand
from opaque_keys.edx.locator import LibraryLocatorV2
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer
from openedx.core.djangoapps.content_libraries.api import DRAFT_NAME
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryBlockIndexer
from openedx.core.djangoapps.content_libraries.library_bundle import LibraryBundle
from openedx.core.djangoapps.content_libraries.models import ContentLibrary
from cms.djangoapps.contentstore.management.commands.prompt import query_yes_no
@@ -57,7 +59,8 @@ class Command(BaseCommand):
if options['clear-all']:
if options['force'] or query_yes_no(self.CONFIRMATION_PROMPT_CLEAR, default="no"):
logging.info("Removing all libraries from the index")
ContentLibraryIndexer.remove_all_libraries()
ContentLibraryIndexer.remove_all_items()
LibraryBlockIndexer.remove_all_items()
return
if options['all']:
@@ -70,4 +73,9 @@ class Command(BaseCommand):
logging.info("Indexing libraries: {}".format(options['library_ids']))
library_keys = list(map(LibraryLocatorV2.from_string, options['library_ids']))
ContentLibraryIndexer.index_libraries(library_keys)
ContentLibraryIndexer.index_items(library_keys)
for library_key in library_keys:
ref = ContentLibrary.objects.get_by_key(library_key)
lib_bundle = LibraryBundle(library_key, ref.bundle_uuid, draft_name=DRAFT_NAME)
LibraryBlockIndexer.index_items(lib_bundle.get_all_usages())

View File

@@ -5,8 +5,8 @@ Content libraries related signals.
from django.dispatch import Signal
CONTENT_LIBRARY_CREATED = Signal(providing_args=['library_key'])
CONTENT_LIBRARY_UPDATED = Signal(providing_args=['library_key'])
CONTENT_LIBRARY_UPDATED = Signal(providing_args=['library_key', 'update_blocks'])
CONTENT_LIBRARY_DELETED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_CREATED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_DELETED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_UPDATED = Signal(providing_args=['library_key'])
LIBRARY_BLOCK_CREATED = Signal(providing_args=['library_key', 'usage_key'])
LIBRARY_BLOCK_DELETED = Signal(providing_args=['library_key', 'usage_key'])
LIBRARY_BLOCK_UPDATED = Signal(providing_args=['library_key', 'usage_key'])

View File

@@ -56,12 +56,12 @@ def elasticsearch_test(func):
'host': settings.TEST_ELASTICSEARCH_HOST,
'port': settings.TEST_ELASTICSEARCH_PORT,
}])(func)
func = patch("openedx.core.djangoapps.content_libraries.libraries_index.ContentLibraryIndexer.SEARCH_KWARGS", new={
func = patch("openedx.core.djangoapps.content_libraries.libraries_index.SearchIndexerBase.SEARCH_KWARGS", new={
'refresh': 'wait_for'
})(func)
return func
else:
return patch("openedx.core.djangoapps.content_libraries.libraries_index.ContentLibraryIndexer.SEARCH_KWARGS", new={})(func)
return patch("openedx.core.djangoapps.content_libraries.libraries_index.SearchIndexerBase.SEARCH_KWARGS", new={})(func)
@requires_blockstore

View File

@@ -6,10 +6,14 @@ from django.conf import settings
from django.core.management import call_command
from django.test.utils import override_settings
from mock import patch
from opaque_keys.edx.locator import LibraryLocatorV2
from opaque_keys.edx.locator import LibraryLocatorV2, LibraryUsageLocatorV2
from search.search_engine_base import SearchEngine
from openedx.core.djangoapps.content_libraries.libraries_index import ContentLibraryIndexer, LibraryNotIndexedException
from openedx.core.djangoapps.content_libraries.libraries_index import (
ContentLibraryIndexer,
LibraryBlockIndexer,
ItemNotIndexedException,
)
from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest, elasticsearch_test
@@ -23,7 +27,8 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
@elasticsearch_test
def setUp(self):
super().setUp()
ContentLibraryIndexer.remove_all_libraries()
ContentLibraryIndexer.remove_all_items()
LibraryBlockIndexer.remove_all_items()
self.searcher = SearchEngine.get_search_engine(ContentLibraryIndexer.INDEX_NAME)
def test_index_libraries(self):
@@ -35,7 +40,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
for result in [result1, result2]:
library_key = LibraryLocatorV2.from_string(result['id'])
response = ContentLibraryIndexer.get_libraries([library_key])[0]
response = ContentLibraryIndexer.get_items([library_key])[0]
self.assertEqual(response['id'], result['id'])
self.assertEqual(response['title'], result['title'])
@@ -54,31 +59,31 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
result = self._create_library(slug="test-lib-schemaupdates-1", title="Title 1", description="Description")
library_key = LibraryLocatorV2.from_string(result['id'])
ContentLibraryIndexer.get_libraries([library_key])
ContentLibraryIndexer.get_items([library_key])
with patch("openedx.core.djangoapps.content_libraries.libraries_index.ContentLibraryIndexer.SCHEMA_VERSION",
new=1):
with self.assertRaises(LibraryNotIndexedException):
ContentLibraryIndexer.get_libraries([library_key])
with self.assertRaises(ItemNotIndexedException):
ContentLibraryIndexer.get_items([library_key])
call_command("reindex_content_library", all=True, force=True)
ContentLibraryIndexer.get_libraries([library_key])
ContentLibraryIndexer.get_items([library_key])
def test_remove_all_libraries(self):
"""
Test if remove_all_libraries() deletes all libraries
Test if remove_all_items() deletes all libraries
"""
lib1 = self._create_library(slug="test-lib-rm-all-1", title="Title 1", description="Description")
lib2 = self._create_library(slug="test-lib-rm-all-2", title="Title 1", description="Description")
library_key1 = LibraryLocatorV2.from_string(lib1['id'])
library_key2 = LibraryLocatorV2.from_string(lib2['id'])
ContentLibraryIndexer.get_libraries([library_key1, library_key2])
self.assertEqual(len(ContentLibraryIndexer.get_items([library_key1, library_key2])), 2)
ContentLibraryIndexer.remove_all_libraries()
with self.assertRaises(LibraryNotIndexedException):
ContentLibraryIndexer.get_libraries([library_key1, library_key2])
ContentLibraryIndexer.remove_all_items()
with self.assertRaises(ItemNotIndexedException):
ContentLibraryIndexer.get_items([library_key1, library_key2])
def test_update_libraries(self):
"""
@@ -89,7 +94,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
self._update_library(lib['id'], title="New Title", description="New Title")
response = ContentLibraryIndexer.get_libraries([library_key])[0]
response = ContentLibraryIndexer.get_items([library_key])[0]
self.assertEqual(response['id'], lib['id'])
self.assertEqual(response['title'], "New Title")
@@ -102,8 +107,8 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
self.assertEqual(response['has_unpublished_deletes'], False)
self._delete_library(lib['id'])
with self.assertRaises(LibraryNotIndexedException):
ContentLibraryIndexer.get_libraries([library_key])
with self.assertRaises(ItemNotIndexedException):
ContentLibraryIndexer.get_items([library_key])
def test_update_library_blocks(self):
"""
@@ -113,9 +118,9 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
"""
Commit library changes, and verify that there are no uncommited changes anymore
"""
last_published = ContentLibraryIndexer.get_libraries([library_key])[0]['last_published']
last_published = ContentLibraryIndexer.get_items([library_key])[0]['last_published']
self._commit_library_changes(str(library_key))
response = ContentLibraryIndexer.get_libraries([library_key])[0]
response = ContentLibraryIndexer.get_items([library_key])[0]
self.assertEqual(response['has_unpublished_changes'], False)
self.assertEqual(response['has_unpublished_deletes'], False)
self.assertGreaterEqual(response['last_published'], last_published)
@@ -125,7 +130,7 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
"""
Verify uncommitted changes and deletes in the index
"""
response = ContentLibraryIndexer.get_libraries([library_key])[0]
response = ContentLibraryIndexer.get_items([library_key])[0]
self.assertEqual(response['has_unpublished_changes'], has_unpublished_changes)
self.assertEqual(response['has_unpublished_deletes'], has_unpublished_deletes)
return response
@@ -177,3 +182,89 @@ class ContentLibraryIndexerIndexer(ContentLibrariesRestApiTest):
#Verify reverting uncommitted changes
self._revert_library_changes(lib["id"])
verify_uncommitted_libraries(library_key, False, False)
@override_settings(FEATURES={**settings.FEATURES, 'ENABLE_CONTENT_LIBRARY_INDEX': True})
@elasticsearch_test
class LibraryBlockIndexerTest(ContentLibrariesRestApiTest):
"""
Tests the operation of LibraryBlockIndexer
"""
@elasticsearch_test
def setUp(self):
super().setUp()
ContentLibraryIndexer.remove_all_items()
LibraryBlockIndexer.remove_all_items()
self.searcher = SearchEngine.get_search_engine(LibraryBlockIndexer.INDEX_NAME)
def test_index_block(self):
"""
Test if libraries are being indexed correctly
"""
lib = self._create_library(slug="test-lib-index-1", title="Title 1", description="Description")
block1 = self._add_block_to_library(lib['id'], "problem", "problem1")
block2 = self._add_block_to_library(lib['id'], "problem", "problem2")
self.assertEqual(len(LibraryBlockIndexer.search()), 2)
for block in [block1, block2]:
usage_key = LibraryUsageLocatorV2.from_string(block['id'])
response = LibraryBlockIndexer.get_items([usage_key])[0]
self.assertEqual(response['id'], block['id'])
self.assertEqual(response['def_key'], block['def_key'])
self.assertEqual(response['block_type'], block['block_type'])
self.assertEqual(response['display_name'], block['display_name'])
self.assertEqual(response['has_unpublished_changes'], block['has_unpublished_changes'])
def test_remove_all_items(self):
"""
Test if remove_all_items() deletes all libraries
"""
lib1 = self._create_library(slug="test-lib-rm-all", title="Title 1", description="Description")
self._add_block_to_library(lib1['id'], "problem", "problem1")
self._add_block_to_library(lib1['id'], "problem", "problem2")
self.assertEqual(len(LibraryBlockIndexer.search()), 2)
LibraryBlockIndexer.remove_all_items()
self.assertEqual(len(LibraryBlockIndexer.search()), 0)
def test_crud_block(self):
"""
Test that CRUD operations on blocks are reflected in the index
"""
lib = self._create_library(slug="test-lib-crud-block", title="Title", description="Description")
block = self._add_block_to_library(lib['id'], "problem", "problem1")
# Update OLX, verify updates in index
self._set_library_block_olx(block["id"], '<problem display_name="new_name"/>')
response = LibraryBlockIndexer.get_items([block['id']])[0]
self.assertEqual(response['display_name'], "new_name")
self.assertEqual(response['has_unpublished_changes'], True)
# Verify has_unpublished_changes after committing library
self._commit_library_changes(lib['id'])
response = LibraryBlockIndexer.get_items([block['id']])[0]
self.assertEqual(response['has_unpublished_changes'], False)
# Verify has_unpublished_changes after reverting library
self._set_library_block_asset(block["id"], "whatever.png", b"data")
response = LibraryBlockIndexer.get_items([block['id']])[0]
self.assertEqual(response['has_unpublished_changes'], True)
self._revert_library_changes(lib['id'])
response = LibraryBlockIndexer.get_items([block['id']])[0]
self.assertEqual(response['has_unpublished_changes'], False)
# Verify that deleting block removes it from index
self._delete_library_block(block['id'])
with self.assertRaises(ItemNotIndexedException):
LibraryBlockIndexer.get_items([block['id']])
# Verify that deleting a library removes its blocks from index too
self._add_block_to_library(lib['id'], "problem", "problem1")
LibraryBlockIndexer.get_items([block['id']])
self._delete_library(lib['id'])
with self.assertRaises(ItemNotIndexedException):
LibraryBlockIndexer.get_items([block['id']])