Update Library v2 Indexes to work with ElasticSearch 7. Repair blockstore integration tests. (#26230)

This commit is contained in:
Fox Piacenti
2021-02-08 10:55:47 -06:00
committed by GitHub
parent 367d2a07c9
commit c840e6f9bb
3 changed files with 53 additions and 50 deletions

View File

@@ -33,7 +33,6 @@ class SearchIndexerBase(ABC):
Abstract Base Class for implementing library search indexers.
"""
INDEX_NAME = None
DOCUMENT_TYPE = None
ENABLE_INDEXING_KEY = None
SCHEMA_VERSION = 0
SEARCH_KWARGS = {
@@ -56,7 +55,7 @@ class SearchIndexerBase(ABC):
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
items = [cls.get_item_definition(item) for item in items]
return searcher.index(cls.DOCUMENT_TYPE, items, **cls.SEARCH_KWARGS)
return searcher.index(items, **cls.SEARCH_KWARGS)
@classmethod
def get_items(cls, ids=None, filter_terms=None, text_search=None):
@@ -79,7 +78,7 @@ class SearchIndexerBase(ABC):
response = cls._perform_elastic_search(filter_terms, text_search)
else:
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
response = searcher.search(doc_type=cls.DOCUMENT_TYPE, field_dictionary=filter_terms, size=MAX_SIZE)
response = searcher.search(field_dictionary=filter_terms, size=MAX_SIZE)
response = [result["data"] for result in response["results"]]
return sorted(response, key=lambda i: i["id"])
@@ -91,7 +90,7 @@ class SearchIndexerBase(ABC):
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
ids_str = [str(i) for i in ids]
searcher.remove(cls.DOCUMENT_TYPE, ids_str, **cls.SEARCH_KWARGS)
searcher.remove(ids_str, **cls.SEARCH_KWARGS)
@classmethod
def remove_all_items(cls):
@@ -99,9 +98,9 @@ class SearchIndexerBase(ABC):
Remove all items from the index
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
response = searcher.search(doc_type=cls.DOCUMENT_TYPE, filter_dictionary={}, size=MAX_SIZE)
response = searcher.search(filter_dictionary={}, size=MAX_SIZE)
ids = [result["data"]["id"] for result in response["results"]]
searcher.remove(cls.DOCUMENT_TYPE, ids, **cls.SEARCH_KWARGS)
searcher.remove(ids, **cls.SEARCH_KWARGS)
@classmethod
def indexing_is_enabled(cls):
@@ -117,7 +116,6 @@ class SearchIndexerBase(ABC):
"""
searcher = SearchEngine.get_search_engine(cls.INDEX_NAME)
return _translate_hits(searcher._es.search( # pylint: disable=protected-access
doc_type=cls.DOCUMENT_TYPE,
index=searcher.index_name,
body=cls.build_elastic_query(filter_terms, text_search),
size=MAX_SIZE
@@ -130,7 +128,7 @@ class SearchIndexerBase(ABC):
"""
# Remove reserved characters (and ") from the text to prevent unexpected errors.
text_search_normalised = text_search.translate(text_search.maketrans('', '', RESERVED_CHARACTERS + '"'))
text_search_normalised = text_search.replace('-', ' ')
text_search_normalised = text_search_normalised.replace('-', ' ')
# Wrap with asterix to enable partial matches
text_search_normalised = "*{}*".format(text_search_normalised)
terms = [
@@ -143,32 +141,19 @@ class SearchIndexerBase(ABC):
]
return {
'query': {
'filtered': {
'query': {
'bool': {
'should': [
{
'query_string': {
'query': text_search_normalised,
"fields": ["content.*"],
"minimum_should_match": "100%",
},
},
# Add a special wildcard search for id, as it contains a ":" character which is
# filtered out in query_string
{
'wildcard': {
'id': {
'value': '*{}*'.format(text_search),
}
},
},
],
'bool': {
'must': [
{
'query_string': {
'query': text_search_normalised,
"fields": ["content.*"],
'minimum_should_match': '100%',
},
},
},
],
'filter': {
'bool': {
'must': terms
'must': terms,
}
}
},
@@ -183,7 +168,6 @@ class ContentLibraryIndexer(SearchIndexerBase):
INDEX_NAME = "content_library_index"
ENABLE_INDEXING_KEY = "ENABLE_CONTENT_LIBRARY_INDEX"
DOCUMENT_TYPE = "content_library"
SCHEMA_VERSION = 0
@classmethod
@@ -212,7 +196,7 @@ class ContentLibraryIndexer(SearchIndexerBase):
"last_published": last_published_str,
"has_unpublished_changes": has_unpublished_changes,
"has_unpublished_deletes": has_unpublished_deletes,
# only 'content' field is analyzed by elastisearch, and allows text-search
# only 'content' field is analyzed by elasticsearch, and allows text-search
"content": {
"id": str(item),
"title": bundle_metadata.title,
@@ -226,9 +210,8 @@ class LibraryBlockIndexer(SearchIndexerBase):
Class to perform indexing on the XBlocks in content libraries.
"""
INDEX_NAME = "content_library_index"
INDEX_NAME = "content_library_block_index"
ENABLE_INDEXING_KEY = "ENABLE_CONTENT_LIBRARY_INDEX"
DOCUMENT_TYPE = "content_library_block"
SCHEMA_VERSION = 0
@classmethod

View File

@@ -68,7 +68,6 @@ def elasticsearch_test(func):
def mock_perform(cls, filter_terms, text_search):
# pylint: disable=no-member
return SearchEngine.get_search_engine(cls.INDEX_NAME).search(
doc_type=cls.DOCUMENT_TYPE,
field_dictionary=filter_terms,
query_string=text_search,
size=MAX_SIZE

View File

@@ -11,6 +11,7 @@ from django.test.utils import override_settings
from mock import patch
from organizations.models import Organization
from openedx.core.djangoapps.content_libraries.libraries_index import LibraryBlockIndexer, ContentLibraryIndexer
from openedx.core.djangoapps.content_libraries.tests.base import ContentLibrariesRestApiTest, elasticsearch_test
from openedx.core.djangoapps.content_libraries.constants import VIDEO, COMPLEX, PROBLEM, CC_4_BY, ALL_RIGHTS_RESERVED
from common.djangoapps.student.tests.factories import UserFactory
@@ -42,6 +43,12 @@ class ContentLibrariesTest(ContentLibrariesRestApiTest):
and cached forever.
"""
def setUp(self):
super().setUp()
if settings.ENABLE_ELASTICSEARCH_FOR_TESTS:
ContentLibraryIndexer.remove_all_items()
LibraryBlockIndexer.remove_all_items()
def test_library_crud(self):
"""
Test Create, Read, Update, and Delete of a Content Library
@@ -218,30 +225,44 @@ class ContentLibrariesTest(ContentLibrariesRestApiTest):
"""
Test the filters in the list libraries API
"""
suffix = str(is_indexing_enabled)
with override_settings(FEATURES={**settings.FEATURES, 'ENABLE_CONTENT_LIBRARY_INDEX': is_indexing_enabled}):
self._create_library(slug="test-lib1", title="Foo", description="Bar", library_type=VIDEO)
self._create_library(slug="test-lib2", title="Library-Title-2", description="Bar2")
self._create_library(slug="l3", title="Library-Title-3", description="Description", library_type=VIDEO)
self._create_library(
slug=f"test-lib-filter-{suffix}-1", title="Fob", description=f"Bar-{suffix}", library_type=VIDEO,
)
self._create_library(
slug=f"test-lib-filter-{suffix}-2", title=f"Library-Title-{suffix}-2", description=f"Bar-{suffix}-2",
)
self._create_library(
slug=f"l3{suffix}", title=f"Library-Title-{suffix}-3", description="Description", library_type=VIDEO,
)
Organization.objects.get_or_create(
short_name="org-test",
short_name=f"org-test-{suffix}",
defaults={"name": "Content Libraries Tachyon Exploration & Survey Team"},
)
self._create_library(
slug="l4", title="Library-Title-4", description="Library-Description", org='org-test',
slug=f"l4-{suffix}", title=f"Library-Title-{suffix}-4",
description="Library-Description", org=f'org-test-{suffix}',
library_type=VIDEO,
)
self._create_library(slug="l5", title="Library-Title-5", description="Library-Description", org='org-test')
self._create_library(
slug="l5", title=f"Library-Title-{suffix}-5", description="Library-Description",
org=f'org-test-{suffix}',
)
self.assertEqual(len(self._list_libraries()), 5)
self.assertEqual(len(self._list_libraries({'org': 'org-test'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'test-lib'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'test-lib', 'type': VIDEO})), 1)
self.assertEqual(len(self._list_libraries({'text_search': 'library-title'})), 4)
self.assertEqual(len(self._list_libraries({'text_search': 'library-title', 'type': VIDEO})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'bar'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': 'org-tes'})), 2)
self.assertEqual(len(self._list_libraries({'org': 'org-test', 'text_search': 'library-title-4'})), 1)
self.assertEqual(len(self._list_libraries({'org': f'org-test-{suffix}'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': f'test-lib-filter-{suffix}'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': f'test-lib-filter-{suffix}', 'type': VIDEO})), 1)
self.assertEqual(len(self._list_libraries({'text_search': f'library-title-{suffix}'})), 4)
self.assertEqual(len(self._list_libraries({'text_search': f'library-title-{suffix}', 'type': VIDEO})), 2)
self.assertEqual(len(self._list_libraries({'text_search': f'bar-{suffix}'})), 2)
self.assertEqual(len(self._list_libraries({'text_search': f'org-test-{suffix}'})), 2)
self.assertEqual(
len(self._list_libraries({'org': f'org-test-{suffix}', 'text_search': f'library-title-{suffix}-4'})),
1,
)
self.assertEqual(len(self._list_libraries({'type': VIDEO})), 3)
# General Content Library XBlock tests: