Add changes to support ES7 and edx-search 2.0 back into the tree.

This commit is contained in:
Diana Huang
2020-11-13 14:59:50 -05:00
parent 8654feb7e1
commit afeea75d9e
13 changed files with 96 additions and 74 deletions

View File

@@ -64,7 +64,6 @@ class SearchIndexerBase(object, metaclass=ABCMeta):
"""
INDEX_NAME = None
DOCUMENT_TYPE = None
ENABLE_INDEXING_KEY = None
INDEX_EVENT = {
@@ -106,12 +105,11 @@ class SearchIndexerBase(object, metaclass=ABCMeta):
as we find items we can shorten the set of items to keep
"""
response = searcher.search(
doc_type=cls.DOCUMENT_TYPE,
field_dictionary=cls._get_location_info(structure_key),
exclude_dictionary={"id": list(exclude_items)}
)
result_ids = [result["data"]["id"] for result in response["results"]]
searcher.remove(cls.DOCUMENT_TYPE, result_ids)
searcher.remove(result_ids)
@classmethod
def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE):
@@ -256,7 +254,7 @@ class SearchIndexerBase(object, metaclass=ABCMeta):
# Now index the content
for item in structure.get_children():
prepare_item_index(item, groups_usage_info=groups_usage_info)
searcher.index(cls.DOCUMENT_TYPE, items_index)
searcher.index(items_index)
cls.remove_deleted_items(searcher, structure_key, indexed_items)
except Exception as err: # pylint: disable=broad-except
# broad exception so that index operation does not prevent the rest of the application from working
@@ -340,8 +338,7 @@ class CoursewareSearchIndexer(SearchIndexerBase):
"""
Class to perform indexing for courseware search from different modulestores
"""
INDEX_NAME = "courseware_index"
DOCUMENT_TYPE = "courseware_content"
INDEX_NAME = "courseware_content"
ENABLE_INDEXING_KEY = 'ENABLE_COURSEWARE_INDEX'
INDEX_EVENT = {
@@ -373,6 +370,24 @@ class CoursewareSearchIndexer(SearchIndexerBase):
"""
return cls._do_reindex(modulestore, course_key)
@classmethod
def _do_reindex(cls, modulestore, structure_key):
"""
(Re)index course content within the given structure.
The course_info index is indexed with the courseware_content index. This method
helps to track the fact that course_info reindex has taken place.
"""
indexed_count = super()._do_reindex(modulestore, structure_key)
if indexed_count:
course_about = CourseAboutSearchIndexer
cls._track_index_request(
course_about.INDEX_EVENT['name'],
course_about.INDEX_EVENT['category'],
indexed_count
)
return indexed_count
@classmethod
def fetch_group_usage(cls, modulestore, structure):
groups_usage_dict = {}
@@ -430,7 +445,6 @@ class LibrarySearchIndexer(SearchIndexerBase):
Base class to perform indexing for library search from different modulestores
"""
INDEX_NAME = "library_index"
DOCUMENT_TYPE = "library_content"
ENABLE_INDEXING_KEY = 'ENABLE_LIBRARY_INDEX'
INDEX_EVENT = {
@@ -531,12 +545,16 @@ class AboutInfo(object):
FROM_COURSE_MODE = from_course_mode
class CourseAboutSearchIndexer(object):
class CourseAboutSearchIndexer(CoursewareSearchIndexer):
"""
Class to perform indexing of about information from course object
"""
DISCOVERY_DOCUMENT_TYPE = "course_info"
INDEX_NAME = CoursewareSearchIndexer.INDEX_NAME
INDEX_NAME = "course_info"
INDEX_EVENT = {
'name': 'edx.course_info.index.reindexed',
'category': 'course_info'
}
# List of properties to add to the index - each item in the list is an instance of AboutInfo object
ABOUT_INFORMATION_TO_INCLUDE = [
@@ -626,7 +644,7 @@ class CourseAboutSearchIndexer(object):
# Broad exception handler to protect around and report problems with indexing
try:
searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
searcher.index([course_info])
except:
log.exception(
u"Course discovery indexing error encountered, course discovery index may be out of date %s",
@@ -651,9 +669,6 @@ class CourseAboutSearchIndexer(object):
if not searcher:
return
response = searcher.search(
doc_type=cls.DISCOVERY_DOCUMENT_TYPE,
field_dictionary=cls._get_location_info(structure_key)
)
response = searcher.search(field_dictionary=cls._get_location_info(structure_key))
result_ids = [result["data"]["id"] for result in response["results"]]
searcher.remove(cls.DISCOVERY_DOCUMENT_TYPE, result_ids)
searcher.remove(result_ids)

View File

@@ -12,7 +12,7 @@ from opaque_keys.edx.locator import CourseLocator
from search.search_engine_base import SearchEngine
from six.moves import map
from cms.djangoapps.contentstore.courseware_index import CoursewareSearchIndexer
from cms.djangoapps.contentstore.courseware_index import CoursewareSearchIndexer, CourseAboutSearchIndexer
from xmodule.modulestore.django import modulestore
from .prompt import query_yes_no
@@ -71,29 +71,23 @@ class Command(BaseCommand):
store = modulestore()
if index_all_courses_option:
index_name = CoursewareSearchIndexer.INDEX_NAME
doc_type = CoursewareSearchIndexer.DOCUMENT_TYPE
index_names = (CoursewareSearchIndexer.INDEX_NAME, CourseAboutSearchIndexer.INDEX_NAME)
if setup_option:
try:
# try getting the ElasticSearch engine
searcher = SearchEngine.get_search_engine(index_name)
except exceptions.ElasticsearchException as exc:
logging.exception(u'Search Engine error - %s', exc)
return
for index_name in index_names:
try:
searcher = SearchEngine.get_search_engine(index_name)
except exceptions.ElasticsearchException as exc:
logging.exception(u'Search Engine error - %s', exc)
return
index_exists = searcher._es.indices.exists(index=index_name) # pylint: disable=protected-access
doc_type_exists = searcher._es.indices.exists_type( # pylint: disable=protected-access
index=index_name,
doc_type=doc_type
)
index_exists = searcher._es.indices.exists(index=index_name) # pylint: disable=protected-access
index_mapping = searcher._es.indices.get_mapping( # pylint: disable=protected-access
index=index_name,
doc_type=doc_type
) if index_exists and doc_type_exists else {}
index_mapping = searcher._es.indices.get_mapping( # pylint: disable=protected-access
index=index_name,
) if index_exists else {}
if index_exists and index_mapping:
return
if index_exists and index_mapping:
return
# if reindexing is done during devstack setup step, don't prompt the user
if setup_option or query_yes_no(self.CONFIRMATION_PROMPT, default="no"):

View File

@@ -10,7 +10,11 @@ from django.core.cache import cache
from django.dispatch import receiver
from pytz import UTC
from cms.djangoapps.contentstore.courseware_index import CoursewareSearchIndexer, LibrarySearchIndexer
from cms.djangoapps.contentstore.courseware_index import (
CoursewareSearchIndexer,
CourseAboutSearchIndexer,
LibrarySearchIndexer
)
from cms.djangoapps.contentstore.proctoring import register_special_exams
from lms.djangoapps.grades.api import task_compute_all_grades_for_course
from openedx.core.djangoapps.credit.signals import on_course_publish
@@ -62,7 +66,7 @@ def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=
# Finally call into the course search subsystem
# to kick off an indexing action
if CoursewareSearchIndexer.indexing_is_enabled():
if CoursewareSearchIndexer.indexing_is_enabled() and CourseAboutSearchIndexer.indexing_is_enabled():
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from cms.djangoapps.contentstore.tasks import update_search_index

View File

@@ -137,7 +137,6 @@ class MixedWithOptionsTestCase(MixedSplitTestCase):
}
INDEX_NAME = None
DOCUMENT_TYPE = None
def setup_course_base(self, store):
""" base version of setup_course_base is a no-op """
@@ -155,7 +154,7 @@ class MixedWithOptionsTestCase(MixedSplitTestCase):
def search(self, field_dictionary=None, query_string=None):
""" Performs index search according to passed parameters """
fields = field_dictionary if field_dictionary else self._get_default_search()
return self.searcher.search(query_string=query_string, field_dictionary=fields, doc_type=self.DOCUMENT_TYPE)
return self.searcher.search(query_string=query_string, field_dictionary=fields)
def _perform_test_using_store(self, store_type, test_to_perform):
""" Helper method to run a test function that uses a specific store """
@@ -248,7 +247,6 @@ class TestCoursewareSearchIndexer(MixedWithOptionsTestCase):
)
INDEX_NAME = CoursewareSearchIndexer.INDEX_NAME
DOCUMENT_TYPE = CoursewareSearchIndexer.DOCUMENT_TYPE
def reindex_course(self, store):
""" kick off complete reindex of the course """
@@ -315,7 +313,7 @@ class TestCoursewareSearchIndexer(MixedWithOptionsTestCase):
"""
Test that course will also be delete from search_index after course deletion.
"""
self.DOCUMENT_TYPE = 'course_info' # pylint: disable=invalid-name
self.searcher = SearchEngine.get_search_engine(CourseAboutSearchIndexer.INDEX_NAME)
response = self.search()
self.assertEqual(response["total"], 0)
@@ -422,34 +420,43 @@ class TestCoursewareSearchIndexer(MixedWithOptionsTestCase):
self.assertEqual(indexed_count, 7)
def _test_course_about_property_index(self, store):
""" Test that informational properties in the course object end up in the course_info index """
"""
Test that informational properties in the course object end up in the course_info index.
"""
self.searcher = SearchEngine.get_search_engine(CourseAboutSearchIndexer.INDEX_NAME)
display_name = "Help, I need somebody!"
self.course.display_name = display_name
self.update_item(store, self.course)
self.reindex_course(store)
response = self.searcher.search(
doc_type=CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE,
field_dictionary={"course": six.text_type(self.course.id)}
)
self.assertEqual(response["total"], 1)
self.assertEqual(response["results"][0]["data"]["content"]["display_name"], display_name)
def _test_course_about_store_index(self, store):
""" Test that informational properties in the about store end up in the course_info index """
"""
Test that informational properties in the about store end up in
the course_info index.
"""
self.searcher = SearchEngine.get_search_engine(CourseAboutSearchIndexer.INDEX_NAME)
short_description = "Not just anybody"
CourseDetails.update_about_item(
self.course, "short_description", short_description, ModuleStoreEnum.UserID.test, store
)
self.reindex_course(store)
response = self.searcher.search(
doc_type=CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE,
field_dictionary={"course": six.text_type(self.course.id)}
)
self.assertEqual(response["total"], 1)
self.assertEqual(response["results"][0]["data"]["content"]["short_description"], short_description)
def _test_course_about_mode_index(self, store):
""" Test that informational properties in the course modes store end up in the course_info index """
"""
Test that informational properties in the course modes store end up in
the course_info index.
"""
self.searcher = SearchEngine.get_search_engine(CourseAboutSearchIndexer.INDEX_NAME)
honour_mode = CourseModeFactory(
course_id=self.course.id,
mode_slug=CourseMode.HONOR,
@@ -466,7 +473,6 @@ class TestCoursewareSearchIndexer(MixedWithOptionsTestCase):
self.reindex_course(store)
response = self.searcher.search(
doc_type=CourseAboutSearchIndexer.DISCOVERY_DOCUMENT_TYPE,
field_dictionary={"course": six.text_type(self.course.id)}
)
self.assertEqual(response["total"], 1)
@@ -586,13 +592,15 @@ class TestLargeCourseDeletions(MixedWithOptionsTestCase):
WORKS_WITH_STORES = (ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
def _clean_course_id(self):
""" Clean all documents from the index that have a specific course provided """
"""
Clean all documents from the index that have a specific course provided.
"""
if self.course_id:
response = self.searcher.search(field_dictionary={"course": self.course_id})
while response["total"] > 0:
for item in response["results"]:
self.searcher.remove(CoursewareSearchIndexer.DOCUMENT_TYPE, item["data"]["id"])
self.searcher.remove(item["data"]["id"])
response = self.searcher.search(field_dictionary={"course": self.course_id})
self.course_id = None
@@ -725,10 +733,12 @@ class TestTaskExecution(SharedModuleStoreTestCase):
super(TestTaskExecution, cls).tearDownClass()
def test_task_indexing_course(self):
""" Making sure that the receiver correctly fires off the task when invoked by signal """
"""
Making sure that the receiver correctly fires off the task when invoked
by signal.
"""
searcher = SearchEngine.get_search_engine(CoursewareSearchIndexer.INDEX_NAME)
response = searcher.search(
doc_type=CoursewareSearchIndexer.DOCUMENT_TYPE,
field_dictionary={"course": six.text_type(self.course.id)}
)
self.assertEqual(response["total"], 0)
@@ -737,7 +747,6 @@ class TestTaskExecution(SharedModuleStoreTestCase):
# Note that this test will only succeed if celery is working in inline mode
response = searcher.search(
doc_type=CoursewareSearchIndexer.DOCUMENT_TYPE,
field_dictionary={"course": six.text_type(self.course.id)}
)
self.assertEqual(response["total"], 3)
@@ -807,7 +816,6 @@ class TestLibrarySearchIndexer(MixedWithOptionsTestCase):
)
INDEX_NAME = LibrarySearchIndexer.INDEX_NAME
DOCUMENT_TYPE = LibrarySearchIndexer.DOCUMENT_TYPE
def _get_default_search(self):
""" Returns field_dictionary for default search """
@@ -1223,22 +1231,24 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
"""
Return content values from args tuple in a mocked calls list.
"""
kall = mock_index.call_args
args, kwargs = kall # pylint: disable=unused-variable
return args[1]
call = mock_index.call_args
(indexed_content, ), kwargs = call # pylint: disable=unused-variable
return indexed_content
def reindex_course(self, store):
""" kick off complete reindex of the course """
return CoursewareSearchIndexer.do_course_reindex(store, self.course.id)
def test_content_group_gets_indexed(self):
""" indexing course with content groups added test """
"""
Indexing course with content groups added test.
"""
# Only published modules should be in the index
added_to_index = self.reindex_course(self.store)
self.assertEqual(added_to_index, 16)
response = self.searcher.search(field_dictionary={"course": six.text_type(self.course.id)})
self.assertEqual(response["total"], 17)
self.assertEqual(response["total"], 16)
group_access_content = {'group_access': {666: [1]}}

View File

@@ -478,7 +478,8 @@ if FEATURES['ENABLE_COURSEWARE_INDEX'] or FEATURES['ENABLE_LIBRARY_INDEX'] or FE
# Use ElasticSearch for the search engine
SEARCH_ENGINE = "search.elastic.ElasticSearchEngine"
ELASTIC_SEARCH_CONFIG = ENV_TOKENS.get('ELASTIC_SEARCH_CONFIG', [{}])
# TODO: Once we have successfully upgraded to ES7, switch this back to ELASTIC_SEARCH_CONFIG.
ELASTIC_SEARCH_CONFIG = ENV_TOKENS.get('ELASTIC_SEARCH_CONFIG_ES7', [{}])
XBLOCK_SETTINGS = ENV_TOKENS.get('XBLOCK_SETTINGS', {})
XBLOCK_SETTINGS.setdefault("VideoBlock", {})["licensing_enabled"] = FEATURES.get("LICENSING", False)

View File

@@ -286,7 +286,7 @@ class CourseDetailViewTestCase(CourseApiTestViewMixin, SharedModuleStoreTestCase
'enrollment_end': {'type': 'date'}
})
@override_settings(SEARCH_ENGINE="search.tests.mock_search_engine.MockSearchEngine")
@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME)
@override_settings(COURSEWARE_INFO_INDEX_NAME=TEST_INDEX_NAME)
class CourseListSearchViewTest(CourseApiTestViewMixin, ModuleStoreTestCase, SearcherMixin):
"""
Tests the search functionality of the courses API.

View File

@@ -104,7 +104,7 @@ class CourseTeamIndexer(object):
"""
search_engine = cls.engine()
serialized_course_team = CourseTeamIndexer(course_team).data()
search_engine.index(cls.DOCUMENT_TYPE_NAME, [serialized_course_team])
search_engine.index([serialized_course_team])
@classmethod
@if_search_enabled
@@ -112,7 +112,7 @@ class CourseTeamIndexer(object):
"""
Remove course_team from the index (if feature is enabled).
"""
cls.engine().remove(cls.DOCUMENT_TYPE_NAME, [course_team.team_id])
cls.engine().remove([course_team.team_id])
@classmethod
@if_search_enabled

View File

@@ -73,7 +73,7 @@ from .toggles import are_team_submissions_enabled
TEAM_MEMBERSHIPS_PER_PAGE = 5
TOPICS_PER_PAGE = 12
MAXIMUM_SEARCH_SIZE = 100000
MAXIMUM_SEARCH_SIZE = 10000
log = logging.getLogger(__name__)

View File

@@ -724,7 +724,8 @@ if FEATURES.get('ENABLE_COURSEWARE_SEARCH') or \
SEARCH_ENGINE = "search.elastic.ElasticSearchEngine"
SEARCH_FILTER_GENERATOR = ENV_TOKENS.get('SEARCH_FILTER_GENERATOR', SEARCH_FILTER_GENERATOR)
ELASTIC_SEARCH_CONFIG = ENV_TOKENS.get('ELASTIC_SEARCH_CONFIG', [{}])
# TODO: Once we have successfully upgraded to ES7, switch this back to ELASTIC_SEARCH_CONFIG.
ELASTIC_SEARCH_CONFIG = ENV_TOKENS.get('ELASTIC_SEARCH_CONFIG_ES7', [{}])
# Facebook app
FACEBOOK_API_VERSION = AUTH_TOKENS.get("FACEBOOK_API_VERSION")

View File

@@ -37,9 +37,6 @@ drf-yasg<1.17.1
# for them.
edx-enterprise==3.12.0
# v2 requires the ES7 upgrade work to be complete
edx-search<2.0.0
# We expect v2.0.0 to introduce large breaking changes in the feature toggle API
edx-toggles<2.0.0

View File

@@ -107,15 +107,15 @@ edx-proctoring-proctortrack==1.0.5 # via -r requirements/edx/base.in
edx-proctoring==2.4.8 # via -r requirements/edx/base.in, edx-proctoring-proctortrack
edx-rbac==1.3.3 # via edx-enterprise
edx-rest-api-client==5.2.1 # via -r requirements/edx/base.in, edx-enterprise, edx-proctoring
edx-search==1.4.1 # via -c requirements/edx/../constraints.txt, -r requirements/edx/base.in
edx-sga==0.13.0 # via -r requirements/edx/base.in
edx-submissions==3.2.2 # via -r requirements/edx/base.in, ora2
edx-tincan-py35==0.0.9 # via edx-enterprise
edx-toggles==1.2.0 # via -c requirements/edx/../constraints.txt, -r requirements/edx/base.in, edx-completion
edx-user-state-client==1.2.0 # via -r requirements/edx/base.in
edx-search==2.0.0 # via -r requirements/edx/base.in
edx-when==1.3.0 # via -r requirements/edx/base.in, edx-proctoring
edxval==1.4.4 # via -r requirements/edx/base.in
elasticsearch==1.9.0 # via edx-search
elasticsearch==7.9.1 # via edx-search
enmerkar-underscore==1.0.0 # via -r requirements/edx/base.in
enmerkar==0.7.1 # via enmerkar-underscore
event-tracking==1.0.0 # via -r requirements/edx/base.in, edx-proctoring, edx-search

View File

@@ -119,7 +119,7 @@ edx-proctoring-proctortrack==1.0.5 # via -r requirements/edx/testing.txt
edx-proctoring==2.4.8 # via -r requirements/edx/testing.txt, edx-proctoring-proctortrack
edx-rbac==1.3.3 # via -r requirements/edx/testing.txt, edx-enterprise
edx-rest-api-client==5.2.1 # via -r requirements/edx/testing.txt, edx-enterprise, edx-proctoring
edx-search==1.4.1 # via -c requirements/edx/../constraints.txt, -r requirements/edx/testing.txt
edx-search==2.0.0 # via -r requirements/edx/testing.txt
edx-sga==0.13.0 # via -r requirements/edx/testing.txt
edx-sphinx-theme==1.5.0 # via -r requirements/edx/development.in
edx-submissions==3.2.2 # via -r requirements/edx/testing.txt, ora2
@@ -128,7 +128,7 @@ edx-toggles==1.2.0 # via -c requirements/edx/../constraints.txt, -r requi
edx-user-state-client==1.2.0 # via -r requirements/edx/testing.txt
edx-when==1.3.0 # via -r requirements/edx/testing.txt, edx-proctoring
edxval==1.4.4 # via -r requirements/edx/testing.txt
elasticsearch==1.9.0 # via -r requirements/edx/testing.txt, edx-search
elasticsearch==7.9.1 # via -r requirements/edx/testing.txt, edx-search
enmerkar-underscore==1.0.0 # via -r requirements/edx/testing.txt
enmerkar==0.7.1 # via -r requirements/edx/testing.txt, enmerkar-underscore
event-tracking==1.0.0 # via -r requirements/edx/testing.txt, edx-proctoring, edx-search

View File

@@ -116,7 +116,7 @@ edx-proctoring-proctortrack==1.0.5 # via -r requirements/edx/base.txt
edx-proctoring==2.4.8 # via -r requirements/edx/base.txt, edx-proctoring-proctortrack
edx-rbac==1.3.3 # via -r requirements/edx/base.txt, edx-enterprise
edx-rest-api-client==5.2.1 # via -r requirements/edx/base.txt, edx-enterprise, edx-proctoring
edx-search==1.4.1 # via -c requirements/edx/../constraints.txt, -r requirements/edx/base.txt
edx-search==2.0.0 # via -r requirements/edx/base.txt
edx-sga==0.13.0 # via -r requirements/edx/base.txt
edx-submissions==3.2.2 # via -r requirements/edx/base.txt, ora2
edx-tincan-py35==0.0.9 # via -r requirements/edx/base.txt, edx-enterprise
@@ -124,7 +124,7 @@ edx-toggles==1.2.0 # via -c requirements/edx/../constraints.txt, -r requi
edx-user-state-client==1.2.0 # via -r requirements/edx/base.txt
edx-when==1.3.0 # via -r requirements/edx/base.txt, edx-proctoring
edxval==1.4.4 # via -r requirements/edx/base.txt
elasticsearch==1.9.0 # via -r requirements/edx/base.txt, edx-search
elasticsearch==7.9.1 # via -r requirements/edx/base.txt, edx-search
enmerkar-underscore==1.0.0 # via -r requirements/edx/base.txt
enmerkar==0.7.1 # via -r requirements/edx/base.txt, enmerkar-underscore
event-tracking==1.0.0 # via -r requirements/edx/base.txt, edx-proctoring, edx-search