SOL-974 make index and remove calls use ES bulk API
This commit is contained in:
@@ -111,8 +111,7 @@ class SearchIndexerBase(object):
|
||||
exclude_dictionary={"id": list(exclude_items)}
|
||||
)
|
||||
result_ids = [result["data"]["id"] for result in response["results"]]
|
||||
for result_id in result_ids:
|
||||
searcher.remove(cls.DOCUMENT_TYPE, result_id)
|
||||
searcher.remove(cls.DOCUMENT_TYPE, result_ids)
|
||||
|
||||
@classmethod
|
||||
def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE):
|
||||
@@ -142,7 +141,7 @@ class SearchIndexerBase(object):
|
||||
structure_key = cls.normalize_structure_key(structure_key)
|
||||
location_info = cls._get_location_info(structure_key)
|
||||
|
||||
# Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item`
|
||||
# Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index`
|
||||
indexed_count = {
|
||||
"count": 0
|
||||
}
|
||||
@@ -153,15 +152,20 @@ class SearchIndexerBase(object):
|
||||
# list - those are ready to be destroyed
|
||||
indexed_items = set()
|
||||
|
||||
# items_index is a list of all the items index dictionaries.
|
||||
# it is used to collect all indexes and index them using bulk API,
|
||||
# instead of per item index API call.
|
||||
items_index = []
|
||||
|
||||
def get_item_location(item):
|
||||
"""
|
||||
Gets the version agnostic item location
|
||||
"""
|
||||
return item.location.version_agnostic().replace(branch=None)
|
||||
|
||||
def index_item(item, skip_index=False, groups_usage_info=None):
|
||||
def prepare_item_index(item, skip_index=False, groups_usage_info=None):
|
||||
"""
|
||||
Add this item to the search index and indexed_items list
|
||||
Add this item to the items_index and indexed_items list
|
||||
|
||||
Arguments:
|
||||
item - item to add to index, its children will be processed recursively
|
||||
@@ -212,7 +216,7 @@ class SearchIndexerBase(object):
|
||||
for child_item in item.get_children():
|
||||
if modulestore.has_published_version(child_item):
|
||||
children_groups_usage.append(
|
||||
index_item(
|
||||
prepare_item_index(
|
||||
child_item,
|
||||
skip_index=skip_child_index,
|
||||
groups_usage_info=groups_usage_info
|
||||
@@ -234,7 +238,7 @@ class SearchIndexerBase(object):
|
||||
item_index['start_date'] = item.start
|
||||
item_index['content_groups'] = item_content_groups if item_content_groups else None
|
||||
item_index.update(cls.supplemental_fields(item))
|
||||
searcher.index(cls.DOCUMENT_TYPE, item_index)
|
||||
items_index.append(item_index)
|
||||
indexed_count["count"] += 1
|
||||
return item_content_groups
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
@@ -252,7 +256,8 @@ class SearchIndexerBase(object):
|
||||
|
||||
# Now index the content
|
||||
for item in structure.get_children():
|
||||
index_item(item, groups_usage_info=groups_usage_info)
|
||||
prepare_item_index(item, groups_usage_info=groups_usage_info)
|
||||
searcher.index(cls.DOCUMENT_TYPE, items_index)
|
||||
cls.remove_deleted_items(searcher, structure_key, indexed_items)
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
# broad exception so that index operation does not prevent the rest of the application from working
|
||||
@@ -623,7 +628,7 @@ class CourseAboutSearchIndexer(object):
|
||||
|
||||
# Broad exception handler to protect around and report problems with indexing
|
||||
try:
|
||||
searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, course_info)
|
||||
searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info])
|
||||
except: # pylint: disable=bare-except
|
||||
log.exception(
|
||||
"Course discovery indexing error encountered, course discovery index may be out of date %s",
|
||||
|
||||
@@ -1167,95 +1167,91 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
|
||||
def _html_group_result(self, html_unit, content_groups):
|
||||
"""
|
||||
Return call object with arguments and content group for html_unit.
|
||||
Return object with arguments and content group for html_unit.
|
||||
"""
|
||||
return call(
|
||||
'courseware_content',
|
||||
{
|
||||
'course_name': unicode(self.course.display_name),
|
||||
'id': unicode(html_unit.location),
|
||||
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
unicode(self.chapter.display_name),
|
||||
unicode(self.sequential.display_name),
|
||||
unicode(html_unit.parent.display_name)
|
||||
],
|
||||
'content_type': 'Text',
|
||||
'org': self.course.org,
|
||||
'content_groups': content_groups,
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
|
||||
}
|
||||
)
|
||||
return {
|
||||
'course_name': self.course.display_name,
|
||||
'id': unicode(html_unit.location),
|
||||
'content': {'html_content': '', 'display_name': html_unit.display_name},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
self.chapter.display_name,
|
||||
self.sequential.display_name,
|
||||
html_unit.parent.display_name
|
||||
],
|
||||
'content_type': 'Text',
|
||||
'org': self.course.org,
|
||||
'content_groups': content_groups,
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
|
||||
}
|
||||
|
||||
def _html_experiment_group_result(self, html_unit, content_groups):
|
||||
"""
|
||||
Return call object with arguments and content group for html_unit.
|
||||
Return object with arguments and content group for html_unit.
|
||||
"""
|
||||
return call(
|
||||
'courseware_content',
|
||||
{
|
||||
'course_name': unicode(self.course.display_name),
|
||||
'id': unicode(html_unit.location),
|
||||
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
unicode(self.chapter.display_name),
|
||||
unicode(self.sequential2.display_name),
|
||||
unicode(self.vertical3.display_name)
|
||||
],
|
||||
'content_type': 'Text',
|
||||
'org': self.course.org,
|
||||
'content_groups': content_groups,
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
|
||||
}
|
||||
)
|
||||
return {
|
||||
'course_name': self.course.display_name,
|
||||
'id': unicode(html_unit.location),
|
||||
'content': {'html_content': '', 'display_name': html_unit.display_name},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
self.chapter.display_name,
|
||||
self.sequential2.display_name,
|
||||
self.vertical3.display_name
|
||||
],
|
||||
'content_type': 'Text',
|
||||
'org': self.course.org,
|
||||
'content_groups': content_groups,
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
|
||||
}
|
||||
|
||||
def _vertical_experiment_group_result(self, vertical, content_groups):
|
||||
"""
|
||||
Return call object with arguments and content group for split_test vertical.
|
||||
Return object with arguments and content group for split_test vertical.
|
||||
"""
|
||||
return call(
|
||||
'courseware_content',
|
||||
{
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()),
|
||||
'content': {'display_name': unicode(vertical.display_name)},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
unicode(self.chapter.display_name),
|
||||
unicode(self.sequential2.display_name),
|
||||
unicode(vertical.parent.display_name)
|
||||
],
|
||||
'content_type': 'Sequence',
|
||||
'content_groups': content_groups,
|
||||
'id': unicode(vertical.location),
|
||||
'course_name': unicode(self.course.display_name),
|
||||
'org': self.course.org
|
||||
}
|
||||
)
|
||||
return {
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()),
|
||||
'content': {'display_name': vertical.display_name},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
self.chapter.display_name,
|
||||
self.sequential2.display_name,
|
||||
vertical.parent.display_name
|
||||
],
|
||||
'content_type': 'Sequence',
|
||||
'content_groups': content_groups,
|
||||
'id': unicode(vertical.location),
|
||||
'course_name': self.course.display_name,
|
||||
'org': self.course.org
|
||||
}
|
||||
|
||||
def _html_nogroup_result(self, html_unit):
|
||||
"""
|
||||
Return call object with arguments and content group set to empty array for html_unit.
|
||||
Return object with arguments and content group set to empty array for html_unit.
|
||||
"""
|
||||
return call(
|
||||
'courseware_content',
|
||||
{
|
||||
'course_name': unicode(self.course.display_name),
|
||||
'id': unicode(html_unit.location),
|
||||
'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
unicode(self.chapter.display_name),
|
||||
unicode(self.sequential.display_name),
|
||||
unicode(html_unit.parent.display_name)
|
||||
],
|
||||
'content_type': 'Text',
|
||||
'org': self.course.org,
|
||||
'content_groups': None,
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
|
||||
}
|
||||
)
|
||||
return {
|
||||
'course_name': self.course.display_name,
|
||||
'id': unicode(html_unit.location),
|
||||
'content': {'html_content': '', 'display_name': html_unit.display_name},
|
||||
'course': unicode(self.course.id),
|
||||
'location': [
|
||||
self.chapter.display_name,
|
||||
self.sequential.display_name,
|
||||
html_unit.parent.display_name
|
||||
],
|
||||
'content_type': 'Text',
|
||||
'org': self.course.org,
|
||||
'content_groups': None,
|
||||
'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc())
|
||||
}
|
||||
|
||||
def _get_index_values_from_call_args(self, mock_index):
|
||||
"""
|
||||
Return content values from args tuple in a mocked calls list.
|
||||
"""
|
||||
kall = mock_index.call_args
|
||||
args, kwargs = kall # pylint: disable=unused-variable
|
||||
return args[1]
|
||||
|
||||
def reindex_course(self, store):
|
||||
""" kick off complete reindex of the course """
|
||||
@@ -1283,46 +1279,47 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
|
||||
self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), mock_index.mock_calls)
|
||||
self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), mock_index.mock_calls)
|
||||
self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), mock_index.mock_calls)
|
||||
self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
|
||||
self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), indexed_content)
|
||||
self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), indexed_content)
|
||||
self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), indexed_content)
|
||||
self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), indexed_content)
|
||||
self.assertIn(
|
||||
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(2)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertNotIn(
|
||||
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(2)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertNotIn(
|
||||
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(2)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertNotIn(
|
||||
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(3)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertIn(
|
||||
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(3)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertNotIn(
|
||||
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(3)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertNotIn(
|
||||
self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(4)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertNotIn(
|
||||
self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(4)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
self.assertIn(
|
||||
self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(4)]),
|
||||
mock_index.mock_calls
|
||||
indexed_content
|
||||
)
|
||||
mock_index.reset_mock()
|
||||
|
||||
@@ -1332,7 +1329,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content)
|
||||
mock_index.reset_mock()
|
||||
|
||||
def test_content_group_not_indexed_on_delete(self):
|
||||
@@ -1351,7 +1349,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
|
||||
mock_index.reset_mock()
|
||||
|
||||
empty_group_access = {'group_access': {}}
|
||||
@@ -1367,7 +1366,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content)
|
||||
mock_index.reset_mock()
|
||||
|
||||
def test_group_indexed_only_on_assigned_html_block(self):
|
||||
@@ -1383,8 +1383,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
|
||||
self.assertIn(self._html_nogroup_result(self.html_unit2), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
|
||||
self.assertIn(self._html_nogroup_result(self.html_unit2), indexed_content)
|
||||
mock_index.reset_mock()
|
||||
|
||||
def test_different_groups_indexed_on_assigned_html_blocks(self):
|
||||
@@ -1407,8 +1408,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls)
|
||||
self.assertIn(self._html_group_result(self.html_unit2, [0]), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content)
|
||||
self.assertIn(self._html_group_result(self.html_unit2, [0]), indexed_content)
|
||||
mock_index.reset_mock()
|
||||
|
||||
def test_different_groups_indexed_on_same_vertical_html_blocks(self):
|
||||
@@ -1435,8 +1437,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase):
|
||||
with patch(settings.SEARCH_ENGINE + '.index') as mock_index:
|
||||
self.reindex_course(self.store)
|
||||
self.assertTrue(mock_index.called)
|
||||
self.assertIn(self._html_group_result(self.html_unit2, [1]), mock_index.mock_calls)
|
||||
self.assertIn(self._html_group_result(self.html_unit3, [0]), mock_index.mock_calls)
|
||||
indexed_content = self._get_index_values_from_call_args(mock_index)
|
||||
self.assertIn(self._html_group_result(self.html_unit2, [1]), indexed_content)
|
||||
self.assertIn(self._html_group_result(self.html_unit3, [0]), indexed_content)
|
||||
mock_index.reset_mock()
|
||||
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ git+https://github.com/edx/ease.git@release-2015-07-14#egg=ease==0.1.3
|
||||
git+https://github.com/edx/edx-oauth2-provider.git@0.5.2#egg=oauth2-provider==0.5.2
|
||||
-e git+https://github.com/edx/edx-val.git@v0.0.5#egg=edx-val
|
||||
-e git+https://github.com/pmitros/RecommenderXBlock.git@518234bc354edbfc2651b9e534ddb54f96080779#egg=recommender-xblock
|
||||
-e git+https://github.com/edx/edx-search.git@release-2015-07-03#egg=edx-search
|
||||
-e git+https://github.com/edx/edx-search.git@release-2015-07-14#egg=edx-search
|
||||
-e git+https://github.com/edx/edx-milestones.git@release-2015-06-17#egg=edx-milestones
|
||||
git+https://github.com/edx/edx-lint.git@ed8c8d2a0267d4d42f43642d193e25f8bd575d9b#egg=edx_lint==0.2.3
|
||||
-e git+https://github.com/edx/xblock-utils.git@213a97a50276d6a2504d8133650b2930ead357a0#egg=xblock-utils
|
||||
|
||||
Reference in New Issue
Block a user