From 862262bd2d63d5fd6df8654bfb71f3d50e78d164 Mon Sep 17 00:00:00 2001 From: Dino Cikatic Date: Thu, 2 Jul 2015 16:35:47 +0200 Subject: [PATCH] SOL-974 make index and remove calls use ES bulk API --- .../contentstore/courseware_index.py | 23 +- .../tests/test_courseware_index.py | 201 +++++++++--------- requirements/edx/github.txt | 2 +- 3 files changed, 117 insertions(+), 109 deletions(-) diff --git a/cms/djangoapps/contentstore/courseware_index.py b/cms/djangoapps/contentstore/courseware_index.py index f134a5993c..bcf26f3572 100644 --- a/cms/djangoapps/contentstore/courseware_index.py +++ b/cms/djangoapps/contentstore/courseware_index.py @@ -111,8 +111,7 @@ class SearchIndexerBase(object): exclude_dictionary={"id": list(exclude_items)} ) result_ids = [result["data"]["id"] for result in response["results"]] - for result_id in result_ids: - searcher.remove(cls.DOCUMENT_TYPE, result_id) + searcher.remove(cls.DOCUMENT_TYPE, result_ids) @classmethod def index(cls, modulestore, structure_key, triggered_at=None, reindex_age=REINDEX_AGE): @@ -142,7 +141,7 @@ class SearchIndexerBase(object): structure_key = cls.normalize_structure_key(structure_key) location_info = cls._get_location_info(structure_key) - # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item` + # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `prepare_item_index` indexed_count = { "count": 0 } @@ -153,15 +152,20 @@ class SearchIndexerBase(object): # list - those are ready to be destroyed indexed_items = set() + # items_index is a list of all the items index dictionaries. + # it is used to collect all indexes and index them using bulk API, + # instead of per item index API call. + items_index = [] + def get_item_location(item): """ Gets the version agnostic item location """ return item.location.version_agnostic().replace(branch=None) - def index_item(item, skip_index=False, groups_usage_info=None): + def prepare_item_index(item, skip_index=False, groups_usage_info=None): """ - Add this item to the search index and indexed_items list + Add this item to the items_index and indexed_items list Arguments: item - item to add to index, its children will be processed recursively @@ -212,7 +216,7 @@ class SearchIndexerBase(object): for child_item in item.get_children(): if modulestore.has_published_version(child_item): children_groups_usage.append( - index_item( + prepare_item_index( child_item, skip_index=skip_child_index, groups_usage_info=groups_usage_info @@ -234,7 +238,7 @@ class SearchIndexerBase(object): item_index['start_date'] = item.start item_index['content_groups'] = item_content_groups if item_content_groups else None item_index.update(cls.supplemental_fields(item)) - searcher.index(cls.DOCUMENT_TYPE, item_index) + items_index.append(item_index) indexed_count["count"] += 1 return item_content_groups except Exception as err: # pylint: disable=broad-except @@ -252,7 +256,8 @@ class SearchIndexerBase(object): # Now index the content for item in structure.get_children(): - index_item(item, groups_usage_info=groups_usage_info) + prepare_item_index(item, groups_usage_info=groups_usage_info) + searcher.index(cls.DOCUMENT_TYPE, items_index) cls.remove_deleted_items(searcher, structure_key, indexed_items) except Exception as err: # pylint: disable=broad-except # broad exception so that index operation does not prevent the rest of the application from working @@ -623,7 +628,7 @@ class CourseAboutSearchIndexer(object): # Broad exception handler to protect around and report problems with indexing try: - searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, course_info) + searcher.index(cls.DISCOVERY_DOCUMENT_TYPE, [course_info]) except: # pylint: disable=bare-except log.exception( "Course discovery indexing error encountered, course discovery index may be out of date %s", diff --git a/cms/djangoapps/contentstore/tests/test_courseware_index.py b/cms/djangoapps/contentstore/tests/test_courseware_index.py index 527192937b..c2afffc2e8 100644 --- a/cms/djangoapps/contentstore/tests/test_courseware_index.py +++ b/cms/djangoapps/contentstore/tests/test_courseware_index.py @@ -1167,95 +1167,91 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): def _html_group_result(self, html_unit, content_groups): """ - Return call object with arguments and content group for html_unit. + Return object with arguments and content group for html_unit. """ - return call( - 'courseware_content', - { - 'course_name': unicode(self.course.display_name), - 'id': unicode(html_unit.location), - 'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)}, - 'course': unicode(self.course.id), - 'location': [ - unicode(self.chapter.display_name), - unicode(self.sequential.display_name), - unicode(html_unit.parent.display_name) - ], - 'content_type': 'Text', - 'org': self.course.org, - 'content_groups': content_groups, - 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()) - } - ) + return { + 'course_name': self.course.display_name, + 'id': unicode(html_unit.location), + 'content': {'html_content': '', 'display_name': html_unit.display_name}, + 'course': unicode(self.course.id), + 'location': [ + self.chapter.display_name, + self.sequential.display_name, + html_unit.parent.display_name + ], + 'content_type': 'Text', + 'org': self.course.org, + 'content_groups': content_groups, + 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()) + } def _html_experiment_group_result(self, html_unit, content_groups): """ - Return call object with arguments and content group for html_unit. + Return object with arguments and content group for html_unit. """ - return call( - 'courseware_content', - { - 'course_name': unicode(self.course.display_name), - 'id': unicode(html_unit.location), - 'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)}, - 'course': unicode(self.course.id), - 'location': [ - unicode(self.chapter.display_name), - unicode(self.sequential2.display_name), - unicode(self.vertical3.display_name) - ], - 'content_type': 'Text', - 'org': self.course.org, - 'content_groups': content_groups, - 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()) - } - ) + return { + 'course_name': self.course.display_name, + 'id': unicode(html_unit.location), + 'content': {'html_content': '', 'display_name': html_unit.display_name}, + 'course': unicode(self.course.id), + 'location': [ + self.chapter.display_name, + self.sequential2.display_name, + self.vertical3.display_name + ], + 'content_type': 'Text', + 'org': self.course.org, + 'content_groups': content_groups, + 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()) + } def _vertical_experiment_group_result(self, vertical, content_groups): """ - Return call object with arguments and content group for split_test vertical. + Return object with arguments and content group for split_test vertical. """ - return call( - 'courseware_content', - { - 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()), - 'content': {'display_name': unicode(vertical.display_name)}, - 'course': unicode(self.course.id), - 'location': [ - unicode(self.chapter.display_name), - unicode(self.sequential2.display_name), - unicode(vertical.parent.display_name) - ], - 'content_type': 'Sequence', - 'content_groups': content_groups, - 'id': unicode(vertical.location), - 'course_name': unicode(self.course.display_name), - 'org': self.course.org - } - ) + return { + 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()), + 'content': {'display_name': vertical.display_name}, + 'course': unicode(self.course.id), + 'location': [ + self.chapter.display_name, + self.sequential2.display_name, + vertical.parent.display_name + ], + 'content_type': 'Sequence', + 'content_groups': content_groups, + 'id': unicode(vertical.location), + 'course_name': self.course.display_name, + 'org': self.course.org + } def _html_nogroup_result(self, html_unit): """ - Return call object with arguments and content group set to empty array for html_unit. + Return object with arguments and content group set to empty array for html_unit. """ - return call( - 'courseware_content', - { - 'course_name': unicode(self.course.display_name), - 'id': unicode(html_unit.location), - 'content': {'html_content': '', 'display_name': unicode(html_unit.display_name)}, - 'course': unicode(self.course.id), - 'location': [ - unicode(self.chapter.display_name), - unicode(self.sequential.display_name), - unicode(html_unit.parent.display_name) - ], - 'content_type': 'Text', - 'org': self.course.org, - 'content_groups': None, - 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()) - } - ) + return { + 'course_name': self.course.display_name, + 'id': unicode(html_unit.location), + 'content': {'html_content': '', 'display_name': html_unit.display_name}, + 'course': unicode(self.course.id), + 'location': [ + self.chapter.display_name, + self.sequential.display_name, + html_unit.parent.display_name + ], + 'content_type': 'Text', + 'org': self.course.org, + 'content_groups': None, + 'start_date': datetime(2015, 4, 1, 0, 0, tzinfo=tzutc()) + } + + def _get_index_values_from_call_args(self, mock_index): + """ + Return content values from args tuple in a mocked calls list. + """ + kall = mock_index.call_args + args, kwargs = kall # pylint: disable=unused-variable + return args[1] def reindex_course(self, store): """ kick off complete reindex of the course """ @@ -1283,46 +1279,47 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) - self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), mock_index.mock_calls) - self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), mock_index.mock_calls) - self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), mock_index.mock_calls) - self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content) + self.assertIn(self._html_experiment_group_result(self.html_unit4, [unicode(2)]), indexed_content) + self.assertIn(self._html_experiment_group_result(self.html_unit5, [unicode(3)]), indexed_content) + self.assertIn(self._html_experiment_group_result(self.html_unit6, [unicode(4)]), indexed_content) + self.assertNotIn(self._html_experiment_group_result(self.html_unit6, [unicode(5)]), indexed_content) self.assertIn( self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(2)]), - mock_index.mock_calls + indexed_content ) self.assertNotIn( self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(2)]), - mock_index.mock_calls + indexed_content ) self.assertNotIn( self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(2)]), - mock_index.mock_calls + indexed_content ) self.assertNotIn( self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(3)]), - mock_index.mock_calls + indexed_content ) self.assertIn( self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(3)]), - mock_index.mock_calls + indexed_content ) self.assertNotIn( self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(3)]), - mock_index.mock_calls + indexed_content ) self.assertNotIn( self._vertical_experiment_group_result(self.condition_0_vertical, [unicode(4)]), - mock_index.mock_calls + indexed_content ) self.assertNotIn( self._vertical_experiment_group_result(self.condition_1_vertical, [unicode(4)]), - mock_index.mock_calls + indexed_content ) self.assertIn( self._vertical_experiment_group_result(self.condition_2_vertical, [unicode(4)]), - mock_index.mock_calls + indexed_content ) mock_index.reset_mock() @@ -1332,7 +1329,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content) mock_index.reset_mock() def test_content_group_not_indexed_on_delete(self): @@ -1351,7 +1349,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content) mock_index.reset_mock() empty_group_access = {'group_access': {}} @@ -1367,7 +1366,8 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_nogroup_result(self.html_unit1), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_nogroup_result(self.html_unit1), indexed_content) mock_index.reset_mock() def test_group_indexed_only_on_assigned_html_block(self): @@ -1383,8 +1383,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) - self.assertIn(self._html_nogroup_result(self.html_unit2), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content) + self.assertIn(self._html_nogroup_result(self.html_unit2), indexed_content) mock_index.reset_mock() def test_different_groups_indexed_on_assigned_html_blocks(self): @@ -1407,8 +1408,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_group_result(self.html_unit1, [1]), mock_index.mock_calls) - self.assertIn(self._html_group_result(self.html_unit2, [0]), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_group_result(self.html_unit1, [1]), indexed_content) + self.assertIn(self._html_group_result(self.html_unit2, [0]), indexed_content) mock_index.reset_mock() def test_different_groups_indexed_on_same_vertical_html_blocks(self): @@ -1435,8 +1437,9 @@ class GroupConfigurationSearchMongo(CourseTestCase, MixedWithOptionsTestCase): with patch(settings.SEARCH_ENGINE + '.index') as mock_index: self.reindex_course(self.store) self.assertTrue(mock_index.called) - self.assertIn(self._html_group_result(self.html_unit2, [1]), mock_index.mock_calls) - self.assertIn(self._html_group_result(self.html_unit3, [0]), mock_index.mock_calls) + indexed_content = self._get_index_values_from_call_args(mock_index) + self.assertIn(self._html_group_result(self.html_unit2, [1]), indexed_content) + self.assertIn(self._html_group_result(self.html_unit3, [0]), indexed_content) mock_index.reset_mock() diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index c06889992e..bdaecf83d5 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -47,7 +47,7 @@ git+https://github.com/edx/ease.git@release-2015-07-14#egg=ease==0.1.3 git+https://github.com/edx/edx-oauth2-provider.git@0.5.2#egg=oauth2-provider==0.5.2 -e git+https://github.com/edx/edx-val.git@v0.0.5#egg=edx-val -e git+https://github.com/pmitros/RecommenderXBlock.git@518234bc354edbfc2651b9e534ddb54f96080779#egg=recommender-xblock --e git+https://github.com/edx/edx-search.git@release-2015-07-03#egg=edx-search +-e git+https://github.com/edx/edx-search.git@release-2015-07-14#egg=edx-search -e git+https://github.com/edx/edx-milestones.git@release-2015-06-17#egg=edx-milestones git+https://github.com/edx/edx-lint.git@ed8c8d2a0267d4d42f43642d193e25f8bd575d9b#egg=edx_lint==0.2.3 -e git+https://github.com/edx/xblock-utils.git@213a97a50276d6a2504d8133650b2930ead357a0#egg=xblock-utils