From e37a98d1be812df05394e642a751d66c360dbac6 Mon Sep 17 00:00:00 2001 From: Davorin Sego Date: Thu, 12 Feb 2015 17:52:58 +0100 Subject: [PATCH] Indexing moved to celery task. Reorganised indexing to always allow for full course reindex. Indexing code no longer needs to be in common, because it is triggered by signal instead of being called.g --- cms/djangoapps/contentstore/__init__.py | 2 + .../contentstore/courseware_index.py | 194 ++++++ cms/djangoapps/contentstore/signals.py | 19 + cms/djangoapps/contentstore/tasks.py | 42 +- .../tests/test_courseware_index.py | 565 ++++++++++++++++++ .../contentstore/tests/test_import.py | 2 +- .../contentstore/tests/test_libraries.py | 1 + cms/djangoapps/contentstore/views/course.py | 2 +- .../views/tests/test_course_index.py | 98 ++- cms/envs/test.py | 4 - .../xmodule/modulestore/courseware_index.py | 176 ------ .../xmodule/modulestore/mongo/draft.py | 8 - .../modulestore/split_mongo/split_draft.py | 8 - common/lib/xmodule/xmodule/vertical_block.py | 20 + .../tests/lms/test_lms_courseware_search.py | 68 ++- requirements/edx/github.txt | 2 +- 16 files changed, 916 insertions(+), 295 deletions(-) create mode 100644 cms/djangoapps/contentstore/courseware_index.py create mode 100644 cms/djangoapps/contentstore/signals.py create mode 100644 cms/djangoapps/contentstore/tests/test_courseware_index.py delete mode 100644 common/lib/xmodule/xmodule/modulestore/courseware_index.py diff --git a/cms/djangoapps/contentstore/__init__.py b/cms/djangoapps/contentstore/__init__.py index e69de29bb2..d3060371e5 100644 --- a/cms/djangoapps/contentstore/__init__.py +++ b/cms/djangoapps/contentstore/__init__.py @@ -0,0 +1,2 @@ +""" module init will register signal handlers """ +import contentstore.signals diff --git a/cms/djangoapps/contentstore/courseware_index.py b/cms/djangoapps/contentstore/courseware_index.py new file mode 100644 index 0000000000..9d8742f302 --- /dev/null +++ b/cms/djangoapps/contentstore/courseware_index.py @@ -0,0 +1,194 @@ +""" Code to allow module store to interface with courseware index """ +from __future__ import absolute_import + +from datetime import timedelta +import logging + +from django.conf import settings +from django.utils.translation import ugettext as _ +from eventtracking import tracker +from xmodule.modulestore import ModuleStoreEnum +from search.search_engine_base import SearchEngine + + +# Use default index and document names for now +INDEX_NAME = "courseware_index" +DOCUMENT_TYPE = "courseware_content" + +# REINDEX_AGE is the default amount of time that we look back for changes +# that might have happened. If we are provided with a time at which the +# indexing is triggered, then we know it is safe to only index items +# recently changed at that time. This is the time period that represents +# how far back from the trigger point to look back in order to index +REINDEX_AGE = timedelta(0, 60) # 60 seconds + +log = logging.getLogger('edx.modulestore') + + +def indexing_is_enabled(): + """ + Checks to see if the indexing feature is enabled + """ + return settings.FEATURES.get('ENABLE_COURSEWARE_INDEX', False) + + +class SearchIndexingError(Exception): + """ Indicates some error(s) occured during indexing """ + + def __init__(self, message, error_list): + super(SearchIndexingError, self).__init__(message) + self.error_list = error_list + + +class CoursewareSearchIndexer(object): + """ + Class to perform indexing for courseware search from different modulestores + """ + + @classmethod + def index_course(cls, modulestore, course_key, triggered_at=None, reindex_age=REINDEX_AGE): + """ + Process course for indexing + + Arguments: + course_key (CourseKey) - course identifier + + triggered_at (datetime) - provides time at which indexing was triggered; + useful for index updates - only things changed recently from that date + (within REINDEX_AGE above ^^) will have their index updated, others skip + updating their index but are still walked through in order to identify + which items may need to be removed from the index + If None, then a full reindex takes place + + Returns: + Number of items that have been added to the index + """ + error_list = [] + searcher = SearchEngine.get_search_engine(INDEX_NAME) + if not searcher: + return + + location_info = { + "course": unicode(course_key), + } + + # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item` + indexed_count = { + "count": 0 + } + + # indexed_items is a list of all the items that we wish to remain in the + # index, whether or not we are planning to actually update their index. + # This is used in order to build a query to remove those items not in this + # list - those are ready to be destroyed + indexed_items = set() + + def index_item(item, skip_index=False): + """ + Add this item to the search index and indexed_items list + + Arguments: + item - item to add to index, its children will be processed recursively + + skip_index - simply walk the children in the tree, the content change is + older than the REINDEX_AGE window and would have been already indexed. + This should really only be passed from the recursive child calls when + this method has determined that it is safe to do so + """ + is_indexable = hasattr(item, "index_dictionary") + item_index_dictionary = item.index_dictionary() if is_indexable else None + # if it's not indexable and it does not have children, then ignore + if not item_index_dictionary and not item.has_children: + return + + item_id = unicode(item.scope_ids.usage_id) + indexed_items.add(item_id) + if item.has_children: + # determine if it's okay to skip adding the children herein based upon how recently any may have changed + skip_child_index = skip_index or \ + (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age) + for child_item in item.get_children(): + index_item(child_item, skip_index=skip_child_index) + + if skip_index or not item_index_dictionary: + return + + item_index = {} + # if it has something to add to the index, then add it + try: + item_index.update(location_info) + item_index.update(item_index_dictionary) + item_index['id'] = item_id + if item.start: + item_index['start_date'] = item.start + + searcher.index(DOCUMENT_TYPE, item_index) + indexed_count["count"] += 1 + except Exception as err: # pylint: disable=broad-except + # broad exception so that index operation does not fail on one item of many + log.warning('Could not index item: %s - %r', item.location, err) + error_list.append(_('Could not index item: {}').format(item.location)) + + def remove_deleted_items(): + """ + remove any item that is present in the search index that is not present in updated list of indexed items + as we find items we can shorten the set of items to keep + """ + response = searcher.search( + doc_type=DOCUMENT_TYPE, + field_dictionary={"course": unicode(course_key)}, + exclude_ids=indexed_items + ) + result_ids = [result["data"]["id"] for result in response["results"]] + for result_id in result_ids: + searcher.remove(DOCUMENT_TYPE, result_id) + + try: + with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only): + course = modulestore.get_course(course_key, depth=None) + for item in course.get_children(): + index_item(item) + remove_deleted_items() + except Exception as err: # pylint: disable=broad-except + # broad exception so that index operation does not prevent the rest of the application from working + log.exception( + "Indexing error encountered, courseware index may be out of date %s - %r", + course_key, + err + ) + error_list.append(_('General indexing error occurred')) + + if error_list: + raise SearchIndexingError(_('Error(s) present during indexing'), error_list) + + return indexed_count["count"] + + @classmethod + def do_course_reindex(cls, modulestore, course_key): + """ + (Re)index all content within the given course, tracking the fact that a full reindex has taking place + """ + indexed_count = cls.index_course(modulestore, course_key) + if indexed_count: + cls._track_index_request('edx.course.index.reindexed', indexed_count) + return indexed_count + + @classmethod + def _track_index_request(cls, event_name, indexed_count): + """Track content index requests. + + Arguments: + event_name (str): Name of the event to be logged. + Returns: + None + + """ + data = { + "indexed_count": indexed_count, + 'category': 'courseware_index', + } + + tracker.emit( + event_name, + data + ) diff --git a/cms/djangoapps/contentstore/signals.py b/cms/djangoapps/contentstore/signals.py new file mode 100644 index 0000000000..df21a57ccd --- /dev/null +++ b/cms/djangoapps/contentstore/signals.py @@ -0,0 +1,19 @@ +""" receiver of course_published events in order to trigger indexing task """ +from datetime import datetime +from pytz import UTC + +from django.dispatch import receiver + +from xmodule.modulestore.django import SignalHandler +from contentstore.courseware_index import indexing_is_enabled + + +@receiver(SignalHandler.course_published) +def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument + """ + Receives signal and kicks off celery task to update search index + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from .tasks import update_search_index + if indexing_is_enabled(): + update_search_index.delay(unicode(course_key), datetime.now(UTC).isoformat()) diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index 222ad7a44d..cebf5aee73 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -1,20 +1,25 @@ """ This file contains celery tasks for contentstore views """ - -from celery.task import task -from django.contrib.auth.models import User import json import logging -from xmodule.modulestore.django import modulestore -from xmodule.course_module import CourseFields +from celery.task import task +from celery.utils.log import get_task_logger +from datetime import datetime +from pytz import UTC -from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError -from course_action_state.models import CourseRerunState +from django.contrib.auth.models import User + +from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError from contentstore.utils import initialize_permissions +from course_action_state.models import CourseRerunState from opaque_keys.edx.keys import CourseKey +from xmodule.course_module import CourseFields +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError -from edxval.api import copy_course_videos +LOGGER = get_task_logger(__name__) +FULL_COURSE_REINDEX_THRESHOLD = 1 @task() @@ -22,6 +27,9 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i """ Reruns a course in a new celery task. """ + # import here, at top level this import prevents the celery workers from starting up correctly + from edxval.api import copy_course_videos + try: # deserialize the payload source_course_key = CourseKey.from_string(source_course_key_string) @@ -72,3 +80,21 @@ def deserialize_fields(json_fields): for field_name, value in fields.iteritems(): fields[field_name] = getattr(CourseFields, field_name).from_json(value) return fields + + +@task() +def update_search_index(course_id, triggered_time_isoformat): + """ Updates course search index. """ + try: + course_key = CourseKey.from_string(course_id) + triggered_time = datetime.strptime( + # remove the +00:00 from the end of the formats generated within the system + triggered_time_isoformat.split('+')[0], + "%Y-%m-%dT%H:%M:%S.%f" + ).replace(tzinfo=UTC) + CoursewareSearchIndexer.index_course(modulestore(), course_key, triggered_at=triggered_time) + + except SearchIndexingError as exc: + LOGGER.error('Search indexing error for complete course %s - %s', course_id, unicode(exc)) + else: + LOGGER.debug('Search indexing successful for complete course %s', course_id) diff --git a/cms/djangoapps/contentstore/tests/test_courseware_index.py b/cms/djangoapps/contentstore/tests/test_courseware_index.py new file mode 100644 index 0000000000..8f0621d60e --- /dev/null +++ b/cms/djangoapps/contentstore/tests/test_courseware_index.py @@ -0,0 +1,565 @@ +""" +Testing indexing of the courseware as it is changed +""" +import ddt +import time +from datetime import datetime +from mock import patch +from pytz import UTC +from uuid import uuid4 +from unittest import skip + +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import SignalHandler +from xmodule.modulestore.edit_info import EditInfoMixin +from xmodule.modulestore.inheritance import InheritanceMixin +from xmodule.modulestore.mixed import MixedModuleStore +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase +from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory +from xmodule.modulestore.tests.mongo_connection import MONGO_PORT_NUM, MONGO_HOST +from xmodule.modulestore.tests.test_cross_modulestore_import_export import MongoContentstoreBuilder +from xmodule.modulestore.tests.utils import create_modulestore_instance, LocationMixin, MixedSplitTestCase +from xmodule.tests import DATA_DIR +from xmodule.x_module import XModuleMixin +from search.search_engine_base import SearchEngine + +from contentstore.courseware_index import CoursewareSearchIndexer, INDEX_NAME, DOCUMENT_TYPE, SearchIndexingError +from contentstore.signals import listen_for_course_publish + + +COURSE_CHILD_STRUCTURE = { + "course": "chapter", + "chapter": "sequential", + "sequential": "vertical", + "vertical": "html", +} + + +def create_children(store, parent, category, load_factor): + """ create load_factor children within the given parent; recursively call to insert children when appropriate """ + created_count = 0 + for child_index in range(0, load_factor): + child_object = ItemFactory.create( + parent_location=parent.location, + category=category, + display_name=u"{} {} {}".format(category, child_index, time.clock()), + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + created_count += 1 + + if category in COURSE_CHILD_STRUCTURE: + created_count += create_children(store, child_object, COURSE_CHILD_STRUCTURE[category], load_factor) + + return created_count + + +def create_large_course(store, load_factor): + """ + Create a large course, note that the number of blocks created will be + load_factor ^ 4 - e.g. load_factor of 10 => 10 chapters, 100 + sequentials, 1000 verticals, 10000 html blocks + """ + course = CourseFactory.create(modulestore=store, start=datetime(2015, 3, 1, tzinfo=UTC)) + with store.bulk_operations(course.id): + child_count = create_children(store, course, COURSE_CHILD_STRUCTURE["course"], load_factor) + return course, child_count + + +class MixedWithOptionsTestCase(MixedSplitTestCase): + """ Base class for test cases within this file """ + HOST = MONGO_HOST + PORT = MONGO_PORT_NUM + DATABASE = 'test_mongo_%s' % uuid4().hex[:5] + COLLECTION = 'modulestore' + ASSET_COLLECTION = 'assetstore' + DEFAULT_CLASS = 'xmodule.raw_module.RawDescriptor' + RENDER_TEMPLATE = lambda t_n, d, ctx=None, nsp='main': '' + modulestore_options = { + 'default_class': DEFAULT_CLASS, + 'fs_root': DATA_DIR, + 'render_template': RENDER_TEMPLATE, + 'xblock_mixins': (EditInfoMixin, InheritanceMixin, LocationMixin, XModuleMixin), + } + DOC_STORE_CONFIG = { + 'host': HOST, + 'port': PORT, + 'db': DATABASE, + 'collection': COLLECTION, + 'asset_collection': ASSET_COLLECTION, + } + OPTIONS = { + 'stores': [ + { + 'NAME': 'draft', + 'ENGINE': 'xmodule.modulestore.mongo.draft.DraftModuleStore', + 'DOC_STORE_CONFIG': DOC_STORE_CONFIG, + 'OPTIONS': modulestore_options + }, + { + 'NAME': 'split', + 'ENGINE': 'xmodule.modulestore.split_mongo.split_draft.DraftVersioningModuleStore', + 'DOC_STORE_CONFIG': DOC_STORE_CONFIG, + 'OPTIONS': modulestore_options + }, + { + 'NAME': 'xml', + 'ENGINE': 'xmodule.modulestore.xml.XMLModuleStore', + 'OPTIONS': { + 'data_dir': DATA_DIR, + 'default_class': 'xmodule.hidden_module.HiddenDescriptor', + 'xblock_mixins': modulestore_options['xblock_mixins'], + } + }, + ], + 'xblock_mixins': modulestore_options['xblock_mixins'], + } + + def setUp(self): + super(MixedWithOptionsTestCase, self).setUp() + + def setup_course_base(self, store): + """ base version of setup_course_base is a no-op """ + pass + + def get_search_engine(self): + """ Centralized call to getting the search engine for the test """ + return SearchEngine.get_search_engine(INDEX_NAME) + + def _perform_test_using_store(self, store_type, test_to_perform): + """ Helper method to run a test function that uses a specific store """ + with MongoContentstoreBuilder().build() as contentstore: + store = MixedModuleStore( + contentstore=contentstore, + create_modulestore_instance=create_modulestore_instance, + mappings={}, + **self.OPTIONS + ) + self.addCleanup(store.close_all_connections) + + with store.default_store(store_type): + self.setup_course_base(store) + test_to_perform(store) + + def publish_item(self, store, item_location): + """ publish the item at the given location """ + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + store.publish(item_location, ModuleStoreEnum.UserID.test) + + def delete_item(self, store, item_location): + """ delete the item at the given location """ + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + store.delete_item(item_location, ModuleStoreEnum.UserID.test) + + def update_item(self, store, item): + """ update the item at the given location """ + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + store.update_item(item, ModuleStoreEnum.UserID.test) + + +@ddt.ddt +class TestCoursewareSearchIndexer(MixedWithOptionsTestCase): + """ Tests the operation of the CoursewareSearchIndexer """ + + def setUp(self): + super(TestCoursewareSearchIndexer, self).setUp() + + self.course = None + self.chapter = None + self.sequential = None + self.vertical = None + self.html_unit = None + + def setup_course_base(self, store): + """ + Set up the for the course outline tests. + """ + self.course = CourseFactory.create(modulestore=store, start=datetime(2015, 3, 1, tzinfo=UTC)) + + self.chapter = ItemFactory.create( + parent_location=self.course.location, + category='chapter', + display_name="Week 1", + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.sequential = ItemFactory.create( + parent_location=self.chapter.location, + category='sequential', + display_name="Lesson 1", + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.vertical = ItemFactory.create( + parent_location=self.sequential.location, + category='vertical', + display_name='Subsection 1', + modulestore=store, + publish_item=True, + start=datetime(2015, 4, 1, tzinfo=UTC), + ) + # unspecified start - should inherit from container + self.html_unit = ItemFactory.create( + parent_location=self.vertical.location, + category="html", + display_name="Html Content", + modulestore=store, + publish_item=False, + ) + + def reindex_course(self, store): + """ kick off complete reindex of the course """ + return CoursewareSearchIndexer.do_course_reindex(store, self.course.id) + + def index_recent_changes(self, store, since_time): + """ index course using recent changes """ + trigger_time = datetime.now(UTC) + return CoursewareSearchIndexer.index_course( + store, + self.course.id, + triggered_at=trigger_time, + reindex_age=(trigger_time - since_time) + ) + + def _test_indexing_course(self, store): + """ indexing course tests """ + searcher = self.get_search_engine() + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 0) + + # Only published modules should be in the index + added_to_index = self.reindex_course(store) + self.assertEqual(added_to_index, 3) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 3) + + # Publish the vertical as is, and any unpublished children should now be available + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + def _test_not_indexing_unpublished_content(self, store): + """ add a new one, only appers in index once added """ + searcher = self.get_search_engine() + # Publish the vertical to start with + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # Now add a new unit to the existing vertical + ItemFactory.create( + parent_location=self.vertical.location, + category="html", + display_name="Some other content", + publish_item=False, + modulestore=store, + ) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # Now publish it and we should find it + # Publish the vertical as is, and everything should be available + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 5) + + def _test_deleting_item(self, store): + """ test deleting an item """ + searcher = self.get_search_engine() + # Publish the vertical to start with + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # just a delete should not change anything + self.delete_item(store, self.html_unit.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # but after publishing, we should no longer find the html_unit + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 3) + + def _test_not_indexable(self, store): + """ test not indexable items """ + searcher = self.get_search_engine() + # Publish the vertical to start with + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # Add a non-indexable item + ItemFactory.create( + parent_location=self.vertical.location, + category="problem", + display_name="Some other content", + publish_item=False, + modulestore=store, + ) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # even after publishing, we should not find the non-indexable item + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + def _test_start_date_propagation(self, store): + """ make sure that the start date is applied at the right level """ + searcher = self.get_search_engine() + early_date = self.course.start + later_date = self.vertical.start + + # Publish the vertical + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + results = response["results"] + date_map = { + unicode(self.chapter.location): early_date, + unicode(self.sequential.location): early_date, + unicode(self.vertical.location): later_date, + unicode(self.html_unit.location): later_date, + } + for result in results: + self.assertEqual(result["data"]["start_date"], date_map[result["data"]["id"]]) + + @patch('django.conf.settings.SEARCH_ENGINE', None) + def _test_search_disabled(self, store): + """ if search setting has it as off, confirm that nothing is indexed """ + indexed_count = self.reindex_course(store) + self.assertFalse(indexed_count) + + def _test_time_based_index(self, store): + """ Make sure that a time based request to index does not index anything too old """ + self.publish_item(store, self.vertical.location) + indexed_count = self.reindex_course(store) + self.assertEqual(indexed_count, 4) + + # Add a new sequential + sequential2 = ItemFactory.create( + parent_location=self.chapter.location, + category='sequential', + display_name='Section 2', + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + + # add a new vertical + vertical2 = ItemFactory.create( + parent_location=sequential2.location, + category='vertical', + display_name='Subsection 2', + modulestore=store, + publish_item=True, + ) + ItemFactory.create( + parent_location=vertical2.location, + category="html", + display_name="Some other content", + publish_item=False, + modulestore=store, + ) + + before_time = datetime.now(UTC) + self.publish_item(store, vertical2.location) + # index based on time, will include an index of the origin sequential + # because it is in a common subtree but not of the original vertical + # because the original sequential's subtree is too old + new_indexed_count = self.index_recent_changes(store, before_time) + self.assertEqual(new_indexed_count, 5) + + # full index again + indexed_count = self.reindex_course(store) + self.assertEqual(indexed_count, 7) + + @patch('django.conf.settings.SEARCH_ENGINE', 'search.tests.utils.ErroringIndexEngine') + def _test_exception(self, store): + """ Test that exception within indexing yields a SearchIndexingError """ + self.publish_item(store, self.vertical.location) + with self.assertRaises(SearchIndexingError): + self.reindex_course(store) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_indexing_course(self, store_type): + self._perform_test_using_store(store_type, self._test_indexing_course) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_not_indexing_unpublished_content(self, store_type): + self._perform_test_using_store(store_type, self._test_not_indexing_unpublished_content) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_deleting_item(self, store_type): + self._perform_test_using_store(store_type, self._test_deleting_item) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_not_indexable(self, store_type): + self._perform_test_using_store(store_type, self._test_not_indexable) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_start_date_propagation(self, store_type): + self._perform_test_using_store(store_type, self._test_start_date_propagation) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_search_disabled(self, store_type): + self._perform_test_using_store(store_type, self._test_search_disabled) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_time_based_index(self, store_type): + self._perform_test_using_store(store_type, self._test_time_based_index) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_exception(self, store_type): + self._perform_test_using_store(store_type, self._test_exception) + + +@patch('django.conf.settings.SEARCH_ENGINE', 'search.tests.utils.ForceRefreshElasticSearchEngine') +@ddt.ddt +class TestLargeCourseDeletions(MixedWithOptionsTestCase): + """ Tests to excerise deleting items from a course """ + + def _clean_course_id(self): + """ Clean all documents from the index that have a specific course provided """ + if self.course_id: + searcher = self.get_search_engine() + response = searcher.search(field_dictionary={"course": self.course_id}) + while response["total"] > 0: + for item in response["results"]: + searcher.remove(DOCUMENT_TYPE, item["data"]["id"]) + searcher.remove(DOCUMENT_TYPE, item["data"]["id"]) + response = searcher.search(field_dictionary={"course": self.course_id}) + self.course_id = None + + def setUp(self): + super(TestLargeCourseDeletions, self).setUp() + self.course_id = None + + def tearDown(self): + super(TestLargeCourseDeletions, self).tearDown() + self._clean_course_id() + + def assert_search_count(self, expected_count): + """ Check that the search within this course will yield the expected number of results """ + searcher = self.get_search_engine() + response = searcher.search(field_dictionary={"course": self.course_id}) + self.assertEqual(response["total"], expected_count) + + def _do_test_large_course_deletion(self, store, load_factor): + """ Test that deleting items from a course works even when present within a very large course """ + def id_list(top_parent_object): + """ private function to get ids from object down the tree """ + list_of_ids = [unicode(top_parent_object.location)] + for child in top_parent_object.get_children(): + list_of_ids.extend(id_list(child)) + return list_of_ids + + course, course_size = create_large_course(store, load_factor) + self.course_id = unicode(course.id) + + # index full course + CoursewareSearchIndexer.do_course_reindex(store, course.id) + + self.assert_search_count(course_size) + + # reload course to allow us to delete one single unit + course = store.get_course(course.id, depth=1) + + # delete the first chapter + chapter_to_delete = course.get_children()[0] + self.delete_item(store, chapter_to_delete.location) + + # index and check correctness + CoursewareSearchIndexer.do_course_reindex(store, course.id) + deleted_count = 1 + load_factor + (load_factor ** 2) + (load_factor ** 3) + self.assert_search_count(course_size - deleted_count) + + def _test_large_course_deletion(self, store): + """ exception catch-ing wrapper around large test course test with deletions """ + # load_factor of 6 (1296 items) takes about 5 minutes to run on devstack on a laptop + # load_factor of 7 (2401 items) takes about 70 minutes to run on devstack on a laptop + # load_factor of 8 (4096 items) takes just under 3 hours to run on devstack on a laptop + load_factor = 6 + try: + self._do_test_large_course_deletion(store, load_factor) + except: # pylint: disable=bare-except + # Catch any exception here to see when we fail + print "Failed with load_factor of {}".format(load_factor) + + @skip(("This test is to see how we handle very large courses, to ensure that the delete" + "procedure works smoothly - too long to run during the normal course of things")) + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_large_course_deletion(self, store_type): + self._perform_test_using_store(store_type, self._test_large_course_deletion) + + +class TestTaskExecution(ModuleStoreTestCase): + """ + Set of tests to ensure that the task code will do the right thing when + executed directly. The test course gets created without the listener + being present, which allows us to ensure that when the listener is + executed, it is done as expected. + """ + + def setUp(self): + super(TestTaskExecution, self).setUp() + SignalHandler.course_published.disconnect(listen_for_course_publish) + self.course = CourseFactory.create(start=datetime(2015, 3, 1, tzinfo=UTC)) + + self.chapter = ItemFactory.create( + parent_location=self.course.location, + category='chapter', + display_name="Week 1", + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.sequential = ItemFactory.create( + parent_location=self.chapter.location, + category='sequential', + display_name="Lesson 1", + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.vertical = ItemFactory.create( + parent_location=self.sequential.location, + category='vertical', + display_name='Subsection 1', + publish_item=True, + start=datetime(2015, 4, 1, tzinfo=UTC), + ) + # unspecified start - should inherit from container + self.html_unit = ItemFactory.create( + parent_location=self.vertical.location, + category="html", + display_name="Html Content", + publish_item=False, + ) + + def test_task_indexing_course(self): + """ Making sure that the receiver correctly fires off the task when invoked by signal """ + searcher = SearchEngine.get_search_engine(INDEX_NAME) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 0) + + #update_search_index(unicode(self.course.id), datetime.now(UTC).isoformat()) + listen_for_course_publish(self, self.course.id) + + # Note that this test will only succeed if celery is working in inline mode + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 3) diff --git a/cms/djangoapps/contentstore/tests/test_import.py b/cms/djangoapps/contentstore/tests/test_import.py index 4efb1cb8ec..f69f450493 100644 --- a/cms/djangoapps/contentstore/tests/test_import.py +++ b/cms/djangoapps/contentstore/tests/test_import.py @@ -28,7 +28,7 @@ TEST_DATA_DIR = settings.COMMON_TEST_DATA_ROOT @ddt.ddt -@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) +@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE, SEARCH_ENGINE=None) class ContentStoreImportTest(SignalDisconnectTestMixin, ModuleStoreTestCase): """ Tests that rely on the toy and test_import_course courses. diff --git a/cms/djangoapps/contentstore/tests/test_libraries.py b/cms/djangoapps/contentstore/tests/test_libraries.py index 8a83dfd7cc..a76efbb3e9 100644 --- a/cms/djangoapps/contentstore/tests/test_libraries.py +++ b/cms/djangoapps/contentstore/tests/test_libraries.py @@ -459,6 +459,7 @@ class TestLibraries(LibraryTestCase): @ddt.ddt +@patch('django.conf.settings.SEARCH_ENGINE', None) class TestLibraryAccess(SignalDisconnectTestMixin, LibraryTestCase): """ Test Roles and Permissions related to Content Libraries diff --git a/cms/djangoapps/contentstore/views/course.py b/cms/djangoapps/contentstore/views/course.py index 7db80dc44f..d205457faa 100644 --- a/cms/djangoapps/contentstore/views/course.py +++ b/cms/djangoapps/contentstore/views/course.py @@ -22,7 +22,6 @@ from edxmako.shortcuts import render_to_response from xmodule.course_module import DEFAULT_START_DATE from xmodule.error_module import ErrorDescriptor from xmodule.modulestore.django import modulestore -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer, SearchIndexingError from xmodule.contentstore.content import StaticContent from xmodule.tabs import PDFTextbookTabs from xmodule.partitions.partitions import UserPartition @@ -35,6 +34,7 @@ from openedx.core.djangoapps.course_groups.partition_scheme import get_cohorted_ from django_future.csrf import ensure_csrf_cookie from contentstore.course_info_model import get_course_updates, update_course_updates, delete_course_update +from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError from contentstore.utils import ( add_instructor, initialize_permissions, diff --git a/cms/djangoapps/contentstore/views/tests/test_course_index.py b/cms/djangoapps/contentstore/views/tests/test_course_index.py index 6c11083966..2c7ee14748 100644 --- a/cms/djangoapps/contentstore/views/tests/test_course_index.py +++ b/cms/djangoapps/contentstore/views/tests/test_course_index.py @@ -6,28 +6,28 @@ import lxml import datetime import os import mock +import pytz -from contentstore.tests.utils import CourseTestCase -from contentstore.utils import reverse_course_url, reverse_library_url, add_instructor -from student.auth import has_course_author_access -from contentstore.views.course import course_outline_initial_state, reindex_course_and_check_access -from contentstore.views.item import create_xblock_info, VisibilityState -from course_action_state.models import CourseRerunState -from util.date_utils import get_default_time_display -from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer -from xmodule.modulestore.exceptions import ItemNotFoundError -from xmodule.modulestore.django import modulestore -from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, LibraryFactory -from xmodule.modulestore.courseware_index import SearchIndexingError -from opaque_keys.edx.locator import CourseLocator -from student.tests.factories import UserFactory -from course_action_state.managers import CourseRerunUIStateManager from django.conf import settings from django.core.exceptions import PermissionDenied from django.utils.translation import ugettext as _ + +from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError +from contentstore.tests.utils import CourseTestCase +from contentstore.utils import reverse_course_url, reverse_library_url, add_instructor +from contentstore.views.course import course_outline_initial_state, reindex_course_and_check_access +from contentstore.views.item import create_xblock_info, VisibilityState +from course_action_state.managers import CourseRerunUIStateManager +from course_action_state.models import CourseRerunState +from opaque_keys.edx.locator import CourseLocator from search.api import perform_search -import pytz +from student.auth import has_course_author_access +from student.tests.factories import UserFactory +from util.date_utils import get_default_time_display +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.exceptions import ItemNotFoundError +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, LibraryFactory class TestCourseIndex(CourseTestCase): @@ -346,8 +346,6 @@ class TestCourseReIndex(CourseTestCase): Unit tests for the course outline. """ - TEST_INDEX_FILENAME = "test_root/index_file.dat" - SUCCESSFUL_RESPONSE = _("Course has been successfully reindexed.") def setUp(self): @@ -379,12 +377,6 @@ class TestCourseReIndex(CourseTestCase): ) - # create test file in which index for this test will live - with open(self.TEST_INDEX_FILENAME, "w+") as index_file: - json.dump({}, index_file) - - self.addCleanup(os.remove, self.TEST_INDEX_FILENAME) - def test_reindex_course(self): """ Verify that course gets reindexed. @@ -445,25 +437,19 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with real data """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # Start manual reindex reindex_course_and_check_access(self.course.id, self.user) - self.html.display_name = "My expanded HTML" - modulestore().update_item(self.html, ModuleStoreEnum.UserID.test) - - # Start manual reindex - reindex_course_and_check_access(self.course.id, self.user) - - # Check results indexed now + # Check results remain the same response = perform_search( "unique", user=self.user, @@ -477,14 +463,14 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with mocked error data for video """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = SearchIndexingError @@ -499,14 +485,14 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with mocked error data for html """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = SearchIndexingError @@ -521,14 +507,14 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with mocked error data for sequence """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception @@ -559,27 +545,21 @@ class TestCourseReIndex(CourseTestCase): def test_indexing_responses(self): """ - Test add_to_search_index response with real data + Test do_course_reindex response with real data """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # Start manual reindex CoursewareSearchIndexer.do_course_reindex(modulestore(), self.course.id) - self.html.display_name = "My expanded HTML" - modulestore().update_item(self.html, ModuleStoreEnum.UserID.test) - - # Start manual reindex - CoursewareSearchIndexer.do_course_reindex(modulestore(), self.course.id) - - # Check results indexed now + # Check results are the same following reindex response = perform_search( "unique", user=self.user, @@ -591,16 +571,16 @@ class TestCourseReIndex(CourseTestCase): @mock.patch('xmodule.video_module.VideoDescriptor.index_dictionary') def test_indexing_video_error_responses(self, mock_index_dictionary): """ - Test add_to_search_index response with mocked error data for video + Test do_course_reindex response with mocked error data for video """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception @@ -613,16 +593,16 @@ class TestCourseReIndex(CourseTestCase): @mock.patch('xmodule.html_module.HtmlDescriptor.index_dictionary') def test_indexing_html_error_responses(self, mock_index_dictionary): """ - Test add_to_search_index response with mocked error data for html + Test do_course_reindex response with mocked error data for html """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception @@ -635,16 +615,16 @@ class TestCourseReIndex(CourseTestCase): @mock.patch('xmodule.seq_module.SequenceDescriptor.index_dictionary') def test_indexing_seq_error_responses(self, mock_index_dictionary): """ - Test add_to_search_index response with mocked error data for sequence + Test do_course_reindex response with mocked error data for sequence """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception diff --git a/cms/envs/test.py b/cms/envs/test.py index a5e330953d..87b03469b4 100644 --- a/cms/envs/test.py +++ b/cms/envs/test.py @@ -268,10 +268,6 @@ VIDEO_CDN_URL = { # Courseware Search Index FEATURES['ENABLE_COURSEWARE_INDEX'] = True SEARCH_ENGINE = "search.tests.mock_search_engine.MockSearchEngine" -# Path at which to store the mock index -MOCK_SEARCH_BACKING_FILE = ( - TEST_ROOT / "index_file.dat" # pylint: disable=no-value-for-parameter -).abspath() # Dummy secret key for dev/test SECRET_KEY = '85920908f28904ed733fe576320db18cabd7b6cd' diff --git a/common/lib/xmodule/xmodule/modulestore/courseware_index.py b/common/lib/xmodule/xmodule/modulestore/courseware_index.py deleted file mode 100644 index ff55b1a721..0000000000 --- a/common/lib/xmodule/xmodule/modulestore/courseware_index.py +++ /dev/null @@ -1,176 +0,0 @@ -""" Code to allow module store to interface with courseware index """ -from __future__ import absolute_import - -import logging - -from django.utils.translation import ugettext as _ -from opaque_keys.edx.locator import CourseLocator -from search.search_engine_base import SearchEngine -from eventtracking import tracker - -from . import ModuleStoreEnum -from .exceptions import ItemNotFoundError - - -# Use default index and document names for now -INDEX_NAME = "courseware_index" -DOCUMENT_TYPE = "courseware_content" - -log = logging.getLogger('edx.modulestore') - - -class SearchIndexingError(Exception): - """ Indicates some error(s) occured during indexing """ - - def __init__(self, message, error_list): - super(SearchIndexingError, self).__init__(message) - self.error_list = error_list - - -class CoursewareSearchIndexer(object): - """ - Class to perform indexing for courseware search from different modulestores - """ - - @staticmethod - def add_to_search_index(modulestore, location, delete=False, raise_on_error=False): - """ - Add to courseware search index from given location and its children - """ - error_list = [] - indexed_count = 0 - # TODO - inline for now, need to move this out to a celery task - searcher = SearchEngine.get_search_engine(INDEX_NAME) - if not searcher: - return - - if isinstance(location, CourseLocator): - course_key = location - else: - course_key = location.course_key - - location_info = { - "course": unicode(course_key), - } - - def _fetch_item(item_location): - """ Fetch the item from the modulestore location, log if not found, but continue """ - try: - if isinstance(item_location, CourseLocator): - item = modulestore.get_course(item_location) - else: - item = modulestore.get_item(item_location, revision=ModuleStoreEnum.RevisionOption.published_only) - except ItemNotFoundError: - log.warning('Cannot find: %s', item_location) - return None - - return item - - def index_item_location(item_location, current_start_date): - """ add this item to the search index """ - item = _fetch_item(item_location) - if not item: - return - - is_indexable = hasattr(item, "index_dictionary") - # if it's not indexable and it does not have children, then ignore - if not is_indexable and not item.has_children: - return - - # if it has a defined start, then apply it and to it's children - if item.start and (not current_start_date or item.start > current_start_date): - current_start_date = item.start - - if item.has_children: - for child_loc in item.children: - index_item_location(child_loc, current_start_date) - - item_index = {} - item_index_dictionary = item.index_dictionary() if is_indexable else None - - # if it has something to add to the index, then add it - if item_index_dictionary: - try: - item_index.update(location_info) - item_index.update(item_index_dictionary) - item_index['id'] = unicode(item.scope_ids.usage_id) - if current_start_date: - item_index['start_date'] = current_start_date - - searcher.index(DOCUMENT_TYPE, item_index) - except Exception as err: # pylint: disable=broad-except - # broad exception so that index operation does not fail on one item of many - log.warning('Could not index item: %s - %s', item_location, unicode(err)) - error_list.append(_('Could not index item: {}').format(item_location)) - - def remove_index_item_location(item_location): - """ remove this item from the search index """ - item = _fetch_item(item_location) - if item: - if item.has_children: - for child_loc in item.children: - remove_index_item_location(child_loc) - - searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id)) - - try: - if delete: - remove_index_item_location(location) - else: - index_item_location(location, None) - indexed_count += 1 - except Exception as err: # pylint: disable=broad-except - # broad exception so that index operation does not prevent the rest of the application from working - log.exception( - "Indexing error encountered, courseware index may be out of date %s - %s", - course_key, - unicode(err) - ) - error_list.append(_('General indexing error occurred')) - - if raise_on_error and error_list: - raise SearchIndexingError(_('Error(s) present during indexing'), error_list) - - return indexed_count - - @classmethod - def do_publish_index(cls, modulestore, location, delete=False, raise_on_error=False): - """ - Add to courseware search index published section and children - """ - indexed_count = cls.add_to_search_index(modulestore, location, delete, raise_on_error) - cls._track_index_request('edx.course.index.published', indexed_count, str(location)) - return indexed_count - - @classmethod - def do_course_reindex(cls, modulestore, course_key): - """ - (Re)index all content within the given course - """ - indexed_count = cls.add_to_search_index(modulestore, course_key, delete=False, raise_on_error=True) - cls._track_index_request('edx.course.index.reindexed', indexed_count) - return indexed_count - - @staticmethod - def _track_index_request(event_name, indexed_count, location=None): - """Track content index requests. - - Arguments: - location (str): The ID of content to be indexed. - event_name (str): Name of the event to be logged. - Returns: - None - - """ - data = { - "indexed_count": indexed_count, - 'category': 'courseware_index', - } - - if location: - data['location_id'] = location - - tracker.emit( - event_name, - data - ) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo/draft.py b/common/lib/xmodule/xmodule/modulestore/mongo/draft.py index 4516165370..9c8f327ad9 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo/draft.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo/draft.py @@ -12,7 +12,6 @@ import logging from opaque_keys.edx.locations import Location from xmodule.exceptions import InvalidVersionError from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer from xmodule.modulestore.exceptions import ( ItemNotFoundError, DuplicateItemError, DuplicateCourseError, InvalidBranchSetting ) @@ -565,10 +564,6 @@ class DraftModuleStore(MongoModuleStore): ) self._delete_subtree(location, as_functions) - # Remove this location from the courseware search index so that searches - # will refrain from showing it as a result - CoursewareSearchIndexer.add_to_search_index(self, location, delete=True) - def _delete_subtree(self, location, as_functions, draft_only=False): """ Internal method for deleting all of the subtree whose revisions match the as_functions @@ -745,9 +740,6 @@ class DraftModuleStore(MongoModuleStore): self._flag_publish_event(course_key) - # Now it's been published, add the object to the courseware search index so that it appears in search results - CoursewareSearchIndexer.do_publish_index(self, location) - return self.get_item(as_published(location)) def unpublish(self, location, user_id, **kwargs): diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py index 06ce24b1b9..0d975fcf55 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py @@ -5,7 +5,6 @@ Module for the dual-branch fall-back Draft->Published Versioning ModuleStore from xmodule.modulestore.split_mongo.split import SplitMongoModuleStore, EXCLUDE_ALL from xmodule.exceptions import InvalidVersionError from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer from xmodule.modulestore.exceptions import InsufficientSpecificationError, ItemNotFoundError from xmodule.modulestore.draft_and_published import ( ModuleStoreDraftAndPublished, DIRECT_ONLY_CATEGORIES, UnsupportedRevisionError @@ -217,10 +216,6 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli if branch == ModuleStoreEnum.BranchName.draft and branched_location.block_type in DIRECT_ONLY_CATEGORIES: self.publish(parent_loc.version_agnostic(), user_id, blacklist=EXCLUDE_ALL, **kwargs) - # Remove this location from the courseware search index so that searches - # will refrain from showing it as a result - CoursewareSearchIndexer.add_to_search_index(self, location, delete=True) - def _map_revision_to_branch(self, key, revision=None): """ Maps RevisionOptions to BranchNames, inserting them into the key @@ -366,9 +361,6 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli self._flag_publish_event(location.course_key) - # Now it's been published, add the object to the courseware search index so that it appears in search results - CoursewareSearchIndexer.do_publish_index(self, location) - return self.get_item(location.for_branch(ModuleStoreEnum.BranchName.published), **kwargs) def unpublish(self, location, user_id, **kwargs): diff --git a/common/lib/xmodule/xmodule/vertical_block.py b/common/lib/xmodule/xmodule/vertical_block.py index 293c865790..1f2b4b874f 100644 --- a/common/lib/xmodule/xmodule/vertical_block.py +++ b/common/lib/xmodule/xmodule/vertical_block.py @@ -128,3 +128,23 @@ class VerticalBlock(SequenceFields, XModuleFields, StudioEditableBlock, XmlParse # TODO: Remove this when studio better supports editing of pure XBlocks. fragment.add_javascript('VerticalBlock = XModule.Descriptor;') return fragment + + def index_dictionary(self): + """ + Return dictionary prepared with module content and type for indexing. + """ + # return key/value fields in a Python dict object + # values may be numeric / string or dict + # default implementation is an empty dict + xblock_body = super(VerticalBlock, self).index_dictionary() + index_body = { + "display_name": self.display_name, + } + if "content" in xblock_body: + xblock_body["content"].update(index_body) + else: + xblock_body["content"] = index_body + # We use "Sequence" for sequentials and verticals + xblock_body["content_type"] = "Sequence" + + return xblock_body diff --git a/common/test/acceptance/tests/lms/test_lms_courseware_search.py b/common/test/acceptance/tests/lms/test_lms_courseware_search.py index c648c9e5dc..9a31f44b62 100644 --- a/common/test/acceptance/tests/lms/test_lms_courseware_search.py +++ b/common/test/acceptance/tests/lms/test_lms_courseware_search.py @@ -84,17 +84,11 @@ class CoursewareSearchTest(UniqueCourseTest): AutoAuthPage(self.browser, username=username, email=email, course_id=self.course_id, staff=staff).visit() - def test_page_existence(self): - """ - Make sure that the page is accessible. - """ - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - def _studio_publish_content(self, section_index): """ Publish content on studio course page under specified section """ + self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self.course_outline.visit() subsection = self.course_outline.section_at(section_index).subsection_at(0) subsection.expand_subsection() @@ -105,6 +99,7 @@ class CoursewareSearchTest(UniqueCourseTest): """ Edit chapter name on studio course page under specified section """ + self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self.course_outline.visit() section = self.course_outline.section_at(section_index) section.change_name(self.EDITED_CHAPTER_NAME) @@ -114,6 +109,7 @@ class CoursewareSearchTest(UniqueCourseTest): Add content on studio course page under specified section """ + self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) # create a unit in course outline self.course_outline.visit() subsection = self.course_outline.section_at(section_index).subsection_at(0) @@ -140,30 +136,44 @@ class CoursewareSearchTest(UniqueCourseTest): self.course_outline.start_reindex() self.course_outline.wait_for_ajax() + def _search_for_content(self, search_term): + """ + Login and search for specific content + + Arguments: + search_term - term to be searched for + + Returns: + (bool) True if search term is found in resulting content; False if not found + """ + self._auto_auth(self.USERNAME, self.EMAIL, False) + self.courseware_search_page.visit() + self.courseware_search_page.search_for_term(search_term) + return search_term in self.courseware_search_page.search_results.html[0] + + def test_page_existence(self): + """ + Make sure that the page is accessible. + """ + self._auto_auth(self.USERNAME, self.EMAIL, False) + self.courseware_search_page.visit() + def test_search(self): """ Make sure that you can search for something. """ # Create content in studio without publishing. - self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self._studio_add_content(0) # Do a search, there should be no results shown. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.SEARCH_STRING) - assert self.SEARCH_STRING not in self.courseware_search_page.search_results.html[0] + self.assertFalse(self._search_for_content(self.SEARCH_STRING)) # Publish in studio to trigger indexing. - self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self._studio_publish_content(0) # Do the search again, this time we expect results. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.SEARCH_STRING) - assert self.SEARCH_STRING in self.courseware_search_page.search_results.html[0] + self.assertTrue(self._search_for_content(self.SEARCH_STRING)) def test_reindex(self): """ @@ -171,24 +181,24 @@ class CoursewareSearchTest(UniqueCourseTest): """ # Create content in studio without publishing. - self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self._studio_add_content(1) + # Do a search, there should be no results shown. + self.assertFalse(self._search_for_content(self.EDITED_SEARCH_STRING)) + # Publish in studio to trigger indexing, and edit chapter name afterwards. self._studio_publish_content(1) + + # Do a ReIndex from studio to ensure that our stuff is updated before the next stage of the test + self._studio_reindex() + + # Search after publish, there should still be no results shown. + self.assertFalse(self._search_for_content(self.EDITED_SEARCH_STRING)) + self._studio_edit_chapter_name(1) - # Do a search, there should be no results shown. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.EDITED_SEARCH_STRING) - assert self.EDITED_SEARCH_STRING not in self.courseware_search_page.search_results.html[0] - - # Do a ReIndex from studio, to add edited chapter name + # Do a ReIndex from studio to ensure that our stuff is updated before the next stage of the test self._studio_reindex() # Do the search again, this time we expect results. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.EDITED_SEARCH_STRING) - assert self.EDITED_SEARCH_STRING in self.courseware_search_page.search_results.html[0] + self.assertTrue(self._search_for_content(self.EDITED_SEARCH_STRING)) diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index 8f9d1693d6..91a478a2b4 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -38,7 +38,7 @@ git+https://github.com/mitocw/django-cas.git@60a5b8e5a62e63e0d5d224a87f0b489201a -e git+https://github.com/edx/edx-val.git@d6087908aa3dd05ceaa7f56a21284f86c53cb3f0#egg=edx-val -e git+https://github.com/pmitros/RecommenderXBlock.git@9b07e807c89ba5761827d0387177f71aa57ef056#egg=recommender-xblock -e git+https://github.com/edx/edx-milestones.git@547f2250ee49e73ce8d7ff4e78ecf1b049892510#egg=edx-milestones --e git+https://github.com/edx/edx-search.git@5eb3853fe74c8fdd9d82d36b8ab0058309390f86#egg=edx-search +-e git+https://github.com/edx/edx-search.git@6ce8b25a2539370de32dc7cb643ae27c9b8f798d#egg=edx-search git+https://github.com/edx/edx-lint.git@8bf82a32ecb8598c415413df66f5232ab8d974e9#egg=edx_lint==0.2.1 -e git+https://github.com/edx/xblock-utils.git@581ed636c862b286002bb9a3724cc883570eb54c#egg=xblock-utils -e git+https://github.com/edx-solutions/xblock-google-drive.git@138e6fa0bf3a2013e904a085b9fed77dab7f3f21#egg=xblock-google-drive