diff --git a/cms/djangoapps/contentstore/__init__.py b/cms/djangoapps/contentstore/__init__.py index e69de29bb2..d3060371e5 100644 --- a/cms/djangoapps/contentstore/__init__.py +++ b/cms/djangoapps/contentstore/__init__.py @@ -0,0 +1,2 @@ +""" module init will register signal handlers """ +import contentstore.signals diff --git a/cms/djangoapps/contentstore/courseware_index.py b/cms/djangoapps/contentstore/courseware_index.py new file mode 100644 index 0000000000..9d8742f302 --- /dev/null +++ b/cms/djangoapps/contentstore/courseware_index.py @@ -0,0 +1,194 @@ +""" Code to allow module store to interface with courseware index """ +from __future__ import absolute_import + +from datetime import timedelta +import logging + +from django.conf import settings +from django.utils.translation import ugettext as _ +from eventtracking import tracker +from xmodule.modulestore import ModuleStoreEnum +from search.search_engine_base import SearchEngine + + +# Use default index and document names for now +INDEX_NAME = "courseware_index" +DOCUMENT_TYPE = "courseware_content" + +# REINDEX_AGE is the default amount of time that we look back for changes +# that might have happened. If we are provided with a time at which the +# indexing is triggered, then we know it is safe to only index items +# recently changed at that time. This is the time period that represents +# how far back from the trigger point to look back in order to index +REINDEX_AGE = timedelta(0, 60) # 60 seconds + +log = logging.getLogger('edx.modulestore') + + +def indexing_is_enabled(): + """ + Checks to see if the indexing feature is enabled + """ + return settings.FEATURES.get('ENABLE_COURSEWARE_INDEX', False) + + +class SearchIndexingError(Exception): + """ Indicates some error(s) occured during indexing """ + + def __init__(self, message, error_list): + super(SearchIndexingError, self).__init__(message) + self.error_list = error_list + + +class CoursewareSearchIndexer(object): + """ + Class to perform indexing for courseware search from different modulestores + """ + + @classmethod + def index_course(cls, modulestore, course_key, triggered_at=None, reindex_age=REINDEX_AGE): + """ + Process course for indexing + + Arguments: + course_key (CourseKey) - course identifier + + triggered_at (datetime) - provides time at which indexing was triggered; + useful for index updates - only things changed recently from that date + (within REINDEX_AGE above ^^) will have their index updated, others skip + updating their index but are still walked through in order to identify + which items may need to be removed from the index + If None, then a full reindex takes place + + Returns: + Number of items that have been added to the index + """ + error_list = [] + searcher = SearchEngine.get_search_engine(INDEX_NAME) + if not searcher: + return + + location_info = { + "course": unicode(course_key), + } + + # Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item` + indexed_count = { + "count": 0 + } + + # indexed_items is a list of all the items that we wish to remain in the + # index, whether or not we are planning to actually update their index. + # This is used in order to build a query to remove those items not in this + # list - those are ready to be destroyed + indexed_items = set() + + def index_item(item, skip_index=False): + """ + Add this item to the search index and indexed_items list + + Arguments: + item - item to add to index, its children will be processed recursively + + skip_index - simply walk the children in the tree, the content change is + older than the REINDEX_AGE window and would have been already indexed. + This should really only be passed from the recursive child calls when + this method has determined that it is safe to do so + """ + is_indexable = hasattr(item, "index_dictionary") + item_index_dictionary = item.index_dictionary() if is_indexable else None + # if it's not indexable and it does not have children, then ignore + if not item_index_dictionary and not item.has_children: + return + + item_id = unicode(item.scope_ids.usage_id) + indexed_items.add(item_id) + if item.has_children: + # determine if it's okay to skip adding the children herein based upon how recently any may have changed + skip_child_index = skip_index or \ + (triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age) + for child_item in item.get_children(): + index_item(child_item, skip_index=skip_child_index) + + if skip_index or not item_index_dictionary: + return + + item_index = {} + # if it has something to add to the index, then add it + try: + item_index.update(location_info) + item_index.update(item_index_dictionary) + item_index['id'] = item_id + if item.start: + item_index['start_date'] = item.start + + searcher.index(DOCUMENT_TYPE, item_index) + indexed_count["count"] += 1 + except Exception as err: # pylint: disable=broad-except + # broad exception so that index operation does not fail on one item of many + log.warning('Could not index item: %s - %r', item.location, err) + error_list.append(_('Could not index item: {}').format(item.location)) + + def remove_deleted_items(): + """ + remove any item that is present in the search index that is not present in updated list of indexed items + as we find items we can shorten the set of items to keep + """ + response = searcher.search( + doc_type=DOCUMENT_TYPE, + field_dictionary={"course": unicode(course_key)}, + exclude_ids=indexed_items + ) + result_ids = [result["data"]["id"] for result in response["results"]] + for result_id in result_ids: + searcher.remove(DOCUMENT_TYPE, result_id) + + try: + with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only): + course = modulestore.get_course(course_key, depth=None) + for item in course.get_children(): + index_item(item) + remove_deleted_items() + except Exception as err: # pylint: disable=broad-except + # broad exception so that index operation does not prevent the rest of the application from working + log.exception( + "Indexing error encountered, courseware index may be out of date %s - %r", + course_key, + err + ) + error_list.append(_('General indexing error occurred')) + + if error_list: + raise SearchIndexingError(_('Error(s) present during indexing'), error_list) + + return indexed_count["count"] + + @classmethod + def do_course_reindex(cls, modulestore, course_key): + """ + (Re)index all content within the given course, tracking the fact that a full reindex has taking place + """ + indexed_count = cls.index_course(modulestore, course_key) + if indexed_count: + cls._track_index_request('edx.course.index.reindexed', indexed_count) + return indexed_count + + @classmethod + def _track_index_request(cls, event_name, indexed_count): + """Track content index requests. + + Arguments: + event_name (str): Name of the event to be logged. + Returns: + None + + """ + data = { + "indexed_count": indexed_count, + 'category': 'courseware_index', + } + + tracker.emit( + event_name, + data + ) diff --git a/cms/djangoapps/contentstore/signals.py b/cms/djangoapps/contentstore/signals.py new file mode 100644 index 0000000000..df21a57ccd --- /dev/null +++ b/cms/djangoapps/contentstore/signals.py @@ -0,0 +1,19 @@ +""" receiver of course_published events in order to trigger indexing task """ +from datetime import datetime +from pytz import UTC + +from django.dispatch import receiver + +from xmodule.modulestore.django import SignalHandler +from contentstore.courseware_index import indexing_is_enabled + + +@receiver(SignalHandler.course_published) +def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument + """ + Receives signal and kicks off celery task to update search index + """ + # import here, because signal is registered at startup, but items in tasks are not yet able to be loaded + from .tasks import update_search_index + if indexing_is_enabled(): + update_search_index.delay(unicode(course_key), datetime.now(UTC).isoformat()) diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index 222ad7a44d..cebf5aee73 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -1,20 +1,25 @@ """ This file contains celery tasks for contentstore views """ - -from celery.task import task -from django.contrib.auth.models import User import json import logging -from xmodule.modulestore.django import modulestore -from xmodule.course_module import CourseFields +from celery.task import task +from celery.utils.log import get_task_logger +from datetime import datetime +from pytz import UTC -from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError -from course_action_state.models import CourseRerunState +from django.contrib.auth.models import User + +from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError from contentstore.utils import initialize_permissions +from course_action_state.models import CourseRerunState from opaque_keys.edx.keys import CourseKey +from xmodule.course_module import CourseFields +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError -from edxval.api import copy_course_videos +LOGGER = get_task_logger(__name__) +FULL_COURSE_REINDEX_THRESHOLD = 1 @task() @@ -22,6 +27,9 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i """ Reruns a course in a new celery task. """ + # import here, at top level this import prevents the celery workers from starting up correctly + from edxval.api import copy_course_videos + try: # deserialize the payload source_course_key = CourseKey.from_string(source_course_key_string) @@ -72,3 +80,21 @@ def deserialize_fields(json_fields): for field_name, value in fields.iteritems(): fields[field_name] = getattr(CourseFields, field_name).from_json(value) return fields + + +@task() +def update_search_index(course_id, triggered_time_isoformat): + """ Updates course search index. """ + try: + course_key = CourseKey.from_string(course_id) + triggered_time = datetime.strptime( + # remove the +00:00 from the end of the formats generated within the system + triggered_time_isoformat.split('+')[0], + "%Y-%m-%dT%H:%M:%S.%f" + ).replace(tzinfo=UTC) + CoursewareSearchIndexer.index_course(modulestore(), course_key, triggered_at=triggered_time) + + except SearchIndexingError as exc: + LOGGER.error('Search indexing error for complete course %s - %s', course_id, unicode(exc)) + else: + LOGGER.debug('Search indexing successful for complete course %s', course_id) diff --git a/cms/djangoapps/contentstore/tests/test_courseware_index.py b/cms/djangoapps/contentstore/tests/test_courseware_index.py new file mode 100644 index 0000000000..8f0621d60e --- /dev/null +++ b/cms/djangoapps/contentstore/tests/test_courseware_index.py @@ -0,0 +1,565 @@ +""" +Testing indexing of the courseware as it is changed +""" +import ddt +import time +from datetime import datetime +from mock import patch +from pytz import UTC +from uuid import uuid4 +from unittest import skip + +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.django import SignalHandler +from xmodule.modulestore.edit_info import EditInfoMixin +from xmodule.modulestore.inheritance import InheritanceMixin +from xmodule.modulestore.mixed import MixedModuleStore +from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase +from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory +from xmodule.modulestore.tests.mongo_connection import MONGO_PORT_NUM, MONGO_HOST +from xmodule.modulestore.tests.test_cross_modulestore_import_export import MongoContentstoreBuilder +from xmodule.modulestore.tests.utils import create_modulestore_instance, LocationMixin, MixedSplitTestCase +from xmodule.tests import DATA_DIR +from xmodule.x_module import XModuleMixin +from search.search_engine_base import SearchEngine + +from contentstore.courseware_index import CoursewareSearchIndexer, INDEX_NAME, DOCUMENT_TYPE, SearchIndexingError +from contentstore.signals import listen_for_course_publish + + +COURSE_CHILD_STRUCTURE = { + "course": "chapter", + "chapter": "sequential", + "sequential": "vertical", + "vertical": "html", +} + + +def create_children(store, parent, category, load_factor): + """ create load_factor children within the given parent; recursively call to insert children when appropriate """ + created_count = 0 + for child_index in range(0, load_factor): + child_object = ItemFactory.create( + parent_location=parent.location, + category=category, + display_name=u"{} {} {}".format(category, child_index, time.clock()), + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + created_count += 1 + + if category in COURSE_CHILD_STRUCTURE: + created_count += create_children(store, child_object, COURSE_CHILD_STRUCTURE[category], load_factor) + + return created_count + + +def create_large_course(store, load_factor): + """ + Create a large course, note that the number of blocks created will be + load_factor ^ 4 - e.g. load_factor of 10 => 10 chapters, 100 + sequentials, 1000 verticals, 10000 html blocks + """ + course = CourseFactory.create(modulestore=store, start=datetime(2015, 3, 1, tzinfo=UTC)) + with store.bulk_operations(course.id): + child_count = create_children(store, course, COURSE_CHILD_STRUCTURE["course"], load_factor) + return course, child_count + + +class MixedWithOptionsTestCase(MixedSplitTestCase): + """ Base class for test cases within this file """ + HOST = MONGO_HOST + PORT = MONGO_PORT_NUM + DATABASE = 'test_mongo_%s' % uuid4().hex[:5] + COLLECTION = 'modulestore' + ASSET_COLLECTION = 'assetstore' + DEFAULT_CLASS = 'xmodule.raw_module.RawDescriptor' + RENDER_TEMPLATE = lambda t_n, d, ctx=None, nsp='main': '' + modulestore_options = { + 'default_class': DEFAULT_CLASS, + 'fs_root': DATA_DIR, + 'render_template': RENDER_TEMPLATE, + 'xblock_mixins': (EditInfoMixin, InheritanceMixin, LocationMixin, XModuleMixin), + } + DOC_STORE_CONFIG = { + 'host': HOST, + 'port': PORT, + 'db': DATABASE, + 'collection': COLLECTION, + 'asset_collection': ASSET_COLLECTION, + } + OPTIONS = { + 'stores': [ + { + 'NAME': 'draft', + 'ENGINE': 'xmodule.modulestore.mongo.draft.DraftModuleStore', + 'DOC_STORE_CONFIG': DOC_STORE_CONFIG, + 'OPTIONS': modulestore_options + }, + { + 'NAME': 'split', + 'ENGINE': 'xmodule.modulestore.split_mongo.split_draft.DraftVersioningModuleStore', + 'DOC_STORE_CONFIG': DOC_STORE_CONFIG, + 'OPTIONS': modulestore_options + }, + { + 'NAME': 'xml', + 'ENGINE': 'xmodule.modulestore.xml.XMLModuleStore', + 'OPTIONS': { + 'data_dir': DATA_DIR, + 'default_class': 'xmodule.hidden_module.HiddenDescriptor', + 'xblock_mixins': modulestore_options['xblock_mixins'], + } + }, + ], + 'xblock_mixins': modulestore_options['xblock_mixins'], + } + + def setUp(self): + super(MixedWithOptionsTestCase, self).setUp() + + def setup_course_base(self, store): + """ base version of setup_course_base is a no-op """ + pass + + def get_search_engine(self): + """ Centralized call to getting the search engine for the test """ + return SearchEngine.get_search_engine(INDEX_NAME) + + def _perform_test_using_store(self, store_type, test_to_perform): + """ Helper method to run a test function that uses a specific store """ + with MongoContentstoreBuilder().build() as contentstore: + store = MixedModuleStore( + contentstore=contentstore, + create_modulestore_instance=create_modulestore_instance, + mappings={}, + **self.OPTIONS + ) + self.addCleanup(store.close_all_connections) + + with store.default_store(store_type): + self.setup_course_base(store) + test_to_perform(store) + + def publish_item(self, store, item_location): + """ publish the item at the given location """ + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + store.publish(item_location, ModuleStoreEnum.UserID.test) + + def delete_item(self, store, item_location): + """ delete the item at the given location """ + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + store.delete_item(item_location, ModuleStoreEnum.UserID.test) + + def update_item(self, store, item): + """ update the item at the given location """ + with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred): + store.update_item(item, ModuleStoreEnum.UserID.test) + + +@ddt.ddt +class TestCoursewareSearchIndexer(MixedWithOptionsTestCase): + """ Tests the operation of the CoursewareSearchIndexer """ + + def setUp(self): + super(TestCoursewareSearchIndexer, self).setUp() + + self.course = None + self.chapter = None + self.sequential = None + self.vertical = None + self.html_unit = None + + def setup_course_base(self, store): + """ + Set up the for the course outline tests. + """ + self.course = CourseFactory.create(modulestore=store, start=datetime(2015, 3, 1, tzinfo=UTC)) + + self.chapter = ItemFactory.create( + parent_location=self.course.location, + category='chapter', + display_name="Week 1", + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.sequential = ItemFactory.create( + parent_location=self.chapter.location, + category='sequential', + display_name="Lesson 1", + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.vertical = ItemFactory.create( + parent_location=self.sequential.location, + category='vertical', + display_name='Subsection 1', + modulestore=store, + publish_item=True, + start=datetime(2015, 4, 1, tzinfo=UTC), + ) + # unspecified start - should inherit from container + self.html_unit = ItemFactory.create( + parent_location=self.vertical.location, + category="html", + display_name="Html Content", + modulestore=store, + publish_item=False, + ) + + def reindex_course(self, store): + """ kick off complete reindex of the course """ + return CoursewareSearchIndexer.do_course_reindex(store, self.course.id) + + def index_recent_changes(self, store, since_time): + """ index course using recent changes """ + trigger_time = datetime.now(UTC) + return CoursewareSearchIndexer.index_course( + store, + self.course.id, + triggered_at=trigger_time, + reindex_age=(trigger_time - since_time) + ) + + def _test_indexing_course(self, store): + """ indexing course tests """ + searcher = self.get_search_engine() + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 0) + + # Only published modules should be in the index + added_to_index = self.reindex_course(store) + self.assertEqual(added_to_index, 3) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 3) + + # Publish the vertical as is, and any unpublished children should now be available + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + def _test_not_indexing_unpublished_content(self, store): + """ add a new one, only appers in index once added """ + searcher = self.get_search_engine() + # Publish the vertical to start with + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # Now add a new unit to the existing vertical + ItemFactory.create( + parent_location=self.vertical.location, + category="html", + display_name="Some other content", + publish_item=False, + modulestore=store, + ) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # Now publish it and we should find it + # Publish the vertical as is, and everything should be available + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 5) + + def _test_deleting_item(self, store): + """ test deleting an item """ + searcher = self.get_search_engine() + # Publish the vertical to start with + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # just a delete should not change anything + self.delete_item(store, self.html_unit.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # but after publishing, we should no longer find the html_unit + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 3) + + def _test_not_indexable(self, store): + """ test not indexable items """ + searcher = self.get_search_engine() + # Publish the vertical to start with + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # Add a non-indexable item + ItemFactory.create( + parent_location=self.vertical.location, + category="problem", + display_name="Some other content", + publish_item=False, + modulestore=store, + ) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + # even after publishing, we should not find the non-indexable item + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + def _test_start_date_propagation(self, store): + """ make sure that the start date is applied at the right level """ + searcher = self.get_search_engine() + early_date = self.course.start + later_date = self.vertical.start + + # Publish the vertical + self.publish_item(store, self.vertical.location) + self.reindex_course(store) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 4) + + results = response["results"] + date_map = { + unicode(self.chapter.location): early_date, + unicode(self.sequential.location): early_date, + unicode(self.vertical.location): later_date, + unicode(self.html_unit.location): later_date, + } + for result in results: + self.assertEqual(result["data"]["start_date"], date_map[result["data"]["id"]]) + + @patch('django.conf.settings.SEARCH_ENGINE', None) + def _test_search_disabled(self, store): + """ if search setting has it as off, confirm that nothing is indexed """ + indexed_count = self.reindex_course(store) + self.assertFalse(indexed_count) + + def _test_time_based_index(self, store): + """ Make sure that a time based request to index does not index anything too old """ + self.publish_item(store, self.vertical.location) + indexed_count = self.reindex_course(store) + self.assertEqual(indexed_count, 4) + + # Add a new sequential + sequential2 = ItemFactory.create( + parent_location=self.chapter.location, + category='sequential', + display_name='Section 2', + modulestore=store, + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + + # add a new vertical + vertical2 = ItemFactory.create( + parent_location=sequential2.location, + category='vertical', + display_name='Subsection 2', + modulestore=store, + publish_item=True, + ) + ItemFactory.create( + parent_location=vertical2.location, + category="html", + display_name="Some other content", + publish_item=False, + modulestore=store, + ) + + before_time = datetime.now(UTC) + self.publish_item(store, vertical2.location) + # index based on time, will include an index of the origin sequential + # because it is in a common subtree but not of the original vertical + # because the original sequential's subtree is too old + new_indexed_count = self.index_recent_changes(store, before_time) + self.assertEqual(new_indexed_count, 5) + + # full index again + indexed_count = self.reindex_course(store) + self.assertEqual(indexed_count, 7) + + @patch('django.conf.settings.SEARCH_ENGINE', 'search.tests.utils.ErroringIndexEngine') + def _test_exception(self, store): + """ Test that exception within indexing yields a SearchIndexingError """ + self.publish_item(store, self.vertical.location) + with self.assertRaises(SearchIndexingError): + self.reindex_course(store) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_indexing_course(self, store_type): + self._perform_test_using_store(store_type, self._test_indexing_course) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_not_indexing_unpublished_content(self, store_type): + self._perform_test_using_store(store_type, self._test_not_indexing_unpublished_content) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_deleting_item(self, store_type): + self._perform_test_using_store(store_type, self._test_deleting_item) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_not_indexable(self, store_type): + self._perform_test_using_store(store_type, self._test_not_indexable) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_start_date_propagation(self, store_type): + self._perform_test_using_store(store_type, self._test_start_date_propagation) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_search_disabled(self, store_type): + self._perform_test_using_store(store_type, self._test_search_disabled) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_time_based_index(self, store_type): + self._perform_test_using_store(store_type, self._test_time_based_index) + + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_exception(self, store_type): + self._perform_test_using_store(store_type, self._test_exception) + + +@patch('django.conf.settings.SEARCH_ENGINE', 'search.tests.utils.ForceRefreshElasticSearchEngine') +@ddt.ddt +class TestLargeCourseDeletions(MixedWithOptionsTestCase): + """ Tests to excerise deleting items from a course """ + + def _clean_course_id(self): + """ Clean all documents from the index that have a specific course provided """ + if self.course_id: + searcher = self.get_search_engine() + response = searcher.search(field_dictionary={"course": self.course_id}) + while response["total"] > 0: + for item in response["results"]: + searcher.remove(DOCUMENT_TYPE, item["data"]["id"]) + searcher.remove(DOCUMENT_TYPE, item["data"]["id"]) + response = searcher.search(field_dictionary={"course": self.course_id}) + self.course_id = None + + def setUp(self): + super(TestLargeCourseDeletions, self).setUp() + self.course_id = None + + def tearDown(self): + super(TestLargeCourseDeletions, self).tearDown() + self._clean_course_id() + + def assert_search_count(self, expected_count): + """ Check that the search within this course will yield the expected number of results """ + searcher = self.get_search_engine() + response = searcher.search(field_dictionary={"course": self.course_id}) + self.assertEqual(response["total"], expected_count) + + def _do_test_large_course_deletion(self, store, load_factor): + """ Test that deleting items from a course works even when present within a very large course """ + def id_list(top_parent_object): + """ private function to get ids from object down the tree """ + list_of_ids = [unicode(top_parent_object.location)] + for child in top_parent_object.get_children(): + list_of_ids.extend(id_list(child)) + return list_of_ids + + course, course_size = create_large_course(store, load_factor) + self.course_id = unicode(course.id) + + # index full course + CoursewareSearchIndexer.do_course_reindex(store, course.id) + + self.assert_search_count(course_size) + + # reload course to allow us to delete one single unit + course = store.get_course(course.id, depth=1) + + # delete the first chapter + chapter_to_delete = course.get_children()[0] + self.delete_item(store, chapter_to_delete.location) + + # index and check correctness + CoursewareSearchIndexer.do_course_reindex(store, course.id) + deleted_count = 1 + load_factor + (load_factor ** 2) + (load_factor ** 3) + self.assert_search_count(course_size - deleted_count) + + def _test_large_course_deletion(self, store): + """ exception catch-ing wrapper around large test course test with deletions """ + # load_factor of 6 (1296 items) takes about 5 minutes to run on devstack on a laptop + # load_factor of 7 (2401 items) takes about 70 minutes to run on devstack on a laptop + # load_factor of 8 (4096 items) takes just under 3 hours to run on devstack on a laptop + load_factor = 6 + try: + self._do_test_large_course_deletion(store, load_factor) + except: # pylint: disable=bare-except + # Catch any exception here to see when we fail + print "Failed with load_factor of {}".format(load_factor) + + @skip(("This test is to see how we handle very large courses, to ensure that the delete" + "procedure works smoothly - too long to run during the normal course of things")) + @ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split) + def test_large_course_deletion(self, store_type): + self._perform_test_using_store(store_type, self._test_large_course_deletion) + + +class TestTaskExecution(ModuleStoreTestCase): + """ + Set of tests to ensure that the task code will do the right thing when + executed directly. The test course gets created without the listener + being present, which allows us to ensure that when the listener is + executed, it is done as expected. + """ + + def setUp(self): + super(TestTaskExecution, self).setUp() + SignalHandler.course_published.disconnect(listen_for_course_publish) + self.course = CourseFactory.create(start=datetime(2015, 3, 1, tzinfo=UTC)) + + self.chapter = ItemFactory.create( + parent_location=self.course.location, + category='chapter', + display_name="Week 1", + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.sequential = ItemFactory.create( + parent_location=self.chapter.location, + category='sequential', + display_name="Lesson 1", + publish_item=True, + start=datetime(2015, 3, 1, tzinfo=UTC), + ) + self.vertical = ItemFactory.create( + parent_location=self.sequential.location, + category='vertical', + display_name='Subsection 1', + publish_item=True, + start=datetime(2015, 4, 1, tzinfo=UTC), + ) + # unspecified start - should inherit from container + self.html_unit = ItemFactory.create( + parent_location=self.vertical.location, + category="html", + display_name="Html Content", + publish_item=False, + ) + + def test_task_indexing_course(self): + """ Making sure that the receiver correctly fires off the task when invoked by signal """ + searcher = SearchEngine.get_search_engine(INDEX_NAME) + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 0) + + #update_search_index(unicode(self.course.id), datetime.now(UTC).isoformat()) + listen_for_course_publish(self, self.course.id) + + # Note that this test will only succeed if celery is working in inline mode + response = searcher.search(field_dictionary={"course": unicode(self.course.id)}) + self.assertEqual(response["total"], 3) diff --git a/cms/djangoapps/contentstore/tests/test_import.py b/cms/djangoapps/contentstore/tests/test_import.py index 4efb1cb8ec..f69f450493 100644 --- a/cms/djangoapps/contentstore/tests/test_import.py +++ b/cms/djangoapps/contentstore/tests/test_import.py @@ -28,7 +28,7 @@ TEST_DATA_DIR = settings.COMMON_TEST_DATA_ROOT @ddt.ddt -@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE) +@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE, SEARCH_ENGINE=None) class ContentStoreImportTest(SignalDisconnectTestMixin, ModuleStoreTestCase): """ Tests that rely on the toy and test_import_course courses. diff --git a/cms/djangoapps/contentstore/tests/test_libraries.py b/cms/djangoapps/contentstore/tests/test_libraries.py index 8a83dfd7cc..a76efbb3e9 100644 --- a/cms/djangoapps/contentstore/tests/test_libraries.py +++ b/cms/djangoapps/contentstore/tests/test_libraries.py @@ -459,6 +459,7 @@ class TestLibraries(LibraryTestCase): @ddt.ddt +@patch('django.conf.settings.SEARCH_ENGINE', None) class TestLibraryAccess(SignalDisconnectTestMixin, LibraryTestCase): """ Test Roles and Permissions related to Content Libraries diff --git a/cms/djangoapps/contentstore/views/course.py b/cms/djangoapps/contentstore/views/course.py index 7db80dc44f..d205457faa 100644 --- a/cms/djangoapps/contentstore/views/course.py +++ b/cms/djangoapps/contentstore/views/course.py @@ -22,7 +22,6 @@ from edxmako.shortcuts import render_to_response from xmodule.course_module import DEFAULT_START_DATE from xmodule.error_module import ErrorDescriptor from xmodule.modulestore.django import modulestore -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer, SearchIndexingError from xmodule.contentstore.content import StaticContent from xmodule.tabs import PDFTextbookTabs from xmodule.partitions.partitions import UserPartition @@ -35,6 +34,7 @@ from openedx.core.djangoapps.course_groups.partition_scheme import get_cohorted_ from django_future.csrf import ensure_csrf_cookie from contentstore.course_info_model import get_course_updates, update_course_updates, delete_course_update +from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError from contentstore.utils import ( add_instructor, initialize_permissions, diff --git a/cms/djangoapps/contentstore/views/tests/test_course_index.py b/cms/djangoapps/contentstore/views/tests/test_course_index.py index 6c11083966..2c7ee14748 100644 --- a/cms/djangoapps/contentstore/views/tests/test_course_index.py +++ b/cms/djangoapps/contentstore/views/tests/test_course_index.py @@ -6,28 +6,28 @@ import lxml import datetime import os import mock +import pytz -from contentstore.tests.utils import CourseTestCase -from contentstore.utils import reverse_course_url, reverse_library_url, add_instructor -from student.auth import has_course_author_access -from contentstore.views.course import course_outline_initial_state, reindex_course_and_check_access -from contentstore.views.item import create_xblock_info, VisibilityState -from course_action_state.models import CourseRerunState -from util.date_utils import get_default_time_display -from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer -from xmodule.modulestore.exceptions import ItemNotFoundError -from xmodule.modulestore.django import modulestore -from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, LibraryFactory -from xmodule.modulestore.courseware_index import SearchIndexingError -from opaque_keys.edx.locator import CourseLocator -from student.tests.factories import UserFactory -from course_action_state.managers import CourseRerunUIStateManager from django.conf import settings from django.core.exceptions import PermissionDenied from django.utils.translation import ugettext as _ + +from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError +from contentstore.tests.utils import CourseTestCase +from contentstore.utils import reverse_course_url, reverse_library_url, add_instructor +from contentstore.views.course import course_outline_initial_state, reindex_course_and_check_access +from contentstore.views.item import create_xblock_info, VisibilityState +from course_action_state.managers import CourseRerunUIStateManager +from course_action_state.models import CourseRerunState +from opaque_keys.edx.locator import CourseLocator from search.api import perform_search -import pytz +from student.auth import has_course_author_access +from student.tests.factories import UserFactory +from util.date_utils import get_default_time_display +from xmodule.modulestore import ModuleStoreEnum +from xmodule.modulestore.exceptions import ItemNotFoundError +from xmodule.modulestore.django import modulestore +from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, LibraryFactory class TestCourseIndex(CourseTestCase): @@ -346,8 +346,6 @@ class TestCourseReIndex(CourseTestCase): Unit tests for the course outline. """ - TEST_INDEX_FILENAME = "test_root/index_file.dat" - SUCCESSFUL_RESPONSE = _("Course has been successfully reindexed.") def setUp(self): @@ -379,12 +377,6 @@ class TestCourseReIndex(CourseTestCase): ) - # create test file in which index for this test will live - with open(self.TEST_INDEX_FILENAME, "w+") as index_file: - json.dump({}, index_file) - - self.addCleanup(os.remove, self.TEST_INDEX_FILENAME) - def test_reindex_course(self): """ Verify that course gets reindexed. @@ -445,25 +437,19 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with real data """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # Start manual reindex reindex_course_and_check_access(self.course.id, self.user) - self.html.display_name = "My expanded HTML" - modulestore().update_item(self.html, ModuleStoreEnum.UserID.test) - - # Start manual reindex - reindex_course_and_check_access(self.course.id, self.user) - - # Check results indexed now + # Check results remain the same response = perform_search( "unique", user=self.user, @@ -477,14 +463,14 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with mocked error data for video """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = SearchIndexingError @@ -499,14 +485,14 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with mocked error data for html """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = SearchIndexingError @@ -521,14 +507,14 @@ class TestCourseReIndex(CourseTestCase): """ Test json response with mocked error data for sequence """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception @@ -559,27 +545,21 @@ class TestCourseReIndex(CourseTestCase): def test_indexing_responses(self): """ - Test add_to_search_index response with real data + Test do_course_reindex response with real data """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # Start manual reindex CoursewareSearchIndexer.do_course_reindex(modulestore(), self.course.id) - self.html.display_name = "My expanded HTML" - modulestore().update_item(self.html, ModuleStoreEnum.UserID.test) - - # Start manual reindex - CoursewareSearchIndexer.do_course_reindex(modulestore(), self.course.id) - - # Check results indexed now + # Check results are the same following reindex response = perform_search( "unique", user=self.user, @@ -591,16 +571,16 @@ class TestCourseReIndex(CourseTestCase): @mock.patch('xmodule.video_module.VideoDescriptor.index_dictionary') def test_indexing_video_error_responses(self, mock_index_dictionary): """ - Test add_to_search_index response with mocked error data for video + Test do_course_reindex response with mocked error data for video """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception @@ -613,16 +593,16 @@ class TestCourseReIndex(CourseTestCase): @mock.patch('xmodule.html_module.HtmlDescriptor.index_dictionary') def test_indexing_html_error_responses(self, mock_index_dictionary): """ - Test add_to_search_index response with mocked error data for html + Test do_course_reindex response with mocked error data for html """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception @@ -635,16 +615,16 @@ class TestCourseReIndex(CourseTestCase): @mock.patch('xmodule.seq_module.SequenceDescriptor.index_dictionary') def test_indexing_seq_error_responses(self, mock_index_dictionary): """ - Test add_to_search_index response with mocked error data for sequence + Test do_course_reindex response with mocked error data for sequence """ - # Check results not indexed + # results are indexed because they are published from ItemFactory response = perform_search( "unique", user=self.user, size=10, from_=0, course_id=unicode(self.course.id)) - self.assertEqual(response['results'], []) + self.assertEqual(response['total'], 1) # set mocked exception response err = Exception diff --git a/cms/envs/test.py b/cms/envs/test.py index a5e330953d..87b03469b4 100644 --- a/cms/envs/test.py +++ b/cms/envs/test.py @@ -268,10 +268,6 @@ VIDEO_CDN_URL = { # Courseware Search Index FEATURES['ENABLE_COURSEWARE_INDEX'] = True SEARCH_ENGINE = "search.tests.mock_search_engine.MockSearchEngine" -# Path at which to store the mock index -MOCK_SEARCH_BACKING_FILE = ( - TEST_ROOT / "index_file.dat" # pylint: disable=no-value-for-parameter -).abspath() # Dummy secret key for dev/test SECRET_KEY = '85920908f28904ed733fe576320db18cabd7b6cd' diff --git a/common/lib/xmodule/xmodule/modulestore/courseware_index.py b/common/lib/xmodule/xmodule/modulestore/courseware_index.py deleted file mode 100644 index ff55b1a721..0000000000 --- a/common/lib/xmodule/xmodule/modulestore/courseware_index.py +++ /dev/null @@ -1,176 +0,0 @@ -""" Code to allow module store to interface with courseware index """ -from __future__ import absolute_import - -import logging - -from django.utils.translation import ugettext as _ -from opaque_keys.edx.locator import CourseLocator -from search.search_engine_base import SearchEngine -from eventtracking import tracker - -from . import ModuleStoreEnum -from .exceptions import ItemNotFoundError - - -# Use default index and document names for now -INDEX_NAME = "courseware_index" -DOCUMENT_TYPE = "courseware_content" - -log = logging.getLogger('edx.modulestore') - - -class SearchIndexingError(Exception): - """ Indicates some error(s) occured during indexing """ - - def __init__(self, message, error_list): - super(SearchIndexingError, self).__init__(message) - self.error_list = error_list - - -class CoursewareSearchIndexer(object): - """ - Class to perform indexing for courseware search from different modulestores - """ - - @staticmethod - def add_to_search_index(modulestore, location, delete=False, raise_on_error=False): - """ - Add to courseware search index from given location and its children - """ - error_list = [] - indexed_count = 0 - # TODO - inline for now, need to move this out to a celery task - searcher = SearchEngine.get_search_engine(INDEX_NAME) - if not searcher: - return - - if isinstance(location, CourseLocator): - course_key = location - else: - course_key = location.course_key - - location_info = { - "course": unicode(course_key), - } - - def _fetch_item(item_location): - """ Fetch the item from the modulestore location, log if not found, but continue """ - try: - if isinstance(item_location, CourseLocator): - item = modulestore.get_course(item_location) - else: - item = modulestore.get_item(item_location, revision=ModuleStoreEnum.RevisionOption.published_only) - except ItemNotFoundError: - log.warning('Cannot find: %s', item_location) - return None - - return item - - def index_item_location(item_location, current_start_date): - """ add this item to the search index """ - item = _fetch_item(item_location) - if not item: - return - - is_indexable = hasattr(item, "index_dictionary") - # if it's not indexable and it does not have children, then ignore - if not is_indexable and not item.has_children: - return - - # if it has a defined start, then apply it and to it's children - if item.start and (not current_start_date or item.start > current_start_date): - current_start_date = item.start - - if item.has_children: - for child_loc in item.children: - index_item_location(child_loc, current_start_date) - - item_index = {} - item_index_dictionary = item.index_dictionary() if is_indexable else None - - # if it has something to add to the index, then add it - if item_index_dictionary: - try: - item_index.update(location_info) - item_index.update(item_index_dictionary) - item_index['id'] = unicode(item.scope_ids.usage_id) - if current_start_date: - item_index['start_date'] = current_start_date - - searcher.index(DOCUMENT_TYPE, item_index) - except Exception as err: # pylint: disable=broad-except - # broad exception so that index operation does not fail on one item of many - log.warning('Could not index item: %s - %s', item_location, unicode(err)) - error_list.append(_('Could not index item: {}').format(item_location)) - - def remove_index_item_location(item_location): - """ remove this item from the search index """ - item = _fetch_item(item_location) - if item: - if item.has_children: - for child_loc in item.children: - remove_index_item_location(child_loc) - - searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id)) - - try: - if delete: - remove_index_item_location(location) - else: - index_item_location(location, None) - indexed_count += 1 - except Exception as err: # pylint: disable=broad-except - # broad exception so that index operation does not prevent the rest of the application from working - log.exception( - "Indexing error encountered, courseware index may be out of date %s - %s", - course_key, - unicode(err) - ) - error_list.append(_('General indexing error occurred')) - - if raise_on_error and error_list: - raise SearchIndexingError(_('Error(s) present during indexing'), error_list) - - return indexed_count - - @classmethod - def do_publish_index(cls, modulestore, location, delete=False, raise_on_error=False): - """ - Add to courseware search index published section and children - """ - indexed_count = cls.add_to_search_index(modulestore, location, delete, raise_on_error) - cls._track_index_request('edx.course.index.published', indexed_count, str(location)) - return indexed_count - - @classmethod - def do_course_reindex(cls, modulestore, course_key): - """ - (Re)index all content within the given course - """ - indexed_count = cls.add_to_search_index(modulestore, course_key, delete=False, raise_on_error=True) - cls._track_index_request('edx.course.index.reindexed', indexed_count) - return indexed_count - - @staticmethod - def _track_index_request(event_name, indexed_count, location=None): - """Track content index requests. - - Arguments: - location (str): The ID of content to be indexed. - event_name (str): Name of the event to be logged. - Returns: - None - - """ - data = { - "indexed_count": indexed_count, - 'category': 'courseware_index', - } - - if location: - data['location_id'] = location - - tracker.emit( - event_name, - data - ) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo/draft.py b/common/lib/xmodule/xmodule/modulestore/mongo/draft.py index 4516165370..9c8f327ad9 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo/draft.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo/draft.py @@ -12,7 +12,6 @@ import logging from opaque_keys.edx.locations import Location from xmodule.exceptions import InvalidVersionError from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer from xmodule.modulestore.exceptions import ( ItemNotFoundError, DuplicateItemError, DuplicateCourseError, InvalidBranchSetting ) @@ -565,10 +564,6 @@ class DraftModuleStore(MongoModuleStore): ) self._delete_subtree(location, as_functions) - # Remove this location from the courseware search index so that searches - # will refrain from showing it as a result - CoursewareSearchIndexer.add_to_search_index(self, location, delete=True) - def _delete_subtree(self, location, as_functions, draft_only=False): """ Internal method for deleting all of the subtree whose revisions match the as_functions @@ -745,9 +740,6 @@ class DraftModuleStore(MongoModuleStore): self._flag_publish_event(course_key) - # Now it's been published, add the object to the courseware search index so that it appears in search results - CoursewareSearchIndexer.do_publish_index(self, location) - return self.get_item(as_published(location)) def unpublish(self, location, user_id, **kwargs): diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py index 06ce24b1b9..0d975fcf55 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split_draft.py @@ -5,7 +5,6 @@ Module for the dual-branch fall-back Draft->Published Versioning ModuleStore from xmodule.modulestore.split_mongo.split import SplitMongoModuleStore, EXCLUDE_ALL from xmodule.exceptions import InvalidVersionError from xmodule.modulestore import ModuleStoreEnum -from xmodule.modulestore.courseware_index import CoursewareSearchIndexer from xmodule.modulestore.exceptions import InsufficientSpecificationError, ItemNotFoundError from xmodule.modulestore.draft_and_published import ( ModuleStoreDraftAndPublished, DIRECT_ONLY_CATEGORIES, UnsupportedRevisionError @@ -217,10 +216,6 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli if branch == ModuleStoreEnum.BranchName.draft and branched_location.block_type in DIRECT_ONLY_CATEGORIES: self.publish(parent_loc.version_agnostic(), user_id, blacklist=EXCLUDE_ALL, **kwargs) - # Remove this location from the courseware search index so that searches - # will refrain from showing it as a result - CoursewareSearchIndexer.add_to_search_index(self, location, delete=True) - def _map_revision_to_branch(self, key, revision=None): """ Maps RevisionOptions to BranchNames, inserting them into the key @@ -366,9 +361,6 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli self._flag_publish_event(location.course_key) - # Now it's been published, add the object to the courseware search index so that it appears in search results - CoursewareSearchIndexer.do_publish_index(self, location) - return self.get_item(location.for_branch(ModuleStoreEnum.BranchName.published), **kwargs) def unpublish(self, location, user_id, **kwargs): diff --git a/common/lib/xmodule/xmodule/vertical_block.py b/common/lib/xmodule/xmodule/vertical_block.py index 293c865790..1f2b4b874f 100644 --- a/common/lib/xmodule/xmodule/vertical_block.py +++ b/common/lib/xmodule/xmodule/vertical_block.py @@ -128,3 +128,23 @@ class VerticalBlock(SequenceFields, XModuleFields, StudioEditableBlock, XmlParse # TODO: Remove this when studio better supports editing of pure XBlocks. fragment.add_javascript('VerticalBlock = XModule.Descriptor;') return fragment + + def index_dictionary(self): + """ + Return dictionary prepared with module content and type for indexing. + """ + # return key/value fields in a Python dict object + # values may be numeric / string or dict + # default implementation is an empty dict + xblock_body = super(VerticalBlock, self).index_dictionary() + index_body = { + "display_name": self.display_name, + } + if "content" in xblock_body: + xblock_body["content"].update(index_body) + else: + xblock_body["content"] = index_body + # We use "Sequence" for sequentials and verticals + xblock_body["content_type"] = "Sequence" + + return xblock_body diff --git a/common/test/acceptance/tests/lms/test_lms_courseware_search.py b/common/test/acceptance/tests/lms/test_lms_courseware_search.py index c648c9e5dc..9a31f44b62 100644 --- a/common/test/acceptance/tests/lms/test_lms_courseware_search.py +++ b/common/test/acceptance/tests/lms/test_lms_courseware_search.py @@ -84,17 +84,11 @@ class CoursewareSearchTest(UniqueCourseTest): AutoAuthPage(self.browser, username=username, email=email, course_id=self.course_id, staff=staff).visit() - def test_page_existence(self): - """ - Make sure that the page is accessible. - """ - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - def _studio_publish_content(self, section_index): """ Publish content on studio course page under specified section """ + self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self.course_outline.visit() subsection = self.course_outline.section_at(section_index).subsection_at(0) subsection.expand_subsection() @@ -105,6 +99,7 @@ class CoursewareSearchTest(UniqueCourseTest): """ Edit chapter name on studio course page under specified section """ + self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self.course_outline.visit() section = self.course_outline.section_at(section_index) section.change_name(self.EDITED_CHAPTER_NAME) @@ -114,6 +109,7 @@ class CoursewareSearchTest(UniqueCourseTest): Add content on studio course page under specified section """ + self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) # create a unit in course outline self.course_outline.visit() subsection = self.course_outline.section_at(section_index).subsection_at(0) @@ -140,30 +136,44 @@ class CoursewareSearchTest(UniqueCourseTest): self.course_outline.start_reindex() self.course_outline.wait_for_ajax() + def _search_for_content(self, search_term): + """ + Login and search for specific content + + Arguments: + search_term - term to be searched for + + Returns: + (bool) True if search term is found in resulting content; False if not found + """ + self._auto_auth(self.USERNAME, self.EMAIL, False) + self.courseware_search_page.visit() + self.courseware_search_page.search_for_term(search_term) + return search_term in self.courseware_search_page.search_results.html[0] + + def test_page_existence(self): + """ + Make sure that the page is accessible. + """ + self._auto_auth(self.USERNAME, self.EMAIL, False) + self.courseware_search_page.visit() + def test_search(self): """ Make sure that you can search for something. """ # Create content in studio without publishing. - self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self._studio_add_content(0) # Do a search, there should be no results shown. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.SEARCH_STRING) - assert self.SEARCH_STRING not in self.courseware_search_page.search_results.html[0] + self.assertFalse(self._search_for_content(self.SEARCH_STRING)) # Publish in studio to trigger indexing. - self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self._studio_publish_content(0) # Do the search again, this time we expect results. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.SEARCH_STRING) - assert self.SEARCH_STRING in self.courseware_search_page.search_results.html[0] + self.assertTrue(self._search_for_content(self.SEARCH_STRING)) def test_reindex(self): """ @@ -171,24 +181,24 @@ class CoursewareSearchTest(UniqueCourseTest): """ # Create content in studio without publishing. - self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True) self._studio_add_content(1) + # Do a search, there should be no results shown. + self.assertFalse(self._search_for_content(self.EDITED_SEARCH_STRING)) + # Publish in studio to trigger indexing, and edit chapter name afterwards. self._studio_publish_content(1) + + # Do a ReIndex from studio to ensure that our stuff is updated before the next stage of the test + self._studio_reindex() + + # Search after publish, there should still be no results shown. + self.assertFalse(self._search_for_content(self.EDITED_SEARCH_STRING)) + self._studio_edit_chapter_name(1) - # Do a search, there should be no results shown. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.EDITED_SEARCH_STRING) - assert self.EDITED_SEARCH_STRING not in self.courseware_search_page.search_results.html[0] - - # Do a ReIndex from studio, to add edited chapter name + # Do a ReIndex from studio to ensure that our stuff is updated before the next stage of the test self._studio_reindex() # Do the search again, this time we expect results. - self._auto_auth(self.USERNAME, self.EMAIL, False) - self.courseware_search_page.visit() - self.courseware_search_page.search_for_term(self.EDITED_SEARCH_STRING) - assert self.EDITED_SEARCH_STRING in self.courseware_search_page.search_results.html[0] + self.assertTrue(self._search_for_content(self.EDITED_SEARCH_STRING)) diff --git a/requirements/edx/github.txt b/requirements/edx/github.txt index 8f9d1693d6..91a478a2b4 100644 --- a/requirements/edx/github.txt +++ b/requirements/edx/github.txt @@ -38,7 +38,7 @@ git+https://github.com/mitocw/django-cas.git@60a5b8e5a62e63e0d5d224a87f0b489201a -e git+https://github.com/edx/edx-val.git@d6087908aa3dd05ceaa7f56a21284f86c53cb3f0#egg=edx-val -e git+https://github.com/pmitros/RecommenderXBlock.git@9b07e807c89ba5761827d0387177f71aa57ef056#egg=recommender-xblock -e git+https://github.com/edx/edx-milestones.git@547f2250ee49e73ce8d7ff4e78ecf1b049892510#egg=edx-milestones --e git+https://github.com/edx/edx-search.git@5eb3853fe74c8fdd9d82d36b8ab0058309390f86#egg=edx-search +-e git+https://github.com/edx/edx-search.git@6ce8b25a2539370de32dc7cb643ae27c9b8f798d#egg=edx-search git+https://github.com/edx/edx-lint.git@8bf82a32ecb8598c415413df66f5232ab8d974e9#egg=edx_lint==0.2.1 -e git+https://github.com/edx/xblock-utils.git@581ed636c862b286002bb9a3724cc883570eb54c#egg=xblock-utils -e git+https://github.com/edx-solutions/xblock-google-drive.git@138e6fa0bf3a2013e904a085b9fed77dab7f3f21#egg=xblock-google-drive