Merge pull request #7313 from edx/mjames/SOL-174
SOL-174 - Indexing within celery task
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
""" module init will register signal handlers """
|
||||
import contentstore.signals
|
||||
|
||||
194
cms/djangoapps/contentstore/courseware_index.py
Normal file
194
cms/djangoapps/contentstore/courseware_index.py
Normal file
@@ -0,0 +1,194 @@
|
||||
""" Code to allow module store to interface with courseware index """
|
||||
from __future__ import absolute_import
|
||||
|
||||
from datetime import timedelta
|
||||
import logging
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils.translation import ugettext as _
|
||||
from eventtracking import tracker
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from search.search_engine_base import SearchEngine
|
||||
|
||||
|
||||
# Use default index and document names for now
|
||||
INDEX_NAME = "courseware_index"
|
||||
DOCUMENT_TYPE = "courseware_content"
|
||||
|
||||
# REINDEX_AGE is the default amount of time that we look back for changes
|
||||
# that might have happened. If we are provided with a time at which the
|
||||
# indexing is triggered, then we know it is safe to only index items
|
||||
# recently changed at that time. This is the time period that represents
|
||||
# how far back from the trigger point to look back in order to index
|
||||
REINDEX_AGE = timedelta(0, 60) # 60 seconds
|
||||
|
||||
log = logging.getLogger('edx.modulestore')
|
||||
|
||||
|
||||
def indexing_is_enabled():
|
||||
"""
|
||||
Checks to see if the indexing feature is enabled
|
||||
"""
|
||||
return settings.FEATURES.get('ENABLE_COURSEWARE_INDEX', False)
|
||||
|
||||
|
||||
class SearchIndexingError(Exception):
|
||||
""" Indicates some error(s) occured during indexing """
|
||||
|
||||
def __init__(self, message, error_list):
|
||||
super(SearchIndexingError, self).__init__(message)
|
||||
self.error_list = error_list
|
||||
|
||||
|
||||
class CoursewareSearchIndexer(object):
|
||||
"""
|
||||
Class to perform indexing for courseware search from different modulestores
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def index_course(cls, modulestore, course_key, triggered_at=None, reindex_age=REINDEX_AGE):
|
||||
"""
|
||||
Process course for indexing
|
||||
|
||||
Arguments:
|
||||
course_key (CourseKey) - course identifier
|
||||
|
||||
triggered_at (datetime) - provides time at which indexing was triggered;
|
||||
useful for index updates - only things changed recently from that date
|
||||
(within REINDEX_AGE above ^^) will have their index updated, others skip
|
||||
updating their index but are still walked through in order to identify
|
||||
which items may need to be removed from the index
|
||||
If None, then a full reindex takes place
|
||||
|
||||
Returns:
|
||||
Number of items that have been added to the index
|
||||
"""
|
||||
error_list = []
|
||||
searcher = SearchEngine.get_search_engine(INDEX_NAME)
|
||||
if not searcher:
|
||||
return
|
||||
|
||||
location_info = {
|
||||
"course": unicode(course_key),
|
||||
}
|
||||
|
||||
# Wrap counter in dictionary - otherwise we seem to lose scope inside the embedded function `index_item`
|
||||
indexed_count = {
|
||||
"count": 0
|
||||
}
|
||||
|
||||
# indexed_items is a list of all the items that we wish to remain in the
|
||||
# index, whether or not we are planning to actually update their index.
|
||||
# This is used in order to build a query to remove those items not in this
|
||||
# list - those are ready to be destroyed
|
||||
indexed_items = set()
|
||||
|
||||
def index_item(item, skip_index=False):
|
||||
"""
|
||||
Add this item to the search index and indexed_items list
|
||||
|
||||
Arguments:
|
||||
item - item to add to index, its children will be processed recursively
|
||||
|
||||
skip_index - simply walk the children in the tree, the content change is
|
||||
older than the REINDEX_AGE window and would have been already indexed.
|
||||
This should really only be passed from the recursive child calls when
|
||||
this method has determined that it is safe to do so
|
||||
"""
|
||||
is_indexable = hasattr(item, "index_dictionary")
|
||||
item_index_dictionary = item.index_dictionary() if is_indexable else None
|
||||
# if it's not indexable and it does not have children, then ignore
|
||||
if not item_index_dictionary and not item.has_children:
|
||||
return
|
||||
|
||||
item_id = unicode(item.scope_ids.usage_id)
|
||||
indexed_items.add(item_id)
|
||||
if item.has_children:
|
||||
# determine if it's okay to skip adding the children herein based upon how recently any may have changed
|
||||
skip_child_index = skip_index or \
|
||||
(triggered_at is not None and (triggered_at - item.subtree_edited_on) > reindex_age)
|
||||
for child_item in item.get_children():
|
||||
index_item(child_item, skip_index=skip_child_index)
|
||||
|
||||
if skip_index or not item_index_dictionary:
|
||||
return
|
||||
|
||||
item_index = {}
|
||||
# if it has something to add to the index, then add it
|
||||
try:
|
||||
item_index.update(location_info)
|
||||
item_index.update(item_index_dictionary)
|
||||
item_index['id'] = item_id
|
||||
if item.start:
|
||||
item_index['start_date'] = item.start
|
||||
|
||||
searcher.index(DOCUMENT_TYPE, item_index)
|
||||
indexed_count["count"] += 1
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
# broad exception so that index operation does not fail on one item of many
|
||||
log.warning('Could not index item: %s - %r', item.location, err)
|
||||
error_list.append(_('Could not index item: {}').format(item.location))
|
||||
|
||||
def remove_deleted_items():
|
||||
"""
|
||||
remove any item that is present in the search index that is not present in updated list of indexed items
|
||||
as we find items we can shorten the set of items to keep
|
||||
"""
|
||||
response = searcher.search(
|
||||
doc_type=DOCUMENT_TYPE,
|
||||
field_dictionary={"course": unicode(course_key)},
|
||||
exclude_ids=indexed_items
|
||||
)
|
||||
result_ids = [result["data"]["id"] for result in response["results"]]
|
||||
for result_id in result_ids:
|
||||
searcher.remove(DOCUMENT_TYPE, result_id)
|
||||
|
||||
try:
|
||||
with modulestore.branch_setting(ModuleStoreEnum.RevisionOption.published_only):
|
||||
course = modulestore.get_course(course_key, depth=None)
|
||||
for item in course.get_children():
|
||||
index_item(item)
|
||||
remove_deleted_items()
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
# broad exception so that index operation does not prevent the rest of the application from working
|
||||
log.exception(
|
||||
"Indexing error encountered, courseware index may be out of date %s - %r",
|
||||
course_key,
|
||||
err
|
||||
)
|
||||
error_list.append(_('General indexing error occurred'))
|
||||
|
||||
if error_list:
|
||||
raise SearchIndexingError(_('Error(s) present during indexing'), error_list)
|
||||
|
||||
return indexed_count["count"]
|
||||
|
||||
@classmethod
|
||||
def do_course_reindex(cls, modulestore, course_key):
|
||||
"""
|
||||
(Re)index all content within the given course, tracking the fact that a full reindex has taking place
|
||||
"""
|
||||
indexed_count = cls.index_course(modulestore, course_key)
|
||||
if indexed_count:
|
||||
cls._track_index_request('edx.course.index.reindexed', indexed_count)
|
||||
return indexed_count
|
||||
|
||||
@classmethod
|
||||
def _track_index_request(cls, event_name, indexed_count):
|
||||
"""Track content index requests.
|
||||
|
||||
Arguments:
|
||||
event_name (str): Name of the event to be logged.
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
data = {
|
||||
"indexed_count": indexed_count,
|
||||
'category': 'courseware_index',
|
||||
}
|
||||
|
||||
tracker.emit(
|
||||
event_name,
|
||||
data
|
||||
)
|
||||
19
cms/djangoapps/contentstore/signals.py
Normal file
19
cms/djangoapps/contentstore/signals.py
Normal file
@@ -0,0 +1,19 @@
|
||||
""" receiver of course_published events in order to trigger indexing task """
|
||||
from datetime import datetime
|
||||
from pytz import UTC
|
||||
|
||||
from django.dispatch import receiver
|
||||
|
||||
from xmodule.modulestore.django import SignalHandler
|
||||
from contentstore.courseware_index import indexing_is_enabled
|
||||
|
||||
|
||||
@receiver(SignalHandler.course_published)
|
||||
def listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable=unused-argument
|
||||
"""
|
||||
Receives signal and kicks off celery task to update search index
|
||||
"""
|
||||
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
|
||||
from .tasks import update_search_index
|
||||
if indexing_is_enabled():
|
||||
update_search_index.delay(unicode(course_key), datetime.now(UTC).isoformat())
|
||||
@@ -1,20 +1,25 @@
|
||||
"""
|
||||
This file contains celery tasks for contentstore views
|
||||
"""
|
||||
|
||||
from celery.task import task
|
||||
from django.contrib.auth.models import User
|
||||
import json
|
||||
import logging
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.course_module import CourseFields
|
||||
from celery.task import task
|
||||
from celery.utils.log import get_task_logger
|
||||
from datetime import datetime
|
||||
from pytz import UTC
|
||||
|
||||
from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError
|
||||
from course_action_state.models import CourseRerunState
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError
|
||||
from contentstore.utils import initialize_permissions
|
||||
from course_action_state.models import CourseRerunState
|
||||
from opaque_keys.edx.keys import CourseKey
|
||||
from xmodule.course_module import CourseFields
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.exceptions import DuplicateCourseError, ItemNotFoundError
|
||||
|
||||
from edxval.api import copy_course_videos
|
||||
LOGGER = get_task_logger(__name__)
|
||||
FULL_COURSE_REINDEX_THRESHOLD = 1
|
||||
|
||||
|
||||
@task()
|
||||
@@ -22,6 +27,9 @@ def rerun_course(source_course_key_string, destination_course_key_string, user_i
|
||||
"""
|
||||
Reruns a course in a new celery task.
|
||||
"""
|
||||
# import here, at top level this import prevents the celery workers from starting up correctly
|
||||
from edxval.api import copy_course_videos
|
||||
|
||||
try:
|
||||
# deserialize the payload
|
||||
source_course_key = CourseKey.from_string(source_course_key_string)
|
||||
@@ -72,3 +80,21 @@ def deserialize_fields(json_fields):
|
||||
for field_name, value in fields.iteritems():
|
||||
fields[field_name] = getattr(CourseFields, field_name).from_json(value)
|
||||
return fields
|
||||
|
||||
|
||||
@task()
|
||||
def update_search_index(course_id, triggered_time_isoformat):
|
||||
""" Updates course search index. """
|
||||
try:
|
||||
course_key = CourseKey.from_string(course_id)
|
||||
triggered_time = datetime.strptime(
|
||||
# remove the +00:00 from the end of the formats generated within the system
|
||||
triggered_time_isoformat.split('+')[0],
|
||||
"%Y-%m-%dT%H:%M:%S.%f"
|
||||
).replace(tzinfo=UTC)
|
||||
CoursewareSearchIndexer.index_course(modulestore(), course_key, triggered_at=triggered_time)
|
||||
|
||||
except SearchIndexingError as exc:
|
||||
LOGGER.error('Search indexing error for complete course %s - %s', course_id, unicode(exc))
|
||||
else:
|
||||
LOGGER.debug('Search indexing successful for complete course %s', course_id)
|
||||
|
||||
565
cms/djangoapps/contentstore/tests/test_courseware_index.py
Normal file
565
cms/djangoapps/contentstore/tests/test_courseware_index.py
Normal file
@@ -0,0 +1,565 @@
|
||||
"""
|
||||
Testing indexing of the courseware as it is changed
|
||||
"""
|
||||
import ddt
|
||||
import time
|
||||
from datetime import datetime
|
||||
from mock import patch
|
||||
from pytz import UTC
|
||||
from uuid import uuid4
|
||||
from unittest import skip
|
||||
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.django import SignalHandler
|
||||
from xmodule.modulestore.edit_info import EditInfoMixin
|
||||
from xmodule.modulestore.inheritance import InheritanceMixin
|
||||
from xmodule.modulestore.mixed import MixedModuleStore
|
||||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory
|
||||
from xmodule.modulestore.tests.mongo_connection import MONGO_PORT_NUM, MONGO_HOST
|
||||
from xmodule.modulestore.tests.test_cross_modulestore_import_export import MongoContentstoreBuilder
|
||||
from xmodule.modulestore.tests.utils import create_modulestore_instance, LocationMixin, MixedSplitTestCase
|
||||
from xmodule.tests import DATA_DIR
|
||||
from xmodule.x_module import XModuleMixin
|
||||
from search.search_engine_base import SearchEngine
|
||||
|
||||
from contentstore.courseware_index import CoursewareSearchIndexer, INDEX_NAME, DOCUMENT_TYPE, SearchIndexingError
|
||||
from contentstore.signals import listen_for_course_publish
|
||||
|
||||
|
||||
COURSE_CHILD_STRUCTURE = {
|
||||
"course": "chapter",
|
||||
"chapter": "sequential",
|
||||
"sequential": "vertical",
|
||||
"vertical": "html",
|
||||
}
|
||||
|
||||
|
||||
def create_children(store, parent, category, load_factor):
|
||||
""" create load_factor children within the given parent; recursively call to insert children when appropriate """
|
||||
created_count = 0
|
||||
for child_index in range(0, load_factor):
|
||||
child_object = ItemFactory.create(
|
||||
parent_location=parent.location,
|
||||
category=category,
|
||||
display_name=u"{} {} {}".format(category, child_index, time.clock()),
|
||||
modulestore=store,
|
||||
publish_item=True,
|
||||
start=datetime(2015, 3, 1, tzinfo=UTC),
|
||||
)
|
||||
created_count += 1
|
||||
|
||||
if category in COURSE_CHILD_STRUCTURE:
|
||||
created_count += create_children(store, child_object, COURSE_CHILD_STRUCTURE[category], load_factor)
|
||||
|
||||
return created_count
|
||||
|
||||
|
||||
def create_large_course(store, load_factor):
|
||||
"""
|
||||
Create a large course, note that the number of blocks created will be
|
||||
load_factor ^ 4 - e.g. load_factor of 10 => 10 chapters, 100
|
||||
sequentials, 1000 verticals, 10000 html blocks
|
||||
"""
|
||||
course = CourseFactory.create(modulestore=store, start=datetime(2015, 3, 1, tzinfo=UTC))
|
||||
with store.bulk_operations(course.id):
|
||||
child_count = create_children(store, course, COURSE_CHILD_STRUCTURE["course"], load_factor)
|
||||
return course, child_count
|
||||
|
||||
|
||||
class MixedWithOptionsTestCase(MixedSplitTestCase):
|
||||
""" Base class for test cases within this file """
|
||||
HOST = MONGO_HOST
|
||||
PORT = MONGO_PORT_NUM
|
||||
DATABASE = 'test_mongo_%s' % uuid4().hex[:5]
|
||||
COLLECTION = 'modulestore'
|
||||
ASSET_COLLECTION = 'assetstore'
|
||||
DEFAULT_CLASS = 'xmodule.raw_module.RawDescriptor'
|
||||
RENDER_TEMPLATE = lambda t_n, d, ctx=None, nsp='main': ''
|
||||
modulestore_options = {
|
||||
'default_class': DEFAULT_CLASS,
|
||||
'fs_root': DATA_DIR,
|
||||
'render_template': RENDER_TEMPLATE,
|
||||
'xblock_mixins': (EditInfoMixin, InheritanceMixin, LocationMixin, XModuleMixin),
|
||||
}
|
||||
DOC_STORE_CONFIG = {
|
||||
'host': HOST,
|
||||
'port': PORT,
|
||||
'db': DATABASE,
|
||||
'collection': COLLECTION,
|
||||
'asset_collection': ASSET_COLLECTION,
|
||||
}
|
||||
OPTIONS = {
|
||||
'stores': [
|
||||
{
|
||||
'NAME': 'draft',
|
||||
'ENGINE': 'xmodule.modulestore.mongo.draft.DraftModuleStore',
|
||||
'DOC_STORE_CONFIG': DOC_STORE_CONFIG,
|
||||
'OPTIONS': modulestore_options
|
||||
},
|
||||
{
|
||||
'NAME': 'split',
|
||||
'ENGINE': 'xmodule.modulestore.split_mongo.split_draft.DraftVersioningModuleStore',
|
||||
'DOC_STORE_CONFIG': DOC_STORE_CONFIG,
|
||||
'OPTIONS': modulestore_options
|
||||
},
|
||||
{
|
||||
'NAME': 'xml',
|
||||
'ENGINE': 'xmodule.modulestore.xml.XMLModuleStore',
|
||||
'OPTIONS': {
|
||||
'data_dir': DATA_DIR,
|
||||
'default_class': 'xmodule.hidden_module.HiddenDescriptor',
|
||||
'xblock_mixins': modulestore_options['xblock_mixins'],
|
||||
}
|
||||
},
|
||||
],
|
||||
'xblock_mixins': modulestore_options['xblock_mixins'],
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
super(MixedWithOptionsTestCase, self).setUp()
|
||||
|
||||
def setup_course_base(self, store):
|
||||
""" base version of setup_course_base is a no-op """
|
||||
pass
|
||||
|
||||
def get_search_engine(self):
|
||||
""" Centralized call to getting the search engine for the test """
|
||||
return SearchEngine.get_search_engine(INDEX_NAME)
|
||||
|
||||
def _perform_test_using_store(self, store_type, test_to_perform):
|
||||
""" Helper method to run a test function that uses a specific store """
|
||||
with MongoContentstoreBuilder().build() as contentstore:
|
||||
store = MixedModuleStore(
|
||||
contentstore=contentstore,
|
||||
create_modulestore_instance=create_modulestore_instance,
|
||||
mappings={},
|
||||
**self.OPTIONS
|
||||
)
|
||||
self.addCleanup(store.close_all_connections)
|
||||
|
||||
with store.default_store(store_type):
|
||||
self.setup_course_base(store)
|
||||
test_to_perform(store)
|
||||
|
||||
def publish_item(self, store, item_location):
|
||||
""" publish the item at the given location """
|
||||
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
|
||||
store.publish(item_location, ModuleStoreEnum.UserID.test)
|
||||
|
||||
def delete_item(self, store, item_location):
|
||||
""" delete the item at the given location """
|
||||
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
|
||||
store.delete_item(item_location, ModuleStoreEnum.UserID.test)
|
||||
|
||||
def update_item(self, store, item):
|
||||
""" update the item at the given location """
|
||||
with store.branch_setting(ModuleStoreEnum.Branch.draft_preferred):
|
||||
store.update_item(item, ModuleStoreEnum.UserID.test)
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class TestCoursewareSearchIndexer(MixedWithOptionsTestCase):
|
||||
""" Tests the operation of the CoursewareSearchIndexer """
|
||||
|
||||
def setUp(self):
|
||||
super(TestCoursewareSearchIndexer, self).setUp()
|
||||
|
||||
self.course = None
|
||||
self.chapter = None
|
||||
self.sequential = None
|
||||
self.vertical = None
|
||||
self.html_unit = None
|
||||
|
||||
def setup_course_base(self, store):
|
||||
"""
|
||||
Set up the for the course outline tests.
|
||||
"""
|
||||
self.course = CourseFactory.create(modulestore=store, start=datetime(2015, 3, 1, tzinfo=UTC))
|
||||
|
||||
self.chapter = ItemFactory.create(
|
||||
parent_location=self.course.location,
|
||||
category='chapter',
|
||||
display_name="Week 1",
|
||||
modulestore=store,
|
||||
publish_item=True,
|
||||
start=datetime(2015, 3, 1, tzinfo=UTC),
|
||||
)
|
||||
self.sequential = ItemFactory.create(
|
||||
parent_location=self.chapter.location,
|
||||
category='sequential',
|
||||
display_name="Lesson 1",
|
||||
modulestore=store,
|
||||
publish_item=True,
|
||||
start=datetime(2015, 3, 1, tzinfo=UTC),
|
||||
)
|
||||
self.vertical = ItemFactory.create(
|
||||
parent_location=self.sequential.location,
|
||||
category='vertical',
|
||||
display_name='Subsection 1',
|
||||
modulestore=store,
|
||||
publish_item=True,
|
||||
start=datetime(2015, 4, 1, tzinfo=UTC),
|
||||
)
|
||||
# unspecified start - should inherit from container
|
||||
self.html_unit = ItemFactory.create(
|
||||
parent_location=self.vertical.location,
|
||||
category="html",
|
||||
display_name="Html Content",
|
||||
modulestore=store,
|
||||
publish_item=False,
|
||||
)
|
||||
|
||||
def reindex_course(self, store):
|
||||
""" kick off complete reindex of the course """
|
||||
return CoursewareSearchIndexer.do_course_reindex(store, self.course.id)
|
||||
|
||||
def index_recent_changes(self, store, since_time):
|
||||
""" index course using recent changes """
|
||||
trigger_time = datetime.now(UTC)
|
||||
return CoursewareSearchIndexer.index_course(
|
||||
store,
|
||||
self.course.id,
|
||||
triggered_at=trigger_time,
|
||||
reindex_age=(trigger_time - since_time)
|
||||
)
|
||||
|
||||
def _test_indexing_course(self, store):
|
||||
""" indexing course tests """
|
||||
searcher = self.get_search_engine()
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 0)
|
||||
|
||||
# Only published modules should be in the index
|
||||
added_to_index = self.reindex_course(store)
|
||||
self.assertEqual(added_to_index, 3)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 3)
|
||||
|
||||
# Publish the vertical as is, and any unpublished children should now be available
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
def _test_not_indexing_unpublished_content(self, store):
|
||||
""" add a new one, only appers in index once added """
|
||||
searcher = self.get_search_engine()
|
||||
# Publish the vertical to start with
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
# Now add a new unit to the existing vertical
|
||||
ItemFactory.create(
|
||||
parent_location=self.vertical.location,
|
||||
category="html",
|
||||
display_name="Some other content",
|
||||
publish_item=False,
|
||||
modulestore=store,
|
||||
)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
# Now publish it and we should find it
|
||||
# Publish the vertical as is, and everything should be available
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 5)
|
||||
|
||||
def _test_deleting_item(self, store):
|
||||
""" test deleting an item """
|
||||
searcher = self.get_search_engine()
|
||||
# Publish the vertical to start with
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
# just a delete should not change anything
|
||||
self.delete_item(store, self.html_unit.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
# but after publishing, we should no longer find the html_unit
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 3)
|
||||
|
||||
def _test_not_indexable(self, store):
|
||||
""" test not indexable items """
|
||||
searcher = self.get_search_engine()
|
||||
# Publish the vertical to start with
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
# Add a non-indexable item
|
||||
ItemFactory.create(
|
||||
parent_location=self.vertical.location,
|
||||
category="problem",
|
||||
display_name="Some other content",
|
||||
publish_item=False,
|
||||
modulestore=store,
|
||||
)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
# even after publishing, we should not find the non-indexable item
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
def _test_start_date_propagation(self, store):
|
||||
""" make sure that the start date is applied at the right level """
|
||||
searcher = self.get_search_engine()
|
||||
early_date = self.course.start
|
||||
later_date = self.vertical.start
|
||||
|
||||
# Publish the vertical
|
||||
self.publish_item(store, self.vertical.location)
|
||||
self.reindex_course(store)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 4)
|
||||
|
||||
results = response["results"]
|
||||
date_map = {
|
||||
unicode(self.chapter.location): early_date,
|
||||
unicode(self.sequential.location): early_date,
|
||||
unicode(self.vertical.location): later_date,
|
||||
unicode(self.html_unit.location): later_date,
|
||||
}
|
||||
for result in results:
|
||||
self.assertEqual(result["data"]["start_date"], date_map[result["data"]["id"]])
|
||||
|
||||
@patch('django.conf.settings.SEARCH_ENGINE', None)
|
||||
def _test_search_disabled(self, store):
|
||||
""" if search setting has it as off, confirm that nothing is indexed """
|
||||
indexed_count = self.reindex_course(store)
|
||||
self.assertFalse(indexed_count)
|
||||
|
||||
def _test_time_based_index(self, store):
|
||||
""" Make sure that a time based request to index does not index anything too old """
|
||||
self.publish_item(store, self.vertical.location)
|
||||
indexed_count = self.reindex_course(store)
|
||||
self.assertEqual(indexed_count, 4)
|
||||
|
||||
# Add a new sequential
|
||||
sequential2 = ItemFactory.create(
|
||||
parent_location=self.chapter.location,
|
||||
category='sequential',
|
||||
display_name='Section 2',
|
||||
modulestore=store,
|
||||
publish_item=True,
|
||||
start=datetime(2015, 3, 1, tzinfo=UTC),
|
||||
)
|
||||
|
||||
# add a new vertical
|
||||
vertical2 = ItemFactory.create(
|
||||
parent_location=sequential2.location,
|
||||
category='vertical',
|
||||
display_name='Subsection 2',
|
||||
modulestore=store,
|
||||
publish_item=True,
|
||||
)
|
||||
ItemFactory.create(
|
||||
parent_location=vertical2.location,
|
||||
category="html",
|
||||
display_name="Some other content",
|
||||
publish_item=False,
|
||||
modulestore=store,
|
||||
)
|
||||
|
||||
before_time = datetime.now(UTC)
|
||||
self.publish_item(store, vertical2.location)
|
||||
# index based on time, will include an index of the origin sequential
|
||||
# because it is in a common subtree but not of the original vertical
|
||||
# because the original sequential's subtree is too old
|
||||
new_indexed_count = self.index_recent_changes(store, before_time)
|
||||
self.assertEqual(new_indexed_count, 5)
|
||||
|
||||
# full index again
|
||||
indexed_count = self.reindex_course(store)
|
||||
self.assertEqual(indexed_count, 7)
|
||||
|
||||
@patch('django.conf.settings.SEARCH_ENGINE', 'search.tests.utils.ErroringIndexEngine')
|
||||
def _test_exception(self, store):
|
||||
""" Test that exception within indexing yields a SearchIndexingError """
|
||||
self.publish_item(store, self.vertical.location)
|
||||
with self.assertRaises(SearchIndexingError):
|
||||
self.reindex_course(store)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_indexing_course(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_indexing_course)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_not_indexing_unpublished_content(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_not_indexing_unpublished_content)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_deleting_item(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_deleting_item)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_not_indexable(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_not_indexable)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_start_date_propagation(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_start_date_propagation)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_search_disabled(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_search_disabled)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_time_based_index(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_time_based_index)
|
||||
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_exception(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_exception)
|
||||
|
||||
|
||||
@patch('django.conf.settings.SEARCH_ENGINE', 'search.tests.utils.ForceRefreshElasticSearchEngine')
|
||||
@ddt.ddt
|
||||
class TestLargeCourseDeletions(MixedWithOptionsTestCase):
|
||||
""" Tests to excerise deleting items from a course """
|
||||
|
||||
def _clean_course_id(self):
|
||||
""" Clean all documents from the index that have a specific course provided """
|
||||
if self.course_id:
|
||||
searcher = self.get_search_engine()
|
||||
response = searcher.search(field_dictionary={"course": self.course_id})
|
||||
while response["total"] > 0:
|
||||
for item in response["results"]:
|
||||
searcher.remove(DOCUMENT_TYPE, item["data"]["id"])
|
||||
searcher.remove(DOCUMENT_TYPE, item["data"]["id"])
|
||||
response = searcher.search(field_dictionary={"course": self.course_id})
|
||||
self.course_id = None
|
||||
|
||||
def setUp(self):
|
||||
super(TestLargeCourseDeletions, self).setUp()
|
||||
self.course_id = None
|
||||
|
||||
def tearDown(self):
|
||||
super(TestLargeCourseDeletions, self).tearDown()
|
||||
self._clean_course_id()
|
||||
|
||||
def assert_search_count(self, expected_count):
|
||||
""" Check that the search within this course will yield the expected number of results """
|
||||
searcher = self.get_search_engine()
|
||||
response = searcher.search(field_dictionary={"course": self.course_id})
|
||||
self.assertEqual(response["total"], expected_count)
|
||||
|
||||
def _do_test_large_course_deletion(self, store, load_factor):
|
||||
""" Test that deleting items from a course works even when present within a very large course """
|
||||
def id_list(top_parent_object):
|
||||
""" private function to get ids from object down the tree """
|
||||
list_of_ids = [unicode(top_parent_object.location)]
|
||||
for child in top_parent_object.get_children():
|
||||
list_of_ids.extend(id_list(child))
|
||||
return list_of_ids
|
||||
|
||||
course, course_size = create_large_course(store, load_factor)
|
||||
self.course_id = unicode(course.id)
|
||||
|
||||
# index full course
|
||||
CoursewareSearchIndexer.do_course_reindex(store, course.id)
|
||||
|
||||
self.assert_search_count(course_size)
|
||||
|
||||
# reload course to allow us to delete one single unit
|
||||
course = store.get_course(course.id, depth=1)
|
||||
|
||||
# delete the first chapter
|
||||
chapter_to_delete = course.get_children()[0]
|
||||
self.delete_item(store, chapter_to_delete.location)
|
||||
|
||||
# index and check correctness
|
||||
CoursewareSearchIndexer.do_course_reindex(store, course.id)
|
||||
deleted_count = 1 + load_factor + (load_factor ** 2) + (load_factor ** 3)
|
||||
self.assert_search_count(course_size - deleted_count)
|
||||
|
||||
def _test_large_course_deletion(self, store):
|
||||
""" exception catch-ing wrapper around large test course test with deletions """
|
||||
# load_factor of 6 (1296 items) takes about 5 minutes to run on devstack on a laptop
|
||||
# load_factor of 7 (2401 items) takes about 70 minutes to run on devstack on a laptop
|
||||
# load_factor of 8 (4096 items) takes just under 3 hours to run on devstack on a laptop
|
||||
load_factor = 6
|
||||
try:
|
||||
self._do_test_large_course_deletion(store, load_factor)
|
||||
except: # pylint: disable=bare-except
|
||||
# Catch any exception here to see when we fail
|
||||
print "Failed with load_factor of {}".format(load_factor)
|
||||
|
||||
@skip(("This test is to see how we handle very large courses, to ensure that the delete"
|
||||
"procedure works smoothly - too long to run during the normal course of things"))
|
||||
@ddt.data(ModuleStoreEnum.Type.mongo, ModuleStoreEnum.Type.split)
|
||||
def test_large_course_deletion(self, store_type):
|
||||
self._perform_test_using_store(store_type, self._test_large_course_deletion)
|
||||
|
||||
|
||||
class TestTaskExecution(ModuleStoreTestCase):
|
||||
"""
|
||||
Set of tests to ensure that the task code will do the right thing when
|
||||
executed directly. The test course gets created without the listener
|
||||
being present, which allows us to ensure that when the listener is
|
||||
executed, it is done as expected.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
super(TestTaskExecution, self).setUp()
|
||||
SignalHandler.course_published.disconnect(listen_for_course_publish)
|
||||
self.course = CourseFactory.create(start=datetime(2015, 3, 1, tzinfo=UTC))
|
||||
|
||||
self.chapter = ItemFactory.create(
|
||||
parent_location=self.course.location,
|
||||
category='chapter',
|
||||
display_name="Week 1",
|
||||
publish_item=True,
|
||||
start=datetime(2015, 3, 1, tzinfo=UTC),
|
||||
)
|
||||
self.sequential = ItemFactory.create(
|
||||
parent_location=self.chapter.location,
|
||||
category='sequential',
|
||||
display_name="Lesson 1",
|
||||
publish_item=True,
|
||||
start=datetime(2015, 3, 1, tzinfo=UTC),
|
||||
)
|
||||
self.vertical = ItemFactory.create(
|
||||
parent_location=self.sequential.location,
|
||||
category='vertical',
|
||||
display_name='Subsection 1',
|
||||
publish_item=True,
|
||||
start=datetime(2015, 4, 1, tzinfo=UTC),
|
||||
)
|
||||
# unspecified start - should inherit from container
|
||||
self.html_unit = ItemFactory.create(
|
||||
parent_location=self.vertical.location,
|
||||
category="html",
|
||||
display_name="Html Content",
|
||||
publish_item=False,
|
||||
)
|
||||
|
||||
def test_task_indexing_course(self):
|
||||
""" Making sure that the receiver correctly fires off the task when invoked by signal """
|
||||
searcher = SearchEngine.get_search_engine(INDEX_NAME)
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 0)
|
||||
|
||||
#update_search_index(unicode(self.course.id), datetime.now(UTC).isoformat())
|
||||
listen_for_course_publish(self, self.course.id)
|
||||
|
||||
# Note that this test will only succeed if celery is working in inline mode
|
||||
response = searcher.search(field_dictionary={"course": unicode(self.course.id)})
|
||||
self.assertEqual(response["total"], 3)
|
||||
@@ -28,7 +28,7 @@ TEST_DATA_DIR = settings.COMMON_TEST_DATA_ROOT
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE)
|
||||
@override_settings(CONTENTSTORE=TEST_DATA_CONTENTSTORE, SEARCH_ENGINE=None)
|
||||
class ContentStoreImportTest(SignalDisconnectTestMixin, ModuleStoreTestCase):
|
||||
"""
|
||||
Tests that rely on the toy and test_import_course courses.
|
||||
|
||||
@@ -459,6 +459,7 @@ class TestLibraries(LibraryTestCase):
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
@patch('django.conf.settings.SEARCH_ENGINE', None)
|
||||
class TestLibraryAccess(SignalDisconnectTestMixin, LibraryTestCase):
|
||||
"""
|
||||
Test Roles and Permissions related to Content Libraries
|
||||
|
||||
@@ -22,7 +22,6 @@ from edxmako.shortcuts import render_to_response
|
||||
from xmodule.course_module import DEFAULT_START_DATE
|
||||
from xmodule.error_module import ErrorDescriptor
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.courseware_index import CoursewareSearchIndexer, SearchIndexingError
|
||||
from xmodule.contentstore.content import StaticContent
|
||||
from xmodule.tabs import PDFTextbookTabs
|
||||
from xmodule.partitions.partitions import UserPartition
|
||||
@@ -35,6 +34,7 @@ from openedx.core.djangoapps.course_groups.partition_scheme import get_cohorted_
|
||||
|
||||
from django_future.csrf import ensure_csrf_cookie
|
||||
from contentstore.course_info_model import get_course_updates, update_course_updates, delete_course_update
|
||||
from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError
|
||||
from contentstore.utils import (
|
||||
add_instructor,
|
||||
initialize_permissions,
|
||||
|
||||
@@ -6,28 +6,28 @@ import lxml
|
||||
import datetime
|
||||
import os
|
||||
import mock
|
||||
import pytz
|
||||
|
||||
from contentstore.tests.utils import CourseTestCase
|
||||
from contentstore.utils import reverse_course_url, reverse_library_url, add_instructor
|
||||
from student.auth import has_course_author_access
|
||||
from contentstore.views.course import course_outline_initial_state, reindex_course_and_check_access
|
||||
from contentstore.views.item import create_xblock_info, VisibilityState
|
||||
from course_action_state.models import CourseRerunState
|
||||
from util.date_utils import get_default_time_display
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.courseware_index import CoursewareSearchIndexer
|
||||
from xmodule.modulestore.exceptions import ItemNotFoundError
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, LibraryFactory
|
||||
from xmodule.modulestore.courseware_index import SearchIndexingError
|
||||
from opaque_keys.edx.locator import CourseLocator
|
||||
from student.tests.factories import UserFactory
|
||||
from course_action_state.managers import CourseRerunUIStateManager
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import PermissionDenied
|
||||
from django.utils.translation import ugettext as _
|
||||
|
||||
from contentstore.courseware_index import CoursewareSearchIndexer, SearchIndexingError
|
||||
from contentstore.tests.utils import CourseTestCase
|
||||
from contentstore.utils import reverse_course_url, reverse_library_url, add_instructor
|
||||
from contentstore.views.course import course_outline_initial_state, reindex_course_and_check_access
|
||||
from contentstore.views.item import create_xblock_info, VisibilityState
|
||||
from course_action_state.managers import CourseRerunUIStateManager
|
||||
from course_action_state.models import CourseRerunState
|
||||
from opaque_keys.edx.locator import CourseLocator
|
||||
from search.api import perform_search
|
||||
import pytz
|
||||
from student.auth import has_course_author_access
|
||||
from student.tests.factories import UserFactory
|
||||
from util.date_utils import get_default_time_display
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.exceptions import ItemNotFoundError
|
||||
from xmodule.modulestore.django import modulestore
|
||||
from xmodule.modulestore.tests.factories import CourseFactory, ItemFactory, LibraryFactory
|
||||
|
||||
|
||||
class TestCourseIndex(CourseTestCase):
|
||||
@@ -346,8 +346,6 @@ class TestCourseReIndex(CourseTestCase):
|
||||
Unit tests for the course outline.
|
||||
"""
|
||||
|
||||
TEST_INDEX_FILENAME = "test_root/index_file.dat"
|
||||
|
||||
SUCCESSFUL_RESPONSE = _("Course has been successfully reindexed.")
|
||||
|
||||
def setUp(self):
|
||||
@@ -379,12 +377,6 @@ class TestCourseReIndex(CourseTestCase):
|
||||
|
||||
)
|
||||
|
||||
# create test file in which index for this test will live
|
||||
with open(self.TEST_INDEX_FILENAME, "w+") as index_file:
|
||||
json.dump({}, index_file)
|
||||
|
||||
self.addCleanup(os.remove, self.TEST_INDEX_FILENAME)
|
||||
|
||||
def test_reindex_course(self):
|
||||
"""
|
||||
Verify that course gets reindexed.
|
||||
@@ -445,25 +437,19 @@ class TestCourseReIndex(CourseTestCase):
|
||||
"""
|
||||
Test json response with real data
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# Start manual reindex
|
||||
reindex_course_and_check_access(self.course.id, self.user)
|
||||
|
||||
self.html.display_name = "My expanded HTML"
|
||||
modulestore().update_item(self.html, ModuleStoreEnum.UserID.test)
|
||||
|
||||
# Start manual reindex
|
||||
reindex_course_and_check_access(self.course.id, self.user)
|
||||
|
||||
# Check results indexed now
|
||||
# Check results remain the same
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
@@ -477,14 +463,14 @@ class TestCourseReIndex(CourseTestCase):
|
||||
"""
|
||||
Test json response with mocked error data for video
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# set mocked exception response
|
||||
err = SearchIndexingError
|
||||
@@ -499,14 +485,14 @@ class TestCourseReIndex(CourseTestCase):
|
||||
"""
|
||||
Test json response with mocked error data for html
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# set mocked exception response
|
||||
err = SearchIndexingError
|
||||
@@ -521,14 +507,14 @@ class TestCourseReIndex(CourseTestCase):
|
||||
"""
|
||||
Test json response with mocked error data for sequence
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# set mocked exception response
|
||||
err = Exception
|
||||
@@ -559,27 +545,21 @@ class TestCourseReIndex(CourseTestCase):
|
||||
|
||||
def test_indexing_responses(self):
|
||||
"""
|
||||
Test add_to_search_index response with real data
|
||||
Test do_course_reindex response with real data
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# Start manual reindex
|
||||
CoursewareSearchIndexer.do_course_reindex(modulestore(), self.course.id)
|
||||
|
||||
self.html.display_name = "My expanded HTML"
|
||||
modulestore().update_item(self.html, ModuleStoreEnum.UserID.test)
|
||||
|
||||
# Start manual reindex
|
||||
CoursewareSearchIndexer.do_course_reindex(modulestore(), self.course.id)
|
||||
|
||||
# Check results indexed now
|
||||
# Check results are the same following reindex
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
@@ -591,16 +571,16 @@ class TestCourseReIndex(CourseTestCase):
|
||||
@mock.patch('xmodule.video_module.VideoDescriptor.index_dictionary')
|
||||
def test_indexing_video_error_responses(self, mock_index_dictionary):
|
||||
"""
|
||||
Test add_to_search_index response with mocked error data for video
|
||||
Test do_course_reindex response with mocked error data for video
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# set mocked exception response
|
||||
err = Exception
|
||||
@@ -613,16 +593,16 @@ class TestCourseReIndex(CourseTestCase):
|
||||
@mock.patch('xmodule.html_module.HtmlDescriptor.index_dictionary')
|
||||
def test_indexing_html_error_responses(self, mock_index_dictionary):
|
||||
"""
|
||||
Test add_to_search_index response with mocked error data for html
|
||||
Test do_course_reindex response with mocked error data for html
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# set mocked exception response
|
||||
err = Exception
|
||||
@@ -635,16 +615,16 @@ class TestCourseReIndex(CourseTestCase):
|
||||
@mock.patch('xmodule.seq_module.SequenceDescriptor.index_dictionary')
|
||||
def test_indexing_seq_error_responses(self, mock_index_dictionary):
|
||||
"""
|
||||
Test add_to_search_index response with mocked error data for sequence
|
||||
Test do_course_reindex response with mocked error data for sequence
|
||||
"""
|
||||
# Check results not indexed
|
||||
# results are indexed because they are published from ItemFactory
|
||||
response = perform_search(
|
||||
"unique",
|
||||
user=self.user,
|
||||
size=10,
|
||||
from_=0,
|
||||
course_id=unicode(self.course.id))
|
||||
self.assertEqual(response['results'], [])
|
||||
self.assertEqual(response['total'], 1)
|
||||
|
||||
# set mocked exception response
|
||||
err = Exception
|
||||
|
||||
@@ -268,10 +268,6 @@ VIDEO_CDN_URL = {
|
||||
# Courseware Search Index
|
||||
FEATURES['ENABLE_COURSEWARE_INDEX'] = True
|
||||
SEARCH_ENGINE = "search.tests.mock_search_engine.MockSearchEngine"
|
||||
# Path at which to store the mock index
|
||||
MOCK_SEARCH_BACKING_FILE = (
|
||||
TEST_ROOT / "index_file.dat" # pylint: disable=no-value-for-parameter
|
||||
).abspath()
|
||||
|
||||
# Dummy secret key for dev/test
|
||||
SECRET_KEY = '85920908f28904ed733fe576320db18cabd7b6cd'
|
||||
|
||||
@@ -1,176 +0,0 @@
|
||||
""" Code to allow module store to interface with courseware index """
|
||||
from __future__ import absolute_import
|
||||
|
||||
import logging
|
||||
|
||||
from django.utils.translation import ugettext as _
|
||||
from opaque_keys.edx.locator import CourseLocator
|
||||
from search.search_engine_base import SearchEngine
|
||||
from eventtracking import tracker
|
||||
|
||||
from . import ModuleStoreEnum
|
||||
from .exceptions import ItemNotFoundError
|
||||
|
||||
|
||||
# Use default index and document names for now
|
||||
INDEX_NAME = "courseware_index"
|
||||
DOCUMENT_TYPE = "courseware_content"
|
||||
|
||||
log = logging.getLogger('edx.modulestore')
|
||||
|
||||
|
||||
class SearchIndexingError(Exception):
|
||||
""" Indicates some error(s) occured during indexing """
|
||||
|
||||
def __init__(self, message, error_list):
|
||||
super(SearchIndexingError, self).__init__(message)
|
||||
self.error_list = error_list
|
||||
|
||||
|
||||
class CoursewareSearchIndexer(object):
|
||||
"""
|
||||
Class to perform indexing for courseware search from different modulestores
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def add_to_search_index(modulestore, location, delete=False, raise_on_error=False):
|
||||
"""
|
||||
Add to courseware search index from given location and its children
|
||||
"""
|
||||
error_list = []
|
||||
indexed_count = 0
|
||||
# TODO - inline for now, need to move this out to a celery task
|
||||
searcher = SearchEngine.get_search_engine(INDEX_NAME)
|
||||
if not searcher:
|
||||
return
|
||||
|
||||
if isinstance(location, CourseLocator):
|
||||
course_key = location
|
||||
else:
|
||||
course_key = location.course_key
|
||||
|
||||
location_info = {
|
||||
"course": unicode(course_key),
|
||||
}
|
||||
|
||||
def _fetch_item(item_location):
|
||||
""" Fetch the item from the modulestore location, log if not found, but continue """
|
||||
try:
|
||||
if isinstance(item_location, CourseLocator):
|
||||
item = modulestore.get_course(item_location)
|
||||
else:
|
||||
item = modulestore.get_item(item_location, revision=ModuleStoreEnum.RevisionOption.published_only)
|
||||
except ItemNotFoundError:
|
||||
log.warning('Cannot find: %s', item_location)
|
||||
return None
|
||||
|
||||
return item
|
||||
|
||||
def index_item_location(item_location, current_start_date):
|
||||
""" add this item to the search index """
|
||||
item = _fetch_item(item_location)
|
||||
if not item:
|
||||
return
|
||||
|
||||
is_indexable = hasattr(item, "index_dictionary")
|
||||
# if it's not indexable and it does not have children, then ignore
|
||||
if not is_indexable and not item.has_children:
|
||||
return
|
||||
|
||||
# if it has a defined start, then apply it and to it's children
|
||||
if item.start and (not current_start_date or item.start > current_start_date):
|
||||
current_start_date = item.start
|
||||
|
||||
if item.has_children:
|
||||
for child_loc in item.children:
|
||||
index_item_location(child_loc, current_start_date)
|
||||
|
||||
item_index = {}
|
||||
item_index_dictionary = item.index_dictionary() if is_indexable else None
|
||||
|
||||
# if it has something to add to the index, then add it
|
||||
if item_index_dictionary:
|
||||
try:
|
||||
item_index.update(location_info)
|
||||
item_index.update(item_index_dictionary)
|
||||
item_index['id'] = unicode(item.scope_ids.usage_id)
|
||||
if current_start_date:
|
||||
item_index['start_date'] = current_start_date
|
||||
|
||||
searcher.index(DOCUMENT_TYPE, item_index)
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
# broad exception so that index operation does not fail on one item of many
|
||||
log.warning('Could not index item: %s - %s', item_location, unicode(err))
|
||||
error_list.append(_('Could not index item: {}').format(item_location))
|
||||
|
||||
def remove_index_item_location(item_location):
|
||||
""" remove this item from the search index """
|
||||
item = _fetch_item(item_location)
|
||||
if item:
|
||||
if item.has_children:
|
||||
for child_loc in item.children:
|
||||
remove_index_item_location(child_loc)
|
||||
|
||||
searcher.remove(DOCUMENT_TYPE, unicode(item.scope_ids.usage_id))
|
||||
|
||||
try:
|
||||
if delete:
|
||||
remove_index_item_location(location)
|
||||
else:
|
||||
index_item_location(location, None)
|
||||
indexed_count += 1
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
# broad exception so that index operation does not prevent the rest of the application from working
|
||||
log.exception(
|
||||
"Indexing error encountered, courseware index may be out of date %s - %s",
|
||||
course_key,
|
||||
unicode(err)
|
||||
)
|
||||
error_list.append(_('General indexing error occurred'))
|
||||
|
||||
if raise_on_error and error_list:
|
||||
raise SearchIndexingError(_('Error(s) present during indexing'), error_list)
|
||||
|
||||
return indexed_count
|
||||
|
||||
@classmethod
|
||||
def do_publish_index(cls, modulestore, location, delete=False, raise_on_error=False):
|
||||
"""
|
||||
Add to courseware search index published section and children
|
||||
"""
|
||||
indexed_count = cls.add_to_search_index(modulestore, location, delete, raise_on_error)
|
||||
cls._track_index_request('edx.course.index.published', indexed_count, str(location))
|
||||
return indexed_count
|
||||
|
||||
@classmethod
|
||||
def do_course_reindex(cls, modulestore, course_key):
|
||||
"""
|
||||
(Re)index all content within the given course
|
||||
"""
|
||||
indexed_count = cls.add_to_search_index(modulestore, course_key, delete=False, raise_on_error=True)
|
||||
cls._track_index_request('edx.course.index.reindexed', indexed_count)
|
||||
return indexed_count
|
||||
|
||||
@staticmethod
|
||||
def _track_index_request(event_name, indexed_count, location=None):
|
||||
"""Track content index requests.
|
||||
|
||||
Arguments:
|
||||
location (str): The ID of content to be indexed.
|
||||
event_name (str): Name of the event to be logged.
|
||||
Returns:
|
||||
None
|
||||
|
||||
"""
|
||||
data = {
|
||||
"indexed_count": indexed_count,
|
||||
'category': 'courseware_index',
|
||||
}
|
||||
|
||||
if location:
|
||||
data['location_id'] = location
|
||||
|
||||
tracker.emit(
|
||||
event_name,
|
||||
data
|
||||
)
|
||||
@@ -12,7 +12,6 @@ import logging
|
||||
from opaque_keys.edx.locations import Location
|
||||
from xmodule.exceptions import InvalidVersionError
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.courseware_index import CoursewareSearchIndexer
|
||||
from xmodule.modulestore.exceptions import (
|
||||
ItemNotFoundError, DuplicateItemError, DuplicateCourseError, InvalidBranchSetting
|
||||
)
|
||||
@@ -565,10 +564,6 @@ class DraftModuleStore(MongoModuleStore):
|
||||
)
|
||||
self._delete_subtree(location, as_functions)
|
||||
|
||||
# Remove this location from the courseware search index so that searches
|
||||
# will refrain from showing it as a result
|
||||
CoursewareSearchIndexer.add_to_search_index(self, location, delete=True)
|
||||
|
||||
def _delete_subtree(self, location, as_functions, draft_only=False):
|
||||
"""
|
||||
Internal method for deleting all of the subtree whose revisions match the as_functions
|
||||
@@ -745,9 +740,6 @@ class DraftModuleStore(MongoModuleStore):
|
||||
|
||||
self._flag_publish_event(course_key)
|
||||
|
||||
# Now it's been published, add the object to the courseware search index so that it appears in search results
|
||||
CoursewareSearchIndexer.do_publish_index(self, location)
|
||||
|
||||
return self.get_item(as_published(location))
|
||||
|
||||
def unpublish(self, location, user_id, **kwargs):
|
||||
|
||||
@@ -5,7 +5,6 @@ Module for the dual-branch fall-back Draft->Published Versioning ModuleStore
|
||||
from xmodule.modulestore.split_mongo.split import SplitMongoModuleStore, EXCLUDE_ALL
|
||||
from xmodule.exceptions import InvalidVersionError
|
||||
from xmodule.modulestore import ModuleStoreEnum
|
||||
from xmodule.modulestore.courseware_index import CoursewareSearchIndexer
|
||||
from xmodule.modulestore.exceptions import InsufficientSpecificationError, ItemNotFoundError
|
||||
from xmodule.modulestore.draft_and_published import (
|
||||
ModuleStoreDraftAndPublished, DIRECT_ONLY_CATEGORIES, UnsupportedRevisionError
|
||||
@@ -217,10 +216,6 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli
|
||||
if branch == ModuleStoreEnum.BranchName.draft and branched_location.block_type in DIRECT_ONLY_CATEGORIES:
|
||||
self.publish(parent_loc.version_agnostic(), user_id, blacklist=EXCLUDE_ALL, **kwargs)
|
||||
|
||||
# Remove this location from the courseware search index so that searches
|
||||
# will refrain from showing it as a result
|
||||
CoursewareSearchIndexer.add_to_search_index(self, location, delete=True)
|
||||
|
||||
def _map_revision_to_branch(self, key, revision=None):
|
||||
"""
|
||||
Maps RevisionOptions to BranchNames, inserting them into the key
|
||||
@@ -366,9 +361,6 @@ class DraftVersioningModuleStore(SplitMongoModuleStore, ModuleStoreDraftAndPubli
|
||||
|
||||
self._flag_publish_event(location.course_key)
|
||||
|
||||
# Now it's been published, add the object to the courseware search index so that it appears in search results
|
||||
CoursewareSearchIndexer.do_publish_index(self, location)
|
||||
|
||||
return self.get_item(location.for_branch(ModuleStoreEnum.BranchName.published), **kwargs)
|
||||
|
||||
def unpublish(self, location, user_id, **kwargs):
|
||||
|
||||
@@ -128,3 +128,23 @@ class VerticalBlock(SequenceFields, XModuleFields, StudioEditableBlock, XmlParse
|
||||
# TODO: Remove this when studio better supports editing of pure XBlocks.
|
||||
fragment.add_javascript('VerticalBlock = XModule.Descriptor;')
|
||||
return fragment
|
||||
|
||||
def index_dictionary(self):
|
||||
"""
|
||||
Return dictionary prepared with module content and type for indexing.
|
||||
"""
|
||||
# return key/value fields in a Python dict object
|
||||
# values may be numeric / string or dict
|
||||
# default implementation is an empty dict
|
||||
xblock_body = super(VerticalBlock, self).index_dictionary()
|
||||
index_body = {
|
||||
"display_name": self.display_name,
|
||||
}
|
||||
if "content" in xblock_body:
|
||||
xblock_body["content"].update(index_body)
|
||||
else:
|
||||
xblock_body["content"] = index_body
|
||||
# We use "Sequence" for sequentials and verticals
|
||||
xblock_body["content_type"] = "Sequence"
|
||||
|
||||
return xblock_body
|
||||
|
||||
@@ -84,17 +84,11 @@ class CoursewareSearchTest(UniqueCourseTest):
|
||||
AutoAuthPage(self.browser, username=username, email=email,
|
||||
course_id=self.course_id, staff=staff).visit()
|
||||
|
||||
def test_page_existence(self):
|
||||
"""
|
||||
Make sure that the page is accessible.
|
||||
"""
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
|
||||
def _studio_publish_content(self, section_index):
|
||||
"""
|
||||
Publish content on studio course page under specified section
|
||||
"""
|
||||
self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True)
|
||||
self.course_outline.visit()
|
||||
subsection = self.course_outline.section_at(section_index).subsection_at(0)
|
||||
subsection.expand_subsection()
|
||||
@@ -105,6 +99,7 @@ class CoursewareSearchTest(UniqueCourseTest):
|
||||
"""
|
||||
Edit chapter name on studio course page under specified section
|
||||
"""
|
||||
self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True)
|
||||
self.course_outline.visit()
|
||||
section = self.course_outline.section_at(section_index)
|
||||
section.change_name(self.EDITED_CHAPTER_NAME)
|
||||
@@ -114,6 +109,7 @@ class CoursewareSearchTest(UniqueCourseTest):
|
||||
Add content on studio course page under specified section
|
||||
"""
|
||||
|
||||
self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True)
|
||||
# create a unit in course outline
|
||||
self.course_outline.visit()
|
||||
subsection = self.course_outline.section_at(section_index).subsection_at(0)
|
||||
@@ -140,30 +136,44 @@ class CoursewareSearchTest(UniqueCourseTest):
|
||||
self.course_outline.start_reindex()
|
||||
self.course_outline.wait_for_ajax()
|
||||
|
||||
def _search_for_content(self, search_term):
|
||||
"""
|
||||
Login and search for specific content
|
||||
|
||||
Arguments:
|
||||
search_term - term to be searched for
|
||||
|
||||
Returns:
|
||||
(bool) True if search term is found in resulting content; False if not found
|
||||
"""
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
self.courseware_search_page.search_for_term(search_term)
|
||||
return search_term in self.courseware_search_page.search_results.html[0]
|
||||
|
||||
def test_page_existence(self):
|
||||
"""
|
||||
Make sure that the page is accessible.
|
||||
"""
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
|
||||
def test_search(self):
|
||||
"""
|
||||
Make sure that you can search for something.
|
||||
"""
|
||||
|
||||
# Create content in studio without publishing.
|
||||
self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True)
|
||||
self._studio_add_content(0)
|
||||
|
||||
# Do a search, there should be no results shown.
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
self.courseware_search_page.search_for_term(self.SEARCH_STRING)
|
||||
assert self.SEARCH_STRING not in self.courseware_search_page.search_results.html[0]
|
||||
self.assertFalse(self._search_for_content(self.SEARCH_STRING))
|
||||
|
||||
# Publish in studio to trigger indexing.
|
||||
self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True)
|
||||
self._studio_publish_content(0)
|
||||
|
||||
# Do the search again, this time we expect results.
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
self.courseware_search_page.search_for_term(self.SEARCH_STRING)
|
||||
assert self.SEARCH_STRING in self.courseware_search_page.search_results.html[0]
|
||||
self.assertTrue(self._search_for_content(self.SEARCH_STRING))
|
||||
|
||||
def test_reindex(self):
|
||||
"""
|
||||
@@ -171,24 +181,24 @@ class CoursewareSearchTest(UniqueCourseTest):
|
||||
"""
|
||||
|
||||
# Create content in studio without publishing.
|
||||
self._auto_auth(self.STAFF_USERNAME, self.STAFF_EMAIL, True)
|
||||
self._studio_add_content(1)
|
||||
|
||||
# Do a search, there should be no results shown.
|
||||
self.assertFalse(self._search_for_content(self.EDITED_SEARCH_STRING))
|
||||
|
||||
# Publish in studio to trigger indexing, and edit chapter name afterwards.
|
||||
self._studio_publish_content(1)
|
||||
|
||||
# Do a ReIndex from studio to ensure that our stuff is updated before the next stage of the test
|
||||
self._studio_reindex()
|
||||
|
||||
# Search after publish, there should still be no results shown.
|
||||
self.assertFalse(self._search_for_content(self.EDITED_SEARCH_STRING))
|
||||
|
||||
self._studio_edit_chapter_name(1)
|
||||
|
||||
# Do a search, there should be no results shown.
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
self.courseware_search_page.search_for_term(self.EDITED_SEARCH_STRING)
|
||||
assert self.EDITED_SEARCH_STRING not in self.courseware_search_page.search_results.html[0]
|
||||
|
||||
# Do a ReIndex from studio, to add edited chapter name
|
||||
# Do a ReIndex from studio to ensure that our stuff is updated before the next stage of the test
|
||||
self._studio_reindex()
|
||||
|
||||
# Do the search again, this time we expect results.
|
||||
self._auto_auth(self.USERNAME, self.EMAIL, False)
|
||||
self.courseware_search_page.visit()
|
||||
self.courseware_search_page.search_for_term(self.EDITED_SEARCH_STRING)
|
||||
assert self.EDITED_SEARCH_STRING in self.courseware_search_page.search_results.html[0]
|
||||
self.assertTrue(self._search_for_content(self.EDITED_SEARCH_STRING))
|
||||
|
||||
@@ -38,7 +38,7 @@ git+https://github.com/mitocw/django-cas.git@60a5b8e5a62e63e0d5d224a87f0b489201a
|
||||
-e git+https://github.com/edx/edx-val.git@d6087908aa3dd05ceaa7f56a21284f86c53cb3f0#egg=edx-val
|
||||
-e git+https://github.com/pmitros/RecommenderXBlock.git@9b07e807c89ba5761827d0387177f71aa57ef056#egg=recommender-xblock
|
||||
-e git+https://github.com/edx/edx-milestones.git@547f2250ee49e73ce8d7ff4e78ecf1b049892510#egg=edx-milestones
|
||||
-e git+https://github.com/edx/edx-search.git@5eb3853fe74c8fdd9d82d36b8ab0058309390f86#egg=edx-search
|
||||
-e git+https://github.com/edx/edx-search.git@6ce8b25a2539370de32dc7cb643ae27c9b8f798d#egg=edx-search
|
||||
git+https://github.com/edx/edx-lint.git@8bf82a32ecb8598c415413df66f5232ab8d974e9#egg=edx_lint==0.2.1
|
||||
-e git+https://github.com/edx/xblock-utils.git@581ed636c862b286002bb9a3724cc883570eb54c#egg=xblock-utils
|
||||
-e git+https://github.com/edx-solutions/xblock-google-drive.git@138e6fa0bf3a2013e904a085b9fed77dab7f3f21#egg=xblock-google-drive
|
||||
|
||||
Reference in New Issue
Block a user