Merge pull request #5162 from cpennington/split/import-export-performance
Performance improvements to split mongo and old-mongo, based on import/export tests.
This commit is contained in:
@@ -287,3 +287,10 @@ class ContentStore(object):
|
||||
logging.exception(u"Failed to generate thumbnail for {0}. Exception: {1}".format(content.location, str(e)))
|
||||
|
||||
return thumbnail_content, thumbnail_file_location
|
||||
|
||||
def ensure_indexes(self):
|
||||
"""
|
||||
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
|
||||
an exception if unable to.
|
||||
"""
|
||||
pass
|
||||
|
||||
@@ -375,6 +375,36 @@ class MongoContentStore(ContentStore):
|
||||
fs_entry['_id'] = dbkey
|
||||
return dbkey
|
||||
|
||||
def ensure_indexes(self):
|
||||
|
||||
# Index needed thru 'category' by `_get_all_content_for_course` and others. That query also takes a sort
|
||||
# which can be `uploadDate`, `display_name`,
|
||||
|
||||
self.fs_files.create_index(
|
||||
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('_id.name', pymongo.ASCENDING)],
|
||||
sparse=True
|
||||
)
|
||||
self.fs_files.create_index(
|
||||
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('content_son.name', pymongo.ASCENDING)],
|
||||
sparse=True
|
||||
)
|
||||
self.fs_files.create_index(
|
||||
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
|
||||
sparse=True
|
||||
)
|
||||
self.fs_files.create_index(
|
||||
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
|
||||
sparse=True
|
||||
)
|
||||
self.fs_files.create_index(
|
||||
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
|
||||
sparse=True
|
||||
)
|
||||
self.fs_files.create_index(
|
||||
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
|
||||
sparse=True
|
||||
)
|
||||
|
||||
|
||||
def query_for_course(course_key, category=None):
|
||||
"""
|
||||
|
||||
@@ -486,6 +486,16 @@ class ModuleStoreRead(object):
|
||||
"""
|
||||
yield
|
||||
|
||||
def ensure_indexes(self):
|
||||
"""
|
||||
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
|
||||
an exception if unable to.
|
||||
|
||||
This method is intended for use by tests and administrative commands, and not
|
||||
to be run during server startup.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ModuleStoreWrite(ModuleStoreRead):
|
||||
"""
|
||||
|
||||
@@ -653,3 +653,14 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
|
||||
store = self._get_modulestore_for_courseid(course_id)
|
||||
with store.bulk_operations(course_id):
|
||||
yield
|
||||
|
||||
def ensure_indexes(self):
|
||||
"""
|
||||
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
|
||||
an exception if unable to.
|
||||
|
||||
This method is intended for use by tests and administrative commands, and not
|
||||
to be run during server startup.
|
||||
"""
|
||||
for store in self.modulestores:
|
||||
store.ensure_indexes()
|
||||
|
||||
@@ -1440,3 +1440,29 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
|
||||
return {ModuleStoreEnum.Type.mongo: True}
|
||||
else:
|
||||
raise HeartbeatFailure("Can't connect to {}".format(self.database.name), 'mongo')
|
||||
|
||||
def ensure_indexes(self):
|
||||
"""
|
||||
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
|
||||
an exception if unable to.
|
||||
|
||||
This method is intended for use by tests and administrative commands, and not
|
||||
to be run during server startup.
|
||||
"""
|
||||
|
||||
# Because we often query for some subset of the id, we define this index:
|
||||
self.collection.create_index([
|
||||
('_id.org', pymongo.ASCENDING),
|
||||
('_id.course', pymongo.ASCENDING),
|
||||
('_id.category', pymongo.ASCENDING),
|
||||
('_id.name', pymongo.ASCENDING),
|
||||
])
|
||||
|
||||
# Because we often scan for all category='course' regardless of the value of the other fields:
|
||||
self.collection.create_index('_id.category')
|
||||
|
||||
# Because lms calls get_parent_locations frequently (for path generation):
|
||||
self.collection.create_index('definition.children', sparse=True)
|
||||
|
||||
# To allow prioritizing draft vs published material
|
||||
self.collection.create_index('_id.revision')
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from opaque_keys.edx.locator import DefinitionLocator
|
||||
from bson import SON
|
||||
|
||||
|
||||
class DefinitionLazyLoader(object):
|
||||
@@ -25,9 +24,3 @@ class DefinitionLazyLoader(object):
|
||||
loader pointer with the result so as not to fetch more than once
|
||||
"""
|
||||
return self.modulestore.db_connection.get_definition(self.definition_locator.definition_id)
|
||||
|
||||
def as_son(self):
|
||||
return SON((
|
||||
('block_type', self.definition_locator.block_type),
|
||||
('definition', self.definition_locator.definition_id)
|
||||
))
|
||||
|
||||
@@ -82,7 +82,6 @@ class MongoConnection(object):
|
||||
host=host,
|
||||
port=port,
|
||||
tz_aware=tz_aware,
|
||||
document_class=son.SON,
|
||||
**kwargs
|
||||
),
|
||||
db
|
||||
@@ -167,10 +166,10 @@ class MongoConnection(object):
|
||||
"""
|
||||
case_regex = ur"(?i)^{}$" if ignore_case else ur"{}"
|
||||
return self.course_index.find_one(
|
||||
son.SON([
|
||||
(key_attr, re.compile(case_regex.format(getattr(key, key_attr))))
|
||||
{
|
||||
key_attr: re.compile(case_regex.format(getattr(key, key_attr)))
|
||||
for key_attr in ('org', 'course', 'run')
|
||||
])
|
||||
}
|
||||
)
|
||||
|
||||
def find_matching_course_indexes(self, branch=None, search_targets=None):
|
||||
@@ -182,7 +181,7 @@ class MongoConnection(object):
|
||||
search_targets: If specified, this must be a dictionary specifying field values
|
||||
that must exist in the search_targets of the returned courses
|
||||
"""
|
||||
query = son.SON()
|
||||
query = {}
|
||||
if branch is not None:
|
||||
query['versions.{}'.format(branch)] = {'$exists': True}
|
||||
|
||||
@@ -206,11 +205,11 @@ class MongoConnection(object):
|
||||
from_index: If set, only update an index if it matches the one specified in `from_index`.
|
||||
"""
|
||||
self.course_index.update(
|
||||
from_index or son.SON([
|
||||
('org', course_index['org']),
|
||||
('course', course_index['course']),
|
||||
('run', course_index['run'])
|
||||
]),
|
||||
from_index or {
|
||||
'org': course_index['org'],
|
||||
'course': course_index['course'],
|
||||
'run': course_index['run'],
|
||||
},
|
||||
course_index,
|
||||
upsert=False,
|
||||
)
|
||||
@@ -219,11 +218,11 @@ class MongoConnection(object):
|
||||
"""
|
||||
Delete the course_index from the persistence mechanism whose id is the given course_index
|
||||
"""
|
||||
return self.course_index.remove(son.SON([
|
||||
('org', course_index['org']),
|
||||
('course', course_index['course']),
|
||||
('run', course_index['run'])
|
||||
]))
|
||||
return self.course_index.remove({
|
||||
'org': course_index['org'],
|
||||
'course': course_index['course'],
|
||||
'run': course_index['run'],
|
||||
})
|
||||
|
||||
def get_definition(self, key):
|
||||
"""
|
||||
@@ -244,4 +243,20 @@ class MongoConnection(object):
|
||||
"""
|
||||
self.definitions.insert(definition)
|
||||
|
||||
def ensure_indexes(self):
|
||||
"""
|
||||
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
|
||||
an exception if unable to.
|
||||
|
||||
This method is intended for use by tests and administrative commands, and not
|
||||
to be run during server startup.
|
||||
"""
|
||||
self.course_index.create_index(
|
||||
[
|
||||
('org', pymongo.ASCENDING),
|
||||
('course', pymongo.ASCENDING),
|
||||
('run', pymongo.ASCENDING)
|
||||
],
|
||||
unique=True
|
||||
)
|
||||
|
||||
|
||||
@@ -2360,6 +2360,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
services=self.services,
|
||||
)
|
||||
|
||||
def ensure_indexes(self):
|
||||
"""
|
||||
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
|
||||
an exception if unable to.
|
||||
|
||||
This method is intended for use by tests and administrative commands, and not
|
||||
to be run during server startup.
|
||||
"""
|
||||
self.db_connection.ensure_indexes()
|
||||
|
||||
class SparseList(list):
|
||||
"""
|
||||
Enable inserting items into a list in arbitrary order and then retrieving them.
|
||||
|
||||
@@ -96,6 +96,7 @@ class MongoModulestoreBuilder(object):
|
||||
branch_setting_func=lambda: ModuleStoreEnum.Branch.draft_preferred,
|
||||
metadata_inheritance_cache_subsystem=MemoryCache(),
|
||||
)
|
||||
modulestore.ensure_indexes()
|
||||
|
||||
try:
|
||||
yield modulestore
|
||||
@@ -139,6 +140,7 @@ class VersioningModulestoreBuilder(object):
|
||||
fs_root,
|
||||
render_template=repr,
|
||||
)
|
||||
modulestore.ensure_indexes()
|
||||
|
||||
try:
|
||||
yield modulestore
|
||||
@@ -210,6 +212,7 @@ class MongoContentstoreBuilder(object):
|
||||
collection='content',
|
||||
**COMMON_DOCSTORE_CONFIG
|
||||
)
|
||||
contentstore.ensure_indexes()
|
||||
|
||||
try:
|
||||
yield contentstore
|
||||
|
||||
@@ -310,7 +310,13 @@ class CourseComparisonTest(BulkAssertionTest):
|
||||
if actual_item is None and expected_item.location.category == 'course':
|
||||
actual_item_location = actual_item_location.replace(name='course')
|
||||
actual_item = actual_item_map.get(map_key(actual_item_location))
|
||||
self.assertIsNotNone(actual_item, u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map))
|
||||
|
||||
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
|
||||
if actual_item is None:
|
||||
msg = u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map)
|
||||
else:
|
||||
msg = None
|
||||
self.assertIsNotNone(actual_item, msg)
|
||||
|
||||
# compare fields
|
||||
self.assertEqual(expected_item.fields, actual_item.fields)
|
||||
@@ -328,17 +334,18 @@ class CourseComparisonTest(BulkAssertionTest):
|
||||
|
||||
exp_value = map_references(field.read_from(expected_item), field, actual_course_key)
|
||||
actual_value = field.read_from(actual_item)
|
||||
self.assertEqual(
|
||||
exp_value,
|
||||
actual_value,
|
||||
"Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
|
||||
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
|
||||
if exp_value != actual_value:
|
||||
msg = "Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
|
||||
field_name,
|
||||
expected_item.scope_ids.usage_id,
|
||||
actual_item.scope_ids.usage_id,
|
||||
exp_value,
|
||||
actual_value,
|
||||
)
|
||||
)
|
||||
else:
|
||||
msg = None
|
||||
self.assertEqual(exp_value, actual_value, msg)
|
||||
|
||||
# compare children
|
||||
self.assertEqual(expected_item.has_children, actual_item.has_children)
|
||||
|
||||
Reference in New Issue
Block a user