Merge pull request #5162 from cpennington/split/import-export-performance

Performance improvements to split mongo and old-mongo, based on import/export tests.
This commit is contained in:
Calen Pennington
2014-09-11 11:54:22 -04:00
10 changed files with 140 additions and 28 deletions

View File

@@ -287,3 +287,10 @@ class ContentStore(object):
logging.exception(u"Failed to generate thumbnail for {0}. Exception: {1}".format(content.location, str(e)))
return thumbnail_content, thumbnail_file_location
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
"""
pass

View File

@@ -375,6 +375,36 @@ class MongoContentStore(ContentStore):
fs_entry['_id'] = dbkey
return dbkey
def ensure_indexes(self):
# Index needed thru 'category' by `_get_all_content_for_course` and others. That query also takes a sort
# which can be `uploadDate`, `display_name`,
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('_id.name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('content_son.name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('_id.org', pymongo.ASCENDING), ('_id.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('uploadDate', pymongo.ASCENDING)],
sparse=True
)
self.fs_files.create_index(
[('content_son.org', pymongo.ASCENDING), ('content_son.course', pymongo.ASCENDING), ('display_name', pymongo.ASCENDING)],
sparse=True
)
def query_for_course(course_key, category=None):
"""

View File

@@ -486,6 +486,16 @@ class ModuleStoreRead(object):
"""
yield
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
pass
class ModuleStoreWrite(ModuleStoreRead):
"""

View File

@@ -653,3 +653,14 @@ class MixedModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase):
store = self._get_modulestore_for_courseid(course_id)
with store.bulk_operations(course_id):
yield
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
for store in self.modulestores:
store.ensure_indexes()

View File

@@ -1440,3 +1440,29 @@ class MongoModuleStore(ModuleStoreDraftAndPublished, ModuleStoreWriteBase, Mongo
return {ModuleStoreEnum.Type.mongo: True}
else:
raise HeartbeatFailure("Can't connect to {}".format(self.database.name), 'mongo')
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
# Because we often query for some subset of the id, we define this index:
self.collection.create_index([
('_id.org', pymongo.ASCENDING),
('_id.course', pymongo.ASCENDING),
('_id.category', pymongo.ASCENDING),
('_id.name', pymongo.ASCENDING),
])
# Because we often scan for all category='course' regardless of the value of the other fields:
self.collection.create_index('_id.category')
# Because lms calls get_parent_locations frequently (for path generation):
self.collection.create_index('definition.children', sparse=True)
# To allow prioritizing draft vs published material
self.collection.create_index('_id.revision')

View File

@@ -1,5 +1,4 @@
from opaque_keys.edx.locator import DefinitionLocator
from bson import SON
class DefinitionLazyLoader(object):
@@ -25,9 +24,3 @@ class DefinitionLazyLoader(object):
loader pointer with the result so as not to fetch more than once
"""
return self.modulestore.db_connection.get_definition(self.definition_locator.definition_id)
def as_son(self):
return SON((
('block_type', self.definition_locator.block_type),
('definition', self.definition_locator.definition_id)
))

View File

@@ -82,7 +82,6 @@ class MongoConnection(object):
host=host,
port=port,
tz_aware=tz_aware,
document_class=son.SON,
**kwargs
),
db
@@ -167,10 +166,10 @@ class MongoConnection(object):
"""
case_regex = ur"(?i)^{}$" if ignore_case else ur"{}"
return self.course_index.find_one(
son.SON([
(key_attr, re.compile(case_regex.format(getattr(key, key_attr))))
{
key_attr: re.compile(case_regex.format(getattr(key, key_attr)))
for key_attr in ('org', 'course', 'run')
])
}
)
def find_matching_course_indexes(self, branch=None, search_targets=None):
@@ -182,7 +181,7 @@ class MongoConnection(object):
search_targets: If specified, this must be a dictionary specifying field values
that must exist in the search_targets of the returned courses
"""
query = son.SON()
query = {}
if branch is not None:
query['versions.{}'.format(branch)] = {'$exists': True}
@@ -206,11 +205,11 @@ class MongoConnection(object):
from_index: If set, only update an index if it matches the one specified in `from_index`.
"""
self.course_index.update(
from_index or son.SON([
('org', course_index['org']),
('course', course_index['course']),
('run', course_index['run'])
]),
from_index or {
'org': course_index['org'],
'course': course_index['course'],
'run': course_index['run'],
},
course_index,
upsert=False,
)
@@ -219,11 +218,11 @@ class MongoConnection(object):
"""
Delete the course_index from the persistence mechanism whose id is the given course_index
"""
return self.course_index.remove(son.SON([
('org', course_index['org']),
('course', course_index['course']),
('run', course_index['run'])
]))
return self.course_index.remove({
'org': course_index['org'],
'course': course_index['course'],
'run': course_index['run'],
})
def get_definition(self, key):
"""
@@ -244,4 +243,20 @@ class MongoConnection(object):
"""
self.definitions.insert(definition)
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
self.course_index.create_index(
[
('org', pymongo.ASCENDING),
('course', pymongo.ASCENDING),
('run', pymongo.ASCENDING)
],
unique=True
)

View File

@@ -2360,6 +2360,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
services=self.services,
)
def ensure_indexes(self):
"""
Ensure that all appropriate indexes are created that are needed by this modulestore, or raise
an exception if unable to.
This method is intended for use by tests and administrative commands, and not
to be run during server startup.
"""
self.db_connection.ensure_indexes()
class SparseList(list):
"""
Enable inserting items into a list in arbitrary order and then retrieving them.

View File

@@ -96,6 +96,7 @@ class MongoModulestoreBuilder(object):
branch_setting_func=lambda: ModuleStoreEnum.Branch.draft_preferred,
metadata_inheritance_cache_subsystem=MemoryCache(),
)
modulestore.ensure_indexes()
try:
yield modulestore
@@ -139,6 +140,7 @@ class VersioningModulestoreBuilder(object):
fs_root,
render_template=repr,
)
modulestore.ensure_indexes()
try:
yield modulestore
@@ -210,6 +212,7 @@ class MongoContentstoreBuilder(object):
collection='content',
**COMMON_DOCSTORE_CONFIG
)
contentstore.ensure_indexes()
try:
yield contentstore

View File

@@ -310,7 +310,13 @@ class CourseComparisonTest(BulkAssertionTest):
if actual_item is None and expected_item.location.category == 'course':
actual_item_location = actual_item_location.replace(name='course')
actual_item = actual_item_map.get(map_key(actual_item_location))
self.assertIsNotNone(actual_item, u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map))
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
if actual_item is None:
msg = u'cannot find {} in {}'.format(map_key(actual_item_location), actual_item_map)
else:
msg = None
self.assertIsNotNone(actual_item, msg)
# compare fields
self.assertEqual(expected_item.fields, actual_item.fields)
@@ -328,17 +334,18 @@ class CourseComparisonTest(BulkAssertionTest):
exp_value = map_references(field.read_from(expected_item), field, actual_course_key)
actual_value = field.read_from(actual_item)
self.assertEqual(
exp_value,
actual_value,
"Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
# Formatting the message slows down tests of large courses significantly, so only do it if it would be used
if exp_value != actual_value:
msg = "Field {!r} doesn't match between usages {} and {}: {!r} != {!r}".format(
field_name,
expected_item.scope_ids.usage_id,
actual_item.scope_ids.usage_id,
exp_value,
actual_value,
)
)
else:
msg = None
self.assertEqual(exp_value, actual_value, msg)
# compare children
self.assertEqual(expected_item.has_children, actual_item.has_children)