From 90553a1b1d842600588fe0ecab9402b15c11de3c Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 26 Mar 2013 16:19:34 -0400 Subject: [PATCH 1/4] Use get_many and set_many to cut down on the number of metadata trees to retrieve, and only retrieve them once per call to _load_items --- .../lib/xmodule/xmodule/modulestore/mongo.py | 73 +++++++++++-------- .../xmodule/modulestore/tests/test_mongo.py | 58 ++++++++++++++- 2 files changed, 100 insertions(+), 31 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo.py b/common/lib/xmodule/xmodule/modulestore/mongo.py index b76251bb99..27a5ffbc26 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo.py @@ -9,6 +9,7 @@ from fs.osfs import OSFS from itertools import repeat from path import path from datetime import datetime +from operator import attrgetter from importlib import import_module from xmodule.errortracker import null_error_tracker, exc_info_to_str @@ -107,7 +108,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem): references to metadata_inheritance_tree """ def __init__(self, modulestore, module_data, default_class, resources_fs, - error_tracker, render_template, metadata_inheritance_tree = None): + error_tracker, render_template, metadata_cache = None): """ modulestore: the module store that can be used to retrieve additional modules @@ -132,7 +133,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem): # cdodge: other Systems have a course_id attribute defined. To keep things consistent, let's # define an attribute here as well, even though it's None self.course_id = None - self.metadata_inheritance_tree = metadata_inheritance_tree + self.metadata_cache = metadata_cache def load_item(self, location): location = Location(location) @@ -165,8 +166,8 @@ class CachingDescriptorSystem(MakoDescriptorSystem): model_data = DbModel(kvs, class_, None, MongoUsage(self.course_id, location)) module = class_(self, location, model_data) - if self.metadata_inheritance_tree is not None: - metadata_to_inherit = self.metadata_inheritance_tree.get('parent_metadata', {}).get(location.url(), {}) + if self.metadata_cache is not None: + metadata_to_inherit = self.metadata_cache.get(metadata_cache_key(location), {}).get('parent_metadata', {}).get(location.url(), {}) inherit_metadata(module, metadata_to_inherit) return module except: @@ -206,6 +207,9 @@ def namedtuple_to_son(namedtuple, prefix=''): return son +metadata_cache_key = attrgetter('org', 'course') + + class MongoModuleStore(ModuleStoreBase): """ A Mongodb backed ModuleStore @@ -278,6 +282,7 @@ class MongoModuleStore(ModuleStoreBase): # now traverse the tree and compute down the inherited metadata metadata_to_inherit = {} + def _compute_inherited_metadata(url): my_metadata = {} # check for presence of metadata key. Note that a given module may not yet be fully formed. @@ -293,7 +298,7 @@ class MongoModuleStore(ModuleStoreBase): # go through all the children and recurse, but only if we have # in the result set. Remember results will not contain leaf nodes - for child in results_by_url[url].get('definition',{}).get('children',[]): + for child in results_by_url[url].get('definition', {}).get('children', []): if child in results_by_url: new_child_metadata = copy.deepcopy(my_metadata) new_child_metadata.update(results_by_url[child].get('metadata', {})) @@ -304,42 +309,42 @@ class MongoModuleStore(ModuleStoreBase): # this is likely a leaf node, so let's record what metadata we need to inherit metadata_to_inherit[child] = my_metadata - if root is not None: _compute_inherited_metadata(root) return {'parent_metadata': metadata_to_inherit, - 'timestamp' : datetime.now()} + 'timestamp': datetime.now()} - def get_cached_metadata_inheritance_tree(self, location, force_refresh=False): + def get_cached_metadata_inheritance_trees(self, locations, force_refresh=False): ''' TODO (cdodge) This method can be deleted when the 'split module store' work has been completed ''' - key_name = '{0}/{1}'.format(location.org, location.course) - tree = None - if self.metadata_inheritance_cache is not None: - tree = self.metadata_inheritance_cache.get(key_name) + trees = {} + if locations and self.metadata_inheritance_cache is not None and not force_refresh: + trees = self.metadata_inheritance_cache.get_many(list(set([metadata_cache_key(loc) for loc in locations]))) else: # This is to help guard against an accident prod runtime without a cache logging.warning('Running MongoModuleStore without metadata_inheritance_cache. This should not happen in production!') - if tree is None or force_refresh: - tree = self.get_metadata_inheritance_tree(location) - if self.metadata_inheritance_cache is not None: - self.metadata_inheritance_cache.set(key_name, tree) + to_cache = {} + for loc in locations: + if metadata_cache_key(loc) not in trees: + to_cache[metadata_cache_key(loc)] = trees[metadata_cache_key(loc)] = self.get_metadata_inheritance_tree(loc) - return tree + if to_cache and self.metadata_inheritance_cache is not None: + self.metadata_inheritance_cache.set_many(to_cache) + + return trees def refresh_cached_metadata_inheritance_tree(self, location): pseudo_course_id = '/'.join([location.org, location.course]) if pseudo_course_id not in self.ignore_write_events_on_courses: - self.get_cached_metadata_inheritance_tree(location, force_refresh = True) + self.get_cached_metadata_inheritance_trees([location], force_refresh=True) def clear_cached_metadata_inheritance_tree(self, location): - key_name = '{0}/{1}'.format(location.org, location.course) if self.metadata_inheritance_cache is not None: - self.metadata_inheritance_cache.delete(key_name) + self.metadata_inheritance_cache.delete(metadata_cache_key(location)) def _clean_item_data(self, item): """ @@ -385,7 +390,18 @@ class MongoModuleStore(ModuleStoreBase): return data - def _load_item(self, item, data_cache, should_apply_metadata_inheritence=True): + def _cache_metadata_inheritance(self, items, depth, force_refresh=False): + """ + Retrieves all course metadata inheritance trees needed to load items + """ + + locations = [ + Location(item['location']) for item in items + if not (item['location']['category'] == 'course' and depth == 0) + ] + return self.get_cached_metadata_inheritance_trees(locations, force_refresh=force_refresh) + + def _load_item(self, item, data_cache, metadata_cache): """ Load an XModuleDescriptor from item, using the children stored in data_cache """ @@ -399,9 +415,6 @@ class MongoModuleStore(ModuleStoreBase): metadata_inheritance_tree = None - if should_apply_metadata_inheritence: - metadata_inheritance_tree = self.get_cached_metadata_inheritance_tree(Location(item['location'])) - # TODO (cdodge): When the 'split module store' work has been completed, we should remove # the 'metadata_inheritance_tree' parameter system = CachingDescriptorSystem( @@ -411,7 +424,7 @@ class MongoModuleStore(ModuleStoreBase): resource_fs, self.error_tracker, self.render_template, - metadata_inheritance_tree = metadata_inheritance_tree + metadata_cache, ) return system.load_item(item['location']) @@ -421,11 +434,11 @@ class MongoModuleStore(ModuleStoreBase): to specified depth """ data_cache = self._cache_children(items, depth) + inheritance_cache = self._cache_metadata_inheritance(items, depth) # if we are loading a course object, if we're not prefetching children (depth != 0) then don't - # bother with the metadata inheritence - return [self._load_item(item, data_cache, - should_apply_metadata_inheritence=(item['location']['category'] != 'course' or depth != 0)) for item in items] + # bother with the metadata inheritence + return [self._load_item(item, data_cache, inheritance_cache) for item in items] def get_courses(self): ''' @@ -631,7 +644,7 @@ class MongoModuleStore(ModuleStoreBase): self._update_single_item(location, {'metadata': metadata}) # recompute (and update) the metadata inheritance tree which is cached - self.refresh_cached_metadata_inheritance_tree(loc) + self.refresh_cached_metadata_inheritance_tree(loc) def delete_item(self, location): """ @@ -654,7 +667,7 @@ class MongoModuleStore(ModuleStoreBase): # from overriding our default value set in the init method. safe=self.collection.safe) # recompute (and update) the metadata inheritance tree which is cached - self.refresh_cached_metadata_inheritance_tree(Location(location)) + self.refresh_cached_metadata_inheritance_tree(Location(location)) def get_parent_locations(self, location, course_id): '''Find all locations that are the parents of this location in this diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py index 6f6f47ba85..3e29c07ea4 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py @@ -1,6 +1,7 @@ import pymongo -from nose.tools import assert_equals, assert_raises, assert_not_equals, with_setup +from mock import Mock +from nose.tools import assert_equals, assert_raises, assert_not_equals, with_setup, assert_false from pprint import pprint from xmodule.modulestore import Location @@ -102,3 +103,58 @@ class TestMongoModuleStore(object): def test_path_to_location(self): '''Make sure that path_to_location works''' check_path_to_location(self.store) + + def test_metadata_inheritance_query_count(self): + ''' + When retrieving items from mongo, we should only query the cache a number of times + equal to the number of courses being retrieved from. + + We should also not query + ''' + self.store.metadata_inheritance_cache = Mock() + get_many = self.store.metadata_inheritance_cache.get_many + set_many = self.store.metadata_inheritance_cache.set_many + get_many.return_value = {('edX', 'toy'): {}} + + self.store.get_item(Location("i4x://edX/toy/course/2012_Fall"), depth=0) + assert_false(get_many.called) + assert_false(set_many.called) + get_many.reset_mock() + + self.store.get_item(Location("i4x://edX/toy/course/2012_Fall"), depth=3) + get_many.assert_called_with([('edX', 'toy')]) + assert_equals(0, set_many.call_count) + get_many.reset_mock() + + self.store.get_items(Location('i4x', 'edX', None, 'course', None), depth=0) + assert_false(get_many.called) + assert_false(set_many.called) + get_many.reset_mock() + + self.store.get_items(Location('i4x', 'edX', None, 'course', None), depth=3) + assert_equals(1, get_many.call_count) + assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(get_many.call_args[0][0])) + assert_equals(1, set_many.call_count) + assert_equals([('edX', 'simple')], sorted(set_many.call_args[0][0].keys())) + get_many.reset_mock() + + self.store.get_items(Location('i4x', 'edX', None, None, None), depth=0) + assert_equals(1, get_many.call_count) + assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(get_many.call_args[0][0])) + assert_equals(1, set_many.call_count) + assert_equals([('edX', 'simple')], sorted(set_many.call_args[0][0].keys())) + get_many.reset_mock() + + def test_metadata_inheritance_query_count_forced_refresh(self): + self.store.metadata_inheritance_cache = Mock() + get_many = self.store.metadata_inheritance_cache.get_many + set_many = self.store.metadata_inheritance_cache.set_many + get_many.return_value = {('edX', 'toy'): {}} + + self.store.get_cached_metadata_inheritance_trees( + [Location("i4x://edX/toy/course/2012_Fall"), Location("i4x://edX/simple/course/2012_Fall")], + True + ) + assert_false(get_many.called) + assert_equals(1, set_many.call_count) + assert_equals([('edX', 'simple'), ('edX', 'toy')], sorted(set_many.call_args[0][0].keys())) From 1f11508ac6b4057a137d35a11835db4f6d231588 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 26 Mar 2013 16:30:55 -0400 Subject: [PATCH 2/4] Pylint cleanup --- .../lib/xmodule/xmodule/modulestore/mongo.py | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo.py b/common/lib/xmodule/xmodule/modulestore/mongo.py index 27a5ffbc26..47049b9fd6 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo.py @@ -97,6 +97,7 @@ class MongoKeyValueStore(KeyValueStore): else: return False + MongoUsage = namedtuple('MongoUsage', 'id, def_id') @@ -108,7 +109,7 @@ class CachingDescriptorSystem(MakoDescriptorSystem): references to metadata_inheritance_tree """ def __init__(self, modulestore, module_data, default_class, resources_fs, - error_tracker, render_template, metadata_cache = None): + error_tracker, render_template, metadata_cache=None): """ modulestore: the module store that can be used to retrieve additional modules @@ -136,6 +137,9 @@ class CachingDescriptorSystem(MakoDescriptorSystem): self.metadata_cache = metadata_cache def load_item(self, location): + """ + Return an XModule instance for the specified location + """ location = Location(location) json_data = self.module_data.get(location) if json_data is None: @@ -197,12 +201,12 @@ def location_to_query(location, wildcard=True): return query -def namedtuple_to_son(namedtuple, prefix=''): +def namedtuple_to_son(ntuple, prefix=''): """ Converts a namedtuple into a SON object with the same key order """ son = SON() - for idx, field_name in enumerate(namedtuple._fields): + for idx, field_name in enumerate(ntuple._fields): son[prefix + field_name] = namedtuple[idx] return son @@ -232,7 +236,6 @@ class MongoModuleStore(ModuleStoreBase): if user is not None and password is not None: self.collection.database.authenticate(user, password) - # Force mongo to report errors, at the expense of performance self.collection.safe = True @@ -262,7 +265,7 @@ class MongoModuleStore(ModuleStoreBase): query = { '_id.org': location.org, '_id.course': location.course, - '_id.category': {'$in': [ 'course', 'chapter', 'sequential', 'vertical']} + '_id.category': {'$in': ['course', 'chapter', 'sequential', 'vertical']} } # we just want the Location, children, and metadata record_filter = {'_id': 1, 'definition.children': 1, 'metadata': 1} @@ -284,6 +287,9 @@ class MongoModuleStore(ModuleStoreBase): metadata_to_inherit = {} def _compute_inherited_metadata(url): + """ + Helper method for computing inherited metadata for a specific location url + """ my_metadata = {} # check for presence of metadata key. Note that a given module may not yet be fully formed. # example: update_item -> update_children -> update_metadata sequence on new item create @@ -325,12 +331,14 @@ class MongoModuleStore(ModuleStoreBase): trees = self.metadata_inheritance_cache.get_many(list(set([metadata_cache_key(loc) for loc in locations]))) else: # This is to help guard against an accident prod runtime without a cache - logging.warning('Running MongoModuleStore without metadata_inheritance_cache. This should not happen in production!') + logging.warning('Running MongoModuleStore without metadata_inheritance_cache. ' + 'This should not happen in production!') to_cache = {} for loc in locations: - if metadata_cache_key(loc) not in trees: - to_cache[metadata_cache_key(loc)] = trees[metadata_cache_key(loc)] = self.get_metadata_inheritance_tree(loc) + cache_key = metadata_cache_key(loc) + if cache_key not in trees: + to_cache[cache_key] = trees[cache_key] = self.get_metadata_inheritance_tree(loc) if to_cache and self.metadata_inheritance_cache is not None: self.metadata_inheritance_cache.set_many(to_cache) @@ -338,11 +346,19 @@ class MongoModuleStore(ModuleStoreBase): return trees def refresh_cached_metadata_inheritance_tree(self, location): + """ + Refresh the cached metadata inheritance tree for the org/course combination + for location + """ pseudo_course_id = '/'.join([location.org, location.course]) if pseudo_course_id not in self.ignore_write_events_on_courses: - self.get_cached_metadata_inheritance_trees([location], force_refresh=True) + self.get_cached_metadata_inheritance_trees([location], force_refresh=True) def clear_cached_metadata_inheritance_tree(self, location): + """ + Delete the cached metadata inheritance tree for the org/course combination + for location + """ if self.metadata_inheritance_cache is not None: self.metadata_inheritance_cache.delete(metadata_cache_key(location)) @@ -372,7 +388,7 @@ class MongoModuleStore(ModuleStoreBase): data[Location(item['location'])] = item if depth == 0: - break; + break # Load all children by id. See # http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or @@ -413,8 +429,6 @@ class MongoModuleStore(ModuleStoreBase): resource_fs = OSFS(root) - metadata_inheritance_tree = None - # TODO (cdodge): When the 'split module store' work has been completed, we should remove # the 'metadata_inheritance_tree' parameter system = CachingDescriptorSystem( @@ -572,7 +586,8 @@ class MongoModuleStore(ModuleStoreBase): raise Exception('Could not find course at {0}'.format(course_search_location)) if found_cnt > 1: - raise Exception('Found more than one course at {0}. There should only be one!!! Dump = {1}'.format(course_search_location, courses)) + raise Exception('Found more than one course at {0}. There should only be one!!! ' + 'Dump = {1}'.format(course_search_location, courses)) return courses[0] @@ -688,4 +703,7 @@ class MongoModuleStore(ModuleStoreBase): # DraftModuleStore is first, because it needs to intercept calls to MongoModuleStore class DraftMongoModuleStore(DraftModuleStore, MongoModuleStore): + """ + Version of MongoModuleStore with draft capability mixed in + """ pass From e0343342b0d20f87818ec74535a0f96e8e91c70c Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 26 Mar 2013 16:36:58 -0400 Subject: [PATCH 3/4] Fix typo during pylint fixes --- common/lib/xmodule/xmodule/modulestore/mongo.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo.py b/common/lib/xmodule/xmodule/modulestore/mongo.py index 47049b9fd6..fdc34913ee 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo.py @@ -207,7 +207,7 @@ def namedtuple_to_son(ntuple, prefix=''): """ son = SON() for idx, field_name in enumerate(ntuple._fields): - son[prefix + field_name] = namedtuple[idx] + son[prefix + field_name] = ntuple[idx] return son @@ -703,6 +703,9 @@ class MongoModuleStore(ModuleStoreBase): # DraftModuleStore is first, because it needs to intercept calls to MongoModuleStore class DraftMongoModuleStore(DraftModuleStore, MongoModuleStore): + """ + Version of MongoModuleStore with draft capability mixed in + """ """ Version of MongoModuleStore with draft capability mixed in """ From b975d4d90ce1a5e3fc79ab18c6eec96b2052bea3 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 26 Mar 2013 16:43:58 -0400 Subject: [PATCH 4/4] Fix tests --- cms/djangoapps/contentstore/tests/test_contentstore.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cms/djangoapps/contentstore/tests/test_contentstore.py b/cms/djangoapps/contentstore/tests/test_contentstore.py index edb20561bc..e6b5933d66 100644 --- a/cms/djangoapps/contentstore/tests/test_contentstore.py +++ b/cms/djangoapps/contentstore/tests/test_contentstore.py @@ -211,7 +211,7 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): new_loc = descriptor.location._replace(org='MITx', course='999') print "Checking {0} should now also be at {1}".format(descriptor.location.url(), new_loc.url()) resp = self.client.get(reverse('edit_unit', kwargs={'location': new_loc.url()})) - self.assertEqual(resp.status_code, 200) + self.assertEqual(resp.status_code, 200) def test_delete_course(self): import_from_xml(modulestore(), 'common/test/data/', ['full']) @@ -328,11 +328,11 @@ class ContentStoreToyCourseTest(ModuleStoreTestCase): self.assertEqual(wrapper.counter, 4) # make sure we pre-fetched a known sequential which should be at depth=2 - self.assertTrue(Location(['i4x', 'edX', 'full', 'sequential', + self.assertTrue(Location(['i4x', 'edX', 'full', 'sequential', 'Administrivia_and_Circuit_Elements', None]) in course.system.module_data) # make sure we don't have a specific vertical which should be at depth=3 - self.assertFalse(Location(['i4x', 'edX', 'full', 'vertical', 'vertical_58', + self.assertFalse(Location(['i4x', 'edX', 'full', 'vertical', 'vertical_58', None]) in course.system.module_data) def test_export_course_with_unknown_metadata(self): @@ -556,7 +556,7 @@ class ContentStoreTest(ModuleStoreTestCase): module_store.update_children(parent.location, parent.children + [new_component_location.url()]) # flush the cache - module_store.get_cached_metadata_inheritance_tree(new_component_location, -1) + module_store.refresh_cached_metadata_inheritance_tree(new_component_location) new_module = module_store.get_item(new_component_location) # check for grace period definition which should be defined at the course level @@ -571,7 +571,7 @@ class ContentStoreTest(ModuleStoreTestCase): module_store.update_metadata(new_module.location, own_metadata(new_module)) # flush the cache and refetch - module_store.get_cached_metadata_inheritance_tree(new_component_location, -1) + module_store.refresh_cached_metadata_inheritance_tree(new_component_location) new_module = module_store.get_item(new_component_location) self.assertEqual(timedelta(1), new_module.lms.graceperiod)