From f8e9313dadd469840d758ee91b0a2aa3c8f0ac19 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Wed, 11 Jul 2012 16:22:24 -0400 Subject: [PATCH] Minimize the number of roundtrips to mongo during cms save by prefetching children --- cms/djangoapps/contentstore/views.py | 2 +- .../xmodule/xmodule/modulestore/__init__.py | 16 ++-- .../lib/xmodule/xmodule/modulestore/mongo.py | 93 ++++++++++++++++--- common/lib/xmodule/xmodule/modulestore/xml.py | 4 +- 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/cms/djangoapps/contentstore/views.py b/cms/djangoapps/contentstore/views.py index 185ea3868d..d1839a0673 100644 --- a/cms/djangoapps/contentstore/views.py +++ b/cms/djangoapps/contentstore/views.py @@ -54,7 +54,7 @@ def save_item(request): # This uses wildcarding to find the course, which requires handling # multiple courses returned, but there should only ever be one course_location = Location(item_id)._replace(category='course', name=None) - courses = modulestore().get_items(course_location) + courses = modulestore().get_items(course_location, depth=None) for course in courses: export_to_github(course, "CMS Edit") diff --git a/common/lib/xmodule/xmodule/modulestore/__init__.py b/common/lib/xmodule/xmodule/modulestore/__init__.py index 1e8ee838d7..816b512ddd 100644 --- a/common/lib/xmodule/xmodule/modulestore/__init__.py +++ b/common/lib/xmodule/xmodule/modulestore/__init__.py @@ -148,7 +148,7 @@ class ModuleStore(object): """ An abstract interface for a database backend that stores XModuleDescriptor instances """ - def get_item(self, location, default_class=None): + def get_item(self, location, depth=0): """ Returns an XModuleDescriptor instance for the item at location. If location.revision is None, returns the item with the most @@ -159,20 +159,24 @@ class ModuleStore(object): If no object is found at that location, raises xmodule.modulestore.exceptions.ItemNotFoundError location: Something that can be passed to Location - default_class: An XModuleDescriptor subclass to use if no plugin matching the - location is found + + depth (int): An argument that some module stores may use to prefetch descendents of the queried modules + for more efficient results later in the request. The depth is counted in the number of + calls to get_children() to cache. None indicates to cache all descendents """ raise NotImplementedError - def get_items(self, location, default_class=None): + def get_items(self, location, depth=0): """ Returns a list of XModuleDescriptor instances for the items that match location. Any element of location that is None is treated as a wildcard that matches any value location: Something that can be passed to Location - default_class: An XModuleDescriptor subclass to use if no plugin matching the - location is found + + depth: An argument that some module stores may use to prefetch descendents of the queried modules + for more efficient results later in the request. The depth is counted in the number of calls + to get_children() to cache. None indicates to cache all descendents """ raise NotImplementedError diff --git a/common/lib/xmodule/xmodule/modulestore/mongo.py b/common/lib/xmodule/xmodule/modulestore/mongo.py index 3484f00d51..6a8444718e 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo.py @@ -15,7 +15,38 @@ from .exceptions import ItemNotFoundError, InsufficientSpecificationError # that assumption will have to change +class CachingDescriptorSystem(MakoDescriptorSystem): + """ + A system that has a cache of module json that it will use to load modules + from, with a backup of calling to the underlying modulestore for more data + """ + def __init__(self, modulestore, module_data, default_class, resources_fs, render_template): + """ + modulestore: the module store that can be used to retrieve additional modules + module_data: a mapping of Location -> json that was cached from the underlying modulestore + default_class: The default_class to use when loading an XModuleDescriptor from the module_data + resources_fs: a filesystem, as per MakoDescriptorSystem + render_template: a function for rendering templates, as per MakoDescriptorSystem + """ + super(CachingDescriptorSystem, self).__init__(render_template, self.load_item, resources_fs) + self.modulestore = modulestore + self.module_data = module_data + self.default_class = default_class + + def load_item(self, location): + location = Location(location) + json_data = self.module_data.get(location) + if json_data is None: + return self.modulestore.get_item(location) + else: + return XModuleDescriptor.load_from_json(json_data, self, self.default_class) + + def location_to_query(loc): + """ + Takes a Location and returns a SON object that will query for that location. + Fields in loc that are None are ignored in the query + """ query = SON() # Location dict is ordered by specificity, and SON # will preserve that order for queries @@ -46,19 +77,49 @@ class MongoModuleStore(ModuleStore): class_ = getattr(import_module(module_path), class_name) self.default_class = class_ - # TODO (cpennington): Pass a proper resources_fs to the system - self.system = MakoDescriptorSystem( - load_item=self.get_item, - resources_fs=None, - render_template=render_to_string - ) - - def _load_item(self, item): + def _clean_item_data(self, item): + """ + Renames the '_id' field in item to 'location' + """ item['location'] = item['_id'] del item['_id'] - return XModuleDescriptor.load_from_json(item, self.system, self.default_class) - def get_item(self, location): + def _cache_children(self, items, depth=0): + """ + Returns a dictionary mapping Location -> item data, populated with json data + for all descendents of items up to the specified depth. + This will make a number of queries that is linear in the depth + """ + data = {} + to_process = list(items) + while to_process and depth is None or depth >= 0: + children = [] + for item in to_process: + self._clean_item_data(item) + children.extend(item.get('definition', {}).get('children', [])) + data[Location(item['location'])] = item + + # Load all children by id. See + # http://www.mongodb.org/display/DOCS/Advanced+Queries#AdvancedQueries-%24or + # for or-query syntax + if children: + to_process = list(self.collection.find({'_id': {'$in': [Location(child).dict() for child in children]}})) + else: + to_process = [] + if depth is not None: + depth -= 1 + + return data + + def _load_items(self, items, depth=0): + """ + Load a list of xmodules from the data in items, with children cached up to specified depth + """ + data_cache = self._cache_children(items, depth) + system = CachingDescriptorSystem(self, data_cache, self.default_class, None, render_to_string) + return [system.load_item(item['location']) for item in items] + + def get_item(self, location, depth=0): """ Returns an XModuleDescriptor instance for the item at location. If location.revision is None, returns the most item with the most @@ -68,7 +129,11 @@ class MongoModuleStore(ModuleStore): xmodule.modulestore.exceptions.InsufficientSpecificationError If no object is found at that location, raises xmodule.modulestore.exceptions.ItemNotFoundError - location: Something that can be passed to Location + location: a Location object + depth (int): An argument that some module stores may use to prefetch descendents of the queried modules + for more efficient results later in the request. The depth is counted in the number of + calls to get_children() to cache. None indicates to cache all descendents + """ for key, val in Location(location).dict().iteritems(): @@ -81,15 +146,15 @@ class MongoModuleStore(ModuleStore): ) if item is None: raise ItemNotFoundError(location) - return self._load_item(item) + return self._load_items([item], depth)[0] - def get_items(self, location, default_class=None): + def get_items(self, location, depth=0): items = self.collection.find( location_to_query(location), sort=[('revision', pymongo.ASCENDING)], ) - return [self._load_item(item) for item in items] + return self._load_items(list(items), depth) def create_item(self, location): """ diff --git a/common/lib/xmodule/xmodule/modulestore/xml.py b/common/lib/xmodule/xmodule/modulestore/xml.py index 7474666e21..17866be4af 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml.py +++ b/common/lib/xmodule/xmodule/modulestore/xml.py @@ -132,7 +132,7 @@ class XMLModuleStore(ModuleStore): log.debug('========> Done with course import') return course_descriptor - def get_item(self, location): + def get_item(self, location, depth=0): """ Returns an XModuleDescriptor instance for the item at location. If location.revision is None, returns the most item with the most @@ -150,7 +150,7 @@ class XMLModuleStore(ModuleStore): except KeyError: raise ItemNotFoundError(location) - def get_courses(self): + def get_courses(self, depth=0): """ Returns a list of course descriptors """