From fa94360d6933f64137bf566683dd2d6a23161c91 Mon Sep 17 00:00:00 2001 From: Don Mitchell Date: Tue, 10 Jun 2014 10:39:39 -0400 Subject: [PATCH] Review all fs.files and modulestore indices and fix some gridfs calls to reduce traffic --- .../lib/xmodule/xmodule/contentstore/mongo.py | 15 +++----- .../xmodule/modulestore/tests/test_mongo.py | 1 - mongo_indexes.md | 38 ++++++++++++++----- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/common/lib/xmodule/xmodule/contentstore/mongo.py b/common/lib/xmodule/xmodule/contentstore/mongo.py index 92c8e96896..68261e1188 100644 --- a/common/lib/xmodule/xmodule/contentstore/mongo.py +++ b/common/lib/xmodule/xmodule/contentstore/mongo.py @@ -67,8 +67,8 @@ class MongoContentStore(ContentStore): def delete(self, location_or_id): if isinstance(location_or_id, AssetLocation): location_or_id = self.asset_db_key(location_or_id) - if self.fs.exists({"_id": location_or_id}): - self.fs.delete(location_or_id) + # Deletes of non-existent files are considered successful + self.fs.delete(location_or_id) def find(self, location, throw_on_not_found=True, as_stream=False): content_id = self.asset_db_key(location) @@ -104,10 +104,8 @@ class MongoContentStore(ContentStore): def get_stream(self, location): content_id = self.asset_db_key(location) - fs_pointer = self.fs_files.find_one({'_id': content_id}, fields={'_id': 1}) - try: - handle = self.fs.get(fs_pointer['_id']) + handle = self.fs.get(content_id) except NoFile: raise NotFoundError() @@ -239,11 +237,10 @@ class MongoContentStore(ContentStore): if attr in ['_id', 'md5', 'uploadDate', 'length']: raise AttributeError("{} is a protected attribute.".format(attr)) asset_db_key = self.asset_db_key(location) - # FIXME remove fetch and use a form of update which fails if doesn't exist - item = self.fs_files.find_one({'_id': asset_db_key}) - if item is None: + # catch upsert error and raise NotFoundError if asset doesn't exist + result = self.fs_files.update({'_id': asset_db_key}, {"$set": attr_dict}, upsert=False) + if not result.get('updatedExisting', True): raise NotFoundError(asset_db_key) - self.fs_files.update({'_id': asset_db_key}, {"$set": attr_dict}) def get_attrs(self, location): """ diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py index 1a32ece1e4..063f05f973 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo.py @@ -9,7 +9,6 @@ import shutil from tempfile import mkdtemp from uuid import uuid4 import unittest -import bson.son from xblock.core import XBlock from xblock.fields import Scope, Reference, ReferenceList, ReferenceValueDict diff --git a/mongo_indexes.md b/mongo_indexes.md index ecfd985c37..544b4186e7 100644 --- a/mongo_indexes.md +++ b/mongo_indexes.md @@ -23,11 +23,15 @@ ensureIndex({'schema': 1}) fs.files: ========= +Index needed thru 'category' by `_get_all_content_for_course` and others. That query also takes a sort +which can be `uploadDate`, `display_name`, +# again, uploadDate may also be a freq sort. ``` -ensureIndex({'displayname': 1}) -ensureIndex({'_id.tag': 1, '_id.org': 1, '_id.course': 1, '_id.category': 1, '_id.name': 1}) +ensureIndex({'_id.tag': 1, '_id.org': 1, '_id.course': 1, '_id.category': 1}) ``` +Remove index on `displayname` + modulestore: ============ @@ -47,6 +51,17 @@ Because we often scan for all category='course' regardless of the value of the o ensureIndex({'_id.category': 1}) ``` +Because lms calls get_parent_locations frequently (for path generation): +``` +ensureIndex({'_id.tag': 1, '_id.org': 1, '_id.course': 1, 'definition.children': 1}) +``` + +Remove these indices if they exist as I can find no use for them: +``` + { "_id.course": 1, "_id.org": 1, "_id.revision": 1, "definition.children": 1 } + { "definition.children": 1 } +``` + NOTE, that index will only aid queries which provide the keys in exactly that form and order. The query can omit later fields of the query but not earlier. Thus ```modulestore.find({'_id.org': 'myu'})``` will not use the index as it omits the tag. As soon as mongo comes across an index field omitted from the query, it stops @@ -56,14 +71,17 @@ for matches to the category. To find out if any records have the wrong id structure, run ``` -db.modulestore.find({$where: function() { - var keys = Object.keys(this['_id']); - var ref = ['tag', 'org', 'course', 'category', 'name', 'revision']; - for (var i=0; i < ref.length; i++) { - if (keys[i] != ref[i]) return true; - } - return false; }}, - {_id: 1}) +db.fs.files.find({uploadDate: {$gt: startDate, $lt: endDate}, + $where: function() { + var keys = ['category', 'name', 'course', 'tag', 'org', 'revision']; + for (var key in this._id) { + if (key != keys.shift()) { + return true; + } + } + return false; + }}, + {_id: 1}) ``` modulestore.active_versions