From 4e7ed93b3ea14b428e48c8e9f1aaa9e432ff996b Mon Sep 17 00:00:00 2001
From: John Eskew <jeskew@edx.org>
Date: Wed, 18 Feb 2015 15:29:52 -0500
Subject: [PATCH] Rollback eager definition loading. Pass 'lazy' parameter down
 properly into low-level definition caching. Adjust mongo call numbers in
 tests and add missing bulk_op wrapper. Avoids the loading of all definitions
 in a large course when only specific, filtered definitions are needed to be
 loaded for an endpoint.

---
 .../xmodule/modulestore/split_mongo/split.py  | 78 +++++++++----------
 .../tests/test_mongo_call_count.py            | 59 +++++++++++---
 .../test_split_modulestore_bulk_operations.py |  7 +-
 .../xmodule/modulestore/xml_exporter.py       |  7 +-
 4 files changed, 97 insertions(+), 54 deletions(-)

diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
index 798dd9bf94..e7cb99df06 100644
--- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
+++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py
@@ -423,11 +423,12 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
                     ids.remove(definition_id)
                     definitions.append(definition)
 
-        # Query the db for the definitions.
-        defs_from_db = self.db_connection.get_definitions(list(ids))
-        # Add the retrieved definitions to the cache.
-        bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db})
-        definitions.extend(defs_from_db)
+        if len(ids):
+            # Query the db for the definitions.
+            defs_from_db = self.db_connection.get_definitions(list(ids))
+            # Add the retrieved definitions to the cache.
+            bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db})
+            definitions.extend(defs_from_db)
         return definitions
 
     def update_definition(self, course_key, definition):
@@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         connection.close()
 
     def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True):
-        '''
+        """
         Handles caching of items once inheritance and any other one time
         per course per fetch operations are done.
 
@@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
             base_block_ids: list of BlockIds to fetch
             course_key: the destination course providing the context
             depth: how deep below these to prefetch
-            lazy: whether to fetch definitions or use placeholders
-        '''
+            lazy: whether to load definitions now or later
+        """
         with self.bulk_operations(course_key, emit_signals=False):
             new_module_data = {}
             for block_id in base_block_ids:
@@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
                     new_module_data
                 )
 
-            # This code supports lazy loading, where the descendent definitions aren't loaded
+            # This method supports lazy loading, where the descendent definitions aren't loaded
             # until they're actually needed.
-            # However, assume that depth == 0 means no depth is specified and depth != 0 means
-            # a depth *is* specified. If a non-zero depth is specified, force non-lazy definition
-            # loading in order to populate the definition cache for later access.
-            load_definitions_now = depth != 0 or not lazy
-            if load_definitions_now:
+            if not lazy:
                 # Non-lazy loading: Load all descendants by id.
                 descendent_definitions = self.get_definitions(
                     course_key,
@@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
             return system.module_data
 
     @contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None")
-    def _load_items(self, course_entry, block_keys, depth=0, lazy=True, **kwargs):
-        '''
+    def _load_items(self, course_entry, block_keys, depth=0, **kwargs):
+        """
         Load & cache the given blocks from the course. May return the blocks in any order.
 
-        Load the definitions into each block if lazy is False;
-        otherwise, use the lazy definition placeholder.
-        '''
+        Load the definitions into each block if lazy is in kwargs and is False;
+        otherwise, do not load the definitions - they'll be loaded later when needed.
+        """
         runtime = self._get_cache(course_entry.structure['_id'])
         if runtime is None:
+            lazy = kwargs.pop('lazy', True)
             runtime = self.create_runtime(course_entry, lazy)
             self._add_cache(course_entry.structure['_id'], runtime)
             self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy)
@@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
             self.request_cache.data['course_cache'] = {}
 
     def _lookup_course(self, course_key):
-        '''
+        """
         Decode the locator into the right series of db access. Does not
         return the CourseDescriptor! It returns the actual db json from
         structures.
@@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         reference)
 
         :param course_key: any subclass of CourseLocator
-        '''
+        """
         if course_key.org and course_key.course and course_key.run:
             if course_key.branch is None:
                 raise InsufficientSpecificationError(course_key)
@@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
             locator = locator_factory(structure_info, branch)
             envelope = CourseEnvelope(locator, entry)
             root = entry['root']
-            structures_list = self._load_items(envelope, [root], 0, lazy=True, **kwargs)
+            structures_list = self._load_items(envelope, [root], depth=0, **kwargs)
             if not isinstance(structures_list[0], ErrorDescriptor):
                 result.append(structures_list[0])
         return result
@@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         """
         structure_entry = self._lookup_course(structure_id)
         root = structure_entry.structure['root']
-        result = self._load_items(structure_entry, [root], depth, lazy=True, **kwargs)
+        result = self._load_items(structure_entry, [root], depth, **kwargs)
         return result[0]
 
     def get_course(self, course_id, depth=0, **kwargs):
-        '''
+        """
         Gets the course descriptor for the course identified by the locator
-        '''
+        """
         if not isinstance(course_id, CourseLocator) or course_id.deprecated:
             # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
             raise ItemNotFoundError(course_id)
@@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         return self._get_structure(library_id, depth, **kwargs)
 
     def has_course(self, course_id, ignore_case=False, **kwargs):
-        '''
+        """
         Does this course exist in this modulestore. This method does not verify that the branch &/or
         version in the course_id exists. Use get_course_index_info to check that.
 
         Returns the course_id of the course if it was found, else None
         Note: we return the course_id instead of a boolean here since the found course may have
            a different id than the given course_id when ignore_case is True.
-        '''
+        """
         if not isinstance(course_id, CourseLocator) or course_id.deprecated:
             # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
             return False
@@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None
 
     def has_library(self, library_id, ignore_case=False, **kwargs):
-        '''
+        """
         Does this library exist in this modulestore. This method does not verify that the branch &/or
         version in the library_id exists.
 
         Returns the library_id of the course if it was found, else None.
-        '''
+        """
         if not isinstance(library_id, LibraryLocator):
             return None
 
@@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
 
         with self.bulk_operations(usage_key.course_key):
             course = self._lookup_course(usage_key.course_key)
-            items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, lazy=True, **kwargs)
+            items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, **kwargs)
             if len(items) == 0:
                 raise ItemNotFoundError(usage_key)
             elif len(items) > 1:
@@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
                 if block_name == block_id.id and _block_matches_all(block):
                     block_ids.append(block_id)
 
-            return self._load_items(course, block_ids, lazy=True, **kwargs)
+            return self._load_items(course, block_ids, **kwargs)
 
         if 'category' in qualifiers:
             qualifiers['block_type'] = qualifiers.pop('category')
@@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
                 items.append(block_id)
 
         if len(items) > 0:
-            return self._load_items(course, items, 0, lazy=True, **kwargs)
+            return self._load_items(course, items, depth=0, **kwargs)
         else:
             return []
 
     def get_parent_location(self, locator, **kwargs):
-        '''
+        """
         Return the location (Locators w/ block_ids) for the parent of this location in this
         course. Could use get_items(location, {'children': block_id}) but this is slightly faster.
         NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists
 
         :param locator: BlockUsageLocator restricting search scope
-        '''
+        """
         if not isinstance(locator, BlockUsageLocator) or locator.deprecated:
             # The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore.
             raise ItemNotFoundError(locator)
@@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         return definition['edit_info']
 
     def get_course_successors(self, course_locator, version_history_depth=1):
-        '''
+        """
         Find the version_history_depth next versions of this course. Return as a VersionTree
         Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant
         next versions, these do include those created for other courses.
         :param course_locator:
-        '''
+        """
         if not isinstance(course_locator, CourseLocator) or course_locator.deprecated:
             # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
             raise ItemNotFoundError(course_locator)
@@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         return VersionTree(course_locator, result)
 
     def get_block_generations(self, block_locator):
-        '''
+        """
         Find the history of this block. Return as a VersionTree of each place the block changed (except
         deletion).
 
         The block's history tracks its explicit changes but not the changes in its children starting
         from when the block was created.
 
-        '''
+        """
         # course_agnostic means we don't care if the head and version don't align, trust the version
         course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure
         block_key = BlockKey.from_usage_key(block_locator)
@@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
         )
 
     def get_definition_successors(self, definition_locator, version_history_depth=1):
-        '''
+        """
         Find the version_history_depth next versions of this definition. Return as a VersionTree
-        '''
+        """
         # TODO implement
         pass
 
diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py
index 0046fdd7ed..72e6ec4ef6 100644
--- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py
@@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase):
     to the leaf nodes.
     """
 
+    # Suppose you want to traverse a course - maybe accessing the fields of each XBlock in the course,
+    # maybe not. What parameters should one use for get_course() in order to minimize the number of
+    # mongo calls? The tests below both ensure that code changes don't increase the number of mongo calls
+    # during traversal -and- demonstrate how to minimize the number of calls.
     @ddt.data(
-        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189),  # The way this traversal *should* be done.
-        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387),     # The pathological case - do *not* query a course this way!
-        (MIXED_SPLIT_MODULESTORE_BUILDER, None, 7),  # The way this traversal *should* be done.
-        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145)    # The pathological case - do *not* query a course this way!
+        # These two lines show the way this traversal *should* be done
+        # (if you'll eventually access all the fields and load all the definitions anyway).
+        # 'lazy' does not matter in old Mongo.
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, True, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, True, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, True, 387),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, True, 387),
+        # As shown in these two lines: whether or not the XBlock fields are accessed,
+        # the same number of mongo calls are made in old Mongo for depth=None.
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, False, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, False, 189),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, False, 387),
+        (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, False, 387),
+        # The line below shows the way this traversal *should* be done
+        # (if you'll eventually access all the fields and load all the definitions anyway).
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, False, True, 4),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, True, True, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, True, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, True, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, False, False, 4),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, None, True, False, 4),
+        # TODO: The call count below seems like a bug - should be 4?
+        # Seems to be related to using self.lazy in CachingDescriptorSystem.get_module_data().
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, False, 143),
+        (MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, False, 4)
     )
     @ddt.unpack
-    def test_number_mongo_calls(self, store, depth, num_mongo_calls):
+    def test_number_mongo_calls(self, store, depth, lazy, access_all_block_fields, num_mongo_calls):
         with store.build() as (source_content, source_store):
 
             source_course_key = source_store.make_course_key('a', 'course', 'course')
@@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase):
             # Starting at the root course block, do a breadth-first traversal using
             # get_children() to retrieve each block's children.
             with check_mongo_calls(num_mongo_calls):
-                start_block = source_store.get_course(source_course_key, depth=depth)
-                stack = [start_block]
-                while stack:
-                    curr_block = stack.pop()
-                    if curr_block.has_children:
-                        for block in reversed(curr_block.get_children()):
-                            stack.append(block)
+                with source_store.bulk_operations(source_course_key):
+                    start_block = source_store.get_course(source_course_key, depth=depth, lazy=lazy)
+                    all_blocks = []
+                    stack = [start_block]
+                    while stack:
+                        curr_block = stack.pop()
+                        all_blocks.append(curr_block)
+                        if curr_block.has_children:
+                            for block in reversed(curr_block.get_children()):
+                                stack.append(block)
+
+                    if access_all_block_fields:
+                        # Read the fields on each block in order to ensure each block and its definition is loaded.
+                        for xblock in all_blocks:
+                            for __, field in xblock.fields.iteritems():
+                                if field.is_set_on(xblock):
+                                    __ = field.read_from(xblock)
diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py
index cfb9e5aa83..0e7ee14b7b 100644
--- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py
+++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py
@@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin):
 
         self.conn.get_definitions.return_value = db_definitions
         results = self.bulk.get_definitions(self.course_key, search_ids)
-        self.conn.get_definitions.assert_called_once_with(list(set(search_ids) - set(active_ids)))
+        definitions_gotten = list(set(search_ids) - set(active_ids))
+        if len(definitions_gotten) > 0:
+            self.conn.get_definitions.assert_called_once_with(definitions_gotten)
+        else:
+            # If no definitions to get, then get_definitions() should *not* have been called.
+            self.assertEquals(self.conn.get_definitions.call_count, 0)
         for _id in active_ids:
             if _id in search_ids:
                 self.assertIn(active_definition(_id), results)
diff --git a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py
index 3c62f44c32..084f5537c9 100644
--- a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py
+++ b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py
@@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir):
 
     with modulestore.bulk_operations(course_key):
 
-        course = modulestore.get_course(course_key, depth=None)  # None means infinite
+        # depth = None: Traverses down the entire course structure.
+        # lazy = False: Loads and caches all block definitions during traversal for fast access later
+        #               -and- to eliminate many round-trips to read individual definitions.
+        # Why these parameters? Because a course export needs to access all the course block information
+        # eventually. Accessing it all now at the beginning increases performance of the export.
+        course = modulestore.get_course(course_key, depth=None, lazy=False)
         fsm = OSFS(root_dir)
         export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir)
         root_course_dir = root_dir + '/' + course_dir