From 4e7ed93b3ea14b428e48c8e9f1aaa9e432ff996b Mon Sep 17 00:00:00 2001 From: John Eskew Date: Wed, 18 Feb 2015 15:29:52 -0500 Subject: [PATCH] Rollback eager definition loading. Pass 'lazy' parameter down properly into low-level definition caching. Adjust mongo call numbers in tests and add missing bulk_op wrapper. Avoids the loading of all definitions in a large course when only specific, filtered definitions are needed to be loaded for an endpoint. --- .../xmodule/modulestore/split_mongo/split.py | 78 +++++++++---------- .../tests/test_mongo_call_count.py | 59 +++++++++++--- .../test_split_modulestore_bulk_operations.py | 7 +- .../xmodule/modulestore/xml_exporter.py | 7 +- 4 files changed, 97 insertions(+), 54 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py index 798dd9bf94..e7cb99df06 100644 --- a/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py +++ b/common/lib/xmodule/xmodule/modulestore/split_mongo/split.py @@ -423,11 +423,12 @@ class SplitBulkWriteMixin(BulkOperationsMixin): ids.remove(definition_id) definitions.append(definition) - # Query the db for the definitions. - defs_from_db = self.db_connection.get_definitions(list(ids)) - # Add the retrieved definitions to the cache. - bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db}) - definitions.extend(defs_from_db) + if len(ids): + # Query the db for the definitions. + defs_from_db = self.db_connection.get_definitions(list(ids)) + # Add the retrieved definitions to the cache. + bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db}) + definitions.extend(defs_from_db) return definitions def update_definition(self, course_key, definition): @@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): connection.close() def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True): - ''' + """ Handles caching of items once inheritance and any other one time per course per fetch operations are done. @@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): base_block_ids: list of BlockIds to fetch course_key: the destination course providing the context depth: how deep below these to prefetch - lazy: whether to fetch definitions or use placeholders - ''' + lazy: whether to load definitions now or later + """ with self.bulk_operations(course_key, emit_signals=False): new_module_data = {} for block_id in base_block_ids: @@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): new_module_data ) - # This code supports lazy loading, where the descendent definitions aren't loaded + # This method supports lazy loading, where the descendent definitions aren't loaded # until they're actually needed. - # However, assume that depth == 0 means no depth is specified and depth != 0 means - # a depth *is* specified. If a non-zero depth is specified, force non-lazy definition - # loading in order to populate the definition cache for later access. - load_definitions_now = depth != 0 or not lazy - if load_definitions_now: + if not lazy: # Non-lazy loading: Load all descendants by id. descendent_definitions = self.get_definitions( course_key, @@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): return system.module_data @contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None") - def _load_items(self, course_entry, block_keys, depth=0, lazy=True, **kwargs): - ''' + def _load_items(self, course_entry, block_keys, depth=0, **kwargs): + """ Load & cache the given blocks from the course. May return the blocks in any order. - Load the definitions into each block if lazy is False; - otherwise, use the lazy definition placeholder. - ''' + Load the definitions into each block if lazy is in kwargs and is False; + otherwise, do not load the definitions - they'll be loaded later when needed. + """ runtime = self._get_cache(course_entry.structure['_id']) if runtime is None: + lazy = kwargs.pop('lazy', True) runtime = self.create_runtime(course_entry, lazy) self._add_cache(course_entry.structure['_id'], runtime) self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy) @@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): self.request_cache.data['course_cache'] = {} def _lookup_course(self, course_key): - ''' + """ Decode the locator into the right series of db access. Does not return the CourseDescriptor! It returns the actual db json from structures. @@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): reference) :param course_key: any subclass of CourseLocator - ''' + """ if course_key.org and course_key.course and course_key.run: if course_key.branch is None: raise InsufficientSpecificationError(course_key) @@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): locator = locator_factory(structure_info, branch) envelope = CourseEnvelope(locator, entry) root = entry['root'] - structures_list = self._load_items(envelope, [root], 0, lazy=True, **kwargs) + structures_list = self._load_items(envelope, [root], depth=0, **kwargs) if not isinstance(structures_list[0], ErrorDescriptor): result.append(structures_list[0]) return result @@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): """ structure_entry = self._lookup_course(structure_id) root = structure_entry.structure['root'] - result = self._load_items(structure_entry, [root], depth, lazy=True, **kwargs) + result = self._load_items(structure_entry, [root], depth, **kwargs) return result[0] def get_course(self, course_id, depth=0, **kwargs): - ''' + """ Gets the course descriptor for the course identified by the locator - ''' + """ if not isinstance(course_id, CourseLocator) or course_id.deprecated: # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore. raise ItemNotFoundError(course_id) @@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): return self._get_structure(library_id, depth, **kwargs) def has_course(self, course_id, ignore_case=False, **kwargs): - ''' + """ Does this course exist in this modulestore. This method does not verify that the branch &/or version in the course_id exists. Use get_course_index_info to check that. Returns the course_id of the course if it was found, else None Note: we return the course_id instead of a boolean here since the found course may have a different id than the given course_id when ignore_case is True. - ''' + """ if not isinstance(course_id, CourseLocator) or course_id.deprecated: # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore. return False @@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None def has_library(self, library_id, ignore_case=False, **kwargs): - ''' + """ Does this library exist in this modulestore. This method does not verify that the branch &/or version in the library_id exists. Returns the library_id of the course if it was found, else None. - ''' + """ if not isinstance(library_id, LibraryLocator): return None @@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): with self.bulk_operations(usage_key.course_key): course = self._lookup_course(usage_key.course_key) - items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, lazy=True, **kwargs) + items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, **kwargs) if len(items) == 0: raise ItemNotFoundError(usage_key) elif len(items) > 1: @@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): if block_name == block_id.id and _block_matches_all(block): block_ids.append(block_id) - return self._load_items(course, block_ids, lazy=True, **kwargs) + return self._load_items(course, block_ids, **kwargs) if 'category' in qualifiers: qualifiers['block_type'] = qualifiers.pop('category') @@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): items.append(block_id) if len(items) > 0: - return self._load_items(course, items, 0, lazy=True, **kwargs) + return self._load_items(course, items, depth=0, **kwargs) else: return [] def get_parent_location(self, locator, **kwargs): - ''' + """ Return the location (Locators w/ block_ids) for the parent of this location in this course. Could use get_items(location, {'children': block_id}) but this is slightly faster. NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists :param locator: BlockUsageLocator restricting search scope - ''' + """ if not isinstance(locator, BlockUsageLocator) or locator.deprecated: # The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore. raise ItemNotFoundError(locator) @@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): return definition['edit_info'] def get_course_successors(self, course_locator, version_history_depth=1): - ''' + """ Find the version_history_depth next versions of this course. Return as a VersionTree Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant next versions, these do include those created for other courses. :param course_locator: - ''' + """ if not isinstance(course_locator, CourseLocator) or course_locator.deprecated: # The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore. raise ItemNotFoundError(course_locator) @@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): return VersionTree(course_locator, result) def get_block_generations(self, block_locator): - ''' + """ Find the history of this block. Return as a VersionTree of each place the block changed (except deletion). The block's history tracks its explicit changes but not the changes in its children starting from when the block was created. - ''' + """ # course_agnostic means we don't care if the head and version don't align, trust the version course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure block_key = BlockKey.from_usage_key(block_locator) @@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase): ) def get_definition_successors(self, definition_locator, version_history_depth=1): - ''' + """ Find the version_history_depth next versions of this definition. Return as a VersionTree - ''' + """ # TODO implement pass diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py index 0046fdd7ed..72e6ec4ef6 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_mongo_call_count.py @@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase): to the leaf nodes. """ + # Suppose you want to traverse a course - maybe accessing the fields of each XBlock in the course, + # maybe not. What parameters should one use for get_course() in order to minimize the number of + # mongo calls? The tests below both ensure that code changes don't increase the number of mongo calls + # during traversal -and- demonstrate how to minimize the number of calls. @ddt.data( - (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189), # The way this traversal *should* be done. - (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387), # The pathological case - do *not* query a course this way! - (MIXED_SPLIT_MODULESTORE_BUILDER, None, 7), # The way this traversal *should* be done. - (MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145) # The pathological case - do *not* query a course this way! + # These two lines show the way this traversal *should* be done + # (if you'll eventually access all the fields and load all the definitions anyway). + # 'lazy' does not matter in old Mongo. + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, True, 189), + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, True, 189), + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, True, 387), + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, True, 387), + # As shown in these two lines: whether or not the XBlock fields are accessed, + # the same number of mongo calls are made in old Mongo for depth=None. + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, False, 189), + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, False, 189), + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, False, 387), + (MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, False, 387), + # The line below shows the way this traversal *should* be done + # (if you'll eventually access all the fields and load all the definitions anyway). + (MIXED_SPLIT_MODULESTORE_BUILDER, None, False, True, 4), + (MIXED_SPLIT_MODULESTORE_BUILDER, None, True, True, 143), + (MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, True, 143), + (MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, True, 143), + (MIXED_SPLIT_MODULESTORE_BUILDER, None, False, False, 4), + (MIXED_SPLIT_MODULESTORE_BUILDER, None, True, False, 4), + # TODO: The call count below seems like a bug - should be 4? + # Seems to be related to using self.lazy in CachingDescriptorSystem.get_module_data(). + (MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, False, 143), + (MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, False, 4) ) @ddt.unpack - def test_number_mongo_calls(self, store, depth, num_mongo_calls): + def test_number_mongo_calls(self, store, depth, lazy, access_all_block_fields, num_mongo_calls): with store.build() as (source_content, source_store): source_course_key = source_store.make_course_key('a', 'course', 'course') @@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase): # Starting at the root course block, do a breadth-first traversal using # get_children() to retrieve each block's children. with check_mongo_calls(num_mongo_calls): - start_block = source_store.get_course(source_course_key, depth=depth) - stack = [start_block] - while stack: - curr_block = stack.pop() - if curr_block.has_children: - for block in reversed(curr_block.get_children()): - stack.append(block) + with source_store.bulk_operations(source_course_key): + start_block = source_store.get_course(source_course_key, depth=depth, lazy=lazy) + all_blocks = [] + stack = [start_block] + while stack: + curr_block = stack.pop() + all_blocks.append(curr_block) + if curr_block.has_children: + for block in reversed(curr_block.get_children()): + stack.append(block) + + if access_all_block_fields: + # Read the fields on each block in order to ensure each block and its definition is loaded. + for xblock in all_blocks: + for __, field in xblock.fields.iteritems(): + if field.is_set_on(xblock): + __ = field.read_from(xblock) diff --git a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py index cfb9e5aa83..0e7ee14b7b 100644 --- a/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py +++ b/common/lib/xmodule/xmodule/modulestore/tests/test_split_modulestore_bulk_operations.py @@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin): self.conn.get_definitions.return_value = db_definitions results = self.bulk.get_definitions(self.course_key, search_ids) - self.conn.get_definitions.assert_called_once_with(list(set(search_ids) - set(active_ids))) + definitions_gotten = list(set(search_ids) - set(active_ids)) + if len(definitions_gotten) > 0: + self.conn.get_definitions.assert_called_once_with(definitions_gotten) + else: + # If no definitions to get, then get_definitions() should *not* have been called. + self.assertEquals(self.conn.get_definitions.call_count, 0) for _id in active_ids: if _id in search_ids: self.assertIn(active_definition(_id), results) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py index 3c62f44c32..084f5537c9 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py @@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir): with modulestore.bulk_operations(course_key): - course = modulestore.get_course(course_key, depth=None) # None means infinite + # depth = None: Traverses down the entire course structure. + # lazy = False: Loads and caches all block definitions during traversal for fast access later + # -and- to eliminate many round-trips to read individual definitions. + # Why these parameters? Because a course export needs to access all the course block information + # eventually. Accessing it all now at the beginning increases performance of the export. + course = modulestore.get_course(course_key, depth=None, lazy=False) fsm = OSFS(root_dir) export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir) root_course_dir = root_dir + '/' + course_dir