Merge pull request #7027 from edx/jeskew/defn_loading_perf_problems
Rollback override of lazy definition loading for large courses.
This commit is contained in:
@@ -423,11 +423,12 @@ class SplitBulkWriteMixin(BulkOperationsMixin):
|
||||
ids.remove(definition_id)
|
||||
definitions.append(definition)
|
||||
|
||||
# Query the db for the definitions.
|
||||
defs_from_db = self.db_connection.get_definitions(list(ids))
|
||||
# Add the retrieved definitions to the cache.
|
||||
bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db})
|
||||
definitions.extend(defs_from_db)
|
||||
if len(ids):
|
||||
# Query the db for the definitions.
|
||||
defs_from_db = self.db_connection.get_definitions(list(ids))
|
||||
# Add the retrieved definitions to the cache.
|
||||
bulk_write_record.definitions.update({d.get('_id'): d for d in defs_from_db})
|
||||
definitions.extend(defs_from_db)
|
||||
return definitions
|
||||
|
||||
def update_definition(self, course_key, definition):
|
||||
@@ -683,7 +684,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
connection.close()
|
||||
|
||||
def cache_items(self, system, base_block_ids, course_key, depth=0, lazy=True):
|
||||
'''
|
||||
"""
|
||||
Handles caching of items once inheritance and any other one time
|
||||
per course per fetch operations are done.
|
||||
|
||||
@@ -692,8 +693,8 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
base_block_ids: list of BlockIds to fetch
|
||||
course_key: the destination course providing the context
|
||||
depth: how deep below these to prefetch
|
||||
lazy: whether to fetch definitions or use placeholders
|
||||
'''
|
||||
lazy: whether to load definitions now or later
|
||||
"""
|
||||
with self.bulk_operations(course_key, emit_signals=False):
|
||||
new_module_data = {}
|
||||
for block_id in base_block_ids:
|
||||
@@ -704,13 +705,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
new_module_data
|
||||
)
|
||||
|
||||
# This code supports lazy loading, where the descendent definitions aren't loaded
|
||||
# This method supports lazy loading, where the descendent definitions aren't loaded
|
||||
# until they're actually needed.
|
||||
# However, assume that depth == 0 means no depth is specified and depth != 0 means
|
||||
# a depth *is* specified. If a non-zero depth is specified, force non-lazy definition
|
||||
# loading in order to populate the definition cache for later access.
|
||||
load_definitions_now = depth != 0 or not lazy
|
||||
if load_definitions_now:
|
||||
if not lazy:
|
||||
# Non-lazy loading: Load all descendants by id.
|
||||
descendent_definitions = self.get_definitions(
|
||||
course_key,
|
||||
@@ -734,15 +731,16 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
return system.module_data
|
||||
|
||||
@contract(course_entry=CourseEnvelope, block_keys="list(BlockKey)", depth="int | None")
|
||||
def _load_items(self, course_entry, block_keys, depth=0, lazy=True, **kwargs):
|
||||
'''
|
||||
def _load_items(self, course_entry, block_keys, depth=0, **kwargs):
|
||||
"""
|
||||
Load & cache the given blocks from the course. May return the blocks in any order.
|
||||
|
||||
Load the definitions into each block if lazy is False;
|
||||
otherwise, use the lazy definition placeholder.
|
||||
'''
|
||||
Load the definitions into each block if lazy is in kwargs and is False;
|
||||
otherwise, do not load the definitions - they'll be loaded later when needed.
|
||||
"""
|
||||
runtime = self._get_cache(course_entry.structure['_id'])
|
||||
if runtime is None:
|
||||
lazy = kwargs.pop('lazy', True)
|
||||
runtime = self.create_runtime(course_entry, lazy)
|
||||
self._add_cache(course_entry.structure['_id'], runtime)
|
||||
self.cache_items(runtime, block_keys, course_entry.course_key, depth, lazy)
|
||||
@@ -786,7 +784,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
self.request_cache.data['course_cache'] = {}
|
||||
|
||||
def _lookup_course(self, course_key):
|
||||
'''
|
||||
"""
|
||||
Decode the locator into the right series of db access. Does not
|
||||
return the CourseDescriptor! It returns the actual db json from
|
||||
structures.
|
||||
@@ -797,7 +795,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
reference)
|
||||
|
||||
:param course_key: any subclass of CourseLocator
|
||||
'''
|
||||
"""
|
||||
if course_key.org and course_key.course and course_key.run:
|
||||
if course_key.branch is None:
|
||||
raise InsufficientSpecificationError(course_key)
|
||||
@@ -864,7 +862,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
locator = locator_factory(structure_info, branch)
|
||||
envelope = CourseEnvelope(locator, entry)
|
||||
root = entry['root']
|
||||
structures_list = self._load_items(envelope, [root], 0, lazy=True, **kwargs)
|
||||
structures_list = self._load_items(envelope, [root], depth=0, **kwargs)
|
||||
if not isinstance(structures_list[0], ErrorDescriptor):
|
||||
result.append(structures_list[0])
|
||||
return result
|
||||
@@ -929,13 +927,13 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
"""
|
||||
structure_entry = self._lookup_course(structure_id)
|
||||
root = structure_entry.structure['root']
|
||||
result = self._load_items(structure_entry, [root], depth, lazy=True, **kwargs)
|
||||
result = self._load_items(structure_entry, [root], depth, **kwargs)
|
||||
return result[0]
|
||||
|
||||
def get_course(self, course_id, depth=0, **kwargs):
|
||||
'''
|
||||
"""
|
||||
Gets the course descriptor for the course identified by the locator
|
||||
'''
|
||||
"""
|
||||
if not isinstance(course_id, CourseLocator) or course_id.deprecated:
|
||||
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
|
||||
raise ItemNotFoundError(course_id)
|
||||
@@ -951,14 +949,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
return self._get_structure(library_id, depth, **kwargs)
|
||||
|
||||
def has_course(self, course_id, ignore_case=False, **kwargs):
|
||||
'''
|
||||
"""
|
||||
Does this course exist in this modulestore. This method does not verify that the branch &/or
|
||||
version in the course_id exists. Use get_course_index_info to check that.
|
||||
|
||||
Returns the course_id of the course if it was found, else None
|
||||
Note: we return the course_id instead of a boolean here since the found course may have
|
||||
a different id than the given course_id when ignore_case is True.
|
||||
'''
|
||||
"""
|
||||
if not isinstance(course_id, CourseLocator) or course_id.deprecated:
|
||||
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
|
||||
return False
|
||||
@@ -967,12 +965,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
return CourseLocator(course_index['org'], course_index['course'], course_index['run'], course_id.branch) if course_index else None
|
||||
|
||||
def has_library(self, library_id, ignore_case=False, **kwargs):
|
||||
'''
|
||||
"""
|
||||
Does this library exist in this modulestore. This method does not verify that the branch &/or
|
||||
version in the library_id exists.
|
||||
|
||||
Returns the library_id of the course if it was found, else None.
|
||||
'''
|
||||
"""
|
||||
if not isinstance(library_id, LibraryLocator):
|
||||
return None
|
||||
|
||||
@@ -1017,7 +1015,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
|
||||
with self.bulk_operations(usage_key.course_key):
|
||||
course = self._lookup_course(usage_key.course_key)
|
||||
items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, lazy=True, **kwargs)
|
||||
items = self._load_items(course, [BlockKey.from_usage_key(usage_key)], depth, **kwargs)
|
||||
if len(items) == 0:
|
||||
raise ItemNotFoundError(usage_key)
|
||||
elif len(items) > 1:
|
||||
@@ -1078,7 +1076,7 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
if block_name == block_id.id and _block_matches_all(block):
|
||||
block_ids.append(block_id)
|
||||
|
||||
return self._load_items(course, block_ids, lazy=True, **kwargs)
|
||||
return self._load_items(course, block_ids, **kwargs)
|
||||
|
||||
if 'category' in qualifiers:
|
||||
qualifiers['block_type'] = qualifiers.pop('category')
|
||||
@@ -1091,18 +1089,18 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
items.append(block_id)
|
||||
|
||||
if len(items) > 0:
|
||||
return self._load_items(course, items, 0, lazy=True, **kwargs)
|
||||
return self._load_items(course, items, depth=0, **kwargs)
|
||||
else:
|
||||
return []
|
||||
|
||||
def get_parent_location(self, locator, **kwargs):
|
||||
'''
|
||||
"""
|
||||
Return the location (Locators w/ block_ids) for the parent of this location in this
|
||||
course. Could use get_items(location, {'children': block_id}) but this is slightly faster.
|
||||
NOTE: the locator must contain the block_id, and this code does not actually ensure block_id exists
|
||||
|
||||
:param locator: BlockUsageLocator restricting search scope
|
||||
'''
|
||||
"""
|
||||
if not isinstance(locator, BlockUsageLocator) or locator.deprecated:
|
||||
# The supplied locator is of the wrong type, so it can't possibly be stored in this modulestore.
|
||||
raise ItemNotFoundError(locator)
|
||||
@@ -1208,12 +1206,12 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
return definition['edit_info']
|
||||
|
||||
def get_course_successors(self, course_locator, version_history_depth=1):
|
||||
'''
|
||||
"""
|
||||
Find the version_history_depth next versions of this course. Return as a VersionTree
|
||||
Mostly makes sense when course_locator uses a version_guid, but because it finds all relevant
|
||||
next versions, these do include those created for other courses.
|
||||
:param course_locator:
|
||||
'''
|
||||
"""
|
||||
if not isinstance(course_locator, CourseLocator) or course_locator.deprecated:
|
||||
# The supplied CourseKey is of the wrong type, so it can't possibly be stored in this modulestore.
|
||||
raise ItemNotFoundError(course_locator)
|
||||
@@ -1244,14 +1242,14 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
return VersionTree(course_locator, result)
|
||||
|
||||
def get_block_generations(self, block_locator):
|
||||
'''
|
||||
"""
|
||||
Find the history of this block. Return as a VersionTree of each place the block changed (except
|
||||
deletion).
|
||||
|
||||
The block's history tracks its explicit changes but not the changes in its children starting
|
||||
from when the block was created.
|
||||
|
||||
'''
|
||||
"""
|
||||
# course_agnostic means we don't care if the head and version don't align, trust the version
|
||||
course_struct = self._lookup_course(block_locator.course_key.course_agnostic()).structure
|
||||
block_key = BlockKey.from_usage_key(block_locator)
|
||||
@@ -1296,9 +1294,9 @@ class SplitMongoModuleStore(SplitBulkWriteMixin, ModuleStoreWriteBase):
|
||||
)
|
||||
|
||||
def get_definition_successors(self, definition_locator, version_history_depth=1):
|
||||
'''
|
||||
"""
|
||||
Find the version_history_depth next versions of this definition. Return as a VersionTree
|
||||
'''
|
||||
"""
|
||||
# TODO implement
|
||||
pass
|
||||
|
||||
|
||||
@@ -87,14 +87,39 @@ class CountMongoCallsCourseTraversal(TestCase):
|
||||
to the leaf nodes.
|
||||
"""
|
||||
|
||||
# Suppose you want to traverse a course - maybe accessing the fields of each XBlock in the course,
|
||||
# maybe not. What parameters should one use for get_course() in order to minimize the number of
|
||||
# mongo calls? The tests below both ensure that code changes don't increase the number of mongo calls
|
||||
# during traversal -and- demonstrate how to minimize the number of calls.
|
||||
@ddt.data(
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, 189), # The way this traversal *should* be done.
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, 387), # The pathological case - do *not* query a course this way!
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, None, 7), # The way this traversal *should* be done.
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, 145) # The pathological case - do *not* query a course this way!
|
||||
# These two lines show the way this traversal *should* be done
|
||||
# (if you'll eventually access all the fields and load all the definitions anyway).
|
||||
# 'lazy' does not matter in old Mongo.
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, True, 189),
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, True, 189),
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, True, 387),
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, True, 387),
|
||||
# As shown in these two lines: whether or not the XBlock fields are accessed,
|
||||
# the same number of mongo calls are made in old Mongo for depth=None.
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, False, False, 189),
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, None, True, False, 189),
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, False, False, 387),
|
||||
(MIXED_OLD_MONGO_MODULESTORE_BUILDER, 0, True, False, 387),
|
||||
# The line below shows the way this traversal *should* be done
|
||||
# (if you'll eventually access all the fields and load all the definitions anyway).
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, None, False, True, 4),
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, None, True, True, 143),
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, True, 143),
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, True, 143),
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, None, False, False, 4),
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, None, True, False, 4),
|
||||
# TODO: The call count below seems like a bug - should be 4?
|
||||
# Seems to be related to using self.lazy in CachingDescriptorSystem.get_module_data().
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, False, False, 143),
|
||||
(MIXED_SPLIT_MODULESTORE_BUILDER, 0, True, False, 4)
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_number_mongo_calls(self, store, depth, num_mongo_calls):
|
||||
def test_number_mongo_calls(self, store, depth, lazy, access_all_block_fields, num_mongo_calls):
|
||||
with store.build() as (source_content, source_store):
|
||||
|
||||
source_course_key = source_store.make_course_key('a', 'course', 'course')
|
||||
@@ -116,10 +141,20 @@ class CountMongoCallsCourseTraversal(TestCase):
|
||||
# Starting at the root course block, do a breadth-first traversal using
|
||||
# get_children() to retrieve each block's children.
|
||||
with check_mongo_calls(num_mongo_calls):
|
||||
start_block = source_store.get_course(source_course_key, depth=depth)
|
||||
stack = [start_block]
|
||||
while stack:
|
||||
curr_block = stack.pop()
|
||||
if curr_block.has_children:
|
||||
for block in reversed(curr_block.get_children()):
|
||||
stack.append(block)
|
||||
with source_store.bulk_operations(source_course_key):
|
||||
start_block = source_store.get_course(source_course_key, depth=depth, lazy=lazy)
|
||||
all_blocks = []
|
||||
stack = [start_block]
|
||||
while stack:
|
||||
curr_block = stack.pop()
|
||||
all_blocks.append(curr_block)
|
||||
if curr_block.has_children:
|
||||
for block in reversed(curr_block.get_children()):
|
||||
stack.append(block)
|
||||
|
||||
if access_all_block_fields:
|
||||
# Read the fields on each block in order to ensure each block and its definition is loaded.
|
||||
for xblock in all_blocks:
|
||||
for __, field in xblock.fields.iteritems():
|
||||
if field.is_set_on(xblock):
|
||||
__ = field.read_from(xblock)
|
||||
|
||||
@@ -405,7 +405,12 @@ class TestBulkWriteMixinFindMethods(TestBulkWriteMixin):
|
||||
|
||||
self.conn.get_definitions.return_value = db_definitions
|
||||
results = self.bulk.get_definitions(self.course_key, search_ids)
|
||||
self.conn.get_definitions.assert_called_once_with(list(set(search_ids) - set(active_ids)))
|
||||
definitions_gotten = list(set(search_ids) - set(active_ids))
|
||||
if len(definitions_gotten) > 0:
|
||||
self.conn.get_definitions.assert_called_once_with(definitions_gotten)
|
||||
else:
|
||||
# If no definitions to get, then get_definitions() should *not* have been called.
|
||||
self.assertEquals(self.conn.get_definitions.call_count, 0)
|
||||
for _id in active_ids:
|
||||
if _id in search_ids:
|
||||
self.assertIn(active_definition(_id), results)
|
||||
|
||||
@@ -41,7 +41,12 @@ def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir):
|
||||
|
||||
with modulestore.bulk_operations(course_key):
|
||||
|
||||
course = modulestore.get_course(course_key, depth=None) # None means infinite
|
||||
# depth = None: Traverses down the entire course structure.
|
||||
# lazy = False: Loads and caches all block definitions during traversal for fast access later
|
||||
# -and- to eliminate many round-trips to read individual definitions.
|
||||
# Why these parameters? Because a course export needs to access all the course block information
|
||||
# eventually. Accessing it all now at the beginning increases performance of the export.
|
||||
course = modulestore.get_course(course_key, depth=None, lazy=False)
|
||||
fsm = OSFS(root_dir)
|
||||
export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir)
|
||||
root_course_dir = root_dir + '/' + course_dir
|
||||
|
||||
Reference in New Issue
Block a user