From e59b650fab1b66dcff9360233eb008f6e0e108e8 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Mon, 18 Mar 2013 15:02:49 -0400 Subject: [PATCH 1/3] add an array of courses to not refresh the metadata cache on writes. This is needed for imports where we are doing bulk updates otherwise, the DB gets thrashed and the import takes too long, timing out the browser --- common/lib/xmodule/xmodule/modulestore/mongo.py | 14 ++++++++++---- .../xmodule/xmodule/modulestore/xml_importer.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo.py b/common/lib/xmodule/xmodule/modulestore/mongo.py index c5e5bbfdf8..4ec7a699e0 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo.py @@ -246,6 +246,7 @@ class MongoModuleStore(ModuleStoreBase): self.fs_root = path(fs_root) self.error_tracker = error_tracker self.render_template = render_template + self.ignore_write_events_on_courses = [] def get_metadata_inheritance_tree(self, location): ''' @@ -329,6 +330,11 @@ class MongoModuleStore(ModuleStoreBase): return tree + def refresh_cached_metadata_inheritance_tree(self, location): + pseudo_course_id = '/'.join([location.org, location.course]) + if pseudo_course_id not in self.ignore_write_events_on_courses: + self.get_cached_metadata_inheritance_tree(location, force_refresh = True) + def clear_cached_metadata_inheritance_tree(self, location): key_name = '{0}/{1}'.format(location.org, location.course) if self.metadata_inheritance_cache is not None: @@ -519,7 +525,7 @@ class MongoModuleStore(ModuleStoreBase): raise DuplicateItemError(location) # recompute (and update) the metadata inheritance tree which is cached - self.get_cached_metadata_inheritance_tree(Location(location), force_refresh = True) + self.refresh_cached_metadata_inheritance_tree(Location(location)) def get_course_for_item(self, location, depth=0): ''' @@ -586,7 +592,7 @@ class MongoModuleStore(ModuleStoreBase): self._update_single_item(location, {'definition.children': children}) # recompute (and update) the metadata inheritance tree which is cached - self.get_cached_metadata_inheritance_tree(Location(location), force_refresh = True) + self.refresh_cached_metadata_inheritance_tree(Location(location)) def update_metadata(self, location, metadata): """ @@ -612,7 +618,7 @@ class MongoModuleStore(ModuleStoreBase): self._update_single_item(location, {'metadata': metadata}) # recompute (and update) the metadata inheritance tree which is cached - self.get_cached_metadata_inheritance_tree(loc, force_refresh = True) + self.refresh_cached_metadata_inheritance_tree(loc) def delete_item(self, location): """ @@ -632,7 +638,7 @@ class MongoModuleStore(ModuleStoreBase): self.collection.remove({'_id': Location(location).dict()}) # recompute (and update) the metadata inheritance tree which is cached - self.get_cached_metadata_inheritance_tree(Location(location), force_refresh = True) + self.refresh_cached_metadata_inheritance_tree(Location(location)) def get_parent_locations(self, location, course_id): diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index fa232596f2..5955b0ebe7 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -201,6 +201,16 @@ def import_from_xml(store, data_dir, course_dirs=None, course_items = [] for course_id in module_store.modules.keys(): + if target_location_namespace is not None: + pseudo_course_id = '/'.join([target_location_namespace.org, target_location_namespace.course]) + else: + course_id_components = course_id.split('/') + pseudo_course_id = '/'.join([course_id_components[0], course_id_components[1]]) + + # turn off all write signalling while importing as this is a high volume operation + if pseudo_course_id not in store.ignore_write_events_on_courses: + store.ignore_write_events_on_courses.append(pseudo_course_id) + course_data_path = None course_location = None @@ -296,6 +306,12 @@ def import_from_xml(store, data_dir, course_dirs=None, # inherited metadata everywhere. store.update_metadata(module.location, dict(own_metadata(module))) + # turn back on all write signalling + if pseudo_course_id in store.ignore_write_events_on_courses: + store.ignore_write_events_on_courses.remove(pseudo_course_id) + store.refresh_cached_metadata_inheritance_tree(target_location_namespace if + target_location_namespace is not None else course_location) + return module_store, course_items def remap_namespace(module, target_location_namespace): From e0d1449e910e06ab59f0902be4bc10e4c7d60e92 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Tue, 19 Mar 2013 17:42:21 -0400 Subject: [PATCH 2/3] add a finally statement to be sure to turn back on event firing on writes in case an import throws an exception --- .../xmodule/modulestore/xml_importer.py | 185 +++++++++--------- 1 file changed, 93 insertions(+), 92 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 5955b0ebe7..89ec1116ae 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -207,110 +207,111 @@ def import_from_xml(store, data_dir, course_dirs=None, course_id_components = course_id.split('/') pseudo_course_id = '/'.join([course_id_components[0], course_id_components[1]]) - # turn off all write signalling while importing as this is a high volume operation - if pseudo_course_id not in store.ignore_write_events_on_courses: - store.ignore_write_events_on_courses.append(pseudo_course_id) + try: + # turn off all write signalling while importing as this is a high volume operation + if pseudo_course_id not in store.ignore_write_events_on_courses: + store.ignore_write_events_on_courses.append(pseudo_course_id) - course_data_path = None - course_location = None - - if verbose: - log.debug("Scanning {0} for course module...".format(course_id)) - - # Quick scan to get course module as we need some info from there. Also we need to make sure that the - # course module is committed first into the store - for module in module_store.modules[course_id].itervalues(): - if module.category == 'course': - course_data_path = path(data_dir) / module.data_dir - course_location = module.location - - module = remap_namespace(module, target_location_namespace) - - # cdodge: more hacks (what else). Seems like we have a problem when importing a course (like 6.002) which - # does not have any tabs defined in the policy file. The import goes fine and then displays fine in LMS, - # but if someone tries to add a new tab in the CMS, then the LMS barfs because it expects that - - # if there is *any* tabs - then there at least needs to be some predefined ones - if module.tabs is None or len(module.tabs) == 0: - module.tabs = [{"type": "courseware"}, - {"type": "course_info", "name": "Course Info"}, - {"type": "discussion", "name": "Discussion"}, - {"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge - - - if hasattr(module, 'data'): - store.update_item(module.location, module.data) - store.update_children(module.location, module.children) - store.update_metadata(module.location, dict(own_metadata(module))) - - # a bit of a hack, but typically the "course image" which is shown on marketing pages is hard coded to /images/course_image.jpg - # so let's make sure we import in case there are no other references to it in the modules - verify_content_links(module, course_data_path, static_content_store, '/static/images/course_image.jpg') - - course_items.append(module) - - - # then import all the static content - if static_content_store is not None: - _namespace_rename = target_location_namespace if target_location_namespace is not None else course_location - - # first pass to find everything in /static/ - import_static_content(module_store.modules[course_id], course_location, course_data_path, static_content_store, - _namespace_rename, subpath='static', verbose=verbose) - - # finally loop through all the modules - for module in module_store.modules[course_id].itervalues(): - - if module.category == 'course': - # we've already saved the course module up at the top of the loop - # so just skip over it in the inner loop - continue - - # remap module to the new namespace - if target_location_namespace is not None: - module = remap_namespace(module, target_location_namespace) + course_data_path = None + course_location = None if verbose: - log.debug('importing module location {0}'.format(module.location)) + log.debug("Scanning {0} for course module...".format(course_id)) - if hasattr(module, 'data'): - module_data = module.data + # Quick scan to get course module as we need some info from there. Also we need to make sure that the + # course module is committed first into the store + for module in module_store.modules[course_id].itervalues(): + if module.category == 'course': + course_data_path = path(data_dir) / module.data_dir + course_location = module.location - # cdodge: now go through any link references to '/static/' and make sure we've imported - # it as a StaticContent asset - try: - remap_dict = {} + module = remap_namespace(module, target_location_namespace) - # use the rewrite_links as a utility means to enumerate through all links - # in the module data. We use that to load that reference into our asset store - # IMPORTANT: There appears to be a bug in lxml.rewrite_link which makes us not be able to - # do the rewrites natively in that code. - # For example, what I'm seeing is -> - # Note the dropped element closing tag. This causes the LMS to fail when rendering modules - that's - # no good, so we have to do this kludge - if isinstance(module_data, str) or isinstance(module_data, unicode): # some module 'data' fields are non strings which blows up the link traversal code - lxml_rewrite_links(module_data, lambda link: verify_content_links(module, course_data_path, - static_content_store, link, remap_dict)) + # cdodge: more hacks (what else). Seems like we have a problem when importing a course (like 6.002) which + # does not have any tabs defined in the policy file. The import goes fine and then displays fine in LMS, + # but if someone tries to add a new tab in the CMS, then the LMS barfs because it expects that - + # if there is *any* tabs - then there at least needs to be some predefined ones + if module.tabs is None or len(module.tabs) == 0: + module.tabs = [{"type": "courseware"}, + {"type": "course_info", "name": "Course Info"}, + {"type": "discussion", "name": "Discussion"}, + {"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge - for key in remap_dict.keys(): - module_data = module_data.replace(key, remap_dict[key]) - except Exception, e: - logging.exception("failed to rewrite links on {0}. Continuing...".format(module.location)) + if hasattr(module, 'data'): + store.update_item(module.location, module.data) + store.update_children(module.location, module.children) + store.update_metadata(module.location, dict(own_metadata(module))) - store.update_item(module.location, module_data) + # a bit of a hack, but typically the "course image" which is shown on marketing pages is hard coded to /images/course_image.jpg + # so let's make sure we import in case there are no other references to it in the modules + verify_content_links(module, course_data_path, static_content_store, '/static/images/course_image.jpg') - if hasattr(module, 'children') and module.children != []: - store.update_children(module.location, module.children) + course_items.append(module) - # NOTE: It's important to use own_metadata here to avoid writing - # inherited metadata everywhere. - store.update_metadata(module.location, dict(own_metadata(module))) - # turn back on all write signalling - if pseudo_course_id in store.ignore_write_events_on_courses: - store.ignore_write_events_on_courses.remove(pseudo_course_id) - store.refresh_cached_metadata_inheritance_tree(target_location_namespace if - target_location_namespace is not None else course_location) + # then import all the static content + if static_content_store is not None: + _namespace_rename = target_location_namespace if target_location_namespace is not None else course_location + + # first pass to find everything in /static/ + import_static_content(module_store.modules[course_id], course_location, course_data_path, static_content_store, + _namespace_rename, subpath='static', verbose=verbose) + + # finally loop through all the modules + for module in module_store.modules[course_id].itervalues(): + + if module.category == 'course': + # we've already saved the course module up at the top of the loop + # so just skip over it in the inner loop + continue + + # remap module to the new namespace + if target_location_namespace is not None: + module = remap_namespace(module, target_location_namespace) + + if verbose: + log.debug('importing module location {0}'.format(module.location)) + + if hasattr(module, 'data'): + module_data = module.data + + # cdodge: now go through any link references to '/static/' and make sure we've imported + # it as a StaticContent asset + try: + remap_dict = {} + + # use the rewrite_links as a utility means to enumerate through all links + # in the module data. We use that to load that reference into our asset store + # IMPORTANT: There appears to be a bug in lxml.rewrite_link which makes us not be able to + # do the rewrites natively in that code. + # For example, what I'm seeing is -> + # Note the dropped element closing tag. This causes the LMS to fail when rendering modules - that's + # no good, so we have to do this kludge + if isinstance(module_data, str) or isinstance(module_data, unicode): # some module 'data' fields are non strings which blows up the link traversal code + lxml_rewrite_links(module_data, lambda link: verify_content_links(module, course_data_path, + static_content_store, link, remap_dict)) + + for key in remap_dict.keys(): + module_data = module_data.replace(key, remap_dict[key]) + + except Exception, e: + logging.exception("failed to rewrite links on {0}. Continuing...".format(module.location)) + + store.update_item(module.location, module_data) + + if hasattr(module, 'children') and module.children != []: + store.update_children(module.location, module.children) + + # NOTE: It's important to use own_metadata here to avoid writing + # inherited metadata everywhere. + store.update_metadata(module.location, dict(own_metadata(module))) + finally: + # turn back on all write signalling + if pseudo_course_id in store.ignore_write_events_on_courses: + store.ignore_write_events_on_courses.remove(pseudo_course_id) + store.refresh_cached_metadata_inheritance_tree(target_location_namespace if + target_location_namespace is not None else course_location) return module_store, course_items From 5dbb153abe607df7f5d7376fe4af973c807956d3 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Wed, 20 Mar 2013 14:32:18 -0400 Subject: [PATCH 3/3] ooops. didn't clean up the merge conflicts --- common/lib/xmodule/xmodule/modulestore/mongo.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/mongo.py b/common/lib/xmodule/xmodule/modulestore/mongo.py index cc4c06d23a..d2fe524a0e 100644 --- a/common/lib/xmodule/xmodule/modulestore/mongo.py +++ b/common/lib/xmodule/xmodule/modulestore/mongo.py @@ -622,11 +622,7 @@ class MongoModuleStore(ModuleStoreBase): self._update_single_item(location, {'metadata': metadata}) # recompute (and update) the metadata inheritance tree which is cached -<<<<<<< HEAD self.refresh_cached_metadata_inheritance_tree(loc) -======= - self.get_cached_metadata_inheritance_tree(loc, force_refresh = True) ->>>>>>> fbd409c914c2dc005fd6b46af6daaee262205e0e def delete_item(self, location): """ @@ -649,12 +645,7 @@ class MongoModuleStore(ModuleStoreBase): # from overriding our default value set in the init method. safe=self.collection.safe) # recompute (and update) the metadata inheritance tree which is cached -<<<<<<< HEAD self.refresh_cached_metadata_inheritance_tree(Location(location)) -======= - self.get_cached_metadata_inheritance_tree(Location(location), force_refresh = True) ->>>>>>> fbd409c914c2dc005fd6b46af6daaee262205e0e - def get_parent_locations(self, location, course_id): '''Find all locations that are the parents of this location in this