From 1789814ceb59143c85eb1e4c62e8bba78d656cd4 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Tue, 2 Apr 2013 14:19:54 -0400 Subject: [PATCH] wip --- cms/djangoapps/contentstore/views.py | 3 +- common/lib/xmodule/xmodule/html_module.py | 7 +- .../lib/xmodule/xmodule/modulestore/draft.py | 12 +- .../xmodule/modulestore/xml_exporter.py | 68 +++---- .../xmodule/modulestore/xml_importer.py | 171 ++++++++++++------ 5 files changed, 163 insertions(+), 98 deletions(-) diff --git a/cms/djangoapps/contentstore/views.py b/cms/djangoapps/contentstore/views.py index f8960dd65d..4bc9bc512c 100644 --- a/cms/djangoapps/contentstore/views.py +++ b/cms/djangoapps/contentstore/views.py @@ -1580,7 +1580,8 @@ def import_course(request, org, course, name): shutil.move(r / fname, course_dir) module_store, course_items = import_from_xml(modulestore('direct'), settings.GITHUB_REPO_ROOT, - [course_subdir], load_error_modules=False, static_content_store=contentstore(), target_location_namespace=Location(location)) + [course_subdir], load_error_modules=False, static_content_store=contentstore(), + target_location_namespace=Location(location), draft_store=modulestore()) # we can blow this away when we're done importing. shutil.rmtree(course_dir) diff --git a/common/lib/xmodule/xmodule/html_module.py b/common/lib/xmodule/xmodule/html_module.py index e9cec32e3e..d901fc5fbe 100644 --- a/common/lib/xmodule/xmodule/html_module.py +++ b/common/lib/xmodule/xmodule/html_module.py @@ -118,8 +118,8 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor): with system.resources_fs.open(filepath) as file: html = file.read().decode('utf-8') # Log a warning if we can't parse the file, but don't error - if not check_html(html): - msg = "Couldn't parse html in {0}.".format(filepath) + if not check_html(html) and len(html) > 0: + msg = "Couldn't parse html in {0}, content = {1}".format(filepath, html) log.warning(msg) system.error_tracker("Warning: " + msg) @@ -156,7 +156,8 @@ class HtmlDescriptor(HtmlFields, XmlDescriptor, EditingDescriptor): resource_fs.makedir(os.path.dirname(filepath), recursive=True, allow_recreate=True) with resource_fs.open(filepath, 'w') as file: - file.write(self.data.encode('utf-8')) + html_data = self.data.encode('utf-8') + file.write(html_data) # write out the relative name relname = path(pathname).basename() diff --git a/common/lib/xmodule/xmodule/modulestore/draft.py b/common/lib/xmodule/xmodule/modulestore/draft.py index ced8e7d42e..9b85fc18aa 100644 --- a/common/lib/xmodule/xmodule/modulestore/draft.py +++ b/common/lib/xmodule/xmodule/modulestore/draft.py @@ -106,7 +106,7 @@ class DraftModuleStore(ModuleStoreBase): """ return wrap_draft(super(DraftModuleStore, self).clone_item(source, as_draft(location))) - def update_item(self, location, data): + def update_item(self, location, data, allow_not_found=False): """ Set the data in the item specified by the location to data @@ -115,9 +115,13 @@ class DraftModuleStore(ModuleStoreBase): data: A nested dictionary of problem data """ draft_loc = as_draft(location) - draft_item = self.get_item(location) - if not getattr(draft_item, 'is_draft', False): - self.clone_item(location, draft_loc) + try: + draft_item = self.get_item(location) + if not getattr(draft_item, 'is_draft', False): + self.clone_item(location, draft_loc) + except ItemNotFoundError, e: + if not allow_not_found: + raise e return super(DraftModuleStore, self).update_item(draft_loc, data) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py index a5a8ee3855..7927b2c68c 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_exporter.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_exporter.py @@ -8,55 +8,59 @@ from json import dumps def export_to_xml(modulestore, contentstore, course_location, root_dir, course_dir, draft_modulestore = None): - course = modulestore.get_item(course_location) + course = modulestore.get_item(course_location) - fs = OSFS(root_dir) - export_fs = fs.makeopendir(course_dir) + fs = OSFS(root_dir) + export_fs = fs.makeopendir(course_dir) - xml = course.export_to_xml(export_fs) - with export_fs.open('course.xml', 'w') as course_xml: - course_xml.write(xml) + xml = course.export_to_xml(export_fs) + with export_fs.open('course.xml', 'w') as course_xml: + course_xml.write(xml) - # export the static assets - contentstore.export_all_for_course(course_location, root_dir + '/' + course_dir + '/static/') + # export the static assets + contentstore.export_all_for_course(course_location, root_dir + '/' + course_dir + '/static/') - # export the static tabs - export_extra_content(export_fs, modulestore, course_location, 'static_tab', 'tabs', '.html') + # export the static tabs + export_extra_content(export_fs, modulestore, course_location, 'static_tab', 'tabs', '.html') - # export the custom tags - export_extra_content(export_fs, modulestore, course_location, 'custom_tag_template', 'custom_tags') + # export the custom tags + export_extra_content(export_fs, modulestore, course_location, 'custom_tag_template', 'custom_tags') - # export the course updates - export_extra_content(export_fs, modulestore, course_location, 'course_info', 'info', '.html') + # export the course updates + export_extra_content(export_fs, modulestore, course_location, 'course_info', 'info', '.html') - # export the grading policy - policies_dir = export_fs.makeopendir('policies') - course_run_policy_dir = policies_dir.makeopendir(course.location.name) - with course_run_policy_dir.open('grading_policy.json', 'w') as grading_policy: - grading_policy.write(dumps(course.grading_policy)) + # export the grading policy + policies_dir = export_fs.makeopendir('policies') + course_run_policy_dir = policies_dir.makeopendir(course.location.name) + with course_run_policy_dir.open('grading_policy.json', 'w') as grading_policy: + grading_policy.write(dumps(course.grading_policy)) - # export all of the course metadata in policy.json - with course_run_policy_dir.open('policy.json', 'w') as course_policy: - policy = {'course/' + course.location.name: own_metadata(course)} - course_policy.write(dumps(policy)) + # export all of the course metadata in policy.json + with course_run_policy_dir.open('policy.json', 'w') as course_policy: + policy = {'course/' + course.location.name: own_metadata(course)} + course_policy.write(dumps(policy)) # export everything from the draft store, unfortunately this will create lots of duplicates + ''' if draft_modulestore is not None: draft_course = draft_modulestore.get_item(course_location) - draft_course_dir = export_fs.makeopendir('drafts') + xml = draft_course.export_to_xml(draft_course_dir) with draft_course_dir.open('course.xml', 'w') as course_xml: course_xml.write(xml) - ''' - draft_items = modulestore.get_items([None, None, None, 'vertical', None, 'draft']) - logging.debug('draft_items = {0}'.format(draft_items)) - if len(draft_items) > 0: - + + # export draft content + # NOTE: this code assumes that verticals are the top most draftable container + # should we change the application, then this assumption will no longer + # be valid + draft_items = draft_modulestore.get_items([None, course_location.org, course_location.course, + 'vertical', None, 'draft']) + + if len(draft_items)>0: + draft_course_dir = export_fs.makeopendir('drafts') for draft_item in draft_items: - draft_item.export_to_xml(draft_items_dir) - #with draft_items_dir.open(draft_item.location.name + '.xml', 'w'): - ''' + draft_item.export_to_xml(draft_course_dir) def export_extra_content(export_fs, modulestore, course_location, category_type, dirname, file_suffix=''): diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 1d3de93b38..113389d77a 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -6,11 +6,13 @@ from path import path from xblock.core import Scope -from .xml import XMLModuleStore +from .xml import XMLModuleStore, ImportSystem, ParentTracker from .exceptions import DuplicateItemError from xmodule.modulestore import Location from xmodule.contentstore.content import StaticContent, XASSET_SRCREF_PREFIX from .inheritance import own_metadata +from xmodule.errortracker import make_error_tracker +from collections import defaultdict log = logging.getLogger(__name__) @@ -175,7 +177,8 @@ def import_course_from_xml(modulestore, static_content_store, course_data_path, def import_from_xml(store, data_dir, course_dirs=None, default_class='xmodule.raw_module.RawDescriptor', - load_error_modules=True, static_content_store=None, target_location_namespace=None, verbose=False): + load_error_modules=True, static_content_store=None, target_location_namespace=None, + verbose=False, draft_store=None): """ Import the specified xml data_dir into the "store" modulestore, using org and course as the location org and course. @@ -190,7 +193,7 @@ def import_from_xml(store, data_dir, course_dirs=None, """ - module_store = XMLModuleStore( + xml_module_store = XMLModuleStore( data_dir, default_class=default_class, course_dirs=course_dirs, @@ -201,7 +204,7 @@ def import_from_xml(store, data_dir, course_dirs=None, # to enumerate the entire collection of course modules. It will be left as a TBD to implement that # method on XmlModuleStore. course_items = [] - for course_id in module_store.modules.keys(): + for course_id in xml_module_store.modules.keys(): if target_location_namespace is not None: pseudo_course_id = '/'.join([target_location_namespace.org, target_location_namespace.course]) @@ -222,7 +225,7 @@ def import_from_xml(store, data_dir, course_dirs=None, # Quick scan to get course module as we need some info from there. Also we need to make sure that the # course module is committed first into the store - for module in module_store.modules[course_id].itervalues(): + for module in xml_module_store.modules[course_id].itervalues(): if module.category == 'course': course_data_path = path(data_dir) / module.data_dir course_location = module.location @@ -239,11 +242,7 @@ def import_from_xml(store, data_dir, course_dirs=None, {"type": "discussion", "name": "Discussion"}, {"type": "wiki", "name": "Wiki"}] # note, add 'progress' when we can support it on Edge - - if hasattr(module, 'data'): - store.update_item(module.location, module.data) - store.update_children(module.location, module.children) - store.update_metadata(module.location, dict(own_metadata(module))) + import_module(module, store, course_data_path, static_content_store) # a bit of a hack, but typically the "course image" which is shown on marketing pages is hard coded to /images/course_image.jpg # so let's make sure we import in case there are no other references to it in the modules @@ -257,11 +256,11 @@ def import_from_xml(store, data_dir, course_dirs=None, _namespace_rename = target_location_namespace if target_location_namespace is not None else course_location # first pass to find everything in /static/ - import_static_content(module_store.modules[course_id], course_location, course_data_path, static_content_store, + import_static_content(xml_module_store.modules[course_id], course_location, course_data_path, static_content_store, _namespace_rename, subpath='static', verbose=verbose) # finally loop through all the modules - for module in module_store.modules[course_id].itervalues(): + for module in xml_module_store.modules[course_id].itervalues(): if module.category == 'course': # we've already saved the course module up at the top of the loop @@ -275,49 +274,8 @@ def import_from_xml(store, data_dir, course_dirs=None, if verbose: log.debug('importing module location {0}'.format(module.location)) - content = {} - for field in module.fields: - if field.scope != Scope.content: - continue - try: - content[field.name] = module._model_data[field.name] - except KeyError: - # Ignore any missing keys in _model_data - pass - - if 'data' in content: - module_data = content['data'] + import_module(module, store, course_data_path, static_content_store) - # cdodge: now go through any link references to '/static/' and make sure we've imported - # it as a StaticContent asset - try: - remap_dict = {} - - # use the rewrite_links as a utility means to enumerate through all links - # in the module data. We use that to load that reference into our asset store - # IMPORTANT: There appears to be a bug in lxml.rewrite_link which makes us not be able to - # do the rewrites natively in that code. - # For example, what I'm seeing is -> - # Note the dropped element closing tag. This causes the LMS to fail when rendering modules - that's - # no good, so we have to do this kludge - if isinstance(module_data, str) or isinstance(module_data, unicode): # some module 'data' fields are non strings which blows up the link traversal code - lxml_rewrite_links(module_data, lambda link: verify_content_links(module, course_data_path, - static_content_store, link, remap_dict)) - - for key in remap_dict.keys(): - module_data = module_data.replace(key, remap_dict[key]) - - except Exception, e: - logging.exception("failed to rewrite links on {0}. Continuing...".format(module.location)) - - store.update_item(module.location, content) - - if hasattr(module, 'children') and module.children != []: - store.update_children(module.location, module.children) - - # NOTE: It's important to use own_metadata here to avoid writing - # inherited metadata everywhere. - store.update_metadata(module.location, dict(own_metadata(module))) finally: # turn back on all write signalling if pseudo_course_id in store.ignore_write_events_on_courses: @@ -326,13 +284,110 @@ def import_from_xml(store, data_dir, course_dirs=None, target_location_namespace is not None else course_location) # now import any 'draft' items - import_course_draft(store, course_data_path, target_location_namespace) + if draft_store is not None: + import_course_draft(xml_module_store, draft_store, course_data_path, static_content_store, target_location_namespace) - return module_store, course_items + return xml_module_store, course_items + +def import_module(module, store, course_data_path, static_content_store, allow_not_found=False): + content = {} + for field in module.fields: + if field.scope != Scope.content: + continue + try: + content[field.name] = module._model_data[field.name] + except KeyError: + # Ignore any missing keys in _model_data + pass + + if 'data' in content: + module_data = content['data'] + + # cdodge: now go through any link references to '/static/' and make sure we've imported + # it as a StaticContent asset + try: + remap_dict = {} + + # use the rewrite_links as a utility means to enumerate through all links + # in the module data. We use that to load that reference into our asset store + # IMPORTANT: There appears to be a bug in lxml.rewrite_link which makes us not be able to + # do the rewrites natively in that code. + # For example, what I'm seeing is -> + # Note the dropped element closing tag. This causes the LMS to fail when rendering modules - that's + # no good, so we have to do this kludge + if isinstance(module_data, str) or isinstance(module_data, unicode): # some module 'data' fields are non strings which blows up the link traversal code + lxml_rewrite_links(module_data, lambda link: verify_content_links(module, course_data_path, + static_content_store, link, remap_dict)) + + for key in remap_dict.keys(): + module_data = module_data.replace(key, remap_dict[key]) + + except Exception, e: + logging.exception("failed to rewrite links on {0}. Continuing...".format(module.location)) + + if allow_not_found: + store.update_item(module.location, content, allow_not_found=allow_not_found) + else: + store.update_item(module.location, content) + + if hasattr(module, 'children') and module.children != []: + store.update_children(module.location, module.children) + + # NOTE: It's important to use own_metadata here to avoid writing + # inherited metadata everywhere. + store.update_metadata(module.location, dict(own_metadata(module))) -def import_course_draft(store, course_data_path, target_location_namespace): - pass +def import_course_draft(xml_module_store, store, course_data_path, static_content_store, target_location_namespace): + ''' + This will import all the content inside of the 'drafts' folder, if it exists + NOTE: This is not a full course import, basically in our current application only verticals (and downwards) + can be in draft. Therefore, we need to use slightly different call points into the import process_xml + as we can't simply call XMLModuleStore() constructor (like we do for importing public content) + ''' + draft_dir = course_data_path + "/drafts" + if not os.path.exists(draft_dir): + return + + # create a new 'System' object which will manage the importing + errorlog = make_error_tracker() + system = ImportSystem( + xml_module_store, + target_location_namespace.course_id, + draft_dir, + {}, + errorlog.tracker, + ParentTracker(), + None, + ) + + # now walk the /vertical directory where each file in there will be a draft copy of the Vertical + for dirname, dirnames, filenames in os.walk(draft_dir + "/vertical"): + for filename in filenames: + module_path = os.path.join(dirname, filename) + with open(module_path) as f: + try: + xml = f.read().decode('utf-8') + descriptor = system.process_xml(xml) + + def _import_module(module): + module.location = module.location._replace(revision='draft') + import_module(module, store, course_data_path, static_content_store, allow_not_found=True) + for child in module.get_children(): + _import_module(child) + + # HACK: since we are doing partial imports of drafts + # the vertical doesn't have the 'url-name' set in the attributes (they are normally in the parent + # object, aka sequential), so we have to replace the location.name with the XML filename + # that is part of the pack + fn, fileExtension = os.path.splitext(filename) + descriptor.location = descriptor.location._replace(name=fn) + + _import_module(descriptor) + + except Exception, e: + logging.exception('There was an error. {0}'.format(unicode(e))) + pass def remap_namespace(module, target_location_namespace):