From 981f5cee45507bff72f85215696de74db5937359 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Thu, 8 Nov 2012 16:07:17 -0500 Subject: [PATCH 1/4] initial buildout of a 'xlint' test to verify legacy coursewar --- .../contentstore/management/commands/xlint.py | 26 +++++++++++ .../xmodule/modulestore/xml_importer.py | 43 +++++++++++++++++-- rakefile | 12 ++++++ 3 files changed, 77 insertions(+), 4 deletions(-) create mode 100644 cms/djangoapps/contentstore/management/commands/xlint.py diff --git a/cms/djangoapps/contentstore/management/commands/xlint.py b/cms/djangoapps/contentstore/management/commands/xlint.py new file mode 100644 index 0000000000..355b639f2d --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/xlint.py @@ -0,0 +1,26 @@ +from django.core.management.base import BaseCommand, CommandError +from xmodule.modulestore.xml_importer import import_from_xml +from xmodule.modulestore.django import modulestore +from xmodule.contentstore.django import contentstore + + +unnamed_modules = 0 + + +class Command(BaseCommand): + help = \ +'''Verify the structure of courseware as to it's suitability for import''' + + def handle(self, *args, **options): + if len(args) == 0: + raise CommandError("import requires at least one argument: [...]") + + data_dir = args[0] + if len(args) > 1: + course_dirs = args[1:] + else: + course_dirs = None + print "Importing. Data_dir={data}, course_dirs={courses}".format( + data=data_dir, + courses=course_dirs) + import_from_xml(None, data_dir, course_dirs, load_error_modules=False, validate_only=True) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 00ddb6a948..4a3526b53e 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -88,7 +88,7 @@ def verify_content_links(module, base_dir, static_content_store, link, remap_dic def import_from_xml(store, data_dir, course_dirs=None, default_class='xmodule.raw_module.RawDescriptor', - load_error_modules=True, static_content_store=None, target_location_namespace = None): + load_error_modules=True, static_content_store=None, target_location_namespace=None, validate_only=False): """ Import the specified xml data_dir into the "store" modulestore, using org and course as the location org and course. @@ -110,6 +110,10 @@ def import_from_xml(store, data_dir, course_dirs=None, load_error_modules=load_error_modules ) + if validate_only: + validate_module_structure(module_store) + return module_store, [] + # NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means # to enumerate the entire collection of course modules. It will be left as a TBD to implement that # method on XmlModuleStore. @@ -192,7 +196,6 @@ def import_from_xml(store, data_dir, course_dirs=None, store.update_item(module.location, module_data) - if 'children' in module.definition: store.update_children(module.location, module.definition['children']) @@ -200,6 +203,38 @@ def import_from_xml(store, data_dir, course_dirs=None, # inherited metadata everywhere. store.update_metadata(module.location, dict(module.own_metadata)) - - return module_store, course_items + + +def validate_category_hierarcy(module_store, course_id, parent_category, expected_child_category): + err_cnt = 0 + + parents = [] + # get all modules of parent_category + for module in module_store.modules[course_id].itervalues(): + if module.location.category == parent_category: + parents.append(module) + + for parent in parents: + for child_loc in [Location(child) for child in parent.definition.get('children', [])]: + if child_loc.category != expected_child_category: + err_cnt += 1 + print 'ERROR: child {0} of parent {1} was expected to be category of {2} but was {3}'.format( + child_loc, parent.location, expected_child_category, child_loc.category) + + return err_cnt + +def validate_module_structure(module_store): + err_cnt = 0 + warn_cnt = 0 + for course_id in module_store.modules.keys(): + # constrain that courses only have 'chapter' children + err_cnt += validate_category_hierarcy(module_store, course_id, "course", "chapter") + # constrain that chapters only have 'sequentials' + err_cnt += validate_category_hierarcy(module_store, course_id, "chapter", "sequential") + # constrain that sequentials only have 'verticals' + err_cnt += validate_category_hierarcy(module_store, course_id, "sequential", "vertical") + + print "SUMMARY: {0} Errors {1} Warnings".format(err_cnt, warn_cnt) + + diff --git a/rakefile b/rakefile index 4f1c15321f..beb787c8c3 100644 --- a/rakefile +++ b/rakefile @@ -364,6 +364,18 @@ namespace :cms do end end +namespace :cms do + desc "Import course data within the given DATA_DIR variable" + task :xlint do + if ENV['DATA_DIR'] + sh(django_admin(:cms, :dev, :xlint, ENV['DATA_DIR'])) + else + raise "Please specify a DATA_DIR variable that point to your data directory.\n" + + "Example: \`rake cms:import DATA_DIR=../data\`" + end + end +end + desc "Build a properties file used to trigger autodeploy builds" task :autodeploy_properties do File.open("autodeploy.properties", "w") do |file| From da3c3e5f20589ea8f8d16471f2cc358c5d6c3d78 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Fri, 9 Nov 2012 13:54:12 -0500 Subject: [PATCH 2/4] initial xlint implementation. Accumulate all import errors during XmlModuleStore importing. Also do checks post XmlModuleStore import and assert that the structure (course->chapter->sequential->vertical) is present in the courses. --- .../contentstore/management/commands/xlint.py | 6 ++-- common/lib/xmodule/xmodule/modulestore/xml.py | 7 ++-- .../xmodule/modulestore/xml_importer.py | 32 ++++++++++++++++++- common/lib/xmodule/xmodule/seq_module.py | 4 ++- rakefile | 4 ++- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/cms/djangoapps/contentstore/management/commands/xlint.py b/cms/djangoapps/contentstore/management/commands/xlint.py index 355b639f2d..e8f7b248e4 100644 --- a/cms/djangoapps/contentstore/management/commands/xlint.py +++ b/cms/djangoapps/contentstore/management/commands/xlint.py @@ -9,8 +9,10 @@ unnamed_modules = 0 class Command(BaseCommand): help = \ -'''Verify the structure of courseware as to it's suitability for import''' - + ''' + Verify the structure of courseware as to it's suitability for import + To run test: rake cms:xlint DATA_DIR=../data [COURSE_DIR=content-edx-101 (optional parameter)] + ''' def handle(self, *args, **options): if len(args) == 0: raise CommandError("import requires at least one argument: [...]") diff --git a/common/lib/xmodule/xmodule/modulestore/xml.py b/common/lib/xmodule/xmodule/modulestore/xml.py index 6794703998..64ccf73d5e 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml.py +++ b/common/lib/xmodule/xmodule/modulestore/xml.py @@ -303,7 +303,7 @@ class XMLModuleStore(ModuleStoreBase): try: course_descriptor = self.load_course(course_dir, errorlog.tracker) except Exception as e: - msg = "Failed to load course '{0}': {1}".format(course_dir, str(e)) + msg = "ERROR: Failed to load course '{0}': {1}".format(course_dir, str(e)) log.exception(msg) errorlog.tracker(msg) @@ -337,7 +337,7 @@ class XMLModuleStore(ModuleStoreBase): with open(policy_path) as f: return json.load(f) except (IOError, ValueError) as err: - msg = "Error loading course policy from {0}".format(policy_path) + msg = "ERROR: loading course policy from {0}".format(policy_path) tracker(msg) log.warning(msg + " " + str(err)) return {} @@ -458,7 +458,8 @@ class XMLModuleStore(ModuleStoreBase): module.metadata['data_dir'] = course_dir self.modules[course_descriptor.id][module.location] = module except Exception, e: - logging.exception("Failed to load {0}. Skipping... Exception: {1}".format(filepath, str(e))) + logging.exception("Failed to load {0}. Skipping... Exception: {1}".format(filepath, str(e))) + system.error_tracker("ERROR: " + str(e)) def get_instance(self, course_id, location, depth=0): """ diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index a1739851ac..23eea58a97 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -227,6 +227,28 @@ def validate_category_hierarcy(module_store, course_id, parent_category, expecte def validate_module_structure(module_store): err_cnt = 0 warn_cnt = 0 + + print module_store.errored_courses + + # first count all errors and warnings as part of the XMLModuleStore import + for err_log in module_store._location_errors.itervalues(): + for err_log_entry in err_log.errors: + msg = err_log_entry[0] + if msg.startswith('ERROR:'): + err_cnt+=1 + else: + warn_cnt+=1 + + # then count outright all courses that failed to load at all + for err_log in module_store.errored_courses.itervalues(): + for err_log_entry in err_log.errors: + msg = err_log_entry[0] + print msg + if msg.startswith('ERROR:'): + err_cnt+=1 + else: + warn_cnt+=1 + for course_id in module_store.modules.keys(): # constrain that courses only have 'chapter' children err_cnt += validate_category_hierarcy(module_store, course_id, "course", "chapter") @@ -235,6 +257,14 @@ def validate_module_structure(module_store): # constrain that sequentials only have 'verticals' err_cnt += validate_category_hierarcy(module_store, course_id, "sequential", "vertical") - print "SUMMARY: {0} Errors {1} Warnings".format(err_cnt, warn_cnt) + print "\n\n------------------------------------------\nVALIDATION SUMMARY: {0} Errors {1} Warnings\n".format(err_cnt, warn_cnt) + + if err_cnt > 0: + print "This course is not suitable for importing. Please fix courseware according to specifications before importing." + elif warn_cnt > 0: + print "This course can be imported, but some errors may occur during the run of the course. It is recommend that you fix your courseware before importing" + else: + print "This course can be imported successfully." + diff --git a/common/lib/xmodule/xmodule/seq_module.py b/common/lib/xmodule/xmodule/seq_module.py index 0ade3e0e7d..155ad99480 100644 --- a/common/lib/xmodule/xmodule/seq_module.py +++ b/common/lib/xmodule/xmodule/seq_module.py @@ -127,8 +127,10 @@ class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor): for child in xml_object: try: children.append(system.process_xml(etree.tostring(child)).location.url()) - except: + except Exception, e: log.exception("Unable to load child when parsing Sequence. Continuing...") + if system.error_tracker is not None: + system.error_tracker("ERROR: " + str(e)) continue return {'children': children} diff --git a/rakefile b/rakefile index beb787c8c3..d8386fbda2 100644 --- a/rakefile +++ b/rakefile @@ -367,7 +367,9 @@ end namespace :cms do desc "Import course data within the given DATA_DIR variable" task :xlint do - if ENV['DATA_DIR'] + if ENV['DATA_DIR'] and ENV['COURSE_DIR'] + sh(django_admin(:cms, :dev, :xlint, ENV['DATA_DIR'], ENV['COURSE_DIR'])) + elsif ENV['DATA_DIR'] sh(django_admin(:cms, :dev, :xlint, ENV['DATA_DIR'])) else raise "Please specify a DATA_DIR variable that point to your data directory.\n" + From b779a421d719f20e49e3c10cdca4483174b9a412 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Wed, 14 Nov 2012 12:37:17 -0500 Subject: [PATCH 3/4] check for the existence of static and static/subs directories --- .../xmodule/modulestore/xml_importer.py | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index 23eea58a97..c6764b491c 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -111,7 +111,7 @@ def import_from_xml(store, data_dir, course_dirs=None, ) if validate_only: - validate_module_structure(module_store) + perform_xlint(data_dir, course_dirs, module_store) return module_store, [] # NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means @@ -224,11 +224,34 @@ def validate_category_hierarcy(module_store, course_id, parent_category, expecte return err_cnt -def validate_module_structure(module_store): +def validate_data_source_path_existence(path, is_err = True, extra_msg = None): + _cnt = 0 + if not os.path.exists(path): + print ("{0}: Expected folder at {1}. {2}".format('ERROR' if is_err == True else 'WARNING', path, extra_msg if + extra_msg is not None else '')) + _cnt = 1 + return _cnt + +def validate_data_source_paths(data_dir, course_dir): + # check that there is a '/static/' directory + course_path = data_dir / course_dir + err_cnt = 0 + warn_cnt = 0 + err_cnt += validate_data_source_path_existence(course_path / 'static') + warn_cnt += validate_data_source_path_existence(course_path / 'static/subs', is_err = False, + extra_msg = 'Video captions (if they are used) will not work unless they are static/subs.') + return err_cnt, warn_cnt + + +def perform_xlint(data_dir, course_dirs,module_store): err_cnt = 0 warn_cnt = 0 - print module_store.errored_courses + # check all data source path information + for course_dir in course_dirs: + _err_cnt, _warn_cnt = validate_data_source_paths(path(data_dir), course_dir) + err_cnt += _err_cnt + warn_cnt += _warn_cnt # first count all errors and warnings as part of the XMLModuleStore import for err_log in module_store._location_errors.itervalues(): From fff378288489ccfee0d7f758661dfb9fb7a3f0e9 Mon Sep 17 00:00:00 2001 From: Chris Dodge Date: Mon, 26 Nov 2012 11:05:57 -0500 Subject: [PATCH 4/4] cleave xlint entry point in xml_importer.py to be separate from the import method --- .../contentstore/management/commands/xlint.py | 4 ++-- .../xmodule/xmodule/modulestore/xml_importer.py | 17 +++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/cms/djangoapps/contentstore/management/commands/xlint.py b/cms/djangoapps/contentstore/management/commands/xlint.py index e8f7b248e4..6bc254a1ff 100644 --- a/cms/djangoapps/contentstore/management/commands/xlint.py +++ b/cms/djangoapps/contentstore/management/commands/xlint.py @@ -1,5 +1,5 @@ from django.core.management.base import BaseCommand, CommandError -from xmodule.modulestore.xml_importer import import_from_xml +from xmodule.modulestore.xml_importer import perform_xlint from xmodule.modulestore.django import modulestore from xmodule.contentstore.django import contentstore @@ -25,4 +25,4 @@ class Command(BaseCommand): print "Importing. Data_dir={data}, course_dirs={courses}".format( data=data_dir, courses=course_dirs) - import_from_xml(None, data_dir, course_dirs, load_error_modules=False, validate_only=True) + perform_xlint(data_dir, course_dirs, load_error_modules=False) diff --git a/common/lib/xmodule/xmodule/modulestore/xml_importer.py b/common/lib/xmodule/xmodule/modulestore/xml_importer.py index c6764b491c..3f44b5d2c6 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml_importer.py +++ b/common/lib/xmodule/xmodule/modulestore/xml_importer.py @@ -88,7 +88,7 @@ def verify_content_links(module, base_dir, static_content_store, link, remap_dic def import_from_xml(store, data_dir, course_dirs=None, default_class='xmodule.raw_module.RawDescriptor', - load_error_modules=True, static_content_store=None, target_location_namespace=None, validate_only=False): + load_error_modules=True, static_content_store=None, target_location_namespace=None): """ Import the specified xml data_dir into the "store" modulestore, using org and course as the location org and course. @@ -110,10 +110,6 @@ def import_from_xml(store, data_dir, course_dirs=None, load_error_modules=load_error_modules ) - if validate_only: - perform_xlint(data_dir, course_dirs, module_store) - return module_store, [] - # NOTE: the XmlModuleStore does not implement get_items() which would be a preferable means # to enumerate the entire collection of course modules. It will be left as a TBD to implement that # method on XmlModuleStore. @@ -243,10 +239,19 @@ def validate_data_source_paths(data_dir, course_dir): return err_cnt, warn_cnt -def perform_xlint(data_dir, course_dirs,module_store): +def perform_xlint(data_dir, course_dirs, + default_class='xmodule.raw_module.RawDescriptor', + load_error_modules=True): err_cnt = 0 warn_cnt = 0 + module_store = XMLModuleStore( + data_dir, + default_class=default_class, + course_dirs=course_dirs, + load_error_modules=load_error_modules + ) + # check all data source path information for course_dir in course_dirs: _err_cnt, _warn_cnt = validate_data_source_paths(path(data_dir), course_dir)