From d09e2261f3ba92e5a248de466c7c4ef5aa230be7 Mon Sep 17 00:00:00 2001 From: Victor Shnayder Date: Fri, 3 Aug 2012 14:51:19 -0400 Subject: [PATCH 01/12] New file structure--everything in own file * needed for CMS performance (can now save just an item, not whole tree) * remove split_to_file methods * simplified AttrMap logic * write each descriptor to a separate file * detect format on import and adjust appropriately. * update tests --- common/lib/xmodule/xmodule/capa_module.py | 5 - common/lib/xmodule/xmodule/html_module.py | 8 +- common/lib/xmodule/xmodule/seq_module.py | 13 -- .../lib/xmodule/xmodule/tests/test_import.py | 9 +- common/lib/xmodule/xmodule/xml_module.py | 166 +++++++++--------- 5 files changed, 93 insertions(+), 108 deletions(-) diff --git a/common/lib/xmodule/xmodule/capa_module.py b/common/lib/xmodule/xmodule/capa_module.py index a384edf234..833713fcde 100644 --- a/common/lib/xmodule/xmodule/capa_module.py +++ b/common/lib/xmodule/xmodule/capa_module.py @@ -572,8 +572,3 @@ class CapaDescriptor(RawDescriptor): 'problems/' + path[8:], path[8:], ] - - @classmethod - def split_to_file(cls, xml_object): - '''Problems always written in their own files''' - return True diff --git a/common/lib/xmodule/xmodule/html_module.py b/common/lib/xmodule/xmodule/html_module.py index 260b84278b..de45af081a 100644 --- a/common/lib/xmodule/xmodule/html_module.py +++ b/common/lib/xmodule/xmodule/html_module.py @@ -109,17 +109,11 @@ class HtmlDescriptor(XmlDescriptor, EditingDescriptor): # add more info and re-raise raise Exception(msg), None, sys.exc_info()[2] - @classmethod - def split_to_file(cls, xml_object): - '''Never include inline html''' - return True - - # TODO (vshnayder): make export put things in the right places. def definition_to_xml(self, resource_fs): '''If the contents are valid xml, write them to filename.xml. Otherwise, - write just the tag to filename.xml, and the html + write just to filename.xml, and the html string to filename.html. ''' try: diff --git a/common/lib/xmodule/xmodule/seq_module.py b/common/lib/xmodule/xmodule/seq_module.py index 5f7f41bf8d..e742a58471 100644 --- a/common/lib/xmodule/xmodule/seq_module.py +++ b/common/lib/xmodule/xmodule/seq_module.py @@ -122,16 +122,3 @@ class SequenceDescriptor(MakoModuleDescriptor, XmlDescriptor): etree.fromstring(child.export_to_xml(resource_fs))) return xml_object - @classmethod - def split_to_file(cls, xml_object): - # Note: if we end up needing subclasses, can port this logic there. - yes = ('chapter',) - no = ('course',) - - if xml_object.tag in yes: - return True - elif xml_object.tag in no: - return False - - # otherwise maybe--delegate to superclass. - return XmlDescriptor.split_to_file(xml_object) diff --git a/common/lib/xmodule/xmodule/tests/test_import.py b/common/lib/xmodule/xmodule/tests/test_import.py index 0d3e2260fb..2599a74079 100644 --- a/common/lib/xmodule/xmodule/tests/test_import.py +++ b/common/lib/xmodule/xmodule/tests/test_import.py @@ -134,7 +134,8 @@ class ImportTestCase(unittest.TestCase): resource_fs = MemoryFS() exported_xml = descriptor.export_to_xml(resource_fs) print "Exported xml:", exported_xml - root = etree.fromstring(exported_xml) - chapter_tag = root[0] - self.assertEqual(chapter_tag.tag, 'chapter') - self.assertFalse('graceperiod' in chapter_tag.attrib) + # hardcode path to child + with resource_fs.open('chapter/ch.xml') as f: + chapter_xml = etree.fromstring(f.read()) + self.assertEqual(chapter_xml.tag, 'chapter') + self.assertFalse('graceperiod' in chapter_xml.attrib) diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index 7a12ed869d..fd48439e46 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -11,22 +11,21 @@ import sys log = logging.getLogger(__name__) -_AttrMapBase = namedtuple('_AttrMap', 'metadata_key to_metadata from_metadata') +_AttrMapBase = namedtuple('_AttrMap', 'from_xml to_xml') class AttrMap(_AttrMapBase): """ - A class that specifies a metadata_key, and two functions: + A class that specifies two functions: - to_metadata: convert value from the xml representation into + from_xml: convert value from the xml representation into an internal python representation - from_metadata: convert the internal python representation into + to_xml: convert the internal python representation into the value to store in the xml. """ - def __new__(_cls, metadata_key, - to_metadata=lambda x: x, - from_metadata=lambda x: x): - return _AttrMapBase.__new__(_cls, metadata_key, to_metadata, from_metadata) + def __new__(_cls, from_xml=lambda x: x, + to_xml=lambda x: x): + return _AttrMapBase.__new__(_cls, from_xml, to_xml) class XmlDescriptor(XModuleDescriptor): @@ -39,6 +38,9 @@ class XmlDescriptor(XModuleDescriptor): # The attributes will be removed from the definition xml passed # to definition_from_xml, and from the xml returned by definition_to_xml + + # Note -- url_name isn't in this list because it's handled specially on + # import and export. metadata_attributes = ('format', 'graceperiod', 'showanswer', 'rerandomize', 'start', 'due', 'graded', 'display_name', 'url_name', 'hide_from_toc', 'ispublic', # if True, then course is listed for all users; see @@ -50,8 +52,7 @@ class XmlDescriptor(XModuleDescriptor): # to import and export them xml_attribute_map = { # type conversion: want True/False in python, "true"/"false" in xml - 'graded': AttrMap('graded', - lambda val: val == 'true', + 'graded': AttrMap(lambda val: val == 'true', lambda val: str(val).lower()), } @@ -101,6 +102,24 @@ class XmlDescriptor(XModuleDescriptor): """ return etree.parse(file_object).getroot() + @classmethod + def load_file(cls, filepath, fs, location): + ''' + Open the specified file in fs, and call cls.file_to_xml on it, + returning the lxml object. + + Add details and reraise on error. + ''' + try: + with fs.open(filepath) as file: + return cls.file_to_xml(file) + except Exception as err: + # Add info about where we are, but keep the traceback + msg = 'Unable to load file contents at path %s for item %s: %s ' % ( + filepath, location.url(), str(err)) + raise Exception, msg, sys.exc_info()[2] + + @classmethod def load_definition(cls, xml_object, system, location): '''Load a descriptor definition from the specified xml_object. @@ -128,14 +147,7 @@ class XmlDescriptor(XModuleDescriptor): filepath = candidate break - try: - with system.resources_fs.open(filepath) as file: - definition_xml = cls.file_to_xml(file) - except Exception: - msg = 'Unable to load file contents at path %s for item %s' % ( - filepath, location.url()) - # Add info about where we are, but keep the traceback - raise Exception, msg, sys.exc_info()[2] + definition_xml = cls.load_file(filepath, system.resources_fs, location) cls.clean_metadata_from_xml(definition_xml) definition = cls.definition_from_xml(definition_xml, system) @@ -146,6 +158,24 @@ class XmlDescriptor(XModuleDescriptor): return definition + @classmethod + def load_metadata(cls, xml_object): + """ + Read the metadata attributes from this xml_object. + + Returns a dictionary {key: value}. + """ + metadata = {} + for attr in cls.metadata_attributes: + val = xml_object.get(attr) + if val is not None: + # VS[compat]. Remove after all key translations done + attr = cls._translate(attr) + + attr_map = cls.xml_attribute_map.get(attr, AttrMap()) + metadata[attr] = attr_map.from_xml(val) + return metadata + @classmethod def from_xml(cls, xml_data, system, org=None, course=None): @@ -161,25 +191,20 @@ class XmlDescriptor(XModuleDescriptor): """ xml_object = etree.fromstring(xml_data) # VS[compat] -- just have the url_name lookup once translation is done - slug = xml_object.get('url_name', xml_object.get('slug')) - location = Location('i4x', org, course, xml_object.tag, slug) + url_name = xml_object.get('url_name', xml_object.get('slug')) + location = Location('i4x', org, course, xml_object.tag, url_name) - def load_metadata(): - metadata = {} - for attr in cls.metadata_attributes: - val = xml_object.get(attr) - if val is not None: - # VS[compat]. Remove after all key translations done - attr = cls._translate(attr) + # VS[compat] -- detect new-style each-in-a-file mode + if len(xml_object.attrib.keys()) == 1 and len(xml_object) == 0: + # new style: this is just a pointer. + # read the actual defition file--named using url_name + filepath = cls._format_filepath(xml_object.tag, url_name) + definition_xml = cls.load_file(filepath, system.resources_fs, location) + else: + definition_xml = xml_object - attr_map = cls.xml_attribute_map.get(attr, AttrMap(attr)) - metadata[attr_map.metadata_key] = attr_map.to_metadata(val) - return metadata - - definition = cls.load_definition(xml_object, system, location) - metadata = load_metadata() - # VS[compat] -- just have the url_name lookup once translation is done - slug = xml_object.get('url_name', xml_object.get('slug')) + definition = cls.load_definition(definition_xml, system, location) + metadata = cls.load_metadata(definition_xml) return cls( system, definition, @@ -193,20 +218,6 @@ class XmlDescriptor(XModuleDescriptor): name=name, ext=cls.filename_extension) - @classmethod - def split_to_file(cls, xml_object): - ''' - Decide whether to write this object to a separate file or not. - - xml_object: an xml definition of an instance of cls. - - This default implementation will split if this has more than 7 - descendant tags. - - Can be overridden by subclasses. - ''' - return len(list(xml_object.iter())) > 7 - def export_to_xml(self, resource_fs): """ Returns an xml string representing this module, and all modules @@ -227,42 +238,39 @@ class XmlDescriptor(XModuleDescriptor): xml_object = self.definition_to_xml(resource_fs) self.__class__.clean_metadata_from_xml(xml_object) - # Set the tag first, so it's right if writing to a file + # Set the tag so we get the file path right xml_object.tag = self.category - # Write it to a file if necessary - if self.split_to_file(xml_object): - # Put this object in its own file - filepath = self.__class__._format_filepath(self.category, self.url_name) - resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True) - with resource_fs.open(filepath, 'w') as file: - file.write(etree.tostring(xml_object, pretty_print=True)) - # ...and remove all of its children here - for child in xml_object: - xml_object.remove(child) - # also need to remove the text of this object. - xml_object.text = '' - # and the tail for good measure... - xml_object.tail = '' + def val_for_xml(attr): + """Get the value for this attribute that we want to store. + (Possible format conversion through an AttrMap). + """ + attr_map = self.xml_attribute_map.get(attr, AttrMap()) + return attr_map.to_xml(self.own_metadata[attr]) - - xml_object.set('filename', self.url_name) - - # Add the metadata - xml_object.set('url_name', self.url_name) - for attr in self.metadata_attributes: - attr_map = self.xml_attribute_map.get(attr, AttrMap(attr)) - metadata_key = attr_map.metadata_key - - if (metadata_key not in self.metadata or - metadata_key in self._inherited_metadata): + # Add the non-inherited metadata + for attr in self.own_metadata: + if attr not in self.metadata_attributes: + log.warning("Unexpected metadata '{attr}' on element '{url}'." + " Not exporting.".format(attr=attr, + url=self.location.url())) continue - val = attr_map.from_metadata(self.metadata[metadata_key]) - xml_object.set(attr, val) + xml_object.set(attr, val_for_xml(attr)) - # Now we just have to make it beautiful - return etree.tostring(xml_object, pretty_print=True) + + # Write the actual contents to a file + filepath = self.__class__._format_filepath(self.category, self.url_name) + resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True) + + with resource_fs.open(filepath, 'w') as file: + file.write(etree.tostring(xml_object, pretty_print=True)) + + # And return just a pointer with the category and filename. + record_object = etree.Element(self.category) + record_object.set('url_name', self.url_name) + + return etree.tostring(record_object, pretty_print=True) def definition_to_xml(self, resource_fs): """ From b285f50d92f02b197f16c0ee448ae26436c73397 Mon Sep 17 00:00:00 2001 From: Victor Shnayder Date: Fri, 3 Aug 2012 15:28:48 -0400 Subject: [PATCH 02/12] Make unknown metadata persist accross import-export * +improve test. --- .../lib/xmodule/xmodule/tests/test_import.py | 40 ++++++++++++++----- common/lib/xmodule/xmodule/xml_module.py | 17 ++++---- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/common/lib/xmodule/xmodule/tests/test_import.py b/common/lib/xmodule/xmodule/tests/test_import.py index 2599a74079..45a142b8fe 100644 --- a/common/lib/xmodule/xmodule/tests/test_import.py +++ b/common/lib/xmodule/xmodule/tests/test_import.py @@ -111,30 +111,50 @@ class ImportTestCase(unittest.TestCase): xml_out = etree.fromstring(xml_str_out) self.assertEqual(xml_out.tag, 'sequential') - def test_metadata_inherit(self): - """Make sure metadata inherits properly""" + def test_metadata_import_export(self): + """Two checks: + - unknown metadata is preserved across import-export + - inherited metadata doesn't leak to children. + """ system = self.get_system() v = "1 hour" - start_xml = ''' - - Two houses, ... - '''.format(grace=v) + start_xml = ''' + + + Two houses, ... + + '''.format(grace=v) descriptor = XModuleDescriptor.load_from_xml(start_xml, system, 'org', 'course') - print "Errors: {0}".format(system.errorlog.errors) print descriptor, descriptor.metadata self.assertEqual(descriptor.metadata['graceperiod'], v) + self.assertEqual(descriptor.metadata['unicorn'], 'purple') - # Check that the child inherits correctly + # Check that the child inherits graceperiod correctly child = descriptor.get_children()[0] self.assertEqual(child.metadata['graceperiod'], v) - # Now export and see if the chapter tag has a graceperiod attribute + # check that the child does _not_ inherit any unicorns + self.assertTrue('unicorn' not in child.metadata) + + # Now export and check things resource_fs = MemoryFS() exported_xml = descriptor.export_to_xml(resource_fs) print "Exported xml:", exported_xml - # hardcode path to child + + # Does the course still have unicorns? + # hardcoded path to course + with resource_fs.open('course/test1.xml') as f: + course_xml = etree.fromstring(f.read()) + + self.assertEqual(course_xml.attrib['unicorn'], 'purple') + + # did we successfully strip the url_name from the definition contents? + self.assertTrue('url_name' not in course_xml.attrib) + + # Does the chapter tag now have a graceperiod attribute? + # hardcoded path to child with resource_fs.open('chapter/ch.xml') as f: chapter_xml = etree.fromstring(f.read()) self.assertEqual(chapter_xml.tag, 'chapter') diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index fd48439e46..907fc302a3 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -47,6 +47,10 @@ class XmlDescriptor(XModuleDescriptor): # VS[compat] Remove once unused. 'name', 'slug') + # VS[compat] -- remove once everything is in the CMS + # We don't want url_name in the metadata--it's in the location, so avoid + # confusion and duplication. + metadata_to_strip = ('url_name', ) # A dictionary mapping xml attribute names AttrMaps that describe how # to import and export them @@ -166,12 +170,16 @@ class XmlDescriptor(XModuleDescriptor): Returns a dictionary {key: value}. """ metadata = {} - for attr in cls.metadata_attributes: + for attr in xml_object.attrib: val = xml_object.get(attr) if val is not None: # VS[compat]. Remove after all key translations done attr = cls._translate(attr) + if attr in cls.metadata_to_strip: + # don't load these + continue + attr_map = cls.xml_attribute_map.get(attr, AttrMap()) metadata[attr] = attr_map.from_xml(val) return metadata @@ -250,15 +258,8 @@ class XmlDescriptor(XModuleDescriptor): # Add the non-inherited metadata for attr in self.own_metadata: - if attr not in self.metadata_attributes: - log.warning("Unexpected metadata '{attr}' on element '{url}'." - " Not exporting.".format(attr=attr, - url=self.location.url())) - continue - xml_object.set(attr, val_for_xml(attr)) - # Write the actual contents to a file filepath = self.__class__._format_filepath(self.category, self.url_name) resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True) From 6ed905275522272c2988211201913177d33bd9ad Mon Sep 17 00:00:00 2001 From: Victor Shnayder Date: Fri, 3 Aug 2012 18:16:54 -0400 Subject: [PATCH 03/12] minor cleanups --- common/lib/xmodule/xmodule/abtest_module.py | 8 ++++++-- common/lib/xmodule/xmodule/course_module.py | 2 +- common/lib/xmodule/xmodule/modulestore/xml.py | 9 +++++++-- common/lib/xmodule/xmodule/x_module.py | 7 ++++--- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/common/lib/xmodule/xmodule/abtest_module.py b/common/lib/xmodule/xmodule/abtest_module.py index 8d6604b06b..9d4744e2c9 100644 --- a/common/lib/xmodule/xmodule/abtest_module.py +++ b/common/lib/xmodule/xmodule/abtest_module.py @@ -103,7 +103,9 @@ class ABTestDescriptor(RawDescriptor, XmlDescriptor): experiment = xml_object.get('experiment') if experiment is None: - raise InvalidDefinitionError("ABTests must specify an experiment. Not found in:\n{xml}".format(xml=etree.tostring(xml_object, pretty_print=True))) + raise InvalidDefinitionError( + "ABTests must specify an experiment. Not found in:\n{xml}" + .format(xml=etree.tostring(xml_object, pretty_print=True))) definition = { 'data': { @@ -127,7 +129,9 @@ class ABTestDescriptor(RawDescriptor, XmlDescriptor): definition['data']['group_content'][name] = child_content_urls definition['children'].extend(child_content_urls) - default_portion = 1 - sum(portion for (name, portion) in definition['data']['group_portions'].items()) + default_portion = 1 - sum( + portion for (name, portion) in definition['data']['group_portions'].items()) + if default_portion < 0: raise InvalidDefinitionError("ABTest portions must add up to less than or equal to 1") diff --git a/common/lib/xmodule/xmodule/course_module.py b/common/lib/xmodule/xmodule/course_module.py index 7f4c204f45..40294fa786 100644 --- a/common/lib/xmodule/xmodule/course_module.py +++ b/common/lib/xmodule/xmodule/course_module.py @@ -37,7 +37,7 @@ class CourseDescriptor(SequenceDescriptor): def has_started(self): return time.gmtime() > self.start - + @property def grader(self): return self.__grading_policy['GRADER'] diff --git a/common/lib/xmodule/xmodule/modulestore/xml.py b/common/lib/xmodule/xmodule/modulestore/xml.py index 46fcf19469..f9093f355a 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml.py +++ b/common/lib/xmodule/xmodule/modulestore/xml.py @@ -188,21 +188,26 @@ class XMLModuleStore(ModuleStoreBase): course_file = StringIO(clean_out_mako_templating(course_file.read())) course_data = etree.parse(course_file).getroot() + org = course_data.get('org') if org is None: - log.error("No 'org' attribute set for course in {dir}. " + msg = ("No 'org' attribute set for course in {dir}. " "Using default 'edx'".format(dir=course_dir)) + log.error(msg) + tracker(msg) org = 'edx' course = course_data.get('course') if course is None: - log.error("No 'course' attribute set for course in {dir}." + msg = ("No 'course' attribute set for course in {dir}." " Using default '{default}'".format( dir=course_dir, default=course_dir )) + log.error(msg) + tracker(msg) course = course_dir system = ImportSystem(self, org, course, course_dir, tracker) diff --git a/common/lib/xmodule/xmodule/x_module.py b/common/lib/xmodule/xmodule/x_module.py index 9b005d1dae..818e0cd8fd 100644 --- a/common/lib/xmodule/xmodule/x_module.py +++ b/common/lib/xmodule/xmodule/x_module.py @@ -6,6 +6,7 @@ from fs.errors import ResourceNotFoundError from functools import partial from lxml import etree from lxml.etree import XMLSyntaxError +from pprint import pprint from xmodule.modulestore import Location from xmodule.errortracker import exc_info_to_str @@ -550,9 +551,9 @@ class XModuleDescriptor(Plugin, HTMLSnippet): if not eq: for attr in self.equality_attributes: - print(getattr(self, attr, None), - getattr(other, attr, None), - getattr(self, attr, None) == getattr(other, attr, None)) + pprint((getattr(self, attr, None), + getattr(other, attr, None), + getattr(self, attr, None) == getattr(other, attr, None))) return eq From b091dcabe0f682695b8cbcf6030051c4cee0c8c6 Mon Sep 17 00:00:00 2001 From: Victor Shnayder Date: Fri, 3 Aug 2012 18:20:06 -0400 Subject: [PATCH 04/12] metadata and file format cleanups * course.xml is special--has org and course attributes in addition to url_name * strip data_dir from metadata on export * more asserts * work on roundtrip import-export test --- common/lib/xmodule/xmodule/course_module.py | 1 - .../lib/xmodule/xmodule/tests/test_export.py | 68 ++++++++++++++++--- .../lib/xmodule/xmodule/tests/test_import.py | 27 ++++++-- common/lib/xmodule/xmodule/xml_module.py | 55 ++++++++++++--- 4 files changed, 124 insertions(+), 27 deletions(-) diff --git a/common/lib/xmodule/xmodule/course_module.py b/common/lib/xmodule/xmodule/course_module.py index 40294fa786..41ed5ab89a 100644 --- a/common/lib/xmodule/xmodule/course_module.py +++ b/common/lib/xmodule/xmodule/course_module.py @@ -13,7 +13,6 @@ log = logging.getLogger(__name__) class CourseDescriptor(SequenceDescriptor): module_class = SequenceModule - metadata_attributes = SequenceDescriptor.metadata_attributes + ('org', 'course') def __init__(self, system, definition=None, **kwargs): super(CourseDescriptor, self).__init__(system, definition, **kwargs) diff --git a/common/lib/xmodule/xmodule/tests/test_export.py b/common/lib/xmodule/xmodule/tests/test_export.py index eacf8352be..e1e48cc796 100644 --- a/common/lib/xmodule/xmodule/tests/test_export.py +++ b/common/lib/xmodule/xmodule/tests/test_export.py @@ -1,24 +1,72 @@ -from xmodule.modulestore.xml import XMLModuleStore -from nose.tools import assert_equals -from nose import SkipTest -from tempfile import mkdtemp from fs.osfs import OSFS +from nose.tools import assert_equals, assert_true +from nose import SkipTest +from path import path +from tempfile import mkdtemp + +from xmodule.modulestore.xml import XMLModuleStore + +# from ~/mitx_all/mitx/common/lib/xmodule/xmodule/tests/ +# to ~/mitx_all/mitx/common/test +TEST_DIR = path(__file__).abspath().dirname() +for i in range(4): + TEST_DIR = TEST_DIR.dirname() +TEST_DIR = TEST_DIR / 'test' + +DATA_DIR = TEST_DIR / 'data' -def check_export_roundtrip(data_dir): +def strip_metadata(descriptor, key): + """ + HACK: data_dir metadata tags break equality because they aren't real metadata + remove them. + + Recursively strips same tag from all children. + """ + print "strip {key} from {desc}".format(key=key, desc=descriptor.location.url()) + descriptor.metadata.pop(key, None) + for d in descriptor.get_children(): + strip_metadata(d, key) + +def check_gone(descriptor, key): + '''Make sure that the metadata of this descriptor or any + descendants does not include key''' + assert_true(key not in descriptor.metadata) + for d in descriptor.get_children(): + check_gone(d, key) + +def check_export_roundtrip(data_dir, course_dir): print "Starting import" - initial_import = XMLModuleStore('org', 'course', data_dir, eager=True) - initial_course = initial_import.course + initial_import = XMLModuleStore(data_dir, eager=True, course_dirs=[course_dir]) + + courses = initial_import.get_courses() + assert_equals(len(courses), 1) + initial_course = courses[0] print "Starting export" export_dir = mkdtemp() + print "export_dir: {0}".format(export_dir) fs = OSFS(export_dir) - xml = initial_course.export_to_xml(fs) - with fs.open('course.xml', 'w') as course_xml: + export_course_dir = 'export' + export_fs = fs.makeopendir(export_course_dir) + + xml = initial_course.export_to_xml(export_fs) + with export_fs.open('course.xml', 'w') as course_xml: course_xml.write(xml) print "Starting second import" - second_import = XMLModuleStore('org', 'course', export_dir, eager=True) + second_import = XMLModuleStore(export_dir, eager=True, + course_dirs=[export_course_dir]) + + courses2 = second_import.get_courses() + assert_equals(len(courses2), 1) + exported_course = courses2[0] + + print "Checking course equality" + strip_metadata(initial_course, 'data_dir') + strip_metadata(exported_course, 'data_dir') + + assert_equals(initial_course, exported_course) print "Checking key equality" assert_equals(initial_import.modules.keys(), second_import.modules.keys()) diff --git a/common/lib/xmodule/xmodule/tests/test_import.py b/common/lib/xmodule/xmodule/tests/test_import.py index 45a142b8fe..d2080e1bf7 100644 --- a/common/lib/xmodule/xmodule/tests/test_import.py +++ b/common/lib/xmodule/xmodule/tests/test_import.py @@ -5,6 +5,7 @@ from fs.memoryfs import MemoryFS from lxml import etree from xmodule.x_module import XMLParsingSystem, XModuleDescriptor +from xmodule.xml_module import is_pointer_tag from xmodule.errortracker import make_error_tracker from xmodule.modulestore import Location from xmodule.modulestore.exceptions import ItemNotFoundError @@ -117,15 +118,19 @@ class ImportTestCase(unittest.TestCase): - inherited metadata doesn't leak to children. """ system = self.get_system() - v = "1 hour" + v = '1 hour' + org = 'foo' + course = 'bbhh' + url_name = 'test1' start_xml = ''' - + Two houses, ... - '''.format(grace=v) + '''.format(grace=v, org=org, course=course, url_name=url_name) descriptor = XModuleDescriptor.load_from_xml(start_xml, system, - 'org', 'course') + org, course) print descriptor, descriptor.metadata self.assertEqual(descriptor.metadata['graceperiod'], v) @@ -141,15 +146,25 @@ class ImportTestCase(unittest.TestCase): # Now export and check things resource_fs = MemoryFS() exported_xml = descriptor.export_to_xml(resource_fs) + + # Check that the exported xml is just a pointer print "Exported xml:", exported_xml + pointer = etree.fromstring(exported_xml) + self.assertTrue(is_pointer_tag(pointer)) + # but it's a special case course pointer + self.assertEqual(pointer.attrib['course'], course) + self.assertEqual(pointer.attrib['org'], org) # Does the course still have unicorns? - # hardcoded path to course - with resource_fs.open('course/test1.xml') as f: + with resource_fs.open('course/{url_name}.xml'.format(url_name=url_name)) as f: course_xml = etree.fromstring(f.read()) self.assertEqual(course_xml.attrib['unicorn'], 'purple') + # the course and org tags should be _only_ in the pointer + self.assertTrue('course' not in course_xml.attrib) + self.assertTrue('org' not in course_xml.attrib) + # did we successfully strip the url_name from the definition contents? self.assertTrue('url_name' not in course_xml.attrib) diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index 907fc302a3..cb0d05a83e 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -11,6 +11,29 @@ import sys log = logging.getLogger(__name__) + +def is_pointer_tag(xml_obj): + """ + Check if xml_obj is a pointer tag: . + No children, one attribute named url_name. + + Special case for course roots: the pointer is + + + xml_obj: an etree Element + + Returns a bool. + """ + if xml_obj.tag != "course": + expected_attr = set(['url_name']) + else: + expected_attr = set(['url_name', 'course', 'org']) + + actual_attr = set(xml_obj.attrib.keys()) + return len(xml_obj) == 0 and actual_attr == expected_attr + + + _AttrMapBase = namedtuple('_AttrMap', 'from_xml to_xml') class AttrMap(_AttrMapBase): @@ -41,16 +64,19 @@ class XmlDescriptor(XModuleDescriptor): # Note -- url_name isn't in this list because it's handled specially on # import and export. + + # TODO (vshnayder): Do we need a list of metadata we actually + # understand? And if we do, is this the place? + # Related: What's the right behavior for clean_metadata? metadata_attributes = ('format', 'graceperiod', 'showanswer', 'rerandomize', 'start', 'due', 'graded', 'display_name', 'url_name', 'hide_from_toc', 'ispublic', # if True, then course is listed for all users; see # VS[compat] Remove once unused. 'name', 'slug') - # VS[compat] -- remove once everything is in the CMS - # We don't want url_name in the metadata--it's in the location, so avoid - # confusion and duplication. - metadata_to_strip = ('url_name', ) + metadata_to_strip = ('data_dir', + # VS[compat] -- remove the below attrs once everything is in the CMS + 'course', 'org', 'url_name') # A dictionary mapping xml attribute names AttrMaps that describe how # to import and export them @@ -130,6 +156,8 @@ class XmlDescriptor(XModuleDescriptor): Subclasses should not need to override this except in special cases (e.g. html module)''' + # VS[compat] -- the filename tag should go away once everything is + # converted. (note: make sure html files still work once this goes away) filename = xml_object.get('filename') if filename is None: definition_xml = copy.deepcopy(xml_object) @@ -198,13 +226,13 @@ class XmlDescriptor(XModuleDescriptor): url identifiers """ xml_object = etree.fromstring(xml_data) - # VS[compat] -- just have the url_name lookup once translation is done + # VS[compat] -- just have the url_name lookup, once translation is done url_name = xml_object.get('url_name', xml_object.get('slug')) location = Location('i4x', org, course, xml_object.tag, url_name) # VS[compat] -- detect new-style each-in-a-file mode - if len(xml_object.attrib.keys()) == 1 and len(xml_object) == 0: - # new style: this is just a pointer. + if is_pointer_tag(xml_object): + # new style: # read the actual defition file--named using url_name filepath = cls._format_filepath(xml_object.tag, url_name) definition_xml = cls.load_file(filepath, system.resources_fs, location) @@ -258,12 +286,13 @@ class XmlDescriptor(XModuleDescriptor): # Add the non-inherited metadata for attr in self.own_metadata: - xml_object.set(attr, val_for_xml(attr)) + # don't want e.g. data_dir + if attr not in self.metadata_to_strip: + xml_object.set(attr, val_for_xml(attr)) - # Write the actual contents to a file + # Write the definition to a file filepath = self.__class__._format_filepath(self.category, self.url_name) resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True) - with resource_fs.open(filepath, 'w') as file: file.write(etree.tostring(xml_object, pretty_print=True)) @@ -271,6 +300,12 @@ class XmlDescriptor(XModuleDescriptor): record_object = etree.Element(self.category) record_object.set('url_name', self.url_name) + # Special case for course pointers: + if self.category == 'course': + # add org and course attributes on the pointer tag + record_object.set('org', self.location.org) + record_object.set('course', self.location.course) + return etree.tostring(record_object, pretty_print=True) def definition_to_xml(self, resource_fs): From e6e290f525123cadcffd3d89660571eba47f0f5f Mon Sep 17 00:00:00 2001 From: Victor Shnayder Date: Fri, 3 Aug 2012 19:03:19 -0400 Subject: [PATCH 05/12] Make initial import-export tests pass. TODO: * need unique slugs for errors so they don't overwrite each other on export. - try to preserve origin slug. If not possible, generate random one. * figure out what metadata to strip. e.g. ({'data': '

Finger Exercise 1...'}, {'data': '

Finger Exercise 1...'}, False) - where did points and type come from? Do we want them there? * separate broken and non-broken test courses --- common/lib/xmodule/xmodule/html_module.py | 11 +- .../lib/xmodule/xmodule/tests/test_export.py | 114 ++++++++++-------- .../lib/xmodule/xmodule/tests/test_import.py | 2 - common/lib/xmodule/xmodule/xml_module.py | 2 +- common/test/data/simple/course.xml | 2 +- .../{problems => problem}/L1_Problem_1.xml | 0 .../{problems => problem}/ps01-simple.xml | 0 7 files changed, 72 insertions(+), 59 deletions(-) rename common/test/data/simple/{problems => problem}/L1_Problem_1.xml (100%) rename common/test/data/simple/{problems => problem}/ps01-simple.xml (100%) diff --git a/common/lib/xmodule/xmodule/html_module.py b/common/lib/xmodule/xmodule/html_module.py index de45af081a..89dbfb5fc0 100644 --- a/common/lib/xmodule/xmodule/html_module.py +++ b/common/lib/xmodule/xmodule/html_module.py @@ -36,7 +36,6 @@ class HtmlDescriptor(XmlDescriptor, EditingDescriptor): # are being edited in the cms @classmethod def backcompat_paths(cls, path): - origpath = path if path.endswith('.html.xml'): path = path[:-9] + '.html' #backcompat--look for html instead of xml candidates = [] @@ -45,9 +44,11 @@ class HtmlDescriptor(XmlDescriptor, EditingDescriptor): _, _, path = path.partition(os.sep) # also look for .html versions instead of .xml - if origpath.endswith('.xml'): - candidates.append(origpath[:-4] + '.html') - return candidates + nc = [] + for candidate in candidates: + if candidate.endswith('.xml'): + nc.append(candidate[:-4] + '.html') + return candidates + nc # NOTE: html descriptors are special. We do not want to parse and # export them ourselves, because that can break things (e.g. lxml @@ -80,7 +81,7 @@ class HtmlDescriptor(XmlDescriptor, EditingDescriptor): # online and has imported all current (fall 2012) courses from xml if not system.resources_fs.exists(filepath): candidates = cls.backcompat_paths(filepath) - #log.debug("candidates = {0}".format(candidates)) + log.debug("candidates = {0}".format(candidates)) for candidate in candidates: if system.resources_fs.exists(candidate): filepath = candidate diff --git a/common/lib/xmodule/xmodule/tests/test_export.py b/common/lib/xmodule/xmodule/tests/test_export.py index e1e48cc796..7358d89dde 100644 --- a/common/lib/xmodule/xmodule/tests/test_export.py +++ b/common/lib/xmodule/xmodule/tests/test_export.py @@ -1,6 +1,7 @@ +import unittest + from fs.osfs import OSFS from nose.tools import assert_equals, assert_true -from nose import SkipTest from path import path from tempfile import mkdtemp @@ -18,10 +19,7 @@ DATA_DIR = TEST_DIR / 'data' def strip_metadata(descriptor, key): """ - HACK: data_dir metadata tags break equality because they aren't real metadata - remove them. - - Recursively strips same tag from all children. + Recursively strips tag from all children. """ print "strip {key} from {desc}".format(key=key, desc=descriptor.location.url()) descriptor.metadata.pop(key, None) @@ -35,50 +33,66 @@ def check_gone(descriptor, key): for d in descriptor.get_children(): check_gone(d, key) -def check_export_roundtrip(data_dir, course_dir): - print "Starting import" - initial_import = XMLModuleStore(data_dir, eager=True, course_dirs=[course_dir]) - - courses = initial_import.get_courses() - assert_equals(len(courses), 1) - initial_course = courses[0] - - print "Starting export" - export_dir = mkdtemp() - print "export_dir: {0}".format(export_dir) - fs = OSFS(export_dir) - export_course_dir = 'export' - export_fs = fs.makeopendir(export_course_dir) - - xml = initial_course.export_to_xml(export_fs) - with export_fs.open('course.xml', 'w') as course_xml: - course_xml.write(xml) - - print "Starting second import" - second_import = XMLModuleStore(export_dir, eager=True, - course_dirs=[export_course_dir]) - - courses2 = second_import.get_courses() - assert_equals(len(courses2), 1) - exported_course = courses2[0] - - print "Checking course equality" - strip_metadata(initial_course, 'data_dir') - strip_metadata(exported_course, 'data_dir') - - assert_equals(initial_course, exported_course) - - print "Checking key equality" - assert_equals(initial_import.modules.keys(), second_import.modules.keys()) - - print "Checking module equality" - for location in initial_import.modules.keys(): - print "Checking", location - assert_equals(initial_import.modules[location], second_import.modules[location]) -def test_toy_roundtrip(): - dir = "" - # TODO: add paths and make this run. - raise SkipTest() - check_export_roundtrip(dir) +class RoundTripTestCase(unittest.TestCase): + '''Check that our test courses roundtrip properly''' + def check_export_roundtrip(self, data_dir, course_dir): + print "Starting import" + initial_import = XMLModuleStore(data_dir, eager=True, course_dirs=[course_dir]) + + courses = initial_import.get_courses() + self.assertEquals(len(courses), 1) + initial_course = courses[0] + + print "Starting export" + export_dir = mkdtemp() + print "export_dir: {0}".format(export_dir) + fs = OSFS(export_dir) + export_course_dir = 'export' + export_fs = fs.makeopendir(export_course_dir) + + xml = initial_course.export_to_xml(export_fs) + with export_fs.open('course.xml', 'w') as course_xml: + course_xml.write(xml) + + print "Starting second import" + second_import = XMLModuleStore(export_dir, eager=True, + course_dirs=[export_course_dir]) + + courses2 = second_import.get_courses() + self.assertEquals(len(courses2), 1) + exported_course = courses2[0] + + print "Checking course equality" + # HACK: data_dir metadata tags break equality because they + # aren't real metadata, and depend on paths. Remove them. + strip_metadata(initial_course, 'data_dir') + strip_metadata(exported_course, 'data_dir') + + self.assertEquals(initial_course, exported_course) + + print "Checking key equality" + self.assertEquals(sorted(initial_import.modules.keys()), + sorted(second_import.modules.keys())) + + print "Checking module equality" + for location in initial_import.modules.keys(): + print "Checking", location + if location.category == 'html': + print ("Skipping html modules--they can't import in" + " final form without writing files...") + continue + self.assertEquals(initial_import.modules[location], + second_import.modules[location]) + + + def setUp(self): + self.maxDiff = None + + def test_toy_roundtrip(self): + self.check_export_roundtrip(DATA_DIR, "toy") + + + def test_full_roundtrip(self): + self.check_export_roundtrip(DATA_DIR, "full") diff --git a/common/lib/xmodule/xmodule/tests/test_import.py b/common/lib/xmodule/xmodule/tests/test_import.py index d2080e1bf7..af7464cfdc 100644 --- a/common/lib/xmodule/xmodule/tests/test_import.py +++ b/common/lib/xmodule/xmodule/tests/test_import.py @@ -51,8 +51,6 @@ class DummySystem(XMLParsingSystem): class ImportTestCase(unittest.TestCase): '''Make sure module imports work properly, including for malformed inputs''' - - @staticmethod def get_system(): '''Get a dummy system''' diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index cb0d05a83e..1318270def 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -76,7 +76,7 @@ class XmlDescriptor(XModuleDescriptor): metadata_to_strip = ('data_dir', # VS[compat] -- remove the below attrs once everything is in the CMS - 'course', 'org', 'url_name') + 'course', 'org', 'url_name', 'filename') # A dictionary mapping xml attribute names AttrMaps that describe how # to import and export them diff --git a/common/test/data/simple/course.xml b/common/test/data/simple/course.xml index b92158cdb7..0ecc153724 100644 --- a/common/test/data/simple/course.xml +++ b/common/test/data/simple/course.xml @@ -2,7 +2,7 @@