From 66ca31947aabf55720ca137fc9ad83fd82959047 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 26 Jun 2012 14:10:15 -0400 Subject: [PATCH] Remove semantically meaningless sections from course.xml by moving their attributes onto the contained element. If there is more than one contained element, turn the section into a sequence. Also handles includes --- .../management/commands/import.py | 45 +++++++------ common/lib/xmodule/html_module.py | 22 ++++++ common/lib/xmodule/seq_module.py | 2 +- common/lib/xmodule/setup.py | 1 + common/lib/xmodule/translation_module.py | 67 +++++++++++++++++++ common/lib/xmodule/x_module.py | 16 ++++- 6 files changed, 128 insertions(+), 25 deletions(-) create mode 100644 common/lib/xmodule/translation_module.py diff --git a/cms/djangoapps/contentstore/management/commands/import.py b/cms/djangoapps/contentstore/management/commands/import.py index 0ce0ce03fe..d99f2bee8b 100644 --- a/cms/djangoapps/contentstore/management/commands/import.py +++ b/cms/djangoapps/contentstore/management/commands/import.py @@ -6,9 +6,10 @@ from django.core.management.base import BaseCommand from keystore.django import keystore from raw_module import RawDescriptor from lxml import etree +from fs.osfs import OSFS from path import path -from x_module import XModuleDescriptor, DescriptorSystem +from x_module import XModuleDescriptor, XMLParsingSystem unnamed_modules = 0 @@ -24,27 +25,29 @@ class Command(BaseCommand): data_dir = path(data_dir) with open(data_dir / "course.xml") as course_file: - system = DescriptorSystem(keystore().get_item) + class ImportSystem(XMLParsingSystem): + def __init__(self): + self.load_item = keystore().get_item + self.fs = OSFS(data_dir) - def process_xml(xml): - try: - xml_data = etree.fromstring(xml) - except: - print xml - raise - if not xml_data.get('name'): - global unnamed_modules - unnamed_modules += 1 - xml_data.set('name', 'Unnamed module %d' % unnamed_modules) + def process_xml(self, xml): + try: + xml_data = etree.fromstring(xml) + except: + print xml + raise + if not xml_data.get('name'): + global unnamed_modules + unnamed_modules += 1 + xml_data.set('name', 'Unnamed module %d' % unnamed_modules) - module = XModuleDescriptor.load_from_xml(etree.tostring(xml_data), system, org, course, RawDescriptor) - keystore().create_item(module.url) - if 'data' in module.definition: - keystore().update_item(module.url, module.definition['data']) - if 'children' in module.definition: - keystore().update_children(module.url, module.definition['children']) - return module.url + module = XModuleDescriptor.load_from_xml(etree.tostring(xml_data), self, org, course, RawDescriptor) + keystore().create_item(module.url) + if 'data' in module.definition: + keystore().update_item(module.url, module.definition['data']) + if 'children' in module.definition: + keystore().update_children(module.url, module.definition['children']) + return module - system.process_xml = process_xml - system.process_xml(course_file.read()) + ImportSystem().process_xml(course_file.read()) diff --git a/common/lib/xmodule/html_module.py b/common/lib/xmodule/html_module.py index 981109a9af..b35549d971 100644 --- a/common/lib/xmodule/html_module.py +++ b/common/lib/xmodule/html_module.py @@ -19,6 +19,28 @@ class HtmlModuleDescriptor(MakoModuleDescriptor): js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]} js_module = 'HTML' + @classmethod + def from_xml(cls, xml_data, system, org=None, course=None): + """ + Creates an instance of this descriptor from the supplied xml_data. + This may be overridden by subclasses + + xml_data: A string of xml that will be translated into data and children for + this module + system: An XModuleSystem for interacting with external resources + org and course are optional strings that will be used in the generated modules + url identifiers + """ + xml_object = etree.fromstring(xml_data) + return cls( + system, + definition={'data': {'text': xml_data}}, + location=['i4x', + org, + course, + xml_object.tag, + xml_object.get('name')] + ) class Module(XModule): id_attribute = 'filename' diff --git a/common/lib/xmodule/seq_module.py b/common/lib/xmodule/seq_module.py index 833ae8c599..e3a19c3d60 100644 --- a/common/lib/xmodule/seq_module.py +++ b/common/lib/xmodule/seq_module.py @@ -123,7 +123,7 @@ class SequenceDescriptor(MakoModuleDescriptor): xml_object = etree.fromstring(xml_data) children = [ - system.process_xml(etree.tostring(child_module)) + system.process_xml(etree.tostring(child_module)).url for child_module in xml_object ] diff --git a/common/lib/xmodule/setup.py b/common/lib/xmodule/setup.py index 044fcbe281..967a7e0b5b 100644 --- a/common/lib/xmodule/setup.py +++ b/common/lib/xmodule/setup.py @@ -13,6 +13,7 @@ setup( # for a description of entry_points entry_points={ 'xmodule.v1': [ + "section = translation_module:SemanticSectionDescriptor", "chapter = seq_module:SequenceDescriptor", "course = seq_module:SequenceDescriptor", "html = html_module:HtmlModuleDescriptor", diff --git a/common/lib/xmodule/translation_module.py b/common/lib/xmodule/translation_module.py new file mode 100644 index 0000000000..b56fed02cd --- /dev/null +++ b/common/lib/xmodule/translation_module.py @@ -0,0 +1,67 @@ +""" +These modules exist to translate old format XML into newer, semantic forms +""" +from x_module import XModuleDescriptor +from lxml import etree +from functools import wraps +import logging + +log = logging.getLogger(__name__) + + +def process_includes(fn): + """ + Wraps a XModuleDescriptor.from_xml method, and modifies xml_data to replace + any immediate child items with the contents of the file that they are + supposed to include + """ + @wraps(fn) + def from_xml(cls, xml_data, system, org=None, course=None): + xml_object = etree.fromstring(xml_data) + next_include = xml_object.find('include') + while next_include is not None: + file = next_include.get('file') + if file is not None: + try: + ifp = system.fs.open(file) + except Exception: + log.exception('Error in problem xml include: %s' % (etree.tostring(next_include, pretty_print=True))) + log.exception('Cannot find file %s in %s' % (file, dir)) + raise + try: + # read in and convert to XML + incxml = etree.XML(ifp.read()) + except Exception: + log.exception('Error in problem xml include: %s' % (etree.tostring(next_include, pretty_print=True))) + log.exception('Cannot parse XML in %s' % (file)) + raise + # insert new XML into tree in place of inlcude + parent = next_include.getparent() + parent.insert(parent.index(next_include), incxml) + parent.remove(next_include) + + next_include = xml_object.find('include') + return fn(cls, etree.tostring(xml_object), system, org, course) + return from_xml + + +class SemanticSectionDescriptor(XModuleDescriptor): + @classmethod + @process_includes + def from_xml(cls, xml_data, system, org=None, course=None): + """ + Removes sections single child elements in favor of just embedding the child element + + """ + xml_object = etree.fromstring(xml_data) + + if len(xml_object) == 1: + for (key, val) in xml_object.items(): + if key == 'format': + continue + xml_object[0].set(key, val) + + return system.process_xml(etree.tostring(xml_object[0])) + else: + xml_object.tag = 'sequence' + return system.process_xml(etree.tostring(xml_object)) diff --git a/common/lib/xmodule/x_module.py b/common/lib/xmodule/x_module.py index b1294f8c2c..336ccc6d0c 100644 --- a/common/lib/xmodule/x_module.py +++ b/common/lib/xmodule/x_module.py @@ -213,6 +213,7 @@ class XModuleDescriptor(Plugin): on the contents of xml_data. xml_data must be a string containing valid xml + system is an XMLParsingSystem org and course are optional strings that will be used in the generated modules url identifiers """ @@ -230,7 +231,7 @@ class XModuleDescriptor(Plugin): xml_data: A string of xml that will be translated into data and children for this module - system: An XModuleSystem for interacting with external resources + system is an XMLParsingSystem org and course are optional strings that will be used in the generated modules url identifiers """ @@ -330,11 +331,20 @@ class XModuleDescriptor(Plugin): class DescriptorSystem(object): - def __init__(self, load_item, process_xml=None): + def __init__(self, load_item): """ load_item: Takes a Location and returns an XModuleDescriptor - process_xml: Takes an xml string, and returns the url of the XModuleDescriptor created from that xml """ self.load_item = load_item + + +class XMLParsingSystem(DescriptorSystem): + def __init__(self, load_item, process_xml, fs): + """ + process_xml: Takes an xml string, and returns the the XModuleDescriptor created from that xml + fs: A Filesystem object that contains all of the xml resources needed to parse + the course + """ self.process_xml = process_xml + self.fs = fs