Remove semantically meaningless sections from course.xml by moving their attributes onto the contained element. If there is more than one contained element, turn the section into a sequence. Also handles includes

This commit is contained in:
Calen Pennington
2012-06-26 14:10:15 -04:00
parent f0cf323488
commit 66ca31947a
6 changed files with 128 additions and 25 deletions

View File

@@ -6,9 +6,10 @@ from django.core.management.base import BaseCommand
from keystore.django import keystore
from raw_module import RawDescriptor
from lxml import etree
from fs.osfs import OSFS
from path import path
from x_module import XModuleDescriptor, DescriptorSystem
from x_module import XModuleDescriptor, XMLParsingSystem
unnamed_modules = 0
@@ -24,27 +25,29 @@ class Command(BaseCommand):
data_dir = path(data_dir)
with open(data_dir / "course.xml") as course_file:
system = DescriptorSystem(keystore().get_item)
class ImportSystem(XMLParsingSystem):
def __init__(self):
self.load_item = keystore().get_item
self.fs = OSFS(data_dir)
def process_xml(xml):
try:
xml_data = etree.fromstring(xml)
except:
print xml
raise
if not xml_data.get('name'):
global unnamed_modules
unnamed_modules += 1
xml_data.set('name', 'Unnamed module %d' % unnamed_modules)
def process_xml(self, xml):
try:
xml_data = etree.fromstring(xml)
except:
print xml
raise
if not xml_data.get('name'):
global unnamed_modules
unnamed_modules += 1
xml_data.set('name', 'Unnamed module %d' % unnamed_modules)
module = XModuleDescriptor.load_from_xml(etree.tostring(xml_data), system, org, course, RawDescriptor)
keystore().create_item(module.url)
if 'data' in module.definition:
keystore().update_item(module.url, module.definition['data'])
if 'children' in module.definition:
keystore().update_children(module.url, module.definition['children'])
return module.url
module = XModuleDescriptor.load_from_xml(etree.tostring(xml_data), self, org, course, RawDescriptor)
keystore().create_item(module.url)
if 'data' in module.definition:
keystore().update_item(module.url, module.definition['data'])
if 'children' in module.definition:
keystore().update_children(module.url, module.definition['children'])
return module
system.process_xml = process_xml
system.process_xml(course_file.read())
ImportSystem().process_xml(course_file.read())

View File

@@ -19,6 +19,28 @@ class HtmlModuleDescriptor(MakoModuleDescriptor):
js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]}
js_module = 'HTML'
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for
this module
system: An XModuleSystem for interacting with external resources
org and course are optional strings that will be used in the generated modules
url identifiers
"""
xml_object = etree.fromstring(xml_data)
return cls(
system,
definition={'data': {'text': xml_data}},
location=['i4x',
org,
course,
xml_object.tag,
xml_object.get('name')]
)
class Module(XModule):
id_attribute = 'filename'

View File

@@ -123,7 +123,7 @@ class SequenceDescriptor(MakoModuleDescriptor):
xml_object = etree.fromstring(xml_data)
children = [
system.process_xml(etree.tostring(child_module))
system.process_xml(etree.tostring(child_module)).url
for child_module in xml_object
]

View File

@@ -13,6 +13,7 @@ setup(
# for a description of entry_points
entry_points={
'xmodule.v1': [
"section = translation_module:SemanticSectionDescriptor",
"chapter = seq_module:SequenceDescriptor",
"course = seq_module:SequenceDescriptor",
"html = html_module:HtmlModuleDescriptor",

View File

@@ -0,0 +1,67 @@
"""
These modules exist to translate old format XML into newer, semantic forms
"""
from x_module import XModuleDescriptor
from lxml import etree
from functools import wraps
import logging
log = logging.getLogger(__name__)
def process_includes(fn):
"""
Wraps a XModuleDescriptor.from_xml method, and modifies xml_data to replace
any immediate child <include> items with the contents of the file that they are
supposed to include
"""
@wraps(fn)
def from_xml(cls, xml_data, system, org=None, course=None):
xml_object = etree.fromstring(xml_data)
next_include = xml_object.find('include')
while next_include is not None:
file = next_include.get('file')
if file is not None:
try:
ifp = system.fs.open(file)
except Exception:
log.exception('Error in problem xml include: %s' % (etree.tostring(next_include, pretty_print=True)))
log.exception('Cannot find file %s in %s' % (file, dir))
raise
try:
# read in and convert to XML
incxml = etree.XML(ifp.read())
except Exception:
log.exception('Error in problem xml include: %s' % (etree.tostring(next_include, pretty_print=True)))
log.exception('Cannot parse XML in %s' % (file))
raise
# insert new XML into tree in place of inlcude
parent = next_include.getparent()
parent.insert(parent.index(next_include), incxml)
parent.remove(next_include)
next_include = xml_object.find('include')
return fn(cls, etree.tostring(xml_object), system, org, course)
return from_xml
class SemanticSectionDescriptor(XModuleDescriptor):
@classmethod
@process_includes
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Removes sections single child elements in favor of just embedding the child element
"""
xml_object = etree.fromstring(xml_data)
if len(xml_object) == 1:
for (key, val) in xml_object.items():
if key == 'format':
continue
xml_object[0].set(key, val)
return system.process_xml(etree.tostring(xml_object[0]))
else:
xml_object.tag = 'sequence'
return system.process_xml(etree.tostring(xml_object))

View File

@@ -213,6 +213,7 @@ class XModuleDescriptor(Plugin):
on the contents of xml_data.
xml_data must be a string containing valid xml
system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers
"""
@@ -230,7 +231,7 @@ class XModuleDescriptor(Plugin):
xml_data: A string of xml that will be translated into data and children for
this module
system: An XModuleSystem for interacting with external resources
system is an XMLParsingSystem
org and course are optional strings that will be used in the generated modules
url identifiers
"""
@@ -330,11 +331,20 @@ class XModuleDescriptor(Plugin):
class DescriptorSystem(object):
def __init__(self, load_item, process_xml=None):
def __init__(self, load_item):
"""
load_item: Takes a Location and returns an XModuleDescriptor
process_xml: Takes an xml string, and returns the url of the XModuleDescriptor created from that xml
"""
self.load_item = load_item
class XMLParsingSystem(DescriptorSystem):
def __init__(self, load_item, process_xml, fs):
"""
process_xml: Takes an xml string, and returns the the XModuleDescriptor created from that xml
fs: A Filesystem object that contains all of the xml resources needed to parse
the course
"""
self.process_xml = process_xml
self.fs = fs