Files
edx-platform/common/lib/xmodule/xmodule/xml_module.py

411 lines
15 KiB
Python

from xmodule.x_module import (XModuleDescriptor, policy_key)
from xmodule.modulestore import Location
from xmodule.modulestore.inheritance import own_metadata
from lxml import etree
import json
import copy
import logging
import traceback
from collections import namedtuple
from fs.errors import ResourceNotFoundError
import os
import sys
log = logging.getLogger(__name__)
edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False,
remove_comments=True, remove_blank_text=True)
def name_to_pathname(name):
"""
Convert a location name for use in a path: replace ':' with '/'.
This allows users of the xml format to organize content into directories
"""
return name.replace(':', '/')
def is_pointer_tag(xml_obj):
"""
Check if xml_obj is a pointer tag: <blah url_name="something" />.
No children, one attribute named url_name, no text.
Special case for course roots: the pointer is
<course url_name="something" org="myorg" course="course">
xml_obj: an etree Element
Returns a bool.
"""
if xml_obj.tag != "course":
expected_attr = set(['url_name'])
else:
expected_attr = set(['url_name', 'course', 'org'])
actual_attr = set(xml_obj.attrib.keys())
has_text = xml_obj.text is not None and len(xml_obj.text.strip()) > 0
return len(xml_obj) == 0 and actual_attr == expected_attr and not has_text
def get_metadata_from_xml(xml_object, remove=True):
meta = xml_object.find('meta')
if meta is None:
return ''
dmdata = meta.text
#log.debug('meta for %s loaded: %s' % (xml_object,dmdata))
if remove:
xml_object.remove(meta)
return dmdata
_AttrMapBase = namedtuple('_AttrMap', 'from_xml to_xml')
class AttrMap(_AttrMapBase):
"""
A class that specifies two functions:
from_xml: convert value from the xml representation into
an internal python representation
to_xml: convert the internal python representation into
the value to store in the xml.
"""
def __new__(_cls, from_xml=lambda x: x,
to_xml=lambda x: x):
return _AttrMapBase.__new__(_cls, from_xml, to_xml)
class XmlDescriptor(XModuleDescriptor):
"""
Mixin class for standardized parsing of from xml
"""
# Extension to append to filename paths
filename_extension = 'xml'
# The attributes will be removed from the definition xml passed
# to definition_from_xml, and from the xml returned by definition_to_xml
# Note -- url_name isn't in this list because it's handled specially on
# import and export.
# TODO (vshnayder): Do we need a list of metadata we actually
# understand? And if we do, is this the place?
# Related: What's the right behavior for clean_metadata?
metadata_attributes = ('format', 'graceperiod', 'showanswer', 'rerandomize',
'start', 'due', 'graded', 'display_name', 'url_name', 'hide_from_toc',
'ispublic', # if True, then course is listed for all users; see
'xqa_key', # for xqaa server access
# VS[compat] Remove once unused.
'name', 'slug')
metadata_to_strip = ('data_dir',
# cdodge: @TODO: We need to figure out a way to export out 'tabs' and 'grading_policy' which is on the course
'tabs', 'grading_policy', 'is_draft', 'published_by', 'published_date',
'discussion_blackouts',
# VS[compat] -- remove the below attrs once everything is in the CMS
'course', 'org', 'url_name', 'filename')
metadata_to_export_to_policy = ('discussion_topics')
# A dictionary mapping xml attribute names AttrMaps that describe how
# to import and export them
# Allow json to specify either the string "true", or the bool True. The string is preferred.
to_bool = lambda val: val == 'true' or val == True
from_bool = lambda val: str(val).lower()
bool_map = AttrMap(to_bool, from_bool)
to_int = lambda val: int(val)
from_int = lambda val: str(val)
int_map = AttrMap(to_int, from_int)
xml_attribute_map = {
# type conversion: want True/False in python, "true"/"false" in xml
'graded': bool_map,
'hide_progress_tab': bool_map,
'allow_anonymous': bool_map,
'allow_anonymous_to_peers': bool_map,
'weight':int_map
}
# VS[compat]. Backwards compatibility code that can go away after
# importing 2012 courses.
# A set of metadata key conversions that we want to make
metadata_translations = {
'slug' : 'url_name',
'name' : 'display_name',
}
@classmethod
def _translate(cls, key):
'VS[compat]'
return cls.metadata_translations.get(key, key)
@classmethod
def definition_from_xml(cls, xml_object, system):
"""
Return the definition to be passed to the newly created descriptor
during from_xml
xml_object: An etree Element
"""
raise NotImplementedError(
"%s does not implement definition_from_xml" % cls.__name__)
@classmethod
def clean_metadata_from_xml(cls, xml_object):
"""
Remove any attribute named in cls.metadata_attributes from the supplied
xml_object
"""
for attr in cls.metadata_attributes:
if xml_object.get(attr) is not None:
del xml_object.attrib[attr]
@classmethod
def file_to_xml(cls, file_object):
"""
Used when this module wants to parse a file object to xml
that will be converted to the definition.
Returns an lxml Element
"""
return etree.parse(file_object, parser=edx_xml_parser).getroot()
@classmethod
def load_file(cls, filepath, fs, location):
'''
Open the specified file in fs, and call cls.file_to_xml on it,
returning the lxml object.
Add details and reraise on error.
'''
try:
with fs.open(filepath) as file:
return cls.file_to_xml(file)
except Exception as err:
# Add info about where we are, but keep the traceback
msg = 'Unable to load file contents at path %s for item %s: %s ' % (
filepath, location.url(), str(err))
raise Exception, msg, sys.exc_info()[2]
@classmethod
def load_definition(cls, xml_object, system, location):
'''Load a descriptor definition from the specified xml_object.
Subclasses should not need to override this except in special
cases (e.g. html module)'''
# VS[compat] -- the filename attr should go away once everything is
# converted. (note: make sure html files still work once this goes away)
filename = xml_object.get('filename')
if filename is None:
definition_xml = copy.deepcopy(xml_object)
filepath = ''
else:
filepath = cls._format_filepath(xml_object.tag, filename)
# VS[compat]
# TODO (cpennington): If the file doesn't exist at the right path,
# give the class a chance to fix it up. The file will be written out
# again in the correct format. This should go away once the CMS is
# online and has imported all current (fall 2012) courses from xml
if not system.resources_fs.exists(filepath) and hasattr(
cls, 'backcompat_paths'):
candidates = cls.backcompat_paths(filepath)
for candidate in candidates:
if system.resources_fs.exists(candidate):
filepath = candidate
break
definition_xml = cls.load_file(filepath, system.resources_fs, location)
definition_metadata = get_metadata_from_xml(definition_xml)
cls.clean_metadata_from_xml(definition_xml)
definition, children = cls.definition_from_xml(definition_xml, system)
if definition_metadata:
definition['definition_metadata'] = definition_metadata
# TODO (ichuang): remove this after migration
# for Fall 2012 LMS migration: keep filename (and unmangled filename)
definition['filename'] = [ filepath, filename ]
return definition, children
@classmethod
def load_metadata(cls, xml_object):
"""
Read the metadata attributes from this xml_object.
Returns a dictionary {key: value}.
"""
metadata = {}
for attr in xml_object.attrib:
val = xml_object.get(attr)
if val is not None:
# VS[compat]. Remove after all key translations done
attr = cls._translate(attr)
if attr in cls.metadata_to_strip:
# don't load these
continue
attr_map = cls.xml_attribute_map.get(attr, AttrMap())
metadata[attr] = attr_map.from_xml(val)
return metadata
@classmethod
def apply_policy(cls, metadata, policy):
"""
Add the keys in policy to metadata, after processing them
through the attrmap. Updates the metadata dict in place.
"""
for attr in policy:
attr_map = cls.xml_attribute_map.get(attr, AttrMap())
metadata[attr] = attr_map.from_xml(policy[attr])
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
Creates an instance of this descriptor from the supplied xml_data.
This may be overridden by subclasses
xml_data: A string of xml that will be translated into data and children for
this module
system: A DescriptorSystem for interacting with external resources
org and course are optional strings that will be used in the generated modules
url identifiers
"""
xml_object = etree.fromstring(xml_data)
# VS[compat] -- just have the url_name lookup, once translation is done
url_name = xml_object.get('url_name', xml_object.get('slug'))
location = Location('i4x', org, course, xml_object.tag, url_name)
# VS[compat] -- detect new-style each-in-a-file mode
if is_pointer_tag(xml_object):
# new style:
# read the actual definition file--named using url_name.replace(':','/')
filepath = cls._format_filepath(xml_object.tag, name_to_pathname(url_name))
definition_xml = cls.load_file(filepath, system.resources_fs, location)
else:
definition_xml = xml_object # this is just a pointer, not the real definition content
definition, children = cls.load_definition(definition_xml, system, location) # note this removes metadata
# VS[compat] -- make Ike's github preview links work in both old and
# new file layouts
if is_pointer_tag(xml_object):
# new style -- contents actually at filepath
definition['filename'] = [filepath, filepath]
metadata = cls.load_metadata(definition_xml)
# move definition metadata into dict
dmdata = definition.get('definition_metadata', '')
if dmdata:
metadata['definition_metadata_raw'] = dmdata
try:
metadata.update(json.loads(dmdata))
except Exception as err:
log.debug('Error %s in loading metadata %s' % (err, dmdata))
metadata['definition_metadata_err'] = str(err)
# Set/override any metadata specified by policy
k = policy_key(location)
if k in system.policy:
cls.apply_policy(metadata, system.policy[k])
model_data = {}
model_data.update(metadata)
model_data.update(definition)
model_data['children'] = children
return cls(
system,
location,
model_data,
)
@classmethod
def _format_filepath(cls, category, name):
return u'{category}/{name}.{ext}'.format(category=category,
name=name,
ext=cls.filename_extension)
def export_to_file(self):
"""If this returns True, write the definition of this descriptor to a separate
file.
NOTE: Do not override this without a good reason. It is here
specifically for customtag...
"""
return True
def export_to_xml(self, resource_fs):
"""
Returns an xml string representing this module, and all modules
underneath it. May also write required resources out to resource_fs
Assumes that modules have single parentage (that no module appears twice
in the same course), and that it is thus safe to nest modules as xml
children as appropriate.
The returned XML should be able to be parsed back into an identical
XModuleDescriptor using the from_xml method with the same system, org,
and course
resource_fs is a pyfilesystem object (from the fs package)
"""
# Get the definition
xml_object = self.definition_to_xml(resource_fs)
self.__class__.clean_metadata_from_xml(xml_object)
# Set the tag so we get the file path right
xml_object.tag = self.category
def val_for_xml(attr):
"""Get the value for this attribute that we want to store.
(Possible format conversion through an AttrMap).
"""
attr_map = self.xml_attribute_map.get(attr, AttrMap())
return attr_map.to_xml(self._model_data[attr])
# Add the non-inherited metadata
for attr in sorted(own_metadata(self)):
# don't want e.g. data_dir
if attr not in self.metadata_to_strip and attr not in self.metadata_to_export_to_policy:
val = val_for_xml(attr)
#logging.debug('location.category = {0}, attr = {1}'.format(self.location.category, attr))
xml_object.set(attr, val)
if self.export_to_file():
# Write the definition to a file
url_path = name_to_pathname(self.url_name)
filepath = self.__class__._format_filepath(self.category, url_path)
resource_fs.makedir(os.path.dirname(filepath), allow_recreate=True)
with resource_fs.open(filepath, 'w') as file:
file.write(etree.tostring(xml_object, pretty_print=True))
# And return just a pointer with the category and filename.
record_object = etree.Element(self.category)
else:
record_object = xml_object
record_object.set('url_name', self.url_name)
# Special case for course pointers:
if self.category == 'course':
# add org and course attributes on the pointer tag
record_object.set('org', self.location.org)
record_object.set('course', self.location.course)
return etree.tostring(record_object, pretty_print=True)
def definition_to_xml(self, resource_fs):
"""
Return a new etree Element object created from this modules definition.
"""
raise NotImplementedError(
"%s does not implement definition_to_xml" % self.__class__.__name__)