From 67a732a0ffceaae0c26a2db591fc0ad5e1107a12 Mon Sep 17 00:00:00 2001 From: Calen Pennington Date: Tue, 3 Jul 2012 13:18:01 -0400 Subject: [PATCH] Allow the HTML module to use the lxml HTML parser when parsing html file includes --- common/lib/xmodule/xmodule/html_module.py | 6 ++++++ common/lib/xmodule/xmodule/xml_module.py | 12 +++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/common/lib/xmodule/xmodule/html_module.py b/common/lib/xmodule/xmodule/html_module.py index 08fe4bbecc..337a833dc3 100644 --- a/common/lib/xmodule/xmodule/html_module.py +++ b/common/lib/xmodule/xmodule/html_module.py @@ -1,4 +1,5 @@ import logging +from lxml import etree from xmodule.x_module import XModule from xmodule.raw_module import RawDescriptor @@ -26,3 +27,8 @@ class HtmlDescriptor(RawDescriptor): js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]} js_module = 'HTML' + + @classmethod + def file_to_xml(cls, file_object): + parser = etree.HTMLParser() + return etree.parse(file_object, parser).getroot() diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index aebb024a59..5699f962cf 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -90,6 +90,16 @@ class XmlDescriptor(XModuleDescriptor): if xml_object.get(attr) is not None: del xml_object.attrib[attr] + @classmethod + def file_to_xml(cls, file_object): + """ + Used when this module wants to parse a file object to xml + that will be converted to the definition. + + Returns an lxml Element + """ + return etree.parse(file_object).getroot() + @classmethod def from_xml(cls, xml_data, system, org=None, course=None): """ @@ -127,7 +137,7 @@ class XmlDescriptor(XModuleDescriptor): filepath = cls._format_filepath(xml_object.tag, filename) with system.resources_fs.open(filepath) as file: try: - definition_xml = etree.parse(file).getroot() + definition_xml = cls.file_to_xml(file) except: log.exception("Failed to parse xml in file %s" % filepath) raise