diff --git a/common/lib/xmodule/xmodule/html_module.py b/common/lib/xmodule/xmodule/html_module.py
index 08fe4bbecc..337a833dc3 100644
--- a/common/lib/xmodule/xmodule/html_module.py
+++ b/common/lib/xmodule/xmodule/html_module.py
@@ -1,4 +1,5 @@
import logging
+from lxml import etree
from xmodule.x_module import XModule
from xmodule.raw_module import RawDescriptor
@@ -26,3 +27,8 @@ class HtmlDescriptor(RawDescriptor):
js = {'coffee': [resource_string(__name__, 'js/module/html.coffee')]}
js_module = 'HTML'
+
+ @classmethod
+ def file_to_xml(cls, file_object):
+ parser = etree.HTMLParser()
+ return etree.parse(file_object, parser).getroot()
diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py
index aebb024a59..5699f962cf 100644
--- a/common/lib/xmodule/xmodule/xml_module.py
+++ b/common/lib/xmodule/xmodule/xml_module.py
@@ -90,6 +90,16 @@ class XmlDescriptor(XModuleDescriptor):
if xml_object.get(attr) is not None:
del xml_object.attrib[attr]
+ @classmethod
+ def file_to_xml(cls, file_object):
+ """
+ Used when this module wants to parse a file object to xml
+ that will be converted to the definition.
+
+ Returns an lxml Element
+ """
+ return etree.parse(file_object).getroot()
+
@classmethod
def from_xml(cls, xml_data, system, org=None, course=None):
"""
@@ -127,7 +137,7 @@ class XmlDescriptor(XModuleDescriptor):
filepath = cls._format_filepath(xml_object.tag, filename)
with system.resources_fs.open(filepath) as file:
try:
- definition_xml = etree.parse(file).getroot()
+ definition_xml = cls.file_to_xml(file)
except:
log.exception("Failed to parse xml in file %s" % filepath)
raise