diff --git a/cms/startup.py b/cms/startup.py index 87b8287369..86586e8f7c 100644 --- a/cms/startup.py +++ b/cms/startup.py @@ -1,6 +1,7 @@ """ Module with code executed during Studio startup """ + from django.conf import settings # Force settings to run so that the python path is modified @@ -14,6 +15,10 @@ def run(): """ Executed during django startup """ + # Patch the xml libs. + from safe_lxml import defuse_xml_libs + defuse_xml_libs() + django_utils_translation.patch() autostartup() diff --git a/common/lib/safe_lxml/safe_lxml/__init__.py b/common/lib/safe_lxml/safe_lxml/__init__.py new file mode 100644 index 0000000000..b17efb15cc --- /dev/null +++ b/common/lib/safe_lxml/safe_lxml/__init__.py @@ -0,0 +1,17 @@ +""" +Defuse vulnerabilities in XML packages. +""" + + +def defuse_xml_libs(): + """ + Monkey patch and defuse all stdlib xml packages and lxml. + """ + from defusedxml import defuse_stdlib + defuse_stdlib() + + import lxml + import lxml.etree + from . import etree as safe_etree + + lxml.etree = safe_etree diff --git a/common/lib/safe_lxml/safe_lxml/etree.py b/common/lib/safe_lxml/safe_lxml/etree.py new file mode 100644 index 0000000000..40b4665ff8 --- /dev/null +++ b/common/lib/safe_lxml/safe_lxml/etree.py @@ -0,0 +1,25 @@ +""" +Safer version of lxml.etree. + +It overrides some unsafe functions from lxml.etree with safer versions from defusedxml. +It also includes a safer XMLParser. + +For processing xml always prefer this over using lxml.etree directly. +""" + +from lxml.etree import * # pylint: disable=wildcard-import, unused-wildcard-import +from lxml.etree import XMLParser as _XMLParser + +# This should be imported after lxml.etree so that it overrides the following attributes. +from defusedxml.lxml import parse, fromstring, XML + + +class XMLParser(_XMLParser): # pylint: disable=function-redefined + """ + A safer version of XMLParser which by default disables entity resolution. + """ + + def __init__(self, *args, **kwargs): + if "resolve_entities" not in kwargs: + kwargs["resolve_entities"] = False + super(XMLParser, self).__init__(*args, **kwargs) diff --git a/common/lib/safe_lxml/setup.py b/common/lib/safe_lxml/setup.py new file mode 100644 index 0000000000..8b01267b75 --- /dev/null +++ b/common/lib/safe_lxml/setup.py @@ -0,0 +1,15 @@ +""" +Setup.py for safe_lxml. +""" + +from setuptools import setup + +setup( + name="safe_lxml", + version="1.0", + packages=["safe_lxml"], + install_requires=[ + "lxml", + "defusedxml" + ], +) diff --git a/lms/startup.py b/lms/startup.py index bb6b312a50..5d789c5146 100644 --- a/lms/startup.py +++ b/lms/startup.py @@ -20,6 +20,11 @@ def run(): """ Executed during django startup """ + + # Patch the xml libs. + from safe_lxml import defuse_xml_libs + defuse_xml_libs() + django_utils_translation.patch() autostartup() diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 37ee134e28..a72d02f71b 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -12,6 +12,7 @@ boto==2.13.3 celery==3.0.19 cssselect==0.9.1 dealer==0.2.3 +defusedxml==0.4.1 distribute>=0.6.28, <0.7 django-babel-underscore==0.1.0 django-celery==3.0.17 diff --git a/requirements/edx/local.txt b/requirements/edx/local.txt index 0e775d04e3..c677c8d087 100644 --- a/requirements/edx/local.txt +++ b/requirements/edx/local.txt @@ -3,6 +3,7 @@ -e common/lib/calc -e common/lib/capa -e common/lib/chem +-e common/lib/safe_lxml -e common/lib/sandbox-packages -e common/lib/symmath -e common/lib/xmodule