From 3d49d80b587098483f2b72c2efdb690336cf00bc Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Wed, 22 Nov 2017 16:59:23 -0500 Subject: [PATCH] Fix safe_lxml. SEC-338 The imports were sorted in May, which broke the monkeypatching in safe_lxml. I added two tests that the XML parsers are properly patched, but they didn't pass until I added the monkeypatching to the start of the test runs. Once that was done, some tests failed because they relied on specific details of how empty elements are represented. Those tests are now fixed. --- cms/conftest.py | 5 +++++ .../lib/capa/capa/tests/test_html_render.py | 6 ++---- .../capa/capa/tests/test_targeted_feedback.py | 8 ++++---- common/lib/conftest.py | 5 +++++ common/lib/safe_lxml/safe_lxml/etree.py | 11 +++++++--- common/lib/safe_lxml/safe_lxml/tests.py | 20 +++++++++++++++++++ common/test/conftest.py | 5 +++++ 7 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 common/lib/conftest.py create mode 100644 common/lib/safe_lxml/safe_lxml/tests.py create mode 100644 common/test/conftest.py diff --git a/cms/conftest.py b/cms/conftest.py index 3b07f67be0..99e340933e 100644 --- a/cms/conftest.py +++ b/cms/conftest.py @@ -14,6 +14,11 @@ import contracts import pytest +# Patch the xml libs before anything else. +from safe_lxml import defuse_xml_libs +defuse_xml_libs() + + def pytest_configure(config): """ Do core setup operations from manage.py before collecting tests. diff --git a/common/lib/capa/capa/tests/test_html_render.py b/common/lib/capa/capa/tests/test_html_render.py index b0c6d88ecf..fd9ccbfbe9 100644 --- a/common/lib/capa/capa/tests/test_html_render.py +++ b/common/lib/capa/capa/tests/test_html_render.py @@ -291,9 +291,7 @@ class CapaHtmlRenderTest(unittest.TestCase): # Comments and processing instructions should be skipped. xml_str = textwrap.dedent("""\ - - ]> + @@ -305,7 +303,7 @@ class CapaHtmlRenderTest(unittest.TestCase): # Render the HTML the_html = problem.get_html() - self.assertRegexpMatches(the_html, r"
") + self.assertRegexpMatches(the_html, r"
\s*
") def _create_test_file(self, path, content_str): test_fp = self.capa_system.filestore.open(path, "w") diff --git a/common/lib/capa/capa/tests/test_targeted_feedback.py b/common/lib/capa/capa/tests/test_targeted_feedback.py index 9406208df5..fbe85fc945 100644 --- a/common/lib/capa/capa/tests/test_targeted_feedback.py +++ b/common/lib/capa/capa/tests/test_targeted_feedback.py @@ -190,14 +190,14 @@ class CapaTargetedFeedbackTest(unittest.TestCase): problem.done = True problem.student_answers = {'1_2_1': 'choice_0'} the_html = problem.get_html() - self.assertRegexpMatches(the_html, r"") + self.assertRegexpMatches(the_html, r"\s*") # New problem with same XML -- try the correct choice. problem = new_loncapa_problem(xml_str) problem.done = True problem.student_answers = {'1_2_1': 'choice_2'} # correct the_html = problem.get_html() - self.assertRegexpMatches(the_html, r"") + self.assertRegexpMatches(the_html, r"\s*") def test_targeted_feedback_no_solution_element(self): xml_str = textwrap.dedent(""" @@ -581,7 +581,7 @@ class CapaTargetedFeedbackTest(unittest.TestCase): # Q1 and Q2 have no feedback self.assertRegexpMatches( without_new_lines, - r'.*' + r'\s*.*\s*' ) def test_targeted_feedback_multiple_answer_1(self): @@ -594,7 +594,7 @@ class CapaTargetedFeedbackTest(unittest.TestCase): self.assertRegexpMatches( without_new_lines, r'.*?explanation-id="feedback1".*?.*' + - r'' + r'\s*' ) def test_targeted_feedback_multiple_answer_2(self): diff --git a/common/lib/conftest.py b/common/lib/conftest.py new file mode 100644 index 0000000000..54d4a8ecdf --- /dev/null +++ b/common/lib/conftest.py @@ -0,0 +1,5 @@ +"""Code run by pylint before running any tests.""" + +# Patch the xml libs before anything else. +from safe_lxml import defuse_xml_libs +defuse_xml_libs() diff --git a/common/lib/safe_lxml/safe_lxml/etree.py b/common/lib/safe_lxml/safe_lxml/etree.py index 696e1b2103..4fbb6b9473 100644 --- a/common/lib/safe_lxml/safe_lxml/etree.py +++ b/common/lib/safe_lxml/safe_lxml/etree.py @@ -7,11 +7,16 @@ It also includes a safer XMLParser. For processing xml always prefer this over using lxml.etree directly. """ +# Names are imported into this module so that it can be a stand-in for +# lxml.etree. The names are not used here, so disable the pylint warning. +# pylint: disable=unused-import, wildcard-import, unused-wildcard-import + +from lxml.etree import XMLParser as _XMLParser +from lxml.etree import * +from lxml.etree import _Element, _ElementTree + # This should be imported after lxml.etree so that it overrides the following attributes. from defusedxml.lxml import XML, fromstring, parse -from lxml.etree import XMLParser as _XMLParser -from lxml.etree import * # pylint: disable=wildcard-import, unused-wildcard-import; pylint: disable=unused-import -from lxml.etree import _Element, _ElementTree class XMLParser(_XMLParser): # pylint: disable=function-redefined diff --git a/common/lib/safe_lxml/safe_lxml/tests.py b/common/lib/safe_lxml/safe_lxml/tests.py new file mode 100644 index 0000000000..1a7fbb6e42 --- /dev/null +++ b/common/lib/safe_lxml/safe_lxml/tests.py @@ -0,0 +1,20 @@ +"""Test that we have defused XML.""" + +import defusedxml +from lxml import etree + +import pytest + + +@pytest.mark.parametrize("attr", ["XML", "fromstring", "parse"]) +def test_etree_is_defused(attr): + func = getattr(etree, attr) + assert "defused" in func.__code__.co_filename + + +def test_entities_arent_resolved(): + # Make sure we have disabled entity resolution. + xml = ']> &hi;' + parser = etree.XMLParser() + with pytest.raises(defusedxml.EntitiesForbidden): + _ = etree.XML(xml, parser=parser) diff --git a/common/test/conftest.py b/common/test/conftest.py new file mode 100644 index 0000000000..54d4a8ecdf --- /dev/null +++ b/common/test/conftest.py @@ -0,0 +1,5 @@ +"""Code run by pylint before running any tests.""" + +# Patch the xml libs before anything else. +from safe_lxml import defuse_xml_libs +defuse_xml_libs()