diff --git a/common/lib/capa/capa/inputtypes.py b/common/lib/capa/capa/inputtypes.py
index 790e93679d..d638b41350 100644
--- a/common/lib/capa/capa/inputtypes.py
+++ b/common/lib/capa/capa/inputtypes.py
@@ -46,6 +46,7 @@ import re
import shlex # for splitting quoted strings
import sys
import pyparsing
+import html5lib
from .registry import TagRegistry
from chem import chemcalc
@@ -286,7 +287,18 @@ class InputTypeBase(object):
context = self._get_render_context()
html = self.capa_system.render_template(self.template, context)
- return etree.XML(html)
+
+ try:
+ output = etree.XML(html)
+ except etree.XMLSyntaxError as ex:
+ # If `html` contains attrs with no values, like `controls` in ,
+ # XML parser will raise exception, so wee fallback to html5parser, which will set empty "" values for such attrs.
+ try:
+ output = html5lib.parseFragment(html, treebuilder='lxml', namespaceHTMLElements=False)[0]
+ except IndexError:
+ raise ex
+
+ return output
def get_user_visible_answer(self, internal_answer):
"""
diff --git a/common/lib/capa/capa/responsetypes.py b/common/lib/capa/capa/responsetypes.py
index 4b06369599..01a1401091 100644
--- a/common/lib/capa/capa/responsetypes.py
+++ b/common/lib/capa/capa/responsetypes.py
@@ -14,6 +14,7 @@ import cgi
import inspect
import json
import logging
+import html5lib
import numbers
import numpy
import os
@@ -1761,17 +1762,22 @@ class CodeResponse(LoncapaResponse):
" tags: 'correct', 'score', 'msg'")
return fail
- # Next, we need to check that the contents of the external grader message
- # is safe for the LMS.
+ # Next, we need to check that the contents of the external grader message is safe for the LMS.
# 1) Make sure that the message is valid XML (proper opening/closing tags)
- # 2) TODO: Is the message actually HTML?
+ # 2) If it is not valid XML, make sure it is valid HTML. Note: html5lib parser will try to repair any broken HTML
+ # For example: will become .
msg = score_result['msg']
+
try:
etree.fromstring(msg)
except etree.XMLSyntaxError as _err:
- log.error("Unable to parse external grader message as valid"
+ # If `html` contains attrs with no values, like `controls` in ,
+ # XML parser will raise exception, so wee fallback to html5parser, which will set empty "" values for such attrs.
+ parsed = html5lib.parseFragment(msg, treebuilder='lxml', namespaceHTMLElements=False)
+ if not parsed:
+ log.error("Unable to parse external grader message as valid"
" XML: score_msg['msg']=%s", msg)
- return fail
+ return fail
return (True, score_result['correct'], score_result['score'], msg)
diff --git a/common/lib/capa/capa/tests/test_inputtypes.py b/common/lib/capa/capa/tests/test_inputtypes.py
index 866fe75f6e..c52b9763f9 100644
--- a/common/lib/capa/capa/tests/test_inputtypes.py
+++ b/common/lib/capa/capa/tests/test_inputtypes.py
@@ -20,6 +20,7 @@ TODO:
import json
from lxml import etree
import unittest
+import textwrap
import xml.sax.saxutils as saxutils
from . import test_capa_system
@@ -583,6 +584,44 @@ class MatlabTest(unittest.TestCase):
self.assertEqual(input_state['queuestate'], 'queued')
self.assertFalse('queue_msg' in input_state)
+ def test_get_html(self):
+ # usual output
+ output = self.the_input.get_html()
+ self.assertEqual(
+ etree.tostring(output),
+ """
{\'status\': \'queued\', \'button_enabled\': True, \'rows\': \'10\', \'queue_len\': \'3\'\
+, \'mode\': \'\', \'cols\': \'80\', \'STATIC_URL\': \'/dummy-static/\', \'linenumbers\': \'true\'\
+, \'queue_msg\': \'\', \'value\': \'print "good evening"\', \'msg\': u\'Submitted\
+. As soon as a response is returned, this message will be replaced by that feedback.\', \'hidden\': \'\'\
+, \'id\': \'prob_1_2\', \'tabsize\': 4}
"""
+ )
+
+ # test html, that is correct HTML5 html, but is not parsable by XML parser.
+ old_render_template = self.the_input.capa_system.render_template
+ self.the_input.capa_system.render_template = lambda *args: textwrap.dedent("""
+
+
+
Right click here and click \"Save As\" to download the file
Right click here and click \"Save As\" to download the file
+
+ """).replace('\n', '').replace('\'', '\"')
+ )
+
+ # check that exception is raised during parsing for html.
+ self.the_input.capa_system.render_template = lambda *args: "MESSAGE', # Valid XML
+ textwrap.dedent("""
+
+
+
Right click here and click \"Save As\" to download the file
+
+ """).replace('\n', ''), # Valid HTML5 real case Matlab response, invalid XML
+ '' # Invalid XML, but will be parsed by html5lib to
+ ]
+
+ invalid_grader_msgs = [
+ '