From 97e8f94379323815c6947eca160a65bb8fa196a8 Mon Sep 17 00:00:00 2001 From: Alexander Kryklia Date: Tue, 3 Jun 2014 18:56:36 +0300 Subject: [PATCH] Improve escaping in code response. --- common/lib/capa/capa/inputtypes.py | 16 ++-------- common/lib/capa/capa/tests/test_inputtypes.py | 10 +++++++ common/lib/capa/capa/tests/test_util.py | 22 ++++++++++++-- common/lib/capa/capa/util.py | 30 ++++++++++++++++++- 4 files changed, 61 insertions(+), 17 deletions(-) diff --git a/common/lib/capa/capa/inputtypes.py b/common/lib/capa/capa/inputtypes.py index 3106e4fa96..0b6a27fd3f 100644 --- a/common/lib/capa/capa/inputtypes.py +++ b/common/lib/capa/capa/inputtypes.py @@ -50,6 +50,7 @@ import pyparsing import html5lib import bleach +from .util import sanitize_html from .registry import TagRegistry from chem import chemcalc from calc.preview import latex_preview @@ -821,19 +822,7 @@ class MatlabInput(CodeInput): # this is only set if we don't have a graded response # the graded response takes precedence if 'queue_msg' in self.input_state and self.status in ['queued', 'incomplete', 'unsubmitted']: - attributes = bleach.ALLOWED_ATTRIBUTES.copy() - # Yuck! but bleach does not offer the option of passing in allowed_protocols, - # and matlab uses data urls for images - if u'data' not in bleach.BleachSanitizer.allowed_protocols: - bleach.BleachSanitizer.allowed_protocols.append(u'data') - attributes.update({'*': ['class', 'style', 'id'], - 'audio': ['controls', 'autobuffer', 'autoplay', 'src'], - 'img': ['src', 'width', 'height', 'class']}) - self.queue_msg = bleach.clean(self.input_state['queue_msg'], - tags=bleach.ALLOWED_TAGS + ['div', 'p', 'audio', 'pre', 'img', 'span'], - styles=['white-space'], - attributes=attributes - ) + self.queue_msg = sanitize_html(self.input_state['queue_msg']) if 'queuestate' in self.input_state and self.input_state['queuestate'] == 'queued': self.status = 'queued' @@ -905,6 +894,7 @@ class MatlabInput(CodeInput): 'button_enabled': self.button_enabled(), 'matlab_editor_js': '{static_url}js/vendor/CodeMirror/octave.js'.format( static_url=self.capa_system.STATIC_URL), + 'msg': sanitize_html(self.msg) # sanitize msg before rendering into template } return extra_context diff --git a/common/lib/capa/capa/tests/test_inputtypes.py b/common/lib/capa/capa/tests/test_inputtypes.py index c173e1f990..1982a5f926 100644 --- a/common/lib/capa/capa/tests/test_inputtypes.py +++ b/common/lib/capa/capa/tests/test_inputtypes.py @@ -757,6 +757,16 @@ class MatlabTest(unittest.TestCase): expected = "<script>Test message</script>" self.assertEqual(the_input.queue_msg, expected) + def test_matlab_sanitize_msg(self): + """ + Check that the_input.msg is sanitized. + """ + not_allowed_tag = 'script' + self.the_input.msg = "<{0}>Test message".format(not_allowed_tag) + expected = "<script>Test message</script>" + self.assertEqual(self.the_input._get_render_context()['msg'], expected) + + def html_tree_equal(received, expected): """ Returns whether two etree Elements are the same, with insensitivity to attribute order. diff --git a/common/lib/capa/capa/tests/test_util.py b/common/lib/capa/capa/tests/test_util.py index 1b0fb11de7..42c8975cd1 100644 --- a/common/lib/capa/capa/tests/test_util.py +++ b/common/lib/capa/capa/tests/test_util.py @@ -1,9 +1,10 @@ -"""Tests capa util""" - +""" +Tests capa util +""" import unittest import textwrap from . import test_capa_system -from capa.util import compare_with_tolerance +from capa.util import compare_with_tolerance, sanitize_html class UtilTest(unittest.TestCase): @@ -80,3 +81,18 @@ class UtilTest(unittest.TestCase): self.assertFalse(result) result = compare_with_tolerance(infinity, infinity, '1.0', False) self.assertTrue(result) + + + def test_sanitize_html(self): + """ + Test for html sanitization with bleach. + """ + allowed_tags = ['div', 'p', 'audio', 'pre', 'span'] + for tag in allowed_tags: + queue_msg = "<{0}>Test message".format(tag) + self.assertEqual(sanitize_html(queue_msg), queue_msg) + + not_allowed_tag = 'script' + queue_msg = "<{0}>Test message".format(not_allowed_tag) + expected = "<script>Test message</script>" + self.assertEqual(sanitize_html(queue_msg), expected) diff --git a/common/lib/capa/capa/util.py b/common/lib/capa/capa/util.py index 5b54d7c666..2db5024b32 100644 --- a/common/lib/capa/capa/util.py +++ b/common/lib/capa/capa/util.py @@ -1,6 +1,10 @@ +""" +Utility functions for capa. +""" +import bleach + from calc import evaluator from cmath import isinf - #----------------------------------------------------------------------------- # # Utility functions used in CAPA responsetypes @@ -134,3 +138,27 @@ def find_with_default(node, path, default): return v.text else: return default + + +def sanitize_html(html_code): + """ + Sanitize html_code for safe embed on LMS pages. + + Used to sanitize XQueue responses from Matlab. + """ + attributes = bleach.ALLOWED_ATTRIBUTES.copy() + # Yuck! but bleach does not offer the option of passing in allowed_protocols, + # and matlab uses data urls for images + if u'data' not in bleach.BleachSanitizer.allowed_protocols: + bleach.BleachSanitizer.allowed_protocols.append(u'data') + attributes.update({ + '*': ['class', 'style', 'id'], + 'audio': ['controls', 'autobuffer', 'autoplay', 'src'], + 'img': ['src', 'width', 'height', 'class'] + }) + output = bleach.clean(html_code, + tags=bleach.ALLOWED_TAGS + ['div', 'p', 'audio', 'pre', 'img', 'span'], + styles=['white-space'], + attributes=attributes + ) + return output