From 63d7b7d6fa762ec13f976f28356ae614cb7336fe Mon Sep 17 00:00:00 2001 From: Hammad Ahmad Waqas Date: Tue, 18 Jun 2019 12:43:17 +0500 Subject: [PATCH] applying filter to remove dangerous html content before rendering in course_about page on course overview field --- lms/templates/courseware/course_about.html | 4 +- openedx/core/djangolib/markup.py | 18 ++++++ openedx/core/djangolib/tests/test_markup.py | 65 +++++++++++++++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) diff --git a/lms/templates/courseware/course_about.html b/lms/templates/courseware/course_about.html index 7cd5e3f146..c1412399f0 100644 --- a/lms/templates/courseware/course_about.html +++ b/lms/templates/courseware/course_about.html @@ -9,7 +9,7 @@ from django.conf import settings from six import text_type from edxmako.shortcuts import marketing_link from openedx.core.djangolib.js_utils import js_escaped_string -from openedx.core.djangolib.markup import HTML, Text +from openedx.core.djangolib.markup import clean_dangerous_html, HTML, Text from openedx.core.lib.courses import course_image_url from six import string_types @@ -207,7 +207,7 @@ from six import string_types % endif
- ${HTML(get_course_about_section(request, course, "overview"))} + ${clean_dangerous_html(get_course_about_section(request, course, "overview"))}
diff --git a/openedx/core/djangolib/markup.py b/openedx/core/djangolib/markup.py index d06ac3a4d6..b99fdd3991 100644 --- a/openedx/core/djangolib/markup.py +++ b/openedx/core/djangolib/markup.py @@ -5,6 +5,7 @@ Utilities for use in Mako markup. from __future__ import absolute_import import markupsafe import bleach +from lxml.html.clean import Cleaner from mako.filters import decode # Text() can be used to declare a string as plain text, as HTML() is used @@ -55,3 +56,20 @@ def strip_all_tags_but_br(string_to_strip): string_to_strip = bleach.clean(string_to_strip, tags=['br'], strip=True) return HTML(string_to_strip) + + +def clean_dangerous_html(html): + """ + Mark a string as already HTML and remove unsafe tags, so that it won't be escaped before output. + Usage: + <%page expression_filter="h"/> + <%! + from openedx.core.djangolib.markup import clean_dangerous_html + %> + ${course_details.overview | n, clean_dangerous_html} + """ + if not html: + return html + cleaner = Cleaner(style=True, inline_style=False, safe_attrs_only=False) + html = cleaner.clean_html(html) + return HTML(html) diff --git a/openedx/core/djangolib/tests/test_markup.py b/openedx/core/djangolib/tests/test_markup.py index 9d68a6f333..ad149f3d1d 100644 --- a/openedx/core/djangolib/tests/test_markup.py +++ b/openedx/core/djangolib/tests/test_markup.py @@ -7,6 +7,7 @@ from __future__ import absolute_import import unittest import ddt +from bs4 import BeautifulSoup from django.utils.translation import ugettext as _ from django.utils.translation import ungettext from mako.template import Template @@ -100,3 +101,67 @@ class FormatHtmlTest(unittest.TestCase): html = strip_all_tags_but_br('{name}
+ + + + + + a link + another link +

a paragraph

+
secret EVIL!
+ of EVIL! + +
+ Password: +
+ annoying EVIL! + spam spam SPAM! + + + + ''' + %> + ${html_content | n, clean_dangerous_html} + """ + ) + rendered_template = template.render() + html_soup = BeautifulSoup(rendered_template, 'html.parser') + + self.assertTrue(html_soup.find('a')) + self.assertTrue(html_soup.find('div')) + self.assertTrue(html_soup.find('div', attrs={'style': 'display: none'})) + self.assertTrue(html_soup.find('p')) + self.assertTrue(html_soup.find('img')) + + self.assertFalse(html_soup.find('a', attrs={'onclick': 'evil_function()'})) + self.assertFalse(html_soup.find('html')) + self.assertFalse(html_soup.find('head')) + self.assertFalse(html_soup.find('script')) + self.assertFalse(html_soup.find('style')) + self.assertFalse(html_soup.find('link')) + self.assertFalse(html_soup.find('iframe')) + self.assertFalse(html_soup.find('form')) + self.assertFalse(html_soup.find('blink')) + self.assertFalse(html_soup.find('object'))