applying filter to remove dangerous html content before rendering in

course_about page on course overview field
This commit is contained in:
Hammad Ahmad Waqas
2019-06-18 12:43:17 +05:00
parent e311dc2e4c
commit 63d7b7d6fa
3 changed files with 85 additions and 2 deletions

View File

@@ -9,7 +9,7 @@ from django.conf import settings
from six import text_type
from edxmako.shortcuts import marketing_link
from openedx.core.djangolib.js_utils import js_escaped_string
from openedx.core.djangolib.markup import HTML, Text
from openedx.core.djangolib.markup import clean_dangerous_html, HTML, Text
from openedx.core.lib.courses import course_image_url
from six import string_types
@@ -207,7 +207,7 @@ from six import string_types
% endif
<div class="inner-wrapper">
${HTML(get_course_about_section(request, course, "overview"))}
${clean_dangerous_html(get_course_about_section(request, course, "overview"))}
</div>
</div>
</%block>

View File

@@ -5,6 +5,7 @@ Utilities for use in Mako markup.
from __future__ import absolute_import
import markupsafe
import bleach
from lxml.html.clean import Cleaner
from mako.filters import decode
# Text() can be used to declare a string as plain text, as HTML() is used
@@ -55,3 +56,20 @@ def strip_all_tags_but_br(string_to_strip):
string_to_strip = bleach.clean(string_to_strip, tags=['br'], strip=True)
return HTML(string_to_strip)
def clean_dangerous_html(html):
"""
Mark a string as already HTML and remove unsafe tags, so that it won't be escaped before output.
Usage:
<%page expression_filter="h"/>
<%!
from openedx.core.djangolib.markup import clean_dangerous_html
%>
${course_details.overview | n, clean_dangerous_html}
"""
if not html:
return html
cleaner = Cleaner(style=True, inline_style=False, safe_attrs_only=False)
html = cleaner.clean_html(html)
return HTML(html)

View File

@@ -7,6 +7,7 @@ from __future__ import absolute_import
import unittest
import ddt
from bs4 import BeautifulSoup
from django.utils.translation import ugettext as _
from django.utils.translation import ungettext
from mako.template import Template
@@ -100,3 +101,67 @@ class FormatHtmlTest(unittest.TestCase):
html = strip_all_tags_but_br('{name}<br><script>')
html = html.format(name='Rock & Roll')
self.assertEqual(html.decode(), u'Rock &amp; Roll<br>')
def test_clean_dengers_html_filter(self):
""" Verify filter removes expected tags """
template = Template(
u"""
<%page expression_filter="h"/>
<%!
from openedx.core.djangolib.markup import clean_dangerous_html
%>
<%
html_content = '''
<html>
<head>
<script type="text/javascript" src="evil-site"></script>
<link rel="alternate" type="text/rss" src="evil-rss">
<style>
body {
background-image: url(javascript:do_evil)
};
div {
color: expression(evil)
};
</style>
</head>
<body onload="evil_function()">
<!-- I am interpreted for EVIL! -->
<a href="javascript:evil_function()">a link</a>
<a href="#" onclick="evil_function()">another link</a>
<p onclick="evil_function()">a paragraph</p>
<div style="display: none">secret EVIL!</div>
<object> of EVIL!</object>
<iframe src="evil-site"></iframe>
<form action="evil-site">
Password: <input type="password" name="password">
</form>
<blink>annoying EVIL!</blink>
<a href="evil-site">spam spam SPAM!</a>
<image src="evil!">
</body>
</html>
'''
%>
${html_content | n, clean_dangerous_html}
"""
)
rendered_template = template.render()
html_soup = BeautifulSoup(rendered_template, 'html.parser')
self.assertTrue(html_soup.find('a'))
self.assertTrue(html_soup.find('div'))
self.assertTrue(html_soup.find('div', attrs={'style': 'display: none'}))
self.assertTrue(html_soup.find('p'))
self.assertTrue(html_soup.find('img'))
self.assertFalse(html_soup.find('a', attrs={'onclick': 'evil_function()'}))
self.assertFalse(html_soup.find('html'))
self.assertFalse(html_soup.find('head'))
self.assertFalse(html_soup.find('script'))
self.assertFalse(html_soup.find('style'))
self.assertFalse(html_soup.find('link'))
self.assertFalse(html_soup.find('iframe'))
self.assertFalse(html_soup.find('form'))
self.assertFalse(html_soup.find('blink'))
self.assertFalse(html_soup.find('object'))