applying filter to remove dangerous html content before rendering in

course_about page on course overview field
2019-06-18 12:43:17 +05:00
parent e311dc2e4c
commit 63d7b7d6fa
3 changed files with 85 additions and 2 deletions
--- a/lms/templates/courseware/course_about.html
+++ b/lms/templates/courseware/course_about.html
@@ -9,7 +9,7 @@ from django.conf import settings
 from six import text_type
 from edxmako.shortcuts import marketing_link
 from openedx.core.djangolib.js_utils import js_escaped_string
-from openedx.core.djangolib.markup import HTML, Text
+from openedx.core.djangolib.markup import clean_dangerous_html, HTML, Text
 from openedx.core.lib.courses import course_image_url

 from six import string_types
@@ -207,7 +207,7 @@ from six import string_types
      % endif

      <div class="inner-wrapper">
-        ${HTML(get_course_about_section(request, course, "overview"))}
+        ${clean_dangerous_html(get_course_about_section(request, course, "overview"))}
      </div>
    </div>
    </%block>
--- a/openedx/core/djangolib/markup.py
+++ b/openedx/core/djangolib/markup.py
@@ -5,6 +5,7 @@ Utilities for use in Mako markup.
 from __future__ import absolute_import
 import markupsafe
 import bleach
+from lxml.html.clean import Cleaner
 from mako.filters import decode

 # Text() can be used to declare a string as plain text, as HTML() is used
@@ -55,3 +56,20 @@ def strip_all_tags_but_br(string_to_strip):
    string_to_strip = bleach.clean(string_to_strip, tags=['br'], strip=True)

    return HTML(string_to_strip)
+
+
+def clean_dangerous_html(html):
+    """
+    Mark a string as already HTML and remove unsafe tags, so that it won't be escaped before output.
+        Usage:
+        <%page expression_filter="h"/>
+        <%!
+        from openedx.core.djangolib.markup import clean_dangerous_html
+        %>
+        ${course_details.overview | n, clean_dangerous_html}
+    """
+    if not html:
+        return html
+    cleaner = Cleaner(style=True, inline_style=False, safe_attrs_only=False)
+    html = cleaner.clean_html(html)
+    return HTML(html)
--- a/openedx/core/djangolib/tests/test_markup.py
+++ b/openedx/core/djangolib/tests/test_markup.py
@@ -7,6 +7,7 @@ from __future__ import absolute_import
 import unittest

 import ddt
+from bs4 import BeautifulSoup
 from django.utils.translation import ugettext as _
 from django.utils.translation import ungettext
 from mako.template import Template
@@ -100,3 +101,67 @@ class FormatHtmlTest(unittest.TestCase):
        html = strip_all_tags_but_br('{name}<br><script>')
        html = html.format(name='Rock & Roll')
        self.assertEqual(html.decode(), u'Rock &amp; Roll<br>')
+
+    def test_clean_dengers_html_filter(self):
+        """ Verify filter removes expected tags """
+        template = Template(
+            u"""
+                <%page expression_filter="h"/>
+                <%!
+                from openedx.core.djangolib.markup import clean_dangerous_html
+                %>
+                <%
+                    html_content = '''
+                        <html>
+                            <head>
+                                <script type="text/javascript" src="evil-site"></script>
+                                <link rel="alternate" type="text/rss" src="evil-rss">
+                                <style>
+                                    body {
+                                        background-image: url(javascript:do_evil)
+                                    };
+                                    div {
+                                        color: expression(evil)
+                                    };
+                                </style>
+                            </head>
+                            <body onload="evil_function()">
+                                <!-- I am interpreted for EVIL! -->
+                                <a href="javascript:evil_function()">a link</a>
+                                <a href="#" onclick="evil_function()">another link</a>
+                                <p onclick="evil_function()">a paragraph</p>
+                                <div style="display: none">secret EVIL!</div>
+                                <object> of EVIL!</object>
+                                <iframe src="evil-site"></iframe>
+                                <form action="evil-site">
+                                    Password: <input type="password" name="password">
+                                </form>
+                                <blink>annoying EVIL!</blink>
+                                <a href="evil-site">spam spam SPAM!</a>
+                                <image src="evil!">
+                            </body>
+                        </html>
+                    '''
+                %>
+                ${html_content | n, clean_dangerous_html}
+            """
+        )
+        rendered_template = template.render()
+        html_soup = BeautifulSoup(rendered_template, 'html.parser')
+
+        self.assertTrue(html_soup.find('a'))
+        self.assertTrue(html_soup.find('div'))
+        self.assertTrue(html_soup.find('div', attrs={'style': 'display: none'}))
+        self.assertTrue(html_soup.find('p'))
+        self.assertTrue(html_soup.find('img'))
+
+        self.assertFalse(html_soup.find('a', attrs={'onclick': 'evil_function()'}))
+        self.assertFalse(html_soup.find('html'))
+        self.assertFalse(html_soup.find('head'))
+        self.assertFalse(html_soup.find('script'))
+        self.assertFalse(html_soup.find('style'))
+        self.assertFalse(html_soup.find('link'))
+        self.assertFalse(html_soup.find('iframe'))
+        self.assertFalse(html_soup.find('form'))
+        self.assertFalse(html_soup.find('blink'))
+        self.assertFalse(html_soup.find('object'))