chore: replace bleach with nh3
This commit is contained in:
@@ -10,7 +10,7 @@ import logging
|
||||
import urllib
|
||||
from functools import wraps
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
from django.db import transaction
|
||||
from django.db.models import Q
|
||||
from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseForbidden, HttpResponseServerError
|
||||
@@ -88,8 +88,8 @@ def search_certificates(request):
|
||||
]
|
||||
|
||||
"""
|
||||
unbleached_filter = urllib.parse.unquote(urllib.parse.quote_plus(request.GET.get("user", "")))
|
||||
user_filter = bleach.clean(unbleached_filter)
|
||||
uncleaned_filter = urllib.parse.unquote(urllib.parse.quote_plus(request.GET.get("user", "")))
|
||||
user_filter = nh3.clean(uncleaned_filter)
|
||||
if not user_filter:
|
||||
msg = _("user is not given.")
|
||||
return HttpResponseBadRequest(msg)
|
||||
|
||||
@@ -10,7 +10,7 @@ from collections import OrderedDict, namedtuple
|
||||
from datetime import datetime
|
||||
from urllib.parse import quote_plus, urlencode, urljoin, urlparse, urlunparse
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
import requests
|
||||
from django.conf import settings
|
||||
from django.contrib.auth.decorators import login_required
|
||||
@@ -1550,7 +1550,7 @@ def render_xblock(request, usage_key_string, check_if_enrolled=True, disable_sta
|
||||
requested_view = request.GET.get('view', 'student_view')
|
||||
if requested_view != 'student_view' and requested_view != 'public_view': # lint-amnesty, pylint: disable=consider-using-in
|
||||
return HttpResponseBadRequest(
|
||||
f"Rendering of the xblock view '{bleach.clean(requested_view, strip=True)}' is not supported."
|
||||
f"Rendering of the xblock view '{nh3.clean(requested_view)}' is not supported."
|
||||
)
|
||||
|
||||
staff_access = has_access(request.user, 'staff', course_key)
|
||||
|
||||
@@ -1038,7 +1038,7 @@ def sanitize_body(body):
|
||||
This is possibly overly broad, and might tamper with legitimate posts that
|
||||
contain this code in fenced code blocks. As far as we can tell, this is an
|
||||
extra layer of protection, and current handling in the front end and using
|
||||
bleach for HTML rendering on the server side should cover these cases.
|
||||
nh3 for HTML rendering on the server side should cover these cases.
|
||||
"""
|
||||
if not body:
|
||||
return body
|
||||
|
||||
@@ -4,17 +4,16 @@ Content rendering functionality
|
||||
Note that this module is designed to imitate the front end behavior as
|
||||
implemented in Markdown.Sanitizer.js.
|
||||
"""
|
||||
import bleach
|
||||
import nh3
|
||||
import markdown
|
||||
|
||||
ALLOWED_TAGS = bleach.ALLOWED_TAGS | {
|
||||
ALLOWED_TAGS = nh3.ALLOWED_TAGS | {
|
||||
'br', 'dd', 'del', 'dl', 'dt', 'h1', 'h2', 'h3', 'h4', 'hr', 'img', 'kbd', 'p', 'pre', 's',
|
||||
'strike', 'sub', 'sup', 'table', 'thead', 'th', 'tbody', 'tr', 'td', 'tfoot'
|
||||
}
|
||||
ALLOWED_PROTOCOLS = {"http", "https", "ftp", "mailto"}
|
||||
ALLOWED_ATTRIBUTES = {
|
||||
"a": ["href", "title", "target", "rel"],
|
||||
"img": ["src", "alt", "title", "width", "height"],
|
||||
"a": {"href", "title", "target", "rel"},
|
||||
"img": {"src", "alt", "title", "width", "height"},
|
||||
}
|
||||
|
||||
|
||||
@@ -25,17 +24,16 @@ def render_body(raw_body):
|
||||
This includes the following steps:
|
||||
|
||||
* Convert Markdown to HTML
|
||||
* Sanitise HTML using bleach
|
||||
* Sanitise HTML using nh3
|
||||
|
||||
Note that this does not prevent Markdown syntax inside a MathJax block from
|
||||
being processed, which the forums JavaScript code does.
|
||||
"""
|
||||
rendered_html = markdown.markdown(raw_body)
|
||||
sanitised_html = bleach.clean(
|
||||
sanitised_html = nh3.clean(
|
||||
rendered_html,
|
||||
tags=ALLOWED_TAGS,
|
||||
protocols=ALLOWED_PROTOCOLS,
|
||||
strip=True,
|
||||
attributes=ALLOWED_ATTRIBUTES
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
link_rel=None,
|
||||
)
|
||||
return sanitised_html
|
||||
|
||||
@@ -84,7 +84,7 @@ class RenderBodyTest(TestCase):
|
||||
|
||||
def test_script_tag(self):
|
||||
raw_body = '<script type="text/javascript">alert("evil script");</script>'
|
||||
assert render_body(raw_body) == 'alert("evil script");'
|
||||
assert render_body(raw_body) == ''
|
||||
|
||||
@ddt.data(
|
||||
("br", '<p>foo<br>bar</p>'), # br is allowed inside p
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
<%page args="grade_summary, grade_cutoffs, graph_div_id, show_grade_breakdown = True, show_grade_cutoffs = True, **kwargs"/>
|
||||
<%!
|
||||
import bleach
|
||||
import nh3
|
||||
import json
|
||||
import math
|
||||
import six
|
||||
@@ -74,7 +74,7 @@ $(function () {
|
||||
## allowing the display of such images, and remove any previously stored HTML
|
||||
## to prevent ugly HTML from being shown to learners.
|
||||
## xss-lint: disable=javascript-jquery-append
|
||||
ticks.append( [tickIndex, bleach.clean(section['label'], tags=set(), strip=True)] )
|
||||
ticks.append( [tickIndex, nh3.clean(section['label'], tags=set())] )
|
||||
|
||||
if section['category'] in detail_tooltips:
|
||||
## xss-lint: disable=javascript-jquery-append
|
||||
|
||||
@@ -61,7 +61,7 @@ from django.utils.translation import gettext as _
|
||||
% if has_score and comment:
|
||||
<h4 class="hd hd-4 problem-feedback-label">${_("Feedback on your work from the grader:")}</h4>
|
||||
<div class="problem-feedback">
|
||||
## sanitized with bleach in view
|
||||
## sanitized with nh3 in view
|
||||
${comment | n, decode.utf8}
|
||||
</div>
|
||||
% endif
|
||||
|
||||
@@ -5,7 +5,7 @@ in a 404 error.
|
||||
"""
|
||||
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
from django.http import HttpResponseNotFound
|
||||
from django.template import TemplateDoesNotExist
|
||||
from django.utils.translation import gettext as _
|
||||
@@ -54,4 +54,4 @@ def show_reference_template(request, template):
|
||||
|
||||
return render_to_response(template, context)
|
||||
except TemplateDoesNotExist:
|
||||
return HttpResponseNotFound(f'Missing template {bleach.clean(template, strip=True)}')
|
||||
return HttpResponseNotFound(f'Missing template {nh3.clean(template)}')
|
||||
|
||||
@@ -5,7 +5,7 @@ import re
|
||||
import urllib.parse as parse # pylint: disable=import-error
|
||||
from urllib.parse import parse_qs, urlsplit, urlunsplit # pylint: disable=import-error
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
from django.conf import settings
|
||||
from django.contrib.auth import logout
|
||||
from django.shortcuts import redirect
|
||||
@@ -60,7 +60,7 @@ class LogoutView(TemplateView):
|
||||
# >> /courses/course-v1:ARTS+D1+2018_T/course/
|
||||
# to handle this scenario we need to encode our URL using quote_plus and then unquote it again.
|
||||
if target_url:
|
||||
target_url = bleach.clean(parse.unquote(parse.quote_plus(target_url)))
|
||||
target_url = nh3.clean(parse.unquote(parse.quote_plus(target_url)))
|
||||
|
||||
use_target_url = target_url and is_safe_login_or_logout_redirect(
|
||||
redirect_to=target_url,
|
||||
|
||||
@@ -5,7 +5,7 @@ Tests for logout
|
||||
import urllib
|
||||
from unittest import mock
|
||||
import ddt
|
||||
import bleach
|
||||
import nh3
|
||||
from django.conf import settings
|
||||
from django.test import TestCase
|
||||
from django.test.utils import override_settings
|
||||
@@ -237,6 +237,6 @@ class LogoutTests(TestCase):
|
||||
)
|
||||
response = self.client.get(url, HTTP_HOST=host)
|
||||
expected = {
|
||||
'target': bleach.clean(urllib.parse.unquote(redirect_url)),
|
||||
'target': nh3.clean(urllib.parse.unquote(redirect_url)),
|
||||
}
|
||||
self.assertDictContainsSubset(expected, response.context_data)
|
||||
|
||||
@@ -4,7 +4,7 @@ Utilities for use in Mako markup.
|
||||
|
||||
|
||||
import markupsafe
|
||||
import bleach
|
||||
import nh3
|
||||
from lxml.html.clean import Cleaner
|
||||
from mako.filters import decode
|
||||
|
||||
@@ -53,7 +53,7 @@ def strip_all_tags_but_br(string_to_strip):
|
||||
string_to_strip = ""
|
||||
|
||||
string_to_strip = decode.utf8(string_to_strip)
|
||||
string_to_strip = bleach.clean(string_to_strip, tags={'br'}, strip=True)
|
||||
string_to_strip = nh3.clean(string_to_strip, tags={'br'})
|
||||
|
||||
return HTML(string_to_strip)
|
||||
|
||||
|
||||
@@ -68,7 +68,6 @@ billiard==4.2.0
|
||||
# via celery
|
||||
bleach[css]==6.1.0
|
||||
# via
|
||||
# -r requirements/edx/kernel.in
|
||||
# edx-enterprise
|
||||
# lti-consumer-xblock
|
||||
# openedx-django-wiki
|
||||
@@ -728,6 +727,8 @@ newrelic==9.9.1
|
||||
# via
|
||||
# -r requirements/edx/bundled.in
|
||||
# edx-django-utils
|
||||
nh3==0.2.17
|
||||
# via -r requirements/edx/kernel.in
|
||||
nltk==3.8.1
|
||||
# via chem
|
||||
nodeenv==1.8.0
|
||||
|
||||
@@ -1259,6 +1259,10 @@ newrelic==9.9.1
|
||||
# -r requirements/edx/doc.txt
|
||||
# -r requirements/edx/testing.txt
|
||||
# edx-django-utils
|
||||
nh3==0.2.17
|
||||
# via
|
||||
# -r requirements/edx/doc.txt
|
||||
# -r requirements/edx/testing.txt
|
||||
nltk==3.8.1
|
||||
# via
|
||||
# -r requirements/edx/doc.txt
|
||||
|
||||
@@ -854,6 +854,8 @@ newrelic==9.9.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
# edx-django-utils
|
||||
nh3==0.2.17
|
||||
# via -r requirements/edx/base.txt
|
||||
nltk==3.8.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
|
||||
@@ -24,7 +24,6 @@ acid-xblock # This XBlock is used for unit tests as well
|
||||
analytics-python # Used for Segment analytics
|
||||
attrs # Reduces boilerplate code involving class attributes
|
||||
Babel # Internationalization utilities, used for date formatting in a few places
|
||||
bleach[css] # Allowed-list-based HTML sanitizing library that escapes or strips markup and attributes; used for capa and LTI
|
||||
boto # Deprecated version of the AWS SDK; we should stop using this
|
||||
boto3 # Amazon Web Services SDK for Python
|
||||
botocore # via boto3, s3transfer
|
||||
@@ -110,6 +109,7 @@ Markdown # Convert text markup to HTML; used in capa
|
||||
meilisearch # Library to access Meilisearch search engine (will replace ElasticSearch)
|
||||
mongoengine # Object-document mapper for MongoDB, used in the LMS dashboard
|
||||
mysqlclient # Driver for the default production relational database
|
||||
nh3 # Python bindings to the ammonia (whitelist-based HTML sanitizing library); used for capa and LTI
|
||||
nodeenv # Utility for managing Node.js environments; we use this for deployments and testing
|
||||
oauthlib # OAuth specification support for authenticating via LTI or other Open edX services
|
||||
olxcleaner
|
||||
|
||||
@@ -939,6 +939,8 @@ newrelic==9.9.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
# edx-django-utils
|
||||
nh3==0.2.17
|
||||
# via -r requirements/edx/base.txt
|
||||
nltk==3.8.1
|
||||
# via
|
||||
# -r requirements/edx/base.txt
|
||||
|
||||
@@ -47,7 +47,7 @@ import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
import html5lib
|
||||
import pyparsing
|
||||
import six
|
||||
@@ -800,7 +800,7 @@ class CodeInput(InputTypeBase):
|
||||
if self.status == 'incomplete':
|
||||
self.status = 'queued'
|
||||
self.queue_len = self.msg # lint-amnesty, pylint: disable=attribute-defined-outside-init
|
||||
self.msg = bleach.clean(self.submitted_msg)
|
||||
self.msg = nh3.clean(self.submitted_msg)
|
||||
|
||||
def setup(self):
|
||||
""" setup this input type """
|
||||
|
||||
@@ -916,7 +916,7 @@ class MatlabTest(unittest.TestCase):
|
||||
}
|
||||
elt = etree.fromstring(self.xml)
|
||||
the_input = self.input_class(test_capa_system(), elt, state)
|
||||
expected = "<script>Test message</script>"
|
||||
expected = ""
|
||||
assert the_input.queue_msg == expected
|
||||
|
||||
def test_matlab_sanitize_msg(self):
|
||||
@@ -925,7 +925,7 @@ class MatlabTest(unittest.TestCase):
|
||||
"""
|
||||
not_allowed_tag = 'script'
|
||||
self.the_input.msg = "<{0}>Test message</{0}>".format(not_allowed_tag)
|
||||
expected = "<script>Test message</script>"
|
||||
expected = ""
|
||||
assert self.the_input._get_render_context()['msg'] == expected # pylint: disable=protected-access
|
||||
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ class UtilTest(unittest.TestCase):
|
||||
|
||||
def test_sanitize_html(self):
|
||||
"""
|
||||
Test for html sanitization with bleach.
|
||||
Test for html sanitization with nh3.
|
||||
"""
|
||||
allowed_tags = ['div', 'p', 'audio', 'pre', 'span']
|
||||
for tag in allowed_tags:
|
||||
@@ -130,7 +130,7 @@ class UtilTest(unittest.TestCase):
|
||||
|
||||
not_allowed_tag = 'script'
|
||||
queue_msg = "<{0}>Test message</{0}>".format(not_allowed_tag)
|
||||
expected = "<script>Test message</script>"
|
||||
expected = ""
|
||||
assert sanitize_html(queue_msg) == expected
|
||||
|
||||
def test_get_inner_html_from_xpath(self):
|
||||
@@ -142,7 +142,7 @@ class UtilTest(unittest.TestCase):
|
||||
|
||||
def test_remove_markup(self):
|
||||
"""
|
||||
Test for markup removal with bleach.
|
||||
Test for markup removal with nh3.
|
||||
"""
|
||||
assert remove_markup('The <mark>Truth</mark> is <em>Out There</em> & you need to <strong>find</strong> it') ==\
|
||||
'The Truth is Out There & you need to find it'
|
||||
|
||||
@@ -8,11 +8,10 @@ import re
|
||||
from cmath import isinf, isnan
|
||||
from decimal import Decimal
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
from calc import evaluator
|
||||
from lxml import etree
|
||||
|
||||
from bleach.css_sanitizer import CSSSanitizer
|
||||
from openedx.core.djangolib.markup import HTML
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
@@ -182,17 +181,15 @@ def sanitize_html(html_code):
|
||||
|
||||
Used to sanitize XQueue responses from Matlab.
|
||||
"""
|
||||
attributes = bleach.ALLOWED_ATTRIBUTES.copy()
|
||||
attributes = nh3.ALLOWED_ATTRIBUTES.copy()
|
||||
attributes.update({
|
||||
'*': ['class', 'style', 'id'],
|
||||
'audio': ['controls', 'autobuffer', 'autoplay', 'src'],
|
||||
'img': ['src', 'width', 'height', 'class']
|
||||
'*': {'class', 'style', 'id'},
|
||||
'audio': {'controls', 'autobuffer', 'autoplay', 'src'},
|
||||
'img': {'src', 'width', 'height', 'class'}
|
||||
})
|
||||
output = bleach.clean(
|
||||
output = nh3.clean(
|
||||
html_code,
|
||||
protocols=bleach.ALLOWED_PROTOCOLS | {'data'},
|
||||
tags=bleach.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
|
||||
css_sanitizer=CSSSanitizer(allowed_css_properties=["white-space"]),
|
||||
tags=nh3.ALLOWED_TAGS | {'div', 'p', 'audio', 'pre', 'img', 'span'},
|
||||
attributes=attributes
|
||||
)
|
||||
return output
|
||||
@@ -215,12 +212,12 @@ def remove_markup(html):
|
||||
"""
|
||||
Return html with markup stripped and text HTML-escaped.
|
||||
|
||||
>>> bleach.clean("<b>Rock & Roll</b>", tags=set(), strip=True)
|
||||
>>> nh3.clean("<b>Rock & Roll</b>", tags=set())
|
||||
'Rock & Roll'
|
||||
>>> bleach.clean("<b>Rock & Roll</b>", tags=set(), strip=True)
|
||||
>>> nh3.clean("<b>Rock & Roll</b>", tags=set())
|
||||
'Rock & Roll'
|
||||
"""
|
||||
return HTML(bleach.clean(html, tags=set(), strip=True))
|
||||
return HTML(nh3.clean(html, tags=set()))
|
||||
|
||||
|
||||
def get_course_id_from_capa_block(capa_block):
|
||||
|
||||
@@ -14,7 +14,7 @@ import struct
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from bleach.sanitizer import Cleaner
|
||||
import nh3
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ImproperlyConfigured
|
||||
from django.utils.encoding import smart_str
|
||||
@@ -619,7 +619,7 @@ class ProblemBlock(
|
||||
capa_content = re.sub(
|
||||
r"(\s| |//)+",
|
||||
" ",
|
||||
Cleaner(tags=[], strip=True).clean(capa_content)
|
||||
nh3.clean(capa_content, tags=set())
|
||||
)
|
||||
|
||||
capa_body = {
|
||||
|
||||
@@ -9,7 +9,7 @@ import random
|
||||
from copy import copy
|
||||
from gettext import ngettext, gettext
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
|
||||
from django.utils.functional import classproperty
|
||||
@@ -731,7 +731,7 @@ class LibraryContentBlock(
|
||||
lib_tools = self.get_tools()
|
||||
user_perms = self.runtime.service(self, 'studio_user_permissions')
|
||||
all_libraries = [
|
||||
(key, bleach.clean(name)) for key, name in lib_tools.list_available_libraries()
|
||||
(key, nh3.clean(name)) for key, name in lib_tools.list_available_libraries()
|
||||
if user_perms.can_read(key) or self.source_library_id == str(key)
|
||||
]
|
||||
all_libraries.sort(key=lambda entry: entry[1]) # Sort by name
|
||||
|
||||
@@ -63,7 +63,7 @@ from xml.sax.saxutils import escape
|
||||
from unittest import mock
|
||||
from urllib import parse
|
||||
|
||||
import bleach
|
||||
import nh3
|
||||
import oauthlib.oauth1
|
||||
from django.conf import settings
|
||||
from lxml import etree
|
||||
@@ -458,17 +458,43 @@ class LTIBlock(
|
||||
"""
|
||||
Returns a context.
|
||||
"""
|
||||
# use bleach defaults. see https://github.com/jsocol/bleach/blob/master/bleach/__init__.py
|
||||
# nh3 defaults for
|
||||
# ALLOWED_TAGS are
|
||||
# ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul']
|
||||
# {
|
||||
# 'a', 'abbr', 'acronym', 'area', 'article', 'aside', 'b', 'bdi', 'bdo',
|
||||
# 'blockquote', 'br', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
|
||||
# 'data', 'dd', 'del', 'details', 'dfn', 'div', 'dl', 'dt', 'em', 'figcaption',
|
||||
# 'figure', 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup',
|
||||
# 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'map', 'mark', 'nav', 'ol', 'p', 'pre',
|
||||
# 'q', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike',
|
||||
# 'strong', 'sub', 'summary', 'sup', 'table', 'tbody', 'td', 'th', 'thead',
|
||||
# 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr'
|
||||
# }
|
||||
#
|
||||
# ALLOWED_ATTRIBUTES are
|
||||
# 'a': ['href', 'title'],
|
||||
# 'abbr': ['title'],
|
||||
# 'acronym': ['title'],
|
||||
# {
|
||||
# 'a': {'href', 'hreflang'},
|
||||
# 'bdo': {'dir'},
|
||||
# 'blockquote': {'cite'},
|
||||
# 'col': {'charoff', 'char', 'align', 'span'},
|
||||
# 'colgroup': {'align', 'char', 'charoff', 'span'},
|
||||
# 'del': {'datetime', 'cite'},
|
||||
# 'hr': {'width', 'align', 'size'},
|
||||
# 'img': {'height', 'src', 'width', 'alt', 'align'},
|
||||
# 'ins': {'datetime', 'cite'},
|
||||
# 'ol': {'start'},
|
||||
# 'q': {'cite'},
|
||||
# 'table': {'align', 'char', 'charoff', 'summary'},
|
||||
# 'tbody': {'align', 'char', 'charoff'},
|
||||
# 'td': {'rowspan', 'headers', 'charoff', 'colspan', 'char', 'align'},
|
||||
# 'tfoot': {'align', 'char', 'charoff'},
|
||||
# 'th': {'rowspan', 'headers', 'charoff', 'colspan', 'scope', 'char', 'align'},
|
||||
# 'thead': {'charoff', 'char', 'align'},
|
||||
# 'tr': {'align', 'char', 'charoff'}
|
||||
# }
|
||||
#
|
||||
# This lets all plaintext through.
|
||||
sanitized_comment = bleach.clean(self.score_comment)
|
||||
sanitized_comment = nh3.clean(self.score_comment)
|
||||
|
||||
return {
|
||||
'input_fields': self.get_input_fields(),
|
||||
|
||||
@@ -45,7 +45,7 @@ class LTI20RESTResultServiceTest(unittest.TestCase):
|
||||
|
||||
test_cases = ( # (before sanitize, after sanitize)
|
||||
("plaintext", "plaintext"),
|
||||
("a <script>alert(3)</script>", "a <script>alert(3)</script>"), # encodes scripts
|
||||
("a <script>alert(3)</script>", "a "), # drops scripts
|
||||
("<b>bold 包</b>", "<b>bold 包</b>"), # unicode, and <b> tags pass through
|
||||
)
|
||||
for case in test_cases:
|
||||
|
||||
Reference in New Issue
Block a user