feat: sanitize bulk course email message content before storing in database

[MICROBA-1666]

* Use bleach to sanitize user-provided content of bulk course emails before storing in the database.
* Add new `BULK_COURSE_EMAIL_ALLOWED_HTML_TAGS` setting to enable configuration of allowed HTML tags in bulk course emails.
This commit is contained in:
Justin Hynes
2022-02-17 16:14:55 -05:00
parent dd488a76d1
commit 53041a2d34
3 changed files with 51 additions and 1 deletions

View File

@@ -10,6 +10,7 @@ import shutil
import tempfile import tempfile
from unittest.mock import Mock, NonCallableMock, patch from unittest.mock import Mock, NonCallableMock, patch
import bleach
import ddt import ddt
import pytest import pytest
from boto.exception import BotoServerError from boto.exception import BotoServerError
@@ -3483,6 +3484,40 @@ class TestInstructorSendEmail(SiteMixin, SharedModuleStoreTestCase, LoginEnrollm
html_message=self.full_test_message['message'], html_message=self.full_test_message['message'],
template_name=org_template, from_addr=org_email).count() template_name=org_template, from_addr=org_email).count()
def test_send_email_and_sanitize_content(self):
test_subject = 'sanitization test subject'
test_message = """
<h1>Welcome to course101!</h1>
<p>We are going to do all the learning together.</p>
<script>Content inside script tag</script>
<form action="/action_page.php">
<label for="fname">First name:</label><br>
<input type="text" id="fname" name="fname"><br><br>
<input type="submit" value="Submit">
</form>
"""
message = {
'send_to': '["myself", "staff"]',
'subject': test_subject,
'message': test_message,
}
sanitized_subject = bleach.clean(test_subject, tags=settings.BULK_COURSE_EMAIL_ALLOWED_HTML_TAGS)
sanitized_message = bleach.clean(test_message, tags=settings.BULK_COURSE_EMAIL_ALLOWED_HTML_TAGS)
url = reverse('send_email', kwargs={'course_id': str(self.course.id)})
response = self.client.post(url, message)
email = CourseEmail.objects.filter(course_id=self.course.id, sender=self.instructor)
assert response.status_code == 200
assert email[0].subject == sanitized_subject
assert email[0].html_message == sanitized_message
# deeper verification, confirm `h1` element hasn't been stripped from message content
assert "<h1>" in email[0].html_message
# deeper verification, confirm `script` element has been stripped from message content
assert "&lt;script&gt;Content inside script tag&lt;/script&gt;" in email[0].html_message
class MockCompletionInfo: class MockCompletionInfo:
"""Mock for get_task_completion_info""" """Mock for get_task_completion_info"""

View File

@@ -13,6 +13,7 @@ import string
import random import random
import re import re
import bleach
import edx_api_doc_tools as apidocs import edx_api_doc_tools as apidocs
from django.conf import settings from django.conf import settings
from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user from django.contrib.auth.models import User # lint-amnesty, pylint: disable=imported-auth-user
@@ -2734,11 +2735,16 @@ def send_email(request, course_id):
# any transaction that has been pending up to this point will also be # any transaction that has been pending up to this point will also be
# committed. # committed.
try: try:
# sanitize the email content before storing in the database
sanitized_subject = bleach.clean(subject, tags=settings.BULK_COURSE_EMAIL_ALLOWED_HTML_TAGS)
sanitized_message = bleach.clean(message, tags=settings.BULK_COURSE_EMAIL_ALLOWED_HTML_TAGS)
email = CourseEmail.create( email = CourseEmail.create(
course_id, course_id,
request.user, request.user,
targets, targets,
subject, message, sanitized_subject,
sanitized_message,
template_name=template_name, template_name=template_name,
from_addr=from_addr from_addr=from_addr
) )

View File

@@ -4964,3 +4964,12 @@ CUSTOM_PAGES_HELP_URL = "https://edx.readthedocs.io/projects/open-edx-building-a
# The expected value is an Integer representing the cutoff point (in months) for inclusion to the message. Example: # The expected value is an Integer representing the cutoff point (in months) for inclusion to the message. Example:
# a value of `3` would include learners who have logged in within the past 3 months. # a value of `3` would include learners who have logged in within the past 3 months.
BULK_COURSE_EMAIL_LAST_LOGIN_ELIGIBILITY_PERIOD = None BULK_COURSE_EMAIL_LAST_LOGIN_ELIGIBILITY_PERIOD = None
# HTML tags allowed within the body of a message authored via the Bulk Course Email Tool
BULK_COURSE_EMAIL_ALLOWED_HTML_TAGS = [
"a", "abbr", "address", "area", "article", "aside", "audio", "b", "bdi", "bdo", "blockquote", "br", "caption",
"cite", "code", "col", "colgroup", "data", "dd", "del", "dfn", "div", "dl", "dt", "em", "embed", "figcaption",
"figure", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "i", "img", "ins", "kbd", "li", "link",
"main", "map", "mark", "meta", "menu", "nav", "object", "ol", "p", "param", "picture", "pre", "q", "rp", "rt",
"ruby", "s", "samp", "section", "small", "source", "span", "strong", "style", "sub", "sup", "table", "tbody", "td",
"tfoot", "th", "thead", "time", "tr", "track", "u", "ul", "var", "video", "wbr",
]