fix: disallow "data:" links in discussion posts

Sanitizes Markdown that goes back and forth between the server and
client side, to strip out data: links, so that they cannot be abused.
There is no present vulnerability to this issue–modern browsers disallow
data links in the first place, and we already filter this out in both
client-side code as well as the HTML generated in the REST API (it's run
through bleach). But we're adding this anyway, to further reduce the
odds that some client-side mistake could cause a vulnerability. This is
part of TNL-8589.
This commit is contained in:
David Ormsbee
2021-08-01 00:48:44 -04:00
parent 68e8a6a981
commit 149b014053
4 changed files with 65 additions and 9 deletions

View File

@@ -39,7 +39,8 @@ from lms.djangoapps.discussion.django_comment_client.utils import (
get_group_id_for_comments_service,
get_user_group_ids,
is_comment_too_deep,
prepare_content
prepare_content,
sanitize_body,
)
from openedx.core.djangoapps.django_comment_common.signals import (
comment_created,
@@ -263,7 +264,7 @@ def create_thread(request, course_id, commentable_id):
'course_id': str(course_key),
'user_id': user.id,
'thread_type': post["thread_type"],
'body': post["body"],
'body': sanitize_body(post["body"]),
'title': post["title"],
}
@@ -326,7 +327,7 @@ def update_thread(request, course_id, thread_id):
thread = cc.Thread.find(thread_id)
# Get thread context first in order to be safe from reseting the values of thread object later
thread_context = getattr(thread, "context", "course")
thread.body = request.POST["body"]
thread.body = sanitize_body(request.POST["body"])
thread.title = request.POST["title"]
user = request.user
# The following checks should avoid issues we've seen during deploys, where end users are hitting an updated server
@@ -381,7 +382,7 @@ def _create_comment(request, course_key, thread_id=None, parent_id=None):
course_id=str(course_key),
thread_id=thread_id,
parent_id=parent_id,
body=post["body"]
body=sanitize_body(post["body"]),
)
comment.save()
@@ -441,7 +442,7 @@ def update_comment(request, course_id, comment_id):
comment = cc.Comment.find(comment_id)
if 'body' not in request.POST or not request.POST['body'].strip():
return JsonError(_("Body can't be empty"))
comment.body = request.POST["body"]
comment.body = sanitize_body(request.POST["body"])
comment.save()
comment_edited.send(sender=None, user=request.user, post=comment)

View File

@@ -3,6 +3,7 @@
import datetime
import json
import unittest
from unittest import mock
from unittest.mock import Mock, patch
@@ -31,7 +32,7 @@ from openedx.core.djangoapps.course_groups.cohorts import set_course_cohorted
from openedx.core.djangoapps.course_groups.tests.helpers import CohortFactory, config_course_cohorts
from openedx.core.djangoapps.django_comment_common.comment_client.utils import (
CommentClientMaintenanceError,
perform_request
perform_request,
)
from openedx.core.djangoapps.django_comment_common.models import (
CourseDiscussionSettings,
@@ -1745,3 +1746,37 @@ class MiscUtilsTests(TestCase):
thread_data['course_id'] = CourseKey.from_string(course_id)
assert utils.permalink(thread_data) == expected_url
@ddt.ddt
class SanitizeTests(unittest.TestCase):
"""Pure functional tests around sanitizing Markdown"""
@ddt.data(
(None, None),
("", ""),
(
"No substitutions, even if there's data: ",
"No substitutions, even if there's data: ",
),
(
"""
[Click here](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0Pg==) Some Text
[This link is fine](https://www.openedx.org)
More Text [Click here](data:text/html;base64,PHNjcmlwdD5hbGVydCgxKTwvc2NyaXB0P)
""",
"""
[Click here]() Some Text
[This link is fine](https://www.openedx.org)
More Text [Click here]()
""",
),
)
@ddt.unpack
def test_input_output(self, input_str, expected_output):
"""Test a range of inputs for cleanup."""
assert utils.sanitize_body(input_str) == expected_output

View File

@@ -1,8 +1,7 @@
# pylint: skip-file
import json
import logging
import regex
from collections import defaultdict
from datetime import datetime
@@ -773,6 +772,10 @@ def prepare_content(content, course_key, is_staff=False, discussion_division_ena
content = strip_none(extract(content, fields))
# Replace the content body with a sanitized version
if 'body' in content:
content['body'] = sanitize_body(content['body'])
if content.get("endorsement"):
endorsement = content["endorsement"]
endorser = None
@@ -1076,3 +1079,20 @@ def is_content_authored_by(content, user):
return int(content.get('user_id')) == user.id
except (ValueError, TypeError):
return False
def sanitize_body(body):
"""
Return a sanitized version of the body with dangerous Markdown removed.
This is possibly overly broad, and might tamper with legitimate posts that
contain this code in fenced code blocks. As far as we can tell, this is an
extra layer of protection, and current handling in the front end and using
bleach for HTML rendering on the server side should cover these cases.
"""
if not body:
return body
# This will remove the Markdown style links with data: URLs, and turn them
# into empty links.
return regex.sub(r'\]\(data:[^)]+\)', ']()', body)

View File

@@ -45,7 +45,7 @@ from lms.djangoapps.discussion.django_comment_client.utils import (
get_group_id_for_user,
get_group_names_by_id,
is_commentable_divided,
strip_none
strip_none,
)
from lms.djangoapps.discussion.exceptions import TeamDiscussionHiddenFromUserException
from lms.djangoapps.experiments.utils import get_experiment_user_metadata_context