Merge pull request #8541 from edx/gprice/discussion-api-rendered-body
Add rendered_body to discussion API endpoints
This commit is contained in:
105
lms/djangoapps/discussion_api/render.py
Normal file
105
lms/djangoapps/discussion_api/render.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""
|
||||
Content rendering functionality
|
||||
|
||||
Note that this module is designed to imitate the front end behavior as
|
||||
implemented in Markdown.Sanitizer.js.
|
||||
"""
|
||||
import re
|
||||
|
||||
import markdown
|
||||
|
||||
# These patterns could be more flexible about things like attributes and
|
||||
# whitespace, but this is imitating Markdown.Sanitizer.js, so it uses the
|
||||
# patterns defined therein.
|
||||
TAG_PATTERN = re.compile(r"<[^>]*>?")
|
||||
SANITIZED_TAG_PATTERN = re.compile(r"<(/?)(\w+)[^>]*>")
|
||||
ALLOWED_BASIC_TAG_PATTERN = re.compile(
|
||||
r"^(</?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?/?>)$"
|
||||
)
|
||||
ALLOWED_A_PATTERN = re.compile(
|
||||
r'^(<a\shref="((https?|ftp)://|/)[-A-Za-z0-9+&@#/%?=~_|!:,.;\(\)]+"(\stitle="[^"<>]+")?\s?>|</a>)$'
|
||||
)
|
||||
ALLOWED_IMG_PATTERN = re.compile(
|
||||
r'^(<img\ssrc="(https?://|/)[-A-Za-z0-9+&@#/%?=~_|!:,.;\(\)]+"(\swidth="\d{1,3}")?'
|
||||
r'(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?/?>)$'
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_tag(match):
|
||||
"""Return the tag if it is allowed or the empty string otherwise"""
|
||||
tag = match.group(0)
|
||||
if (
|
||||
ALLOWED_BASIC_TAG_PATTERN.match(tag) or
|
||||
ALLOWED_A_PATTERN.match(tag) or
|
||||
ALLOWED_IMG_PATTERN.match(tag)
|
||||
):
|
||||
return tag
|
||||
else:
|
||||
return ""
|
||||
|
||||
|
||||
def _sanitize_html(source):
|
||||
"""
|
||||
Return source with all non-allowed tags removed, preserving the text content
|
||||
"""
|
||||
return TAG_PATTERN.sub(_sanitize_tag, source)
|
||||
|
||||
|
||||
def _remove_unpaired_tags(source):
|
||||
"""
|
||||
Return source with all unpaired tags removed, preserving the text content
|
||||
|
||||
source should have already been sanitized
|
||||
"""
|
||||
tag_matches = list(SANITIZED_TAG_PATTERN.finditer(source))
|
||||
if not tag_matches:
|
||||
return source
|
||||
tag_stack = []
|
||||
tag_name_stack = []
|
||||
text_stack = [source[:tag_matches[0].start()]]
|
||||
for i, match in enumerate(tag_matches):
|
||||
tag_name = match.group(2)
|
||||
following_text = (
|
||||
source[match.end():tag_matches[i + 1].start()] if i + 1 < len(tag_matches) else
|
||||
source[match.end():]
|
||||
)
|
||||
if tag_name in ["p", "img", "br", "li", "hr"]: # tags that don't require closing
|
||||
text_stack[-1] += match.group(0) + following_text
|
||||
elif match.group(1): # end tag
|
||||
if tag_name in tag_name_stack: # paired with a start tag somewhere
|
||||
# pop tags until we find the matching one, keeping the non-tag text
|
||||
while True:
|
||||
popped_tag_name = tag_name_stack.pop()
|
||||
popped_tag = tag_stack.pop()
|
||||
popped_text = text_stack.pop()
|
||||
if popped_tag_name == tag_name:
|
||||
text_stack[-1] += popped_tag + popped_text + match.group(0)
|
||||
break
|
||||
else:
|
||||
text_stack[-1] += popped_text
|
||||
# else unpaired; drop the tag
|
||||
text_stack[-1] += following_text
|
||||
else: # start tag
|
||||
tag_stack.append(match.group(0))
|
||||
tag_name_stack.append(tag_name)
|
||||
text_stack.append(following_text)
|
||||
return "".join(text_stack)
|
||||
|
||||
|
||||
def render_body(raw_body):
|
||||
"""
|
||||
Render raw_body to HTML.
|
||||
|
||||
This includes the following steps:
|
||||
|
||||
* Convert Markdown to HTML
|
||||
* Strip non-whitelisted HTML
|
||||
* Remove unbalanced HTML tags
|
||||
|
||||
Note that this does not prevent Markdown syntax inside a MathJax block from
|
||||
being processed, which the forums JavaScript code does.
|
||||
"""
|
||||
rendered = markdown.markdown(raw_body)
|
||||
rendered = _sanitize_html(rendered)
|
||||
rendered = _remove_unpaired_tags(rendered)
|
||||
return rendered
|
||||
@@ -10,6 +10,7 @@ from django.core.urlresolvers import reverse
|
||||
|
||||
from rest_framework import serializers
|
||||
|
||||
from discussion_api.render import render_body
|
||||
from django_comment_common.models import (
|
||||
FORUM_ROLE_ADMINISTRATOR,
|
||||
FORUM_ROLE_COMMUNITY_TA,
|
||||
@@ -65,6 +66,7 @@ class _ContentSerializer(serializers.Serializer):
|
||||
created_at = serializers.CharField(read_only=True)
|
||||
updated_at = serializers.CharField(read_only=True)
|
||||
raw_body = NonEmptyCharField(source="body")
|
||||
rendered_body = serializers.SerializerMethodField("get_rendered_body")
|
||||
abuse_flagged = serializers.SerializerMethodField("get_abuse_flagged")
|
||||
voted = serializers.SerializerMethodField("get_voted")
|
||||
vote_count = serializers.SerializerMethodField("get_vote_count")
|
||||
@@ -122,6 +124,10 @@ class _ContentSerializer(serializers.Serializer):
|
||||
"""Returns the role label for the content author."""
|
||||
return None if self._is_anonymous(obj) else self._get_user_label(int(obj["user_id"]))
|
||||
|
||||
def get_rendered_body(self, obj):
|
||||
"""Returns the rendered body content."""
|
||||
return render_body(obj["body"])
|
||||
|
||||
def get_abuse_flagged(self, obj):
|
||||
"""
|
||||
Returns a boolean indicating whether the requester has flagged the
|
||||
|
||||
@@ -597,6 +597,7 @@ class GetThreadListTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
|
||||
"type": "discussion",
|
||||
"title": "Test Title",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"pinned": False,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -622,6 +623,7 @@ class GetThreadListTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
|
||||
"type": "question",
|
||||
"title": "Another Test Title",
|
||||
"raw_body": "More content",
|
||||
"rendered_body": "<p>More content</p>",
|
||||
"pinned": False,
|
||||
"closed": True,
|
||||
"following": False,
|
||||
@@ -953,6 +955,7 @@ class GetCommentListTest(CommentsServiceMockMixin, ModuleStoreTestCase):
|
||||
"created_at": "2015-05-11T00:00:00Z",
|
||||
"updated_at": "2015-05-11T11:11:11Z",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
@@ -971,6 +974,7 @@ class GetCommentListTest(CommentsServiceMockMixin, ModuleStoreTestCase):
|
||||
"created_at": "2015-05-11T22:22:22Z",
|
||||
"updated_at": "2015-05-11T33:33:33Z",
|
||||
"raw_body": "More content",
|
||||
"rendered_body": "<p>More content</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
@@ -1186,6 +1190,7 @@ class CreateThreadTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTestC
|
||||
"type": "discussion",
|
||||
"title": "Test Title",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"pinned": False,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -1353,6 +1358,7 @@ class CreateCommentTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
|
||||
"created_at": "2015-05-27T00:00:00Z",
|
||||
"updated_at": "2015-05-27T00:00:00Z",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
@@ -1562,6 +1568,7 @@ class UpdateThreadTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTestC
|
||||
"type": "discussion",
|
||||
"title": "Original Title",
|
||||
"raw_body": "Edited body",
|
||||
"rendered_body": "<p>Edited body</p>",
|
||||
"pinned": False,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -1825,6 +1832,7 @@ class UpdateCommentTest(CommentsServiceMockMixin, UrlResetMixin, ModuleStoreTest
|
||||
"created_at": "2015-06-03T00:00:00Z",
|
||||
"updated_at": "2015-06-03T00:00:00Z",
|
||||
"raw_body": "Edited body",
|
||||
"rendered_body": "<p>Edited body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
|
||||
90
lms/djangoapps/discussion_api/tests/test_render.py
Normal file
90
lms/djangoapps/discussion_api/tests/test_render.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""
|
||||
Tests for content rendering
|
||||
"""
|
||||
from unittest import TestCase
|
||||
|
||||
import ddt
|
||||
|
||||
from discussion_api.render import render_body
|
||||
|
||||
|
||||
def _add_p_tags(raw_body):
|
||||
"""Return raw_body surrounded by p tags"""
|
||||
return "<p>{raw_body}</p>".format(raw_body=raw_body)
|
||||
|
||||
|
||||
@ddt.ddt
|
||||
class RenderBodyTest(TestCase):
|
||||
"""Tests for render_body"""
|
||||
def test_empty(self):
|
||||
self.assertEqual(render_body(""), "")
|
||||
|
||||
@ddt.data(
|
||||
("*", "em"),
|
||||
("**", "strong"),
|
||||
("`", "code"),
|
||||
)
|
||||
@ddt.unpack
|
||||
def test_markdown_inline(self, delimiter, tag):
|
||||
self.assertEqual(
|
||||
render_body("{delimiter}some text{delimiter}".format(delimiter=delimiter)),
|
||||
"<p><{tag}>some text</{tag}></p>".format(tag=tag)
|
||||
)
|
||||
|
||||
@ddt.data(
|
||||
"b", "blockquote", "code", "del", "dd", "dl", "dt", "em", "h1", "h2", "h3", "i", "kbd",
|
||||
"li", "ol", "p", "pre", "s", "sup", "sub", "strong", "strike", "ul"
|
||||
)
|
||||
def test_openclose_tag(self, tag):
|
||||
raw_body = "<{tag}>some text</{tag}>".format(tag=tag)
|
||||
is_inline_tag = tag in ["b", "code", "del", "em", "i", "kbd", "s", "sup", "sub", "strong", "strike"]
|
||||
rendered_body = _add_p_tags(raw_body) if is_inline_tag else raw_body
|
||||
self.assertEqual(render_body(raw_body), rendered_body)
|
||||
|
||||
@ddt.data("br", "hr")
|
||||
def test_selfclosing_tag(self, tag):
|
||||
raw_body = "<{tag}>".format(tag=tag)
|
||||
is_inline_tag = tag == "br"
|
||||
rendered_body = _add_p_tags(raw_body) if is_inline_tag else raw_body
|
||||
self.assertEqual(render_body(raw_body), rendered_body)
|
||||
|
||||
@ddt.data("http", "https", "ftp")
|
||||
def test_allowed_a_tag(self, protocol):
|
||||
raw_body = '<a href="{protocol}://foo" title="bar">baz</a>'.format(protocol=protocol)
|
||||
self.assertEqual(render_body(raw_body), _add_p_tags(raw_body))
|
||||
|
||||
def test_disallowed_a_tag(self):
|
||||
raw_body = '<a href="gopher://foo">link content</a>'
|
||||
self.assertEqual(render_body(raw_body), "<p>link content</p>")
|
||||
|
||||
@ddt.data("http", "https")
|
||||
def test_allowed_img_tag(self, protocol):
|
||||
raw_body = '<img src="{protocol}://foo" width="111" height="222" alt="bar" title="baz">'.format(
|
||||
protocol=protocol
|
||||
)
|
||||
self.assertEqual(render_body(raw_body), _add_p_tags(raw_body))
|
||||
|
||||
def test_disallowed_img_tag(self):
|
||||
raw_body = '<img src="gopher://foo">'
|
||||
self.assertEqual(render_body(raw_body), "<p></p>")
|
||||
|
||||
def test_script_tag(self):
|
||||
raw_body = '<script type="text/javascript">alert("evil script");</script>'
|
||||
self.assertEqual(render_body(raw_body), 'alert("evil script");')
|
||||
|
||||
@ddt.data("p", "br", "li", "hr") # img is tested above
|
||||
def test_allowed_unpaired_tags(self, tag):
|
||||
raw_body = "foo<{tag}>bar".format(tag=tag)
|
||||
self.assertEqual(render_body(raw_body), _add_p_tags(raw_body))
|
||||
|
||||
def test_unpaired_start_tag(self):
|
||||
self.assertEqual(render_body("foo<i>bar"), "<p>foobar</p>")
|
||||
|
||||
def test_unpaired_end_tag(self):
|
||||
self.assertEqual(render_body("foo</i>bar"), "<p>foobar</p>")
|
||||
|
||||
def test_interleaved_tags(self):
|
||||
self.assertEqual(
|
||||
render_body("foo<i>bar<b>baz</i>quux</b>greg"),
|
||||
"<p>foo<i>barbaz</i>quuxgreg</p>"
|
||||
)
|
||||
@@ -184,6 +184,7 @@ class ThreadSerializerSerializationTest(SerializerTestMixin, ModuleStoreTestCase
|
||||
"type": "discussion",
|
||||
"title": "Test Title",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"pinned": True,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -281,6 +282,7 @@ class CommentSerializerTest(SerializerTestMixin, ModuleStoreTestCase):
|
||||
"created_at": "2015-04-28T00:00:00Z",
|
||||
"updated_at": "2015-04-28T11:11:11Z",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
|
||||
@@ -191,6 +191,7 @@ class ThreadViewSetListTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
|
||||
"type": "discussion",
|
||||
"title": "Test Title",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"pinned": False,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -303,6 +304,7 @@ class ThreadViewSetCreateTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
|
||||
"type": "discussion",
|
||||
"title": "Test Title",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"pinned": False,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -392,6 +394,7 @@ class ThreadViewSetPartialUpdateTest(DiscussionAPIViewTestMixin, ModuleStoreTest
|
||||
"type": "discussion",
|
||||
"title": "Original Title",
|
||||
"raw_body": "Edited body",
|
||||
"rendered_body": "<p>Edited body</p>",
|
||||
"pinned": False,
|
||||
"closed": False,
|
||||
"following": False,
|
||||
@@ -536,6 +539,7 @@ class CommentViewSetListTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
|
||||
"created_at": "2015-05-11T00:00:00Z",
|
||||
"updated_at": "2015-05-11T11:11:11Z",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
@@ -688,6 +692,7 @@ class CommentViewSetCreateTest(DiscussionAPIViewTestMixin, ModuleStoreTestCase):
|
||||
"created_at": "2015-05-27T00:00:00Z",
|
||||
"updated_at": "2015-05-27T00:00:00Z",
|
||||
"raw_body": "Test body",
|
||||
"rendered_body": "<p>Test body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
@@ -770,6 +775,7 @@ class CommentViewSetPartialUpdateTest(DiscussionAPIViewTestMixin, ModuleStoreTes
|
||||
"created_at": "2015-06-03T00:00:00Z",
|
||||
"updated_at": "2015-06-03T00:00:00Z",
|
||||
"raw_body": "Edited body",
|
||||
"rendered_body": "<p>Edited body</p>",
|
||||
"endorsed": False,
|
||||
"endorsed_by": None,
|
||||
"endorsed_by_label": None,
|
||||
|
||||
Reference in New Issue
Block a user