feat: added backward compatibility for urls and images in threads (#31807)

* feat: added backward compatibility for URLs and images in threads

* feat: added unit tests for markdown converter
This commit is contained in:
Ahtisham Shahid
2023-03-06 11:23:11 +05:00
committed by GitHub
parent cf27d344bd
commit acbb66a911
2 changed files with 160 additions and 20 deletions

View File

@@ -132,6 +132,7 @@ class CoursewareContextTestCase(ModuleStoreTestCase):
Base testcase class for courseware context for the
comment client service integration
"""
def setUp(self):
super().setUp()
self.course = CourseFactory.create(org="TestX", number="101", display_name="Test Course")
@@ -169,7 +170,8 @@ class CoursewareContextTestCase(ModuleStoreTestCase):
def assertThreadCorrect(thread, discussion, expected_title): # pylint: disable=invalid-name
"""Asserts that the given thread has the expected set of properties"""
assert set(thread.keys()) == {'commentable_id', 'courseware_url', 'courseware_title'}
assert thread.get('courseware_url') == reverse('jump_to', kwargs={'course_id': str(self.course.id), 'location': str(discussion.location)})
assert thread.get('courseware_url') == reverse('jump_to', kwargs={'course_id': str(self.course.id),
'location': str(discussion.location)})
assert thread.get('courseware_title') == expected_title
assertThreadCorrect(threads[0], self.discussion1, "Chapter / Discussion 1")
@@ -342,6 +344,7 @@ class CategoryMapTestMixin:
Provides functionality for classes that test
`get_discussion_category_map`.
"""
def assert_category_map_equals(self, expected, requesting_user=None):
"""
Call `get_discussion_category_map`, and verify that it returns
@@ -357,6 +360,7 @@ class CategoryMapTestCase(CategoryMapTestMixin, ModuleStoreTestCase):
Base testcase class for discussion categories for the
comment client service integration
"""
def setUp(self):
super().setUp()
@@ -385,11 +389,13 @@ class CategoryMapTestCase(CategoryMapTestMixin, ModuleStoreTestCase):
**kwargs
)
def assert_category_map_equals(self, expected, divided_only_if_explicit=False, exclude_unstarted=True): # pylint: disable=arguments-differ
def assert_category_map_equals(self, expected, divided_only_if_explicit=False,
exclude_unstarted=True): # pylint: disable=arguments-differ
"""
Asserts the expected map with the map returned by get_discussion_category_map method.
"""
assert utils.get_discussion_category_map(self.course, self.instructor, divided_only_if_explicit, exclude_unstarted) == expected
assert utils.get_discussion_category_map(self.course, self.instructor, divided_only_if_explicit,
exclude_unstarted) == expected
def test_empty(self):
self.assert_category_map_equals({"entries": {}, "subcategories": {}, "children": []})
@@ -976,6 +982,7 @@ class ContentGroupCategoryMapTestCase(CategoryMapTestMixin, ContentGroupTestCase
Tests `get_discussion_category_map` on discussion xblocks which are
only visible to some content groups.
"""
def test_staff_user(self):
"""
Verify that the staff user can access the alpha, beta, and
@@ -1198,7 +1205,8 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase):
return topic_name_to_id(course, name)
# no topics
assert not utils.is_commentable_divided(course.id, to_id('General')), "Course doesn't even have a 'General' topic"
assert not utils.is_commentable_divided(course.id,
to_id('General')), "Course doesn't even have a 'General' topic"
# not cohorted
config_course_cohorts(course, is_cohorted=False)
@@ -1221,7 +1229,8 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase):
assert cohorts.is_course_cohorted(course.id)
assert not utils.is_commentable_divided(course.id, to_id('General')), "Course is cohorted, but 'General' isn't."
assert utils.is_commentable_divided(course.id, to_id('Feedback')), 'Feedback was listed as cohorted. Should be.'
assert utils.is_commentable_divided(course.id,
to_id('Feedback')), 'Feedback was listed as cohorted. Should be.'
def test_is_commentable_divided_inline_discussion(self):
course = modulestore().get_course(self.toy_course_key)
@@ -1240,7 +1249,8 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase):
divided_discussions=["Feedback", "random_inline"]
)
assert not utils.is_commentable_divided(course.id, to_id('random')), 'By default, Non-top-level discussions are not cohorted in a cohorted courses.'
assert not utils.is_commentable_divided(course.id, to_id(
'random')), 'By default, Non-top-level discussions are not cohorted in a cohorted courses.'
# if always_divide_inline_discussions is set to False, non-top-level discussion are always
# not divided unless they are explicitly set in divided_discussions
@@ -1255,9 +1265,12 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase):
always_divide_inline_discussions=False
)
assert not utils.is_commentable_divided(course.id, to_id('random')), 'Non-top-level discussion is not cohorted if always_divide_inline_discussions is False.'
assert utils.is_commentable_divided(course.id, to_id('random_inline')), 'If always_divide_inline_discussions set to False, Non-top-level discussion is cohorted if explicitly set in cohorted_discussions.'
assert utils.is_commentable_divided(course.id, to_id('Feedback')), 'If always_divide_inline_discussions set to False, top-level discussion are not affected.'
assert not utils.is_commentable_divided(course.id, to_id(
'random')), 'Non-top-level discussion is not cohorted if always_divide_inline_discussions is False.'
assert utils.is_commentable_divided(course.id, to_id(
'random_inline')), 'If always_divide_inline_discussions set to False, Non-top-level discussion is cohorted if explicitly set in cohorted_discussions.'
assert utils.is_commentable_divided(course.id, to_id(
'Feedback')), 'If always_divide_inline_discussions set to False, top-level discussion are not affected.'
def test_is_commentable_divided_team(self):
course = modulestore().get_course(self.toy_course_key)
@@ -1430,7 +1443,8 @@ class GroupNameTestCase(ModuleStoreTestCase):
self.course.id, enable_cohorts=True, division_scheme=CourseDiscussionSettings.COHORT
)
course_discussion_settings = CourseDiscussionSettings.get(self.course.id)
assert {self.test_cohort_1.id: self.test_cohort_1.name, self.test_cohort_2.id: self.test_cohort_2.name} == get_group_names_by_id(course_discussion_settings)
assert {self.test_cohort_1.id: self.test_cohort_1.name,
self.test_cohort_2.id: self.test_cohort_2.name} == get_group_names_by_id(course_discussion_settings)
assert self.test_cohort_2.name == utils.get_group_name(self.test_cohort_2.id, course_discussion_settings)
# Test also with a group_id that doesn't exist.
assert utils.get_group_name((- 1000), course_discussion_settings) is None
@@ -1462,10 +1476,14 @@ class PermissionsTestCase(ModuleStoreTestCase):
'lms.djangoapps.discussion.django_comment_client.utils.check_permissions_by_view'
) as check_perm:
check_perm.return_value = True
assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True, 'can_openclose': True, 'can_vote': False, 'can_report': False}
assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True,
'can_openclose': True, 'can_vote': False,
'can_report': False}
content['user_id'] = '2'
assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True, 'can_openclose': True, 'can_vote': True, 'can_report': True}
assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True,
'can_openclose': True, 'can_vote': True,
'can_report': True}
def test_get_ability_with_global_staff(self):
"""
@@ -1480,7 +1498,9 @@ class PermissionsTestCase(ModuleStoreTestCase):
# check_permissions_by_view returns false because user is not enrolled in the course.
check_perm.return_value = False
global_staff = UserFactory(username='global_staff', email='global_staff@edx.org', is_staff=True)
assert utils.get_ability(None, content, global_staff) == {'editable': False, 'can_reply': False, 'can_delete': False, 'can_openclose': False, 'can_vote': False, 'can_report': True}
assert utils.get_ability(None, content, global_staff) == {'editable': False, 'can_reply': False,
'can_delete': False, 'can_openclose': False,
'can_vote': False, 'can_report': True}
def test_is_content_authored_by(self):
content = {}
@@ -1573,11 +1593,26 @@ class GroupModeratorPermissionsTestCase(ModuleStoreTestCase):
Group moderator should not have moderator permissions if the discussions are not divided.
"""
content = {'user_id': self.plain_user.id, 'type': 'thread', 'username': self.plain_user.username}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False,
'can_reply': True,
'can_delete': False,
'can_openclose': False,
'can_vote': True,
'can_report': True}
content = {'user_id': self.cohorted_user.id, 'type': 'thread'}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False,
'can_reply': True,
'can_delete': False,
'can_openclose': False,
'can_vote': True,
'can_report': True}
content = {'user_id': self.verified_user.id, 'type': 'thread'}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False,
'can_reply': True,
'can_delete': False,
'can_openclose': False,
'can_vote': True,
'can_report': True}
@mock.patch(
'lms.djangoapps.discussion.django_comment_client.permissions._check_condition',
@@ -1590,7 +1625,11 @@ class GroupModeratorPermissionsTestCase(ModuleStoreTestCase):
set_discussion_division_settings(self.course.id, enable_cohorts=True,
division_scheme=CourseDiscussionSettings.COHORT)
content = {'user_id': self.cohorted_user.id, 'type': 'thread', 'username': self.cohorted_user.username}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': True, 'can_reply': True, 'can_delete': True, 'can_openclose': True, 'can_vote': True, 'can_report': True}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': True, 'can_reply': True,
'can_delete': True,
'can_openclose': True,
'can_vote': True,
'can_report': True}
@mock.patch(
'lms.djangoapps.discussion.django_comment_client.permissions._check_condition',
@@ -1603,7 +1642,12 @@ class GroupModeratorPermissionsTestCase(ModuleStoreTestCase):
content = {'user_id': self.plain_user.id, 'type': 'thread', 'username': self.plain_user.username}
set_discussion_division_settings(self.course.id, division_scheme=CourseDiscussionSettings.NONE)
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True}
assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False,
'can_reply': True,
'can_delete': False,
'can_openclose': False,
'can_vote': True,
'can_report': True}
class ClientConfigurationTestCase(TestCase):
@@ -1636,8 +1680,8 @@ class ClientConfigurationTestCase(TestCase):
def set_discussion_division_settings(
course_key, enable_cohorts=False, always_divide_inline_discussions=False,
divided_discussions=[], division_scheme=CourseDiscussionSettings.COHORT
course_key, enable_cohorts=False, always_divide_inline_discussions=False,
divided_discussions=[], division_scheme=CourseDiscussionSettings.COHORT
):
"""
Convenience method for setting cohort enablement and discussion settings.
@@ -1730,3 +1774,42 @@ class SanitizeTests(unittest.TestCase):
def test_input_output(self, input_str, expected_output):
"""Test a range of inputs for cleanup."""
assert utils.sanitize_body(input_str) == expected_output
class TestConvertHtmlToMarkdown(unittest.TestCase):
"""
Tests for the convert_html_to_markdown function.
"""
def test_convert_a_to_markdown(self):
"""
Tests that the convert_a_to_markdown function converts HTML anchor tags to Markdown.
"""
input_text = '<a href="https://example.com">Example</a>'
expected_output = '[Example](https://example.com)'
self.assertEqual(utils.convert_a_to_markdown(input_text), expected_output)
def test_convert_img_to_markdown(self):
"""
Tests that the convert_img_to_markdown function converts HTML image tags to Markdown.
"""
input_text = '<img src="https://example.com/Full-form-of-URL-1-1024x824.jpg" width="1024" height="824" />'
expected_output = '![](https://example.com/Full-form-of-URL-1-1024x824.jpg "")'
self.assertEqual(utils.convert_img_to_markdown(input_text), expected_output)
def test_convert_p_to_markdown(self):
"""
Tests that the convert_p_to_markdown function converts HTML paragraph tags to Markdown.
"""
input_text = '<p>Paragraph text</p>'
expected_output = '\n\nParagraph text\n\n'
self.assertEqual(utils.convert_p_to_markdown(input_text), expected_output)
def test_convert_html_to_markdown(self):
"""
Tests that the convert_html_to_markdown function converts HTML to Markdown.
"""
input_text = '<a href="https://example.com">Example</a><p>Paragraph text</p> ' \
'<img src="https://example.com/Full-form-of-URL-1-1024x824.jpg" width="1024" height="824" />'
# pylint: disable=line-too-long
expected_output = '[Example](https://example.com)\n\nParagraph text\n\n ![](https://example.com/Full-form-of-URL-1-1024x824.jpg "")'
self.assertEqual(utils.convert_html_to_markdown(input_text), expected_output)

View File

@@ -1,6 +1,8 @@
# pylint: skip-file
import json
import logging
import re
from typing import Set
import regex
@@ -70,13 +72,16 @@ def strip_blank(dic):
"""
Returns a dictionary stripped of any 'blank' (empty) keys
"""
def _is_blank(v):
"""
Determines if the provided value contains no information
"""
return isinstance(v, str) and len(v.strip()) == 0
return {k: v for k, v in dic.items() if not _is_blank(v)}
# TODO should we be checking if d1 and d2 have the same keys with different values?
@@ -794,6 +799,7 @@ def prepare_content(
# Replace the content body with a sanitized version
if 'body' in content:
content['body'] = sanitize_body(content['body'])
content['body'] = convert_html_to_markdown(content['body'])
if content.get("endorsement"):
endorsement = content["endorsement"]
@@ -1040,3 +1046,54 @@ def sanitize_body(body):
# This will remove the Markdown style links with data: URLs, and turn them
# into empty links.
return regex.sub(r'\]\(data:[^)]+\)', ']()', body)
def convert_html_to_markdown(text):
"""
Convert HTML to Markdown
"""
text = convert_img_to_markdown(text)
text = convert_a_to_markdown(text)
text = convert_p_to_markdown(text)
return text
def convert_a_to_markdown(text):
"""
Convert <a> tags to Markdown format
"""
def replace_link(match):
url = match.group(1)
if not url.startswith('http://') and not url.startswith('https://'):
url = 'https://' + url
title = match.group(3)
return f"[{title}]({url})"
# Use a regular expression to match each <a> tag and call replace_link on each match
# return re.sub(r'<a.*?href="(.*?)".*?title="(.*?)".*?</a>|<a.*?href="(.*?)".*?</a>', replace_link, text)
regex_exp = r'<a(?: [^>]+)? href="([^"]+)"(?: [^>]+)?(?: title="([^"]+)")?(?: [^>]+)?>(.*?)</a>'
return re.sub(regex_exp, replace_link, text)
def convert_img_to_markdown(text):
"""
Convert <img> tags to Markdown format
"""
def replace_image(match):
url = match.group(1)
if not url.startswith('http://') and not url.startswith('https://'):
url = 'https://' + url
alt_text = match.group(2) or ''
title = match.group(3) or ''
return f"![{alt_text}]({url} \"{title}\")"
# Use a regular expression to match each <img> tag and call replace_image on each match
# pylint: disable=line-too-long
regex_exp = r'<img(?: [^>]+)? src="([^"]+)"(?: [^>]+)?(?: alt="([^"]+)")?(?: [^>]+)?(?: title="([^"]+)")?(?: [^>]+)?/>'
return re.sub(regex_exp, replace_image, text)
def convert_p_to_markdown(text):
# Use a regular expression to match each <p> tag and replace it with a Markdown paragraph
return re.sub(r'<p(?: [^>]+)?>(.*?)</p>', r'\n\n\1\n\n', text)