From acbb66a9115bb495f02e89e62125f741f1b724c7 Mon Sep 17 00:00:00 2001 From: Ahtisham Shahid Date: Mon, 6 Mar 2023 11:23:11 +0500 Subject: [PATCH] feat: added backward compatibility for urls and images in threads (#31807) * feat: added backward compatibility for URLs and images in threads * feat: added unit tests for markdown converter --- .../django_comment_client/tests/test_utils.py | 123 +++++++++++++++--- .../discussion/django_comment_client/utils.py | 57 ++++++++ 2 files changed, 160 insertions(+), 20 deletions(-) diff --git a/lms/djangoapps/discussion/django_comment_client/tests/test_utils.py b/lms/djangoapps/discussion/django_comment_client/tests/test_utils.py index c93c63ded1..8800504c86 100644 --- a/lms/djangoapps/discussion/django_comment_client/tests/test_utils.py +++ b/lms/djangoapps/discussion/django_comment_client/tests/test_utils.py @@ -132,6 +132,7 @@ class CoursewareContextTestCase(ModuleStoreTestCase): Base testcase class for courseware context for the comment client service integration """ + def setUp(self): super().setUp() self.course = CourseFactory.create(org="TestX", number="101", display_name="Test Course") @@ -169,7 +170,8 @@ class CoursewareContextTestCase(ModuleStoreTestCase): def assertThreadCorrect(thread, discussion, expected_title): # pylint: disable=invalid-name """Asserts that the given thread has the expected set of properties""" assert set(thread.keys()) == {'commentable_id', 'courseware_url', 'courseware_title'} - assert thread.get('courseware_url') == reverse('jump_to', kwargs={'course_id': str(self.course.id), 'location': str(discussion.location)}) + assert thread.get('courseware_url') == reverse('jump_to', kwargs={'course_id': str(self.course.id), + 'location': str(discussion.location)}) assert thread.get('courseware_title') == expected_title assertThreadCorrect(threads[0], self.discussion1, "Chapter / Discussion 1") @@ -342,6 +344,7 @@ class CategoryMapTestMixin: Provides functionality for classes that test `get_discussion_category_map`. """ + def assert_category_map_equals(self, expected, requesting_user=None): """ Call `get_discussion_category_map`, and verify that it returns @@ -357,6 +360,7 @@ class CategoryMapTestCase(CategoryMapTestMixin, ModuleStoreTestCase): Base testcase class for discussion categories for the comment client service integration """ + def setUp(self): super().setUp() @@ -385,11 +389,13 @@ class CategoryMapTestCase(CategoryMapTestMixin, ModuleStoreTestCase): **kwargs ) - def assert_category_map_equals(self, expected, divided_only_if_explicit=False, exclude_unstarted=True): # pylint: disable=arguments-differ + def assert_category_map_equals(self, expected, divided_only_if_explicit=False, + exclude_unstarted=True): # pylint: disable=arguments-differ """ Asserts the expected map with the map returned by get_discussion_category_map method. """ - assert utils.get_discussion_category_map(self.course, self.instructor, divided_only_if_explicit, exclude_unstarted) == expected + assert utils.get_discussion_category_map(self.course, self.instructor, divided_only_if_explicit, + exclude_unstarted) == expected def test_empty(self): self.assert_category_map_equals({"entries": {}, "subcategories": {}, "children": []}) @@ -976,6 +982,7 @@ class ContentGroupCategoryMapTestCase(CategoryMapTestMixin, ContentGroupTestCase Tests `get_discussion_category_map` on discussion xblocks which are only visible to some content groups. """ + def test_staff_user(self): """ Verify that the staff user can access the alpha, beta, and @@ -1198,7 +1205,8 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase): return topic_name_to_id(course, name) # no topics - assert not utils.is_commentable_divided(course.id, to_id('General')), "Course doesn't even have a 'General' topic" + assert not utils.is_commentable_divided(course.id, + to_id('General')), "Course doesn't even have a 'General' topic" # not cohorted config_course_cohorts(course, is_cohorted=False) @@ -1221,7 +1229,8 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase): assert cohorts.is_course_cohorted(course.id) assert not utils.is_commentable_divided(course.id, to_id('General')), "Course is cohorted, but 'General' isn't." - assert utils.is_commentable_divided(course.id, to_id('Feedback')), 'Feedback was listed as cohorted. Should be.' + assert utils.is_commentable_divided(course.id, + to_id('Feedback')), 'Feedback was listed as cohorted. Should be.' def test_is_commentable_divided_inline_discussion(self): course = modulestore().get_course(self.toy_course_key) @@ -1240,7 +1249,8 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase): divided_discussions=["Feedback", "random_inline"] ) - assert not utils.is_commentable_divided(course.id, to_id('random')), 'By default, Non-top-level discussions are not cohorted in a cohorted courses.' + assert not utils.is_commentable_divided(course.id, to_id( + 'random')), 'By default, Non-top-level discussions are not cohorted in a cohorted courses.' # if always_divide_inline_discussions is set to False, non-top-level discussion are always # not divided unless they are explicitly set in divided_discussions @@ -1255,9 +1265,12 @@ class IsCommentableDividedTestCase(ModuleStoreTestCase): always_divide_inline_discussions=False ) - assert not utils.is_commentable_divided(course.id, to_id('random')), 'Non-top-level discussion is not cohorted if always_divide_inline_discussions is False.' - assert utils.is_commentable_divided(course.id, to_id('random_inline')), 'If always_divide_inline_discussions set to False, Non-top-level discussion is cohorted if explicitly set in cohorted_discussions.' - assert utils.is_commentable_divided(course.id, to_id('Feedback')), 'If always_divide_inline_discussions set to False, top-level discussion are not affected.' + assert not utils.is_commentable_divided(course.id, to_id( + 'random')), 'Non-top-level discussion is not cohorted if always_divide_inline_discussions is False.' + assert utils.is_commentable_divided(course.id, to_id( + 'random_inline')), 'If always_divide_inline_discussions set to False, Non-top-level discussion is cohorted if explicitly set in cohorted_discussions.' + assert utils.is_commentable_divided(course.id, to_id( + 'Feedback')), 'If always_divide_inline_discussions set to False, top-level discussion are not affected.' def test_is_commentable_divided_team(self): course = modulestore().get_course(self.toy_course_key) @@ -1430,7 +1443,8 @@ class GroupNameTestCase(ModuleStoreTestCase): self.course.id, enable_cohorts=True, division_scheme=CourseDiscussionSettings.COHORT ) course_discussion_settings = CourseDiscussionSettings.get(self.course.id) - assert {self.test_cohort_1.id: self.test_cohort_1.name, self.test_cohort_2.id: self.test_cohort_2.name} == get_group_names_by_id(course_discussion_settings) + assert {self.test_cohort_1.id: self.test_cohort_1.name, + self.test_cohort_2.id: self.test_cohort_2.name} == get_group_names_by_id(course_discussion_settings) assert self.test_cohort_2.name == utils.get_group_name(self.test_cohort_2.id, course_discussion_settings) # Test also with a group_id that doesn't exist. assert utils.get_group_name((- 1000), course_discussion_settings) is None @@ -1462,10 +1476,14 @@ class PermissionsTestCase(ModuleStoreTestCase): 'lms.djangoapps.discussion.django_comment_client.utils.check_permissions_by_view' ) as check_perm: check_perm.return_value = True - assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True, 'can_openclose': True, 'can_vote': False, 'can_report': False} + assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True, + 'can_openclose': True, 'can_vote': False, + 'can_report': False} content['user_id'] = '2' - assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True, 'can_openclose': True, 'can_vote': True, 'can_report': True} + assert utils.get_ability(None, content, user) == {'editable': True, 'can_reply': True, 'can_delete': True, + 'can_openclose': True, 'can_vote': True, + 'can_report': True} def test_get_ability_with_global_staff(self): """ @@ -1480,7 +1498,9 @@ class PermissionsTestCase(ModuleStoreTestCase): # check_permissions_by_view returns false because user is not enrolled in the course. check_perm.return_value = False global_staff = UserFactory(username='global_staff', email='global_staff@edx.org', is_staff=True) - assert utils.get_ability(None, content, global_staff) == {'editable': False, 'can_reply': False, 'can_delete': False, 'can_openclose': False, 'can_vote': False, 'can_report': True} + assert utils.get_ability(None, content, global_staff) == {'editable': False, 'can_reply': False, + 'can_delete': False, 'can_openclose': False, + 'can_vote': False, 'can_report': True} def test_is_content_authored_by(self): content = {} @@ -1573,11 +1593,26 @@ class GroupModeratorPermissionsTestCase(ModuleStoreTestCase): Group moderator should not have moderator permissions if the discussions are not divided. """ content = {'user_id': self.plain_user.id, 'type': 'thread', 'username': self.plain_user.username} - assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True} + assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, + 'can_reply': True, + 'can_delete': False, + 'can_openclose': False, + 'can_vote': True, + 'can_report': True} content = {'user_id': self.cohorted_user.id, 'type': 'thread'} - assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True} + assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, + 'can_reply': True, + 'can_delete': False, + 'can_openclose': False, + 'can_vote': True, + 'can_report': True} content = {'user_id': self.verified_user.id, 'type': 'thread'} - assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True} + assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, + 'can_reply': True, + 'can_delete': False, + 'can_openclose': False, + 'can_vote': True, + 'can_report': True} @mock.patch( 'lms.djangoapps.discussion.django_comment_client.permissions._check_condition', @@ -1590,7 +1625,11 @@ class GroupModeratorPermissionsTestCase(ModuleStoreTestCase): set_discussion_division_settings(self.course.id, enable_cohorts=True, division_scheme=CourseDiscussionSettings.COHORT) content = {'user_id': self.cohorted_user.id, 'type': 'thread', 'username': self.cohorted_user.username} - assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': True, 'can_reply': True, 'can_delete': True, 'can_openclose': True, 'can_vote': True, 'can_report': True} + assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': True, 'can_reply': True, + 'can_delete': True, + 'can_openclose': True, + 'can_vote': True, + 'can_report': True} @mock.patch( 'lms.djangoapps.discussion.django_comment_client.permissions._check_condition', @@ -1603,7 +1642,12 @@ class GroupModeratorPermissionsTestCase(ModuleStoreTestCase): content = {'user_id': self.plain_user.id, 'type': 'thread', 'username': self.plain_user.username} set_discussion_division_settings(self.course.id, division_scheme=CourseDiscussionSettings.NONE) - assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, 'can_reply': True, 'can_delete': False, 'can_openclose': False, 'can_vote': True, 'can_report': True} + assert utils.get_ability(self.course.id, content, self.group_moderator) == {'editable': False, + 'can_reply': True, + 'can_delete': False, + 'can_openclose': False, + 'can_vote': True, + 'can_report': True} class ClientConfigurationTestCase(TestCase): @@ -1636,8 +1680,8 @@ class ClientConfigurationTestCase(TestCase): def set_discussion_division_settings( - course_key, enable_cohorts=False, always_divide_inline_discussions=False, - divided_discussions=[], division_scheme=CourseDiscussionSettings.COHORT + course_key, enable_cohorts=False, always_divide_inline_discussions=False, + divided_discussions=[], division_scheme=CourseDiscussionSettings.COHORT ): """ Convenience method for setting cohort enablement and discussion settings. @@ -1730,3 +1774,42 @@ class SanitizeTests(unittest.TestCase): def test_input_output(self, input_str, expected_output): """Test a range of inputs for cleanup.""" assert utils.sanitize_body(input_str) == expected_output + + +class TestConvertHtmlToMarkdown(unittest.TestCase): + """ + Tests for the convert_html_to_markdown function. + """ + def test_convert_a_to_markdown(self): + """ + Tests that the convert_a_to_markdown function converts HTML anchor tags to Markdown. + """ + input_text = 'Example' + expected_output = '[Example](https://example.com)' + self.assertEqual(utils.convert_a_to_markdown(input_text), expected_output) + + def test_convert_img_to_markdown(self): + """ + Tests that the convert_img_to_markdown function converts HTML image tags to Markdown. + """ + input_text = '' + expected_output = '![](https://example.com/Full-form-of-URL-1-1024x824.jpg "")' + self.assertEqual(utils.convert_img_to_markdown(input_text), expected_output) + + def test_convert_p_to_markdown(self): + """ + Tests that the convert_p_to_markdown function converts HTML paragraph tags to Markdown. + """ + input_text = '

Paragraph text

' + expected_output = '\n\nParagraph text\n\n' + self.assertEqual(utils.convert_p_to_markdown(input_text), expected_output) + + def test_convert_html_to_markdown(self): + """ + Tests that the convert_html_to_markdown function converts HTML to Markdown. + """ + input_text = 'Example

Paragraph text

' \ + '' + # pylint: disable=line-too-long + expected_output = '[Example](https://example.com)\n\nParagraph text\n\n ![](https://example.com/Full-form-of-URL-1-1024x824.jpg "")' + self.assertEqual(utils.convert_html_to_markdown(input_text), expected_output) diff --git a/lms/djangoapps/discussion/django_comment_client/utils.py b/lms/djangoapps/discussion/django_comment_client/utils.py index 1de0035c9d..3b5e1af4a0 100644 --- a/lms/djangoapps/discussion/django_comment_client/utils.py +++ b/lms/djangoapps/discussion/django_comment_client/utils.py @@ -1,6 +1,8 @@ # pylint: skip-file import json import logging + +import re from typing import Set import regex @@ -70,13 +72,16 @@ def strip_blank(dic): """ Returns a dictionary stripped of any 'blank' (empty) keys """ + def _is_blank(v): """ Determines if the provided value contains no information """ return isinstance(v, str) and len(v.strip()) == 0 + return {k: v for k, v in dic.items() if not _is_blank(v)} + # TODO should we be checking if d1 and d2 have the same keys with different values? @@ -794,6 +799,7 @@ def prepare_content( # Replace the content body with a sanitized version if 'body' in content: content['body'] = sanitize_body(content['body']) + content['body'] = convert_html_to_markdown(content['body']) if content.get("endorsement"): endorsement = content["endorsement"] @@ -1040,3 +1046,54 @@ def sanitize_body(body): # This will remove the Markdown style links with data: URLs, and turn them # into empty links. return regex.sub(r'\]\(data:[^)]+\)', ']()', body) + + +def convert_html_to_markdown(text): + """ + Convert HTML to Markdown + """ + text = convert_img_to_markdown(text) + text = convert_a_to_markdown(text) + text = convert_p_to_markdown(text) + return text + + +def convert_a_to_markdown(text): + """ + Convert tags to Markdown format + """ + def replace_link(match): + url = match.group(1) + if not url.startswith('http://') and not url.startswith('https://'): + url = 'https://' + url + title = match.group(3) + return f"[{title}]({url})" + + # Use a regular expression to match each tag and call replace_link on each match + # return re.sub(r'|', replace_link, text) + regex_exp = r']+)? href="([^"]+)"(?: [^>]+)?(?: title="([^"]+)")?(?: [^>]+)?>(.*?)' + return re.sub(regex_exp, replace_link, text) + + +def convert_img_to_markdown(text): + """ + Convert tags to Markdown format + """ + def replace_image(match): + url = match.group(1) + if not url.startswith('http://') and not url.startswith('https://'): + url = 'https://' + url + alt_text = match.group(2) or '' + title = match.group(3) or '' + return f"![{alt_text}]({url} \"{title}\")" + + # Use a regular expression to match each tag and call replace_image on each match + # pylint: disable=line-too-long + regex_exp = r']+)? src="([^"]+)"(?: [^>]+)?(?: alt="([^"]+)")?(?: [^>]+)?(?: title="([^"]+)")?(?: [^>]+)?/>' + + return re.sub(regex_exp, replace_image, text) + + +def convert_p_to_markdown(text): + # Use a regular expression to match each

tag and replace it with a Markdown paragraph + return re.sub(r']+)?>(.*?)

', r'\n\n\1\n\n', text)