From d72e87d3f33a2939c4e10825421069f3284029f0 Mon Sep 17 00:00:00 2001 From: Muhammad Adeel Tajamul <77053848+muhammadadeeltajamul@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:58:40 +0500 Subject: [PATCH] feat: removed extra spaces from start and end of content (#35647) --- .../rest_api/discussions_notifications.py | 39 +++++++++++++++++-- .../tests/test_discussions_notifications.py | 22 +++++++++-- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/lms/djangoapps/discussion/rest_api/discussions_notifications.py b/lms/djangoapps/discussion/rest_api/discussions_notifications.py index 498a05fdb9..b0eb7c89dc 100644 --- a/lms/djangoapps/discussion/rest_api/discussions_notifications.py +++ b/lms/djangoapps/discussion/rest_api/discussions_notifications.py @@ -3,7 +3,7 @@ Discussion notifications sender util. """ import re -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, Tag from django.conf import settings from django.utils.text import Truncator @@ -380,6 +380,30 @@ def remove_html_tags(text): return re.sub(clean, '', text) +def strip_empty_tags(soup): + """ + Strip starting and ending empty tags from the soup object + """ + def strip_tag(element, reverse=False): + """ + Checks if element is empty and removes it + """ + if not element.get_text(strip=True): + element.extract() + return True + if isinstance(element, Tag): + child_list = element.contents[::-1] if reverse else element.contents + for child in child_list: + if not strip_tag(child): + break + return False + + while soup.contents: + if not (strip_tag(soup.contents[0]) or strip_tag(soup.contents[-1], reverse=True)): + break + return soup + + def clean_thread_html_body(html_body): """ Get post body with tags removed and limited to 500 characters @@ -401,6 +425,9 @@ def clean_thread_html_body(html_body): for match in html_body.find_all(tag): match.unwrap() + if not html_body.find(): + return str(html_body) + # Replace tags that are not allowed in email tags_to_update = [ {"source": "button", "target": "span"}, @@ -412,11 +439,15 @@ def clean_thread_html_body(html_body): for tag_dict in tags_to_update: for source_tag in html_body.find_all(tag_dict['source']): target_tag = html_body.new_tag(tag_dict['target'], **source_tag.attrs) - if source_tag.string: - target_tag.string = source_tag.string - source_tag.replace_with(target_tag) + if source_tag.contents: + for content in list(source_tag.contents): + target_tag.append(content) + source_tag.insert_before(target_tag) + source_tag.extract() for tag in html_body.find_all(True): tag.attrs = {} tag['style'] = 'margin: 0' + + html_body = strip_empty_tags(html_body) return str(html_body) diff --git a/lms/djangoapps/discussion/rest_api/tests/test_discussions_notifications.py b/lms/djangoapps/discussion/rest_api/tests/test_discussions_notifications.py index 0a8d750416..9e4a76aa40 100644 --- a/lms/djangoapps/discussion/rest_api/tests/test_discussions_notifications.py +++ b/lms/djangoapps/discussion/rest_api/tests/test_discussions_notifications.py @@ -179,15 +179,23 @@ class TestCleanThreadHtmlBody(unittest.TestCase): """ Tests that the clean_thread_html_body function replaces the button tag with span tag """ - # Tests for button replacement tag with text html_body = '' expected_output = 'Button' result = clean_thread_html_body(html_body) self.assertEqual(result, expected_output) - # Tests button tag replacement without text + html_body = '
abc
abc
' + expected_output = 'abc
'\ + 'abc
' + result = clean_thread_html_body(html_body) + self.assertEqual(result, expected_output) + + def test_button_tag_removal(self): + """ + Tests button tag with no text is removed if at start or end + """ html_body = '' - expected_output = '' + expected_output = '' result = clean_thread_html_body(html_body) self.assertEqual(result, expected_output) @@ -196,3 +204,11 @@ class TestCleanThreadHtmlBody(unittest.TestCase): html_body = 'Paragraph
' result = clean_thread_html_body(html_body) self.assertEqual(result, 'Paragraph
') + + def test_strip_empty_tags(self): + """ + Tests if the clean_thread_html_body function removes starting and ending empty tags + """ + html_body = 'content
content
')