feat: removed extra spaces from start and end of content (#35647)
This commit is contained in:
committed by
GitHub
parent
e28a01e2bf
commit
d72e87d3f3
@@ -3,7 +3,7 @@ Discussion notifications sender util.
|
||||
"""
|
||||
import re
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from django.conf import settings
|
||||
from django.utils.text import Truncator
|
||||
|
||||
@@ -380,6 +380,30 @@ def remove_html_tags(text):
|
||||
return re.sub(clean, '', text)
|
||||
|
||||
|
||||
def strip_empty_tags(soup):
|
||||
"""
|
||||
Strip starting and ending empty tags from the soup object
|
||||
"""
|
||||
def strip_tag(element, reverse=False):
|
||||
"""
|
||||
Checks if element is empty and removes it
|
||||
"""
|
||||
if not element.get_text(strip=True):
|
||||
element.extract()
|
||||
return True
|
||||
if isinstance(element, Tag):
|
||||
child_list = element.contents[::-1] if reverse else element.contents
|
||||
for child in child_list:
|
||||
if not strip_tag(child):
|
||||
break
|
||||
return False
|
||||
|
||||
while soup.contents:
|
||||
if not (strip_tag(soup.contents[0]) or strip_tag(soup.contents[-1], reverse=True)):
|
||||
break
|
||||
return soup
|
||||
|
||||
|
||||
def clean_thread_html_body(html_body):
|
||||
"""
|
||||
Get post body with tags removed and limited to 500 characters
|
||||
@@ -401,6 +425,9 @@ def clean_thread_html_body(html_body):
|
||||
for match in html_body.find_all(tag):
|
||||
match.unwrap()
|
||||
|
||||
if not html_body.find():
|
||||
return str(html_body)
|
||||
|
||||
# Replace tags that are not allowed in email
|
||||
tags_to_update = [
|
||||
{"source": "button", "target": "span"},
|
||||
@@ -412,11 +439,15 @@ def clean_thread_html_body(html_body):
|
||||
for tag_dict in tags_to_update:
|
||||
for source_tag in html_body.find_all(tag_dict['source']):
|
||||
target_tag = html_body.new_tag(tag_dict['target'], **source_tag.attrs)
|
||||
if source_tag.string:
|
||||
target_tag.string = source_tag.string
|
||||
source_tag.replace_with(target_tag)
|
||||
if source_tag.contents:
|
||||
for content in list(source_tag.contents):
|
||||
target_tag.append(content)
|
||||
source_tag.insert_before(target_tag)
|
||||
source_tag.extract()
|
||||
|
||||
for tag in html_body.find_all(True):
|
||||
tag.attrs = {}
|
||||
tag['style'] = 'margin: 0'
|
||||
|
||||
html_body = strip_empty_tags(html_body)
|
||||
return str(html_body)
|
||||
|
||||
@@ -179,15 +179,23 @@ class TestCleanThreadHtmlBody(unittest.TestCase):
|
||||
"""
|
||||
Tests that the clean_thread_html_body function replaces the button tag with span tag
|
||||
"""
|
||||
# Tests for button replacement tag with text
|
||||
html_body = '<button class="abc">Button</button>'
|
||||
expected_output = '<span style="margin: 0">Button</span>'
|
||||
result = clean_thread_html_body(html_body)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
# Tests button tag replacement without text
|
||||
html_body = '<p><p>abc</p><button class="abc"></button><p>abc</p></p>'
|
||||
expected_output = '<p style="margin: 0"><p style="margin: 0">abc</p>'\
|
||||
'<span style="margin: 0"></span><p style="margin: 0">abc</p></p>'
|
||||
result = clean_thread_html_body(html_body)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
def test_button_tag_removal(self):
|
||||
"""
|
||||
Tests button tag with no text is removed if at start or end
|
||||
"""
|
||||
html_body = '<button class="abc"></button>'
|
||||
expected_output = '<span style="margin: 0"></span>'
|
||||
expected_output = ''
|
||||
result = clean_thread_html_body(html_body)
|
||||
self.assertEqual(result, expected_output)
|
||||
|
||||
@@ -196,3 +204,11 @@ class TestCleanThreadHtmlBody(unittest.TestCase):
|
||||
html_body = '<p class="abc" style="color:red" aria-disabled=true>Paragraph</p>'
|
||||
result = clean_thread_html_body(html_body)
|
||||
self.assertEqual(result, '<p style="margin: 0">Paragraph</p>')
|
||||
|
||||
def test_strip_empty_tags(self):
|
||||
"""
|
||||
Tests if the clean_thread_html_body function removes starting and ending empty tags
|
||||
"""
|
||||
html_body = '<div><p></p><p>content</p><p></p></div>'
|
||||
result = clean_thread_html_body(html_body)
|
||||
self.assertEqual(result, '<p style="margin: 0"><p style="margin: 0">content</p></p>')
|
||||
|
||||
Reference in New Issue
Block a user