From 0a8ec0caab57868dfd100b5ae2d6b97ab0f637ed Mon Sep 17 00:00:00 2001
From: Muhammad Adeel Tajamul
 <77053848+muhammadadeeltajamul@users.noreply.github.com>
Date: Fri, 11 Jul 2025 22:54:05 +0500
Subject: [PATCH] temp: added option to replace spam content with text (#37009)

---
 lms/djangoapps/discussion/rest_api/serializers.py     | 11 +++++++++--
 .../discussion/rest_api/tests/test_serializers.py     |  6 +++---
 lms/envs/common.py                                    |  5 +++++
 3 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/lms/djangoapps/discussion/rest_api/serializers.py b/lms/djangoapps/discussion/rest_api/serializers.py
index b6587c0f36..75ef1f3d51 100644
--- a/lms/djangoapps/discussion/rest_api/serializers.py
+++ b/lms/djangoapps/discussion/rest_api/serializers.py
@@ -144,10 +144,13 @@ def _validate_privileged_access(context: Dict) -> bool:
 def filter_spam_urls_from_html(html_string):
     """
     Filters out spam posts from html
+    Returns:
+        clean_post, is_spam
     """
     html_string = html.unescape(html_string)
     soup = BeautifulSoup(html_string, "html.parser")
     patterns = []
+    is_spam = False
     for domain in settings.DISCUSSION_SPAM_URLS:
         escaped = domain.replace(".", r"\.")
         domain_pattern = rf"(\w+\.)*{escaped}(?:/\S*)*"
@@ -165,6 +168,7 @@ def filter_spam_urls_from_html(html_string):
         if href:
             if any(p.search(href) for p in patterns):
                 a_tag.replace_with(a_tag.get_text(strip=True))
+                is_spam = True
 
     for text_node in soup.find_all(string=True):
         new_text = text_node
@@ -172,8 +176,9 @@ def filter_spam_urls_from_html(html_string):
             new_text = p.sub('', new_text)
         if new_text != text_node:
             text_node.replace_with(new_text.strip())
+            is_spam = True
 
-    return str(soup)
+    return str(soup), is_spam
 
 
 class _ContentSerializer(serializers.Serializer):
@@ -283,7 +288,9 @@ class _ContentSerializer(serializers.Serializer):
         """
         if self._rendered_body is None:
             self._rendered_body = render_body(obj["body"])
-        self._rendered_body = filter_spam_urls_from_html(self._rendered_body)
+            self._rendered_body, is_spam = filter_spam_urls_from_html(self._rendered_body)
+            if is_spam and settings.CONTENT_FOR_SPAM_POSTS:
+                self._rendered_body = settings.CONTENT_FOR_SPAM_POSTS
         return self._rendered_body
 
     def get_abuse_flagged(self, obj):
diff --git a/lms/djangoapps/discussion/rest_api/tests/test_serializers.py b/lms/djangoapps/discussion/rest_api/tests/test_serializers.py
index f7aa2d83ad..0333c62d73 100644
--- a/lms/djangoapps/discussion/rest_api/tests/test_serializers.py
+++ b/lms/djangoapps/discussion/rest_api/tests/test_serializers.py
@@ -1123,14 +1123,14 @@ class FilterSpamTest(SharedModuleStoreTestCase):
     @override_settings(DISCUSSION_SPAM_URLS=['example.com'])
     def test_filter(self):
         self.assertEqual(
-            filter_spam_urls_from_html('<div><a href="example.com/abc/def">abc</a></div>'),
+            filter_spam_urls_from_html('<div><a href="example.com/abc/def">abc</a></div>')[0],
             '<div>abc</div>'
         )
         self.assertEqual(
-            filter_spam_urls_from_html('<div>example.com/abc/def</div>'),
+            filter_spam_urls_from_html('<div>example.com/abc/def</div>')[0],
             '<div></div>'
         )
         self.assertEqual(
-            filter_spam_urls_from_html('<div>e x a m p l e . c o m / a b c / d e f</div>'),
+            filter_spam_urls_from_html('<div>e x a m p l e . c o m / a b c / d e f</div>')[0],
             '<div></div>'
         )
diff --git a/lms/envs/common.py b/lms/envs/common.py
index b4f6c8b43f..ba77c23afb 100644
--- a/lms/envs/common.py
+++ b/lms/envs/common.py
@@ -5089,6 +5089,11 @@ EXAMS_DASHBOARD_MICROFRONTEND_URL = None
 # .. setting_description: Urls to filter from discussion content to avoid spam
 DISCUSSION_SPAM_URLS = []
 
+# .. setting_name: CONTENT_FOR_SPAM_POSTS
+# .. setting_default: ""
+# .. setting_description: Content to replace spam posts with
+CONTENT_FOR_SPAM_POSTS = ""
+
 # .. toggle_name: ENABLE_AUTHN_RESET_PASSWORD_HIBP_POLICY
 # .. toggle_implementation: DjangoSetting
 # .. toggle_default: False