From dfedde57b002dfdf854efe75e3e40471ae79bd4d Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Khalid Date: Tue, 4 Aug 2020 20:26:15 +0500 Subject: [PATCH 1/5] Handle edge case where dropdown values are not being indexed, fixed tests --- common/lib/xmodule/xmodule/capa_module.py | 32 +++++++++++++++++-- .../xmodule/xmodule/tests/test_capa_module.py | 32 ++++++++++--------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/common/lib/xmodule/xmodule/capa_module.py b/common/lib/xmodule/xmodule/capa_module.py index 1dbfa7429f..58f4bfc132 100644 --- a/common/lib/xmodule/xmodule/capa_module.py +++ b/common/lib/xmodule/xmodule/capa_module.py @@ -7,6 +7,7 @@ import re import sys import six +from bleach.sanitizer import Cleaner from lxml import etree from pkg_resources import resource_string from web_fragments.fragment import Fragment @@ -17,7 +18,6 @@ from xmodule.contentstore.django import contentstore from xmodule.editing_module import EditingMixin from xmodule.exceptions import NotFoundError, ProcessingError from xmodule.raw_module import RawMixin -from xmodule.util.misc import escape_html_characters from xmodule.util.sandboxing import get_python_lib_zip from xmodule.util.xmodule_django import add_webpack_to_fragment from xmodule.x_module import ( @@ -35,6 +35,13 @@ from .capa_base import CapaMixin, ComplexEncoder, _ log = logging.getLogger("edx.courseware") +def _remove_html_tags_and_extra_spaces(data): + """ Remove html tags and extra white spaces e.g newline, tabs etc from provided data """ + cleaner = Cleaner(tags=[], strip=True) + cleaned_text = " ".join(re.split(r"\s+", cleaner.clean(data), flags=re.UNICODE)).strip() + return cleaned_text + + @XBlock.wants('user') @XBlock.needs('i18n') class ProblemBlock( @@ -300,7 +307,26 @@ class ProblemBlock( """ Return dictionary prepared with module content and type for indexing. """ + def _make_optionresponse_indexable(xml_content): + """ + Extract options from dropdown and replace the actual xml for optionresponse with them. + """ + xml_content_copy = xml_content[:] + root = etree.fromstring(xml_content_copy) + for option_response in root.findall("optionresponse"): + option_response_xml = etree.tostring(option_response, method="html").decode("utf-8") + option_input = option_response.find("optioninput") + if option_input is not None: + option_input_attrib = option_input.get("options", "") + option_input_attrib = "[" + option_input_attrib.strip("()").replace("'", '"') + "]" + values_of_dropdown = ", ".join(json.loads(option_input_attrib)) + xml_content_copy = xml_content_copy.replace(option_response_xml, values_of_dropdown) + return xml_content_copy + xblock_body = super(ProblemBlock, self).index_dictionary() + + capa_content = _make_optionresponse_indexable(self.data) + # Removing solutions and hints, as well as script and style capa_content = re.sub( re.compile( @@ -313,9 +339,9 @@ class ProblemBlock( re.DOTALL | re.VERBOSE), "", - self.data + capa_content ) - capa_content = escape_html_characters(capa_content) + capa_content = _remove_html_tags_and_extra_spaces(capa_content) capa_body = { "capa_content": capa_content, "display_name": self.display_name, diff --git a/common/lib/xmodule/xmodule/tests/test_capa_module.py b/common/lib/xmodule/xmodule/tests/test_capa_module.py index aead4e8808..bd4b04c087 100644 --- a/common/lib/xmodule/xmodule/tests/test_capa_module.py +++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py @@ -39,6 +39,8 @@ from xmodule.tests import DATA_DIR from ..capa_base import RANDOMIZATION, SHOWANSWER from . import get_test_system +unittest.TestCase.maxDiff = None + class CapaFactory(object): """ @@ -2509,7 +2511,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': ' Label Some comment Apple Banana Chocolate Donut ' + 'capa_content': 'Label Some comment Apple Banana Chocolate Donut' } }) @@ -2542,7 +2544,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["optionresponse", "multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': ' Label Some comment Donut Buggy ' + 'capa_content': 'Label Some comment Donut Buggy' } } ) @@ -2579,7 +2581,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': [], 'content': { 'display_name': name, - 'capa_content': ' ' + 'capa_content': '' } } ) @@ -2607,7 +2609,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["choiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2628,7 +2630,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["optionresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2653,7 +2655,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2681,7 +2683,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["numericalresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2706,7 +2708,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["stringresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2720,7 +2722,7 @@ class ProblemBlockXMLTest(unittest.TestCase): """) name = "Non latin Input" descriptor = self._create_descriptor(sample_text_input_problem_xml, name=name) - capa_content = " FX1_VAL='Καλημέρα' Δοκιμή με μεταβλητές με Ελληνικούς χαρακτήρες μέσα σε python: $FX1_VAL " + capa_content = "FX1_VAL='Καλημέρα' Δοκιμή με μεταβλητές με Ελληνικούς χαρακτήρες μέσα σε python: $FX1_VAL" descriptor_dict = descriptor.index_dictionary() self.assertEqual( @@ -2752,7 +2754,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["choiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2778,7 +2780,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["optionresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2804,7 +2806,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2828,7 +2830,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["numericalresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2852,7 +2854,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["stringresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) @@ -2884,7 +2886,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': [], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ") + 'capa_content': capa_content.replace("\n", " ").strip() } } ) From eb8998894433ab41215891fcd67d71bec5f729cb Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Khalid Date: Tue, 4 Aug 2020 22:11:16 +0500 Subject: [PATCH 2/5] fixed issue --- common/lib/xmodule/xmodule/capa_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/xmodule/xmodule/capa_module.py b/common/lib/xmodule/xmodule/capa_module.py index 58f4bfc132..52edca3843 100644 --- a/common/lib/xmodule/xmodule/capa_module.py +++ b/common/lib/xmodule/xmodule/capa_module.py @@ -316,7 +316,7 @@ class ProblemBlock( for option_response in root.findall("optionresponse"): option_response_xml = etree.tostring(option_response, method="html").decode("utf-8") option_input = option_response.find("optioninput") - if option_input is not None: + if option_input is not None and "options" in option_input: option_input_attrib = option_input.get("options", "") option_input_attrib = "[" + option_input_attrib.strip("()").replace("'", '"') + "]" values_of_dropdown = ", ".join(json.loads(option_input_attrib)) From 3fab3ad47f461040256376b230ff3a7f31f6dbcb Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Khalid Date: Wed, 5 Aug 2020 16:26:12 +0500 Subject: [PATCH 3/5] issue fixed, use regex to replace --- common/lib/xmodule/xmodule/capa_module.py | 23 +++++-------------- .../xmodule/xmodule/tests/test_capa_module.py | 12 +++++----- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/common/lib/xmodule/xmodule/capa_module.py b/common/lib/xmodule/xmodule/capa_module.py index 52edca3843..66cae5c0da 100644 --- a/common/lib/xmodule/xmodule/capa_module.py +++ b/common/lib/xmodule/xmodule/capa_module.py @@ -307,25 +307,14 @@ class ProblemBlock( """ Return dictionary prepared with module content and type for indexing. """ - def _make_optionresponse_indexable(xml_content): - """ - Extract options from dropdown and replace the actual xml for optionresponse with them. - """ - xml_content_copy = xml_content[:] - root = etree.fromstring(xml_content_copy) - for option_response in root.findall("optionresponse"): - option_response_xml = etree.tostring(option_response, method="html").decode("utf-8") - option_input = option_response.find("optioninput") - if option_input is not None and "options" in option_input: - option_input_attrib = option_input.get("options", "") - option_input_attrib = "[" + option_input_attrib.strip("()").replace("'", '"') + "]" - values_of_dropdown = ", ".join(json.loads(option_input_attrib)) - xml_content_copy = xml_content_copy.replace(option_response_xml, values_of_dropdown) - return xml_content_copy - xblock_body = super(ProblemBlock, self).index_dictionary() - capa_content = _make_optionresponse_indexable(self.data) + # Make optioninput's options index friendly by replacing the actual tag with the values + capa_content = re.sub( + r'\s*|\S*<\/optioninput>', + lambda matched: matched.group(1).replace("'", '').replace(",", ", ") if matched.group(1) else None, + self.data + ) # Removing solutions and hints, as well as script and style capa_content = re.sub( diff --git a/common/lib/xmodule/xmodule/tests/test_capa_module.py b/common/lib/xmodule/xmodule/tests/test_capa_module.py index bd4b04c087..0869fe4513 100644 --- a/common/lib/xmodule/xmodule/tests/test_capa_module.py +++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py @@ -39,8 +39,6 @@ from xmodule.tests import DATA_DIR from ..capa_base import RANDOMIZATION, SHOWANSWER from . import get_test_system -unittest.TestCase.maxDiff = None - class CapaFactory(object): """ @@ -2539,12 +2537,14 @@ class ProblemBlockXMLTest(unittest.TestCase): descriptor = self._create_descriptor(xml, name=name) self.assertEqual(descriptor.problem_types, {"multiplechoiceresponse", "optionresponse"}) six.assertCountEqual( - self, descriptor.index_dictionary(), { + self, + descriptor.index_dictionary(), + { 'content_type': ProblemBlock.INDEX_CONTENT_TYPE, - 'problem_types': ["optionresponse", "multiplechoiceresponse"], + 'problem_types': ["multiplechoiceresponse", "optionresponse"], 'content': { 'display_name': name, - 'capa_content': 'Label Some comment Donut Buggy' + 'capa_content': 'Label Some comment Donut Buggy 1, 2' } } ) @@ -2621,7 +2621,7 @@ class ProblemBlockXMLTest(unittest.TestCase): Dropdown problems allow learners to select only one option from a list of options. Description You can use the following example problem as a model. - Which of the following countries celebrates its independence on August 15? + Which of the following countries celebrates its independence on August 15? India, Spain, China, Bermuda """) self.assertEqual(descriptor.problem_types, {"optionresponse"}) self.assertEqual( From 971bb1bebd5a0bd46ff0bfc4c3cb0f5157c9b10d Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Khalid Date: Fri, 7 Aug 2020 00:26:07 +0500 Subject: [PATCH 4/5] fixed issues pointed by kshitij --- common/lib/xmodule/xmodule/capa_module.py | 20 +++----- .../xmodule/xmodule/tests/test_capa_module.py | 50 +++++++++++-------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/common/lib/xmodule/xmodule/capa_module.py b/common/lib/xmodule/xmodule/capa_module.py index 66cae5c0da..4212161d44 100644 --- a/common/lib/xmodule/xmodule/capa_module.py +++ b/common/lib/xmodule/xmodule/capa_module.py @@ -35,13 +35,6 @@ from .capa_base import CapaMixin, ComplexEncoder, _ log = logging.getLogger("edx.courseware") -def _remove_html_tags_and_extra_spaces(data): - """ Remove html tags and extra white spaces e.g newline, tabs etc from provided data """ - cleaner = Cleaner(tags=[], strip=True) - cleaned_text = " ".join(re.split(r"\s+", cleaner.clean(data), flags=re.UNICODE)).strip() - return cleaned_text - - @XBlock.wants('user') @XBlock.needs('i18n') class ProblemBlock( @@ -310,11 +303,7 @@ class ProblemBlock( xblock_body = super(ProblemBlock, self).index_dictionary() # Make optioninput's options index friendly by replacing the actual tag with the values - capa_content = re.sub( - r'\s*|\S*<\/optioninput>', - lambda matched: matched.group(1).replace("'", '').replace(",", ", ") if matched.group(1) else None, - self.data - ) + capa_content = re.sub(r'\s*|\S*<\/optioninput>', r'\1', self.data) # Removing solutions and hints, as well as script and style capa_content = re.sub( @@ -330,7 +319,12 @@ class ProblemBlock( "", capa_content ) - capa_content = _remove_html_tags_and_extra_spaces(capa_content) + capa_content = re.sub( + r"(\s| |//)+", + " ", + Cleaner(tags=[], strip=True).clean(capa_content) + ) + capa_body = { "capa_content": capa_content, "display_name": self.display_name, diff --git a/common/lib/xmodule/xmodule/tests/test_capa_module.py b/common/lib/xmodule/xmodule/tests/test_capa_module.py index 0869fe4513..ac6bf4b465 100644 --- a/common/lib/xmodule/xmodule/tests/test_capa_module.py +++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py @@ -2109,6 +2109,7 @@ class ProblemBlockTest(unittest.TestCase): @ddt.ddt class ProblemBlockXMLTest(unittest.TestCase): + maxDiff = None sample_checkbox_problem_xml = textwrap.dedent("""

Title

@@ -2509,7 +2510,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': 'Label Some comment Apple Banana Chocolate Donut' + 'capa_content': ' Label Some comment Apple Banana Chocolate Donut ' } }) @@ -2536,16 +2537,21 @@ class ProblemBlockXMLTest(unittest.TestCase): name = "Other Test Capa Problem" descriptor = self._create_descriptor(xml, name=name) self.assertEqual(descriptor.problem_types, {"multiplechoiceresponse", "optionresponse"}) - six.assertCountEqual( - self, - descriptor.index_dictionary(), - { + + # We are converting problem_types to a set to compare it later without taking into account the order + # the reasoning behind is that the problem_types (property) is represented by dict and when it is converted + # to list its ordering is different everytime. + + indexing_result = descriptor.index_dictionary() + indexing_result['problem_types'] = set(indexing_result['problem_types']) + self.assertDictEqual( + indexing_result, { 'content_type': ProblemBlock.INDEX_CONTENT_TYPE, - 'problem_types': ["multiplechoiceresponse", "optionresponse"], + 'problem_types': set(["optionresponse", "multiplechoiceresponse"]), 'content': { 'display_name': name, - 'capa_content': 'Label Some comment Donut Buggy 1, 2' - } + 'capa_content': " Label Some comment Donut Buggy '1','2' " + }, } ) @@ -2581,7 +2587,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': [], 'content': { 'display_name': name, - 'capa_content': '' + 'capa_content': ' ' } } ) @@ -2609,7 +2615,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["choiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2621,7 +2627,7 @@ class ProblemBlockXMLTest(unittest.TestCase): Dropdown problems allow learners to select only one option from a list of options. Description You can use the following example problem as a model. - Which of the following countries celebrates its independence on August 15? India, Spain, China, Bermuda + Which of the following countries celebrates its independence on August 15? 'India','Spain','China','Bermuda' """) self.assertEqual(descriptor.problem_types, {"optionresponse"}) self.assertEqual( @@ -2630,7 +2636,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["optionresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2655,7 +2661,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2683,7 +2689,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["numericalresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2708,7 +2714,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["stringresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2722,7 +2728,7 @@ class ProblemBlockXMLTest(unittest.TestCase): """) name = "Non latin Input" descriptor = self._create_descriptor(sample_text_input_problem_xml, name=name) - capa_content = "FX1_VAL='Καλημέρα' Δοκιμή με μεταβλητές με Ελληνικούς χαρακτήρες μέσα σε python: $FX1_VAL" + capa_content = " FX1_VAL='Καλημέρα' Δοκιμή με μεταβλητές με Ελληνικούς χαρακτήρες μέσα σε python: $FX1_VAL " descriptor_dict = descriptor.index_dictionary() self.assertEqual( @@ -2754,7 +2760,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["choiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2780,7 +2786,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["optionresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2806,7 +2812,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["multiplechoiceresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2830,7 +2836,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["numericalresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2854,7 +2860,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': ["stringresponse"], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) @@ -2886,7 +2892,7 @@ class ProblemBlockXMLTest(unittest.TestCase): 'problem_types': [], 'content': { 'display_name': name, - 'capa_content': capa_content.replace("\n", " ").strip() + 'capa_content': capa_content.replace("\n", " ") } } ) From 30274b00e32d81fe2010f2462d3c1cf749773bd5 Mon Sep 17 00:00:00 2001 From: Ahmad Bilal Khalid Date: Fri, 7 Aug 2020 01:03:42 +0500 Subject: [PATCH 5/5] fixed nit --- common/lib/xmodule/xmodule/tests/test_capa_module.py | 1 - 1 file changed, 1 deletion(-) diff --git a/common/lib/xmodule/xmodule/tests/test_capa_module.py b/common/lib/xmodule/xmodule/tests/test_capa_module.py index ac6bf4b465..c0e52f2aaf 100644 --- a/common/lib/xmodule/xmodule/tests/test_capa_module.py +++ b/common/lib/xmodule/xmodule/tests/test_capa_module.py @@ -2109,7 +2109,6 @@ class ProblemBlockTest(unittest.TestCase): @ddt.ddt class ProblemBlockXMLTest(unittest.TestCase): - maxDiff = None sample_checkbox_problem_xml = textwrap.dedent("""

Title