From 4bf829dcbd7b02b825678a11eee216a84e4abc04 Mon Sep 17 00:00:00 2001 From: Piotr Surowiec Date: Tue, 16 Nov 2021 18:42:08 +0100 Subject: [PATCH] fix: do not remove comments from XML during course import (#28557) This is a follow-up to edx#1087, which reverted this change. According to the PR comments, parsing strings with XML comments inside them was causing errors. This does not seem to be the case anymore - these strings are just hidden when the block is rendered, but they are not breaking XBlocks. This also handles (ignores) the comments that could be added directly to the LibraryContentBlock in the XML export by users. --- .../views/tests/test_import_export.py | 20 +++- .../xmodule/xmodule/library_content_module.py | 19 +++- common/lib/xmodule/xmodule/modulestore/xml.py | 3 +- .../xmodule/tests/test_library_content.py | 92 ++++++++++++------- common/lib/xmodule/xmodule/xml_module.py | 4 +- 5 files changed, 97 insertions(+), 41 deletions(-) diff --git a/cms/djangoapps/contentstore/views/tests/test_import_export.py b/cms/djangoapps/contentstore/views/tests/test_import_export.py index 8cbd11c40c..51c58162e8 100644 --- a/cms/djangoapps/contentstore/views/tests/test_import_export.py +++ b/cms/djangoapps/contentstore/views/tests/test_import_export.py @@ -1256,7 +1256,25 @@ class TestCourseExportImportProblem(CourseTestCase): '\n
\n    x=10 print("hello \n")\n  
\n ' '\n
\n' - ] + ], + [ + '' + '' + '' + '

' + '""' + 'Text' + '

' + '
', + + '\n ' + '\n ' + '

' + '""' + 'Text' + '

\n' + '
\n' + ], ) @ddt.unpack def test_problem_content_on_course_export_import(self, problem_data, expected_problem_content): diff --git a/common/lib/xmodule/xmodule/library_content_module.py b/common/lib/xmodule/xmodule/library_content_module.py index d81cb98048..d3bce47bb8 100644 --- a/common/lib/xmodule/xmodule/library_content_module.py +++ b/common/lib/xmodule/xmodule/library_content_module.py @@ -12,6 +12,7 @@ from gettext import ngettext import bleach from lazy import lazy from lxml import etree +from lxml.etree import XMLSyntaxError from opaque_keys.edx.locator import LibraryLocator from pkg_resources import resource_string from web_fragments.fragment import Fragment @@ -666,10 +667,20 @@ class LibraryContentBlock( @classmethod def definition_from_xml(cls, xml_object, system): - children = [ - system.process_xml(etree.tostring(child)).scope_ids.usage_id - for child in xml_object.getchildren() - ] + children = [] + + for child in xml_object.getchildren(): + try: + children.append(system.process_xml(etree.tostring(child)).scope_ids.usage_id) + except (XMLSyntaxError, AttributeError): + msg = ( + "Unable to load child when parsing Library Content Block. " + "This can happen when a comment is manually added to the course export." + ) + logger.error(msg) + if system.error_tracker is not None: + system.error_tracker(msg) + definition = { attr_name: json.loads(attr_value) for attr_name, attr_value in xml_object.attrib.items() diff --git a/common/lib/xmodule/xmodule/modulestore/xml.py b/common/lib/xmodule/xmodule/modulestore/xml.py index d87bee1932..c84c7be722 100644 --- a/common/lib/xmodule/xmodule/modulestore/xml.py +++ b/common/lib/xmodule/xmodule/modulestore/xml.py @@ -39,8 +39,7 @@ from xmodule.x_module import ( # lint-amnesty, pylint: disable=unused-import from .exceptions import ItemNotFoundError from .inheritance import InheritanceKeyValueStore, compute_inherited_metadata, inheriting_field_data -edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, - remove_comments=True, remove_blank_text=True) +edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True) etree.set_default_parser(edx_xml_parser) diff --git a/common/lib/xmodule/xmodule/tests/test_library_content.py b/common/lib/xmodule/xmodule/tests/test_library_content.py index 8a287acf88..628f471e88 100644 --- a/common/lib/xmodule/xmodule/tests/test_library_content.py +++ b/common/lib/xmodule/xmodule/tests/test_library_content.py @@ -74,18 +74,14 @@ class TestLibraryContentExportImport(LibraryContentTest): """ Export and import tests for LibraryContentBlock """ + def setUp(self): + super().setUp() - maxDiff = None - - def test_xml_export_import_cycle(self): - """ - Test the export-import cycle. - """ # Children will only set after calling this. self.lc_block.refresh_children() - lc_block = self.store.get_item(self.lc_block.location) + self.lc_block = self.store.get_item(self.lc_block.location) - expected_olx = ( + self.expected_olx = ( '\n' ' \n' @@ -94,44 +90,78 @@ class TestLibraryContentExportImport(LibraryContentTest): ' \n' '\n' ).format( - block=lc_block, + block=self.lc_block, ) - export_fs = MemoryFS() # Set the virtual FS to export the olx to. - lc_block.runtime._descriptor_system.export_fs = export_fs # pylint: disable=protected-access + self.export_fs = MemoryFS() + self.lc_block.runtime._descriptor_system.export_fs = self.export_fs # pylint: disable=protected-access + + # Prepare runtime for the import. + self.runtime = TestImportSystem(load_error_modules=True, course_id=self.lc_block.location.course_key) + self.runtime.resources_fs = self.export_fs + self.id_generator = Mock() # Export the olx. node = etree.Element("unknown_root") - lc_block.add_xml_to_node(node) + self.lc_block.add_xml_to_node(node) - # Read it back - with export_fs.open('{dir}/{file_name}.xml'.format( - dir=lc_block.scope_ids.usage_id.block_type, - file_name=lc_block.scope_ids.usage_id.block_id + def _verify_xblock_properties(self, imported_lc_block): + """ + Check the new XBlock has the same properties as the old one. + """ + assert imported_lc_block.display_name == self.lc_block.display_name + assert imported_lc_block.source_library_id == self.lc_block.source_library_id + assert imported_lc_block.source_library_version == self.lc_block.source_library_version + assert imported_lc_block.mode == self.lc_block.mode + assert imported_lc_block.max_count == self.lc_block.max_count + assert imported_lc_block.capa_type == self.lc_block.capa_type + assert len(imported_lc_block.children) == len(self.lc_block.children) + assert imported_lc_block.children == self.lc_block.children + + def test_xml_export_import_cycle(self): + """ + Test the export-import cycle. + """ + # Read back the olx. + with self.export_fs.open('{dir}/{file_name}.xml'.format( + dir=self.lc_block.scope_ids.usage_id.block_type, + file_name=self.lc_block.scope_ids.usage_id.block_id )) as f: exported_olx = f.read() # And compare. - assert exported_olx == expected_olx - - runtime = TestImportSystem(load_error_modules=True, course_id=lc_block.location.course_key) - runtime.resources_fs = export_fs + assert exported_olx == self.expected_olx # Now import it. olx_element = etree.fromstring(exported_olx) - id_generator = Mock() - imported_lc_block = LibraryContentBlock.parse_xml(olx_element, runtime, None, id_generator) + imported_lc_block = LibraryContentBlock.parse_xml(olx_element, self.runtime, None, self.id_generator) - # Check the new XBlock has the same properties as the old one. - assert imported_lc_block.display_name == lc_block.display_name - assert imported_lc_block.source_library_id == lc_block.source_library_id - assert imported_lc_block.source_library_version == lc_block.source_library_version - assert imported_lc_block.mode == lc_block.mode - assert imported_lc_block.max_count == lc_block.max_count - assert imported_lc_block.capa_type == lc_block.capa_type - assert len(imported_lc_block.children) == 4 - assert imported_lc_block.children == lc_block.children + self._verify_xblock_properties(imported_lc_block) + + def test_xml_import_with_comments(self): + """ + Test that XML comments within LibraryContentBlock are ignored during the import. + """ + olx_with_comments = ( + '\n' + '\n' + '\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ).format( + block=self.lc_block, + ) + + # Import the olx. + olx_element = etree.fromstring(olx_with_comments) + imported_lc_block = LibraryContentBlock.parse_xml(olx_element, self.runtime, None, self.id_generator) + + self._verify_xblock_properties(imported_lc_block) class LibraryContentBlockTestMixin: diff --git a/common/lib/xmodule/xmodule/xml_module.py b/common/lib/xmodule/xmodule/xml_module.py index ef6f01b8fe..1bfcf03d03 100644 --- a/common/lib/xmodule/xmodule/xml_module.py +++ b/common/lib/xmodule/xmodule/xml_module.py @@ -18,9 +18,7 @@ from xmodule.x_module import XModuleDescriptor # lint-amnesty, pylint: disable= log = logging.getLogger(__name__) # assume all XML files are persisted as utf-8. -EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, - remove_comments=True, remove_blank_text=True, - encoding='utf-8') +EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True, encoding='utf-8') def name_to_pathname(name):