fix: do not remove comments from XML during course import (#28557)

This is a follow-up to edx#1087, which reverted this change.
According to the PR comments, parsing strings with XML comments inside them was
causing errors. This does not seem to be the case anymore - these strings are
just hidden when the block is rendered, but they are not breaking XBlocks.
This also handles (ignores) the comments that could be added directly to the
LibraryContentBlock in the XML export by users.
This commit is contained in:
Piotr Surowiec
2021-11-16 18:42:08 +01:00
committed by GitHub
parent 814efed637
commit 4bf829dcbd
5 changed files with 97 additions and 41 deletions

View File

@@ -1256,7 +1256,25 @@ class TestCourseExportImportProblem(CourseTestCase):
'<problem>\n <pre>\n <code>x=10 print("hello \n")</code>\n </pre>\n '
'<multiplechoiceresponse/>\n</problem>\n'
]
],
[
'<!-- Comment outside of the root (will be deleted). -->'
'<problem>'
'<!-- Valid comment -->'
'<p>'
'"<!-- String with non-XML structure: >< -->"'
'Text'
'</p>'
'</problem>',
'<problem>\n '
'<!-- Valid comment -->\n '
'<p>'
'"<!-- String with non-XML structure: >< -->"'
'Text'
'</p>\n'
'</problem>\n'
],
)
@ddt.unpack
def test_problem_content_on_course_export_import(self, problem_data, expected_problem_content):

View File

@@ -12,6 +12,7 @@ from gettext import ngettext
import bleach
from lazy import lazy
from lxml import etree
from lxml.etree import XMLSyntaxError
from opaque_keys.edx.locator import LibraryLocator
from pkg_resources import resource_string
from web_fragments.fragment import Fragment
@@ -666,10 +667,20 @@ class LibraryContentBlock(
@classmethod
def definition_from_xml(cls, xml_object, system):
children = [
system.process_xml(etree.tostring(child)).scope_ids.usage_id
for child in xml_object.getchildren()
]
children = []
for child in xml_object.getchildren():
try:
children.append(system.process_xml(etree.tostring(child)).scope_ids.usage_id)
except (XMLSyntaxError, AttributeError):
msg = (
"Unable to load child when parsing Library Content Block. "
"This can happen when a comment is manually added to the course export."
)
logger.error(msg)
if system.error_tracker is not None:
system.error_tracker(msg)
definition = {
attr_name: json.loads(attr_value)
for attr_name, attr_value in xml_object.attrib.items()

View File

@@ -39,8 +39,7 @@ from xmodule.x_module import ( # lint-amnesty, pylint: disable=unused-import
from .exceptions import ItemNotFoundError
from .inheritance import InheritanceKeyValueStore, compute_inherited_metadata, inheriting_field_data
edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False,
remove_comments=True, remove_blank_text=True)
edx_xml_parser = etree.XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True)
etree.set_default_parser(edx_xml_parser)

View File

@@ -74,18 +74,14 @@ class TestLibraryContentExportImport(LibraryContentTest):
"""
Export and import tests for LibraryContentBlock
"""
def setUp(self):
super().setUp()
maxDiff = None
def test_xml_export_import_cycle(self):
"""
Test the export-import cycle.
"""
# Children will only set after calling this.
self.lc_block.refresh_children()
lc_block = self.store.get_item(self.lc_block.location)
self.lc_block = self.store.get_item(self.lc_block.location)
expected_olx = (
self.expected_olx = (
'<library_content display_name="{block.display_name}" max_count="{block.max_count}"'
' source_library_id="{block.source_library_id}" source_library_version="{block.source_library_version}">\n'
' <html url_name="{block.children[0].block_id}"/>\n'
@@ -94,44 +90,78 @@ class TestLibraryContentExportImport(LibraryContentTest):
' <html url_name="{block.children[3].block_id}"/>\n'
'</library_content>\n'
).format(
block=lc_block,
block=self.lc_block,
)
export_fs = MemoryFS()
# Set the virtual FS to export the olx to.
lc_block.runtime._descriptor_system.export_fs = export_fs # pylint: disable=protected-access
self.export_fs = MemoryFS()
self.lc_block.runtime._descriptor_system.export_fs = self.export_fs # pylint: disable=protected-access
# Prepare runtime for the import.
self.runtime = TestImportSystem(load_error_modules=True, course_id=self.lc_block.location.course_key)
self.runtime.resources_fs = self.export_fs
self.id_generator = Mock()
# Export the olx.
node = etree.Element("unknown_root")
lc_block.add_xml_to_node(node)
self.lc_block.add_xml_to_node(node)
# Read it back
with export_fs.open('{dir}/{file_name}.xml'.format(
dir=lc_block.scope_ids.usage_id.block_type,
file_name=lc_block.scope_ids.usage_id.block_id
def _verify_xblock_properties(self, imported_lc_block):
"""
Check the new XBlock has the same properties as the old one.
"""
assert imported_lc_block.display_name == self.lc_block.display_name
assert imported_lc_block.source_library_id == self.lc_block.source_library_id
assert imported_lc_block.source_library_version == self.lc_block.source_library_version
assert imported_lc_block.mode == self.lc_block.mode
assert imported_lc_block.max_count == self.lc_block.max_count
assert imported_lc_block.capa_type == self.lc_block.capa_type
assert len(imported_lc_block.children) == len(self.lc_block.children)
assert imported_lc_block.children == self.lc_block.children
def test_xml_export_import_cycle(self):
"""
Test the export-import cycle.
"""
# Read back the olx.
with self.export_fs.open('{dir}/{file_name}.xml'.format(
dir=self.lc_block.scope_ids.usage_id.block_type,
file_name=self.lc_block.scope_ids.usage_id.block_id
)) as f:
exported_olx = f.read()
# And compare.
assert exported_olx == expected_olx
runtime = TestImportSystem(load_error_modules=True, course_id=lc_block.location.course_key)
runtime.resources_fs = export_fs
assert exported_olx == self.expected_olx
# Now import it.
olx_element = etree.fromstring(exported_olx)
id_generator = Mock()
imported_lc_block = LibraryContentBlock.parse_xml(olx_element, runtime, None, id_generator)
imported_lc_block = LibraryContentBlock.parse_xml(olx_element, self.runtime, None, self.id_generator)
# Check the new XBlock has the same properties as the old one.
assert imported_lc_block.display_name == lc_block.display_name
assert imported_lc_block.source_library_id == lc_block.source_library_id
assert imported_lc_block.source_library_version == lc_block.source_library_version
assert imported_lc_block.mode == lc_block.mode
assert imported_lc_block.max_count == lc_block.max_count
assert imported_lc_block.capa_type == lc_block.capa_type
assert len(imported_lc_block.children) == 4
assert imported_lc_block.children == lc_block.children
self._verify_xblock_properties(imported_lc_block)
def test_xml_import_with_comments(self):
"""
Test that XML comments within LibraryContentBlock are ignored during the import.
"""
olx_with_comments = (
'<!-- Comment -->\n'
'<library_content display_name="{block.display_name}" max_count="{block.max_count}"'
' source_library_id="{block.source_library_id}" source_library_version="{block.source_library_version}">\n'
'<!-- Comment -->\n'
' <html url_name="{block.children[0].block_id}"/>\n'
' <html url_name="{block.children[1].block_id}"/>\n'
' <html url_name="{block.children[2].block_id}"/>\n'
' <html url_name="{block.children[3].block_id}"/>\n'
'</library_content>\n'
).format(
block=self.lc_block,
)
# Import the olx.
olx_element = etree.fromstring(olx_with_comments)
imported_lc_block = LibraryContentBlock.parse_xml(olx_element, self.runtime, None, self.id_generator)
self._verify_xblock_properties(imported_lc_block)
class LibraryContentBlockTestMixin:

View File

@@ -18,9 +18,7 @@ from xmodule.x_module import XModuleDescriptor # lint-amnesty, pylint: disable=
log = logging.getLogger(__name__)
# assume all XML files are persisted as utf-8.
EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False,
remove_comments=True, remove_blank_text=True,
encoding='utf-8')
EDX_XML_PARSER = XMLParser(dtd_validation=False, load_dtd=False, remove_blank_text=True, encoding='utf-8')
def name_to_pathname(name):