Fix inline html bug.
* problem was in trying to remove the outer <html> + </html> tags. * cleaned up the stringify function to be less magical, and more obviously correct. [Fix #35136297]
This commit is contained in:
@@ -12,9 +12,17 @@ def stringify_children(node):
|
||||
fixed from
|
||||
http://stackoverflow.com/questions/4624062/get-all-text-inside-a-tag-in-lxml
|
||||
'''
|
||||
parts = ([node.text] +
|
||||
list(chain(*([etree.tostring(c), c.tail]
|
||||
for c in node.getchildren())
|
||||
)))
|
||||
# Useful things to know:
|
||||
|
||||
# node.tostring() -- generates xml for the node, including start
|
||||
# and end tags. We'll use this for the children.
|
||||
# node.text -- the text after the end of a start tag to the start
|
||||
# of the first child
|
||||
# node.tail -- the text after the end this tag to the start of the
|
||||
# next element.
|
||||
parts = [node.text]
|
||||
for c in node.getchildren():
|
||||
parts.append(etree.tostring(c, with_tail=True))
|
||||
|
||||
# filter removes possible Nones in texts and tails
|
||||
return ''.join(filter(None, parts))
|
||||
|
||||
Reference in New Issue
Block a user