refactor: abstract XML parser output operations (#445)

This is a refactoring for a part of the ReactStateXMLParser. I wanted to use functions that are more generic and not just handle a list of edge cases. So I encapsulated the operation that was done in this part of the code to a function `findNodesAndRemoveTheirParentNodes` which is more generic and could be used for different operations.
This commit is contained in:
Jesper Hodge
2024-01-02 15:04:03 -05:00
committed by GitHub
parent 69452344d8
commit 70581c54ab
5 changed files with 317 additions and 19 deletions

4
package-lock.json generated
View File

@@ -25,6 +25,7 @@
"fast-xml-parser": "^4.0.10",
"frontend-components-tinymce-advanced-plugins": "^1.0.2",
"lodash-es": "^4.17.21",
"lodash.flatten": "^4.4.0",
"moment": "^2.29.4",
"moment-shortformat": "^2.1.0",
"react-dropzone": "^14.2.3",
@@ -17212,8 +17213,7 @@
"node_modules/lodash.flatten": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
"integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g==",
"dev": true
"integrity": "sha512-C5N2Z3DgnnKr0LOpv/hKCgKdb7ZZwafIrsesve6lmzvZIRZRGaZ/l6Q8+2W7NaT+ZwO3fFlSCzCzrDCFdJfZ4g=="
},
"node_modules/lodash.flattendeep": {
"version": "4.4.0",

View File

@@ -79,6 +79,7 @@
"fast-xml-parser": "^4.0.10",
"frontend-components-tinymce-advanced-plugins": "^1.0.2",
"lodash-es": "^4.17.21",
"lodash.flatten": "^4.4.0",
"moment": "^2.29.4",
"moment-shortformat": "^2.1.0",
"react-dropzone": "^14.2.3",

View File

@@ -2,6 +2,9 @@ import _ from 'lodash-es';
import { XMLParser, XMLBuilder } from 'fast-xml-parser';
import { ProblemTypeKeys } from '../../../data/constants/problem';
import { ToleranceTypes } from '../components/EditProblemView/SettingsWidget/settingsComponents/Tolerance/constants';
import { findNodesAndRemoveTheirParentNodes } from './reactStateOLXHelpers';
const HtmlBlockTags = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div', 'pre', 'blockquote', 'ol', 'ul', 'li', 'dl', 'dt', 'dd', 'hr', 'table', 'thead', 'caption', 'tbody', 'tr', 'th', 'td', 'colgroup', 'col', 'address', 'fieldset', 'legend'];
class ReactStateOLXParser {
constructor(problemState) {
@@ -193,33 +196,29 @@ class ReactStateOLXParser {
/** addQuestion()
* The editorObject saved to the class constuctor is parsed for the attribute question. The question is parsed and
* checked for label and em tags. After the question is fully updated, the questionObject is returned.
* TODO: this is very brittle and unreliable. Needs improvement.
* checked for label tags. label tags are extracted from block-type tags like <p> or <h1>, and the block-type tag is
* deleted while label is kept. For example, <p><label>Question</label></p> becomes <label>Question</label>, while
* <p><span>Text</span></p> remains <p><span>Text</span></p>. The question is returned as an object representation.
* @return {object} object representaion of question
*/
addQuestion() {
const { question } = this.editorObject;
const questionObject = this.richTextParser.parse(question);
/* Removes block tags like <p> or <h1> that surround the <label> or <em> format.
const questionObjectArray = this.richTextParser.parse(question);
/* Removes block tags like <p> or <h1> that surround the <label> format.
Block tags are required by tinyMCE but have adverse effect on css in studio.
*/
const resultQuestion = [];
const relevantSubnodes = ['label', 'em'];
questionObject.forEach((tag) => {
const subNodes = Object.values(tag)[0];
const containsRelevantSubnodes = subNodes.some(subNode => relevantSubnodes.includes(Object.keys(subNode)[0]));
if (!containsRelevantSubnodes) {
resultQuestion.push(tag);
} else {
resultQuestion.push(...subNodes);
}
const result = findNodesAndRemoveTheirParentNodes({
arrayOfNodes: questionObjectArray,
nodesToFind: ['label'],
parentsToRemove: HtmlBlockTags,
});
return resultQuestion;
return result;
}
// findNodesWithChildTags(nodes, tagNames, recursive=false) {
// const result = [];
/** buildMultiSelectProblem()
* OLX builder for multiple choice, checkbox, and dropdown problems. The question
* builder has a different format than the other parts (demand hint, answers, and

View File

@@ -0,0 +1,116 @@
/* eslint-disable import/prefer-default-export */
import flatten from 'lodash.flatten';
/**
* flattenSubNodes - appends a nodes children to a given array 'subNodes' in a flattened format.
* Only appends direct children unless `recursive` is set to true.
* @param {object} node
* @param {object[]} subNodes
* @param {object} options - { recursive: false } Whether to search recursively for child tags.
* @returns {void}
*/
function flattenSubNodes(node, subNodes, options = { recursive: false }) {
const values = Array.isArray(node) ? (
flatten(node.map(n => Object.values(n)))
) : Object.values(node);
values.forEach(value => {
if (Array.isArray(value)) {
subNodes.push(...value);
if (options.recursive) {
value.forEach(nestedNode => {
flattenSubNodes(nestedNode, subNodes, options);
});
}
}
});
}
/**
* function nodeContainsChildTags - checks whether a node contains any subnodes with the specified tag names.
*
* @param {object} node - Example for node:
* {"div":
* [
* {"label":
* [
* {"#text":"def"}
* ]
* },
* {"div":
* [{
* label:
* [
* { '#text': 'def' },
* ],
* }],
* },
* {"#text":" "},
* {"em":
* [
* {"#text":"ghi"}
* ],
* ":@": {"@_class":"olx_description"}
* },
* {"em":
* [
* {"#text":"jkl"}
* ]
* }
* ]
* }
* @param {string[]} tagNames
* @param {boolean} [recursive=false] - Whether to search recursively for child tags.
*
* @returns {boolean} whether node contains the specified child tags
*/
export function nodeContainsChildTags(node, tagNames, options = { recursive: false }) {
const subNodes = [];
flattenSubNodes(node, subNodes, options);
const res = subNodes.some(subNode => tagNames.includes(Object.keys(subNode)[0]));
return res;
}
/**
* @param {object} node
* @returns {string} the tag name of the node
*/
export function tagName(node) {
if (Array.isArray(node)) {
throw new TypeError('function tagName does not accept arrays as input');
}
return Object.keys(node).find(key => key.match(/^[a-zA-Z].*/));
}
/**
* findNodesAndRemoveTheirParentNodes - given 'arrayOfNodes', an array that results from the
* XMLParser, this looks for 'nodesToFind': specific child tags (e.g. 'label'), and removes
* their parent tags (e.g. 'div') but keeps all their content one level higher.
* The 'parentsToRemove' array should contain a list of parent tags that will be removed.
*
* For example, if you have a div containing a p that contains a label and an em, and you specify
* 'label' as the nodeToFind and 'p' as the parentToRemove, then you will get a div containing a label and an em.
* @param {object} { arrayOfNodes, nodesToFind, parentsToRemove }
*
* @returns {object} - an array of nodes
*/
export function findNodesAndRemoveTheirParentNodes({
arrayOfNodes,
nodesToFind,
parentsToRemove,
}) {
const result = [];
arrayOfNodes.forEach((node) => {
const containsRelevantSubnodes = nodeContainsChildTags(node, nodesToFind);
if (!containsRelevantSubnodes || !parentsToRemove.includes(tagName(node))) {
result.push(node);
} else {
const subNodes = Object.values(node)[0];
result.push(...subNodes);
}
});
return result;
}

View File

@@ -0,0 +1,182 @@
import { nodeContainsChildTags, findNodesAndRemoveTheirParentNodes, tagName } from './reactStateOLXHelpers';
describe('reactStateOLXHelpers', () => {
describe('findNodesWithChildTags', () => {
const node = {
div:
[
{
label:
[
{ '#text': 'def' },
],
},
{ '#text': ' ' },
{
em:
[
{ '#text': 'ghi' },
],
':@': { '@_class': 'olx_description' },
},
{
em:
[
{ '#text': 'jkl' },
],
},
],
};
const nodeWithNestedLabel = {
div:
[
{
div:
[{
label:
[
{ '#text': 'def' },
],
}],
},
{ '#text': ' ' },
{
em:
[
{ '#text': 'ghi' },
],
':@': { '@_class': 'olx_description' },
},
{
em:
[
{ '#text': 'jkl' },
],
},
],
};
it('should return true if node contains specified child tags', () => {
const received = nodeContainsChildTags(node, ['p', 'label']);
expect(received).toEqual(true);
});
it('should return false if node does not contain child tags', () => {
const received = nodeContainsChildTags(node, ['p', 'span']);
expect(received).toEqual(false);
});
it('should return true if node contains specified nested child tags and recursive is true', () => {
const received = nodeContainsChildTags(nodeWithNestedLabel, ['p', 'label'], { recursive: true });
expect(received).toEqual(true);
});
it('should return false if node contains specified nested child tags and recursive is not set', () => {
const received = nodeContainsChildTags(nodeWithNestedLabel, ['p', 'label']);
expect(received).toEqual(false);
});
it('should handle arrays somehow, in case there is an edge case where it is passed', () => {
const received = nodeContainsChildTags([nodeWithNestedLabel, node], ['p', 'label'], { recursive: true });
expect(received).toEqual(true);
});
});
describe('findNodesAndRemoveTheirParentNodes', () => {
const exampleQuestionObject = [
{
p: [{
'#text': 'abc',
}],
},
{
div:
[
{
label:
[{ '#text': 'def' }],
},
{ '#text': ' ' },
{
em:
[{
'#text': 'ghi',
}],
':@': { '@_class': 'olx_description' },
},
{ em: [{ '#text': 'Just a generic em tag' }] },
],
},
];
const questionObjectWithoutDiv = [
{
p: [{
'#text': 'abc',
}],
},
{
label: [{ '#text': 'def' }],
},
{ '#text': ' ' },
{
em:
[{
'#text': 'ghi',
}],
':@': { '@_class': 'olx_description' },
},
{ em: [{ '#text': 'Just a generic em tag' }] },
];
it('should remove parent nodes of specified child tags', () => {
expect(findNodesAndRemoveTheirParentNodes({
arrayOfNodes: exampleQuestionObject,
nodesToFind: ['label'],
parentsToRemove: ['div'],
})).toEqual(questionObjectWithoutDiv);
});
it('should not remove anything unless specified in "parentsToRemove"', () => {
expect(findNodesAndRemoveTheirParentNodes({
arrayOfNodes: exampleQuestionObject,
nodesToFind: ['label'],
parentsToRemove: ['span'],
})).toEqual(exampleQuestionObject);
});
});
describe('tagName', () => {
it('should return the tag name of the node', () => {
expect(tagName({
p: [{
'#text': 'abc',
}],
})).toEqual('p');
});
it('should throw an error if the node is an array', () => {
expect(() => tagName([])).toThrow(TypeError);
});
it('should return undefined if the node is text-only', () => {
expect(tagName({ '#text': 'abc' })).toEqual(undefined);
});
it('should return undefined if the node is an empty object', () => {
expect(tagName({})).toEqual(undefined);
});
it('should return correct tagName if the node has text and class properties as well', () => {
expect(tagName({
':@': { '@_class': 'olx_description' },
'#text': 'abc',
em:
[{
'#text': 'ghi',
}],
})).toEqual('em');
});
});
});