115 lines
3.3 KiB
JavaScript
115 lines
3.3 KiB
JavaScript
import { flatten } from 'lodash';
|
|
|
|
/**
|
|
* flattenSubNodes - appends a nodes children to a given array 'subNodes' in a flattened format.
|
|
* Only appends direct children unless `recursive` is set to true.
|
|
* @param {object} node
|
|
* @param {object[]} subNodes
|
|
* @param {object} options - { recursive: false } Whether to search recursively for child tags.
|
|
* @returns {void}
|
|
*/
|
|
function flattenSubNodes(node, subNodes, options = { recursive: false }) {
|
|
const values = Array.isArray(node) ? (
|
|
flatten(node.map(n => Object.values(n)))
|
|
) : Object.values(node);
|
|
|
|
values.forEach(value => {
|
|
if (Array.isArray(value)) {
|
|
subNodes.push(...value);
|
|
|
|
if (options.recursive) {
|
|
value.forEach(nestedNode => {
|
|
flattenSubNodes(nestedNode, subNodes, options);
|
|
});
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/**
|
|
* function nodeContainsChildTags - checks whether a node contains any subnodes with the specified tag names.
|
|
*
|
|
* @param {object} node - Example for node:
|
|
* {"div":
|
|
* [
|
|
* {"label":
|
|
* [
|
|
* {"#text":"def"}
|
|
* ]
|
|
* },
|
|
* {"div":
|
|
* [{
|
|
* label:
|
|
* [
|
|
* { '#text': 'def' },
|
|
* ],
|
|
* }],
|
|
* },
|
|
* {"#text":" "},
|
|
* {"em":
|
|
* [
|
|
* {"#text":"ghi"}
|
|
* ],
|
|
* ":@": {"@_class":"olx_description"}
|
|
* },
|
|
* {"em":
|
|
* [
|
|
* {"#text":"jkl"}
|
|
* ]
|
|
* }
|
|
* ]
|
|
* }
|
|
* @param {string[]} tagNames
|
|
* @param {boolean} [recursive=false] - Whether to search recursively for child tags.
|
|
*
|
|
* @returns {boolean} whether node contains the specified child tags
|
|
*/
|
|
export function nodeContainsChildTags(node, tagNames, options = { recursive: false }) {
|
|
const subNodes = [];
|
|
flattenSubNodes(node, subNodes, options);
|
|
const res = subNodes.some(subNode => tagNames.includes(Object.keys(subNode)[0]));
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* @param {object} node
|
|
* @returns {string} the tag name of the node
|
|
*/
|
|
export function tagName(node) {
|
|
if (Array.isArray(node)) {
|
|
throw new TypeError('function tagName does not accept arrays as input');
|
|
}
|
|
return Object.keys(node).find(key => key.match(/^[a-zA-Z].*/));
|
|
}
|
|
|
|
/**
|
|
* findNodesAndRemoveTheirParentNodes - given 'arrayOfNodes', an array that results from the
|
|
* XMLParser, this looks for 'nodesToFind': specific child tags (e.g. 'label'), and removes
|
|
* their parent tags (e.g. 'div') but keeps all their content one level higher.
|
|
* The 'parentsToRemove' array should contain a list of parent tags that will be removed.
|
|
*
|
|
* For example, if you have a div containing a p that contains a label and an em, and you specify
|
|
* 'label' as the nodeToFind and 'p' as the parentToRemove, then you will get a div containing a label and an em.
|
|
* @param {object} { arrayOfNodes, nodesToFind, parentsToRemove }
|
|
*
|
|
* @returns {object} - an array of nodes
|
|
*/
|
|
export function findNodesAndRemoveTheirParentNodes({
|
|
arrayOfNodes,
|
|
nodesToFind,
|
|
parentsToRemove,
|
|
}) {
|
|
const result = [];
|
|
arrayOfNodes.forEach((node) => {
|
|
const containsRelevantSubnodes = nodeContainsChildTags(node, nodesToFind);
|
|
|
|
if (!containsRelevantSubnodes || !parentsToRemove.includes(tagName(node))) {
|
|
result.push(node);
|
|
} else {
|
|
const subNodes = Object.values(node)[0];
|
|
result.push(...subNodes);
|
|
}
|
|
});
|
|
return result;
|
|
}
|