123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898 |
- const sax = require('./sax');
- //node types
- const NODE = 1;
- const TEXT = 2;
- const CDATA = 3;
- const COMMENT = 4;
- const name2type = {
- 'NODE': NODE,
- 'TEXT': TEXT,
- 'CDATA': CDATA,
- 'COMMENT': COMMENT,
- };
- const type2name = {
- [NODE]: 'NODE',
- [TEXT]: 'TEXT',
- [CDATA]: 'CDATA',
- [COMMENT]: 'COMMENT',
- };
- class NodeBase {
- wideSelector(selectorString) {
- const result = {all: false, before: false, type: 0, name: ''};
- if (selectorString === '') {
- result.before = true;
- } else if (selectorString === '*') {
- result.all = true;
- } else if (selectorString[0] === '*') {
- const typeName = selectorString.substring(1);
- result.type = name2type[typeName];
- if (!result.type)
- throw new Error(`Unknown selector type: ${typeName}`);
- } else {
- result.name = selectorString;
- }
- return result;
- }
- checkNode(rawNode, selectorObj) {
- return selectorObj.all || selectorObj.before
- || (selectorObj.type && rawNode[0] === selectorObj.type)
- || (rawNode[0] === NODE && rawNode[1] === selectorObj.name);
- }
- findNodeIndex(nodes, selectorObj) {
- for (let i = 0; i < nodes.length; i++)
- if (this.checkNode(nodes[i], selectorObj))
- return i;
- }
- rawAdd(nodes, rawNode, selectorObj) {
- if (selectorObj.all) {
- nodes.push(rawNode);
- } else if (selectorObj.before) {
- nodes.unshift(rawNode);
- } else {
- const index = this.findNodeIndex(nodes, selectorObj);
- if (index >= 0)
- nodes.splice(index, 0, rawNode);
- else
- nodes.push(rawNode);
- }
- }
- rawRemove(nodes, selectorObj) {
- if (selectorObj.before)
- return;
- for (let i = nodes.length - 1; i >= 0; i--) {
- if (this.checkNode(nodes[i], selectorObj))
- nodes.splice(i, 1);
- }
- }
- }
- class NodeObject extends NodeBase {
- constructor(raw = null) {
- super();
- if (raw)
- this.raw = raw;
- else
- this.raw = [];
- }
- get type() {
- return this.raw[0] || null;
- }
- get name() {
- if (this.type === NODE)
- return this.raw[1] || null;
- return null;
- }
- set name(value) {
- if (this.type === NODE)
- this.raw[1] = value;
- }
- attrs(key, value) {
- if (this.type !== NODE)
- return null;
- let map = null;
- if (key instanceof Map) {
- map = key;
- this.raw[2] = Array.from(map);
- } else if (Array.isArray(this.raw[2])) {
- map = new Map(this.raw[2]);
- if (key) {
- map.set(key, value);
- this.raw[2] = Array.from(map);
- }
- }
- return map;
- }
- get value() {
- switch (this.type) {
- case NODE:
- return this.raw[3] || null;
- case TEXT:
- case CDATA:
- case COMMENT:
- return this.raw[1] || null;
- }
- return null;
- }
- set value(v) {
- switch (this.type) {
- case NODE:
- this.raw[3] = v;
- break;
- case TEXT:
- case CDATA:
- case COMMENT:
- this.raw[1] = v;
- }
- }
- add(node, after = '*') {
- if (this.type !== NODE)
- return;
- const selectorObj = this.wideSelector(after);
- if (!Array.isArray(this.raw[3]))
- this.raw[3] = [];
- if (Array.isArray(node)) {
- for (const node_ of node)
- this.rawAdd(this.raw[3], node_.raw, selectorObj);
- } else {
- this.rawAdd(this.raw[3], node.raw, selectorObj);
- }
- return this;
- }
- remove(selector = '') {
- if (this.type !== NODE || !this.raw[3])
- return;
- const selectorObj = this.wideSelector(selector);
- this.rawRemove(this.raw[3], selectorObj);
- if (!this.raw[3].length)
- this.raw[3] = null;
- return this;
- }
- each(callback) {
- if (this.type !== NODE || !this.raw[3])
- return;
- for (const n of this.raw[3]) {
- if (callback(new NodeObject(n)) === false)
- break;
- }
- return this;
- }
- eachDeep(callback) {
- if (this.type !== NODE || !this.raw[3])
- return;
- const deep = (nodes, route = '') => {
- for (const n of nodes) {
- const node = new NodeObject(n);
- if (callback(node, route) === false)
- return false;
- if (node.type === NODE && node.value) {
- if (deep(node.value, `${route}${route ? '/' : ''}${node.name}`) === false)
- return false;
- }
- }
- }
- deep(this.raw[3]);
- return this;
- }
- }
- class XmlParser extends NodeBase {
- constructor(rawNodes = []) {
- super();
- this.NODE = NODE;
- this.TEXT = TEXT;
- this.CDATA = CDATA;
- this.COMMENT = COMMENT;
- this.rawNodes = rawNodes;
- }
- get count() {
- return this.rawNodes.length;
- }
- get nodes() {
- const result = [];
- for (const n of this.rawNodes)
- result.push(new NodeObject(n));
- return result;
- }
- nodeObject(node) {
- return new NodeObject(node);
- }
- newParser(nodes) {
- return new XmlParser(nodes);
- }
- checkType(type) {
- if (!type2name[type])
- throw new Error(`Invalid type: ${type}`);
- }
- createTypedNode(type, nameOrValue, attrs = null, value = null) {
- this.checkType(type);
- switch (type) {
- case NODE:
- if (!nameOrValue || typeof(nameOrValue) !== 'string')
- throw new Error('Node name must be non-empty string');
- return new NodeObject([type, nameOrValue, attrs, value]);
- case TEXT:
- case CDATA:
- case COMMENT:
- if (typeof(nameOrValue) !== 'string')
- throw new Error('Node value must be of type string');
- return new NodeObject([type, nameOrValue]);
- }
- }
- createNode(name, attrs = null, value = null) {
- return this.createTypedNode(NODE, name, attrs, value);
- }
- createText(value = null) {
- return this.createTypedNode(TEXT, value);
- }
- createCdata(value = null) {
- return this.createTypedNode(CDATA, value);
- }
- createComment(value = null) {
- return this.createTypedNode(COMMENT, value);
- }
- add(node, after = '*') {
- const selectorObj = this.wideSelector(after);
- for (const n of this.rawNodes) {
- if (n && n[0] === NODE) {
- if (!Array.isArray(n[3]))
- n[3] = [];
-
- if (Array.isArray(node)) {
- for (const node_ of node)
- this.rawAdd(n[3], node_.raw, selectorObj);
- } else {
- this.rawAdd(n[3], node.raw, selectorObj);
- }
- }
- }
- return this;
- }
- addRoot(node, after = '*') {
- const selectorObj = this.wideSelector(after);
- if (Array.isArray(node)) {
- for (const node_ of node)
- this.rawAdd(this.rawNodes, node_.raw, selectorObj);
- } else {
- this.rawAdd(this.rawNodes, node.raw, selectorObj);
- }
- return this;
- }
- remove(selector = '') {
- const selectorObj = this.wideSelector(selector);
- for (const n of this.rawNodes) {
- if (n && n[0] === NODE && Array.isArray(n[3])) {
- this.rawRemove(n[3], selectorObj);
- if (!n[3].length)
- n[3] = null;
- }
- }
- return this;
- }
- removeRoot(selector = '') {
- const selectorObj = this.wideSelector(selector);
- this.rawRemove(this.rawNodes, selectorObj);
- return this;
- }
- each(callback, self = false) {
- if (self) {
- for (const n of this.rawNodes) {
- if (callback(new NodeObject(n)) === false)
- return this;
- }
- } else {
- for (const n of this.rawNodes) {
- if (n[0] === NODE && n[3]) {
- for (const nn of n[3])
- if (callback(new NodeObject(nn)) === false)
- return this;
- }
- }
- }
- return this;
- }
- eachSelf(callback) {
- return this.each(callback, true);
- }
- eachDeep(callback, self = false) {
- const deep = (nodes, route = '') => {
- for (const n of nodes) {
- const node = new NodeObject(n);
- if (callback(node, route) === false)
- return false;
- if (node.type === NODE && node.value) {
- if (deep(node.value, `${route}${route ? '/' : ''}${node.name}`) === false)
- return false;
- }
- }
- }
- if (self) {
- deep(this.rawNodes);
- } else {
- for (const n of this.rawNodes) {
- if (n[0] === NODE && n[3])
- if (deep(n[3]) === false)
- break;
- }
- }
- return this;
- }
- eachDeepSelf(callback) {
- return this.eachDeep(callback, true);
- }
- rawSelect(nodes, selectorObj, callback) {
- for (const n of nodes)
- if (this.checkNode(n, selectorObj))
- callback(n);
- return this;
- }
- select(selector = '', self = false) {
- let newRawNodes = [];
- if (selector.indexOf('/') >= 0) {
- const selectors = selector.split('/');
- let res = this;
- for (const sel of selectors) {
- res = res.select(sel, self);
- self = false;
- }
- newRawNodes = res.rawNodes;
- } else {
- const selectorObj = this.wideSelector(selector);
- if (self) {
- this.rawSelect(this.rawNodes, selectorObj, (node) => {
- newRawNodes.push(node);
- })
- } else {
- for (const n of this.rawNodes) {
- if (n && n[0] === NODE && Array.isArray(n[3])) {
- this.rawSelect(n[3], selectorObj, (node) => {
- newRawNodes.push(node);
- })
- }
- }
- }
- }
- return new XmlParser(newRawNodes);
- }
- selectSelf(selector) {
- return this.select(selector, true);
- }
- selectFirst(selector, self) {
- const result = this.select(selector, self);
- const node = (result.count ? result.rawNodes[0] : null);
- return new NodeObject(node);
- }
- selectFirstSelf(selector) {
- return this.selectFirst(selector, true);
- }
- toJson(options = {}) {
- const {format = false} = options;
- if (format)
- return JSON.stringify(this.rawNodes, null, 2);
- else
- return JSON.stringify(this.rawNodes);
- }
- fromJson(jsonString) {
- const parsed = JSON.parse(jsonString);
- if (!Array.isArray(parsed))
- throw new Error('JSON parse error: root element must be array');
- this.rawNodes = parsed;
- return this;
- }
- toString(options = {}) {
- const {
- encoding = 'utf-8',
- format = false,
- noHeader = false,
- expandEmpty = false
- } = options;
- let deepType = 0;
- let out = '';
- if (!noHeader)
- out += `<?xml version="1.0" encoding="${encoding}"?>`;
- const nodesToString = (nodes, depth = 0) => {
- let result = '';
- const indent = '\n' + ' '.repeat(depth);
- let lastType = 0;
- for (const n of nodes) {
- const node = new NodeObject(n);
- let open = '';
- let body = '';
- let close = '';
- if (node.type === NODE) {
- if (!node.name)
- continue;
- let attrs = '';
- const nodeAttrs = node.attrs();
- if (nodeAttrs) {
- for (const [attrName, attrValue] of nodeAttrs) {
- if (typeof(attrValue) === 'string')
- attrs += ` ${attrName}="${attrValue}"`;
- else
- if (attrValue)
- attrs += ` ${attrName}`;
- }
- }
- if (node.value)
- body = nodesToString(node.value, depth + 2);
- if (!body && !expandEmpty) {
- open = (format && lastType !== TEXT ? indent : '');
- open += `<${node.name}${attrs}/>`;
- } else {
- open = (format && lastType !== TEXT ? indent : '');
- open += `<${node.name}${attrs}>`;
- close = (format && deepType && deepType !== TEXT ? indent : '');
- close += `</${node.name}>`;
- }
- } else if (node.type === TEXT) {
- body = node.value || '';
- } else if (node.type === CDATA) {
- body = (format && lastType !== TEXT ? indent : '');
- body += `<![CDATA[${node.value || ''}]]>`;
- } else if (node.type === COMMENT) {
- body = (format && lastType !== TEXT ? indent : '');
- body += `<!--${node.value || ''}-->`;
- }
- result += `${open}${body}${close}`;
- lastType = node.type;
- }
- deepType = lastType;
- return result;
- }
- out += nodesToString(this.rawNodes) + (format ? '\n' : '');
- return out;
- }
- fromString(xmlString, options = {}) {
- const {
- lowerCase = false,
- whiteSpace = false,
- pickNode = false,
- } = options;
- const parsed = [];
- const root = this.createNode('root', null, parsed);//fake node
- let node = root;
- let route = '';
- let routeStack = [];
- let ignoreNode = false;
- const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
- if (tag == '?xml')
- return;
- if (!ignoreNode && pickNode) {
- route += `${route ? '/' : ''}${tag}`;
- ignoreNode = !pickNode(route);
- }
- let newNode = node;
- if (!ignoreNode)
- newNode = this.createNode(tag);
- routeStack.push({tag, route, ignoreNode, node: newNode});
- if (ignoreNode)
- return;
- if (tail && tail.trim() !== '') {
- const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
- const attrs = new Map();
- for (const attr of parsedAttrs.values()) {
- attrs.set(attr.fn, attr.value);
- }
- if (attrs.size)
- newNode.attrs(attrs);
- }
- if (!node.value)
- node.value = [];
- node.value.push(newNode.raw);
- node = newNode;
- };
- const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
- if (routeStack.length && routeStack[routeStack.length - 1].tag === tag) {
- routeStack.pop();
- if (routeStack.length) {
- const last = routeStack[routeStack.length - 1];
- route = last.route;
- ignoreNode = last.ignoreNode;
- node = last.node;
- } else {
- route = '';
- ignoreNode = false;
- node = root;
- }
- }
- }
- const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
- if (ignoreNode || (pickNode && !pickNode(`${route}/*TEXT`)))
- return;
- if (!whiteSpace && text.trim() == '')
- return;
- if (!node.value)
- node.value = [];
- node.value.push(this.createText(text).raw);
- };
- const onCdata = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
- if (ignoreNode || (pickNode && !pickNode(`${route}/*CDATA`)))
- return;
- if (!node.value)
- node.value = [];
- node.value.push(this.createCdata(tagData).raw);
- }
- const onComment = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
- if (ignoreNode || (pickNode && !pickNode(`${route}/*COMMENT`)))
- return;
- if (!node.value)
- node.value = [];
- node.value.push(this.createComment(tagData).raw);
- }
- sax.parseSync(xmlString, {
- onStartNode, onEndNode, onTextNode, onCdata, onComment, lowerCase
- });
- this.rawNodes = parsed;
- return this;
- }
- toObject(options = {}) {
- const {
- compactText = false
- } = options;
- const nodesToObject = (nodes) => {
- const result = {};
- for (const n of nodes) {
- const node = new NodeObject(n);
- if (node.type === NODE) {
- if (!node.name)
- continue;
- let newNode = {};
- const nodeAttrs = node.attrs();
- if (nodeAttrs)
- newNode['*ATTRS'] = Object.fromEntries(nodeAttrs);
- if (node.value) {
- Object.assign(newNode, nodesToObject(node.value));
- //схлопывание текстового узла до string
- if (compactText
- && !Array.isArray(newNode)
- && Object.prototype.hasOwnProperty.call(newNode, '*TEXT')
- && Object.keys(newNode).length === 1) {
- newNode = newNode['*TEXT'];
- }
- }
- if (!Object.prototype.hasOwnProperty.call(result, node.name)) {
- result[node.name] = newNode;
- } else {
- if (!Array.isArray(result[node.name])) {
- result[node.name] = [result[node.name]];
- }
- result[node.name].push(newNode);
- }
- } else if (node.type === TEXT) {
- if (!result['*TEXT'])
- result['*TEXT'] = '';
- result['*TEXT'] += node.value || '';
- } else if (node.type === CDATA) {
- if (!result['*CDATA'])
- result['*CDATA'] = '';
- result['*CDATA'] += node.value || '';
- } else if (node.type === COMMENT) {
- if (!result['*COMMENT'])
- result['*COMMENT'] = '';
- result['*COMMENT'] += node.value || '';
- }
- }
- return result;
- }
- return nodesToObject(this.rawNodes);
- }
- fromObject(xmlObject) {
- const objectToNodes = (obj) => {
- const result = [];
- for (const [tag, objNode] of Object.entries(obj)) {
- if (tag === '*TEXT') {
- result.push(this.createText(objNode).raw);
- } else if (tag === '*CDATA') {
- result.push(this.createCdata(objNode).raw);
- } else if (tag === '*COMMENT') {
- result.push(this.createComment(objNode).raw);
- } else if (tag === '*ATTRS') {
- //пропускаем
- } else {
- if (typeof(objNode) === 'string' || typeof(objNode) === 'number') {
- result.push(this.createNode(tag, null, [this.createText(objNode.toString()).raw]).raw);
- } else if (Array.isArray(objNode)) {
- for (const n of objNode) {
- if (typeof(n) === 'string') {
- result.push(this.createNode(tag, null, [this.createText(n).raw]).raw);
- } else if (typeof(n) === 'object') {
- result.push(this.createNode(tag, (n['*ATTRS'] ? Object.entries(n['*ATTRS']) : null), objectToNodes(n)).raw);
- }
- }
- } else if (typeof(objNode) === 'object') {
- result.push(this.createNode(tag, (objNode['*ATTRS'] ? Object.entries(objNode['*ATTRS']) : null), objectToNodes(objNode)).raw);
- } else {
- throw new Error(`Unknown node type "${typeof(objNode)}" of node: ${objNode}`);
- }
- }
- }
- return result;
- };
- this.rawNodes = objectToNodes(xmlObject);
- return this;
- }
- // XML Inspector start
- narrowSelector(selector) {
- const result = [];
- selector = selector.trim();
-
- //последний индекс не учитывется, только если не задан явно
- if (selector && selector[selector.length - 1] == ']')
- selector += '/';
- const levels = selector.split('/');
- for (const level of levels) {
- const [name, indexPart] = level.split('[');
- let index = 0;
- if (indexPart) {
- const i = indexPart.indexOf(']');
- index = parseInt(indexPart.substring(0, i), 10) || 0;
- }
- let type = NODE;
- if (name[0] === '*') {
- const typeName = name.substring(1);
- type = name2type[typeName];
- if (!type)
- throw new Error(`Unknown selector type: ${typeName}`);
- }
- result.push({type, name, index});
- }
- if (result.length);
- result[result.length - 1].last = true;
- return result;
- }
- inspect(selector = '') {
- selector = this.narrowSelector(selector);
- let raw = this.rawNodes;
- for (const s of selector) {
- if (s.name) {
- let found = [];
- for (const n of raw) {
- if (n[0] === s.type && (n[0] !== NODE || s.name === '*NODE' || n[1] === s.name)) {
- found.push(n);
- if (found.length > s.index && !s.last)
- break;
- }
- }
- raw = found;
- }
- if (raw.length && !s.last) {
- if (s.index < raw.length) {
- raw = raw[s.index];
- if (raw[0] === NODE && raw[3])
- raw = raw[3];
- else {
- raw = [];
- break;
- }
- } else {
- raw = [];
- break;
- }
- }
- }
- return new XmlParser(raw);
- }
- $$(selector) {
- return this.inspect(selector);
- }
- $$array(selector) {
- const res = this.inspect(selector);
- const result = [];
- for (const n of res.rawNodes)
- if (n[0] === NODE)
- result.push(new XmlParser([n]));
- return result;
- }
- $(selector) {
- const res = this.inspect(selector);
- const node = (res.count ? res.rawNodes[0] : null);
- return new NodeObject(node);
- }
- v(selector = '') {
- const res = this.$(selector);
- return (res.type ? res.value : null);
- }
- text(selector = '') {
- const res = this.$(`${selector}/*TEXT`);
- return (res.type === TEXT ? res.value : null);
- }
- comment(selector = '') {
- const res = this.$(`${selector}/*COMMENT`);
- return (res.type === COMMENT ? res.value : null);
- }
- cdata(selector = '') {
- const res = this.$(`${selector}/*CDATA`);
- return (res.type === CDATA ? res.value : null);
- }
- concat(selector = '') {
- const res = this.$$(selector);
- const out = [];
- for (const n of res.rawNodes) {
- const node = new NodeObject(n);
- if (node.type && node.type !== NODE)
- out.push(node.value);
- }
- return (out.length ? out.join('') : null);
- }
- attrs(selector = '') {
- const res = this.$(selector);
- const attrs = res.attrs();
- return (res.type === NODE && attrs ? Object.fromEntries(attrs) : null);
- }
- // XML Inspector finish
- }
- module.exports = XmlParser;
|