XmlParser.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. const sax = require('./sax');
  2. const ObjectInspector = require('./ObjectInspector');
  3. //node types
  4. const NODE = 1;
  5. const TEXT = 2;
  6. const CDATA = 3;
  7. const COMMENT = 4;
  8. const name2type = {
  9. 'NODE': NODE,
  10. 'TEXT': TEXT,
  11. 'CDATA': CDATA,
  12. 'COMMENT': COMMENT,
  13. };
  14. const type2name = {
  15. [NODE]: 'NODE',
  16. [TEXT]: 'TEXT',
  17. [CDATA]: 'CDATA',
  18. [COMMENT]: 'COMMENT',
  19. };
  20. class NodeBase {
  21. makeSelectorObj(selectorString) {
  22. const result = {all: false, before: false, type: 0, name: ''};
  23. if (selectorString === '') {
  24. result.before = true;
  25. } else if (selectorString === '*') {
  26. result.all = true;
  27. } else if (selectorString[0] === '*') {
  28. const typeName = selectorString.substring(1);
  29. result.type = name2type[typeName];
  30. if (!result.type)
  31. throw new Error(`Unknown selector type: ${typeName}`);
  32. } else {
  33. result.name = selectorString;
  34. }
  35. return result;
  36. }
  37. checkNode(rawNode, selectorObj) {
  38. return selectorObj.all || selectorObj.before
  39. || (selectorObj.type && rawNode[0] === selectorObj.type)
  40. || (rawNode[0] === NODE && rawNode[1] === selectorObj.name);
  41. }
  42. findNodeIndex(nodes, selectorObj) {
  43. for (let i = 0; i < nodes.length; i++)
  44. if (this.checkNode(nodes[i], selectorObj))
  45. return i;
  46. }
  47. rawAdd(nodes, rawNode, selectorObj) {
  48. if (selectorObj.all) {
  49. nodes.push(rawNode);
  50. } else if (selectorObj.before) {
  51. nodes.unshift(rawNode);
  52. } else {
  53. const index = this.findNodeIndex(nodes, selectorObj);
  54. if (index >= 0)
  55. nodes.splice(index, 0, rawNode);
  56. else
  57. nodes.push(rawNode);
  58. }
  59. }
  60. rawRemove(nodes, selectorObj) {
  61. if (selectorObj.before)
  62. return;
  63. for (let i = nodes.length - 1; i >= 0; i--) {
  64. if (this.checkNode(nodes[i], selectorObj))
  65. nodes.splice(i, 1);
  66. }
  67. }
  68. }
  69. class NodeObject extends NodeBase {
  70. constructor(raw = null) {
  71. super();
  72. if (raw)
  73. this.raw = raw;
  74. else
  75. this.raw = [];
  76. }
  77. get type() {
  78. return this.raw[0] || null;
  79. }
  80. get name() {
  81. if (this.type === NODE)
  82. return this.raw[1] || null;
  83. return null;
  84. }
  85. set name(value) {
  86. if (this.type === NODE)
  87. this.raw[1] = value;
  88. }
  89. attrs(key, value) {
  90. if (this.type !== NODE)
  91. return null;
  92. let map = null;
  93. if (key instanceof Map) {
  94. map = key;
  95. this.raw[2] = Array.from(map);
  96. } else if (Array.isArray(this.raw[2])) {
  97. map = new Map(this.raw[2]);
  98. if (key) {
  99. map.set(key, value);
  100. this.raw[2] = Array.from(map);
  101. }
  102. }
  103. return map;
  104. }
  105. get value() {
  106. switch (this.type) {
  107. case NODE:
  108. return this.raw[3] || null;
  109. case TEXT:
  110. case CDATA:
  111. case COMMENT:
  112. return this.raw[1] || null;
  113. }
  114. return null;
  115. }
  116. set value(v) {
  117. switch (this.type) {
  118. case NODE:
  119. this.raw[3] = v;
  120. break;
  121. case TEXT:
  122. case CDATA:
  123. case COMMENT:
  124. this.raw[1] = v;
  125. }
  126. }
  127. add(node, after = '*') {
  128. if (this.type !== NODE)
  129. return;
  130. const selectorObj = this.makeSelectorObj(after);
  131. if (!Array.isArray(this.raw[3]))
  132. this.raw[3] = [];
  133. if (Array.isArray(node)) {
  134. for (const node_ of node)
  135. this.rawAdd(this.raw[3], node_.raw, selectorObj);
  136. } else {
  137. this.rawAdd(this.raw[3], node.raw, selectorObj);
  138. }
  139. return this;
  140. }
  141. remove(selector = '') {
  142. if (this.type !== NODE || !this.raw[3])
  143. return;
  144. const selectorObj = this.makeSelectorObj(selector);
  145. this.rawRemove(this.raw[3], selectorObj);
  146. if (!this.raw[3].length)
  147. this.raw[3] = null;
  148. return this;
  149. }
  150. each(callback) {
  151. if (this.type !== NODE || !this.raw[3])
  152. return;
  153. for (const n of this.raw[3]) {
  154. if (callback(new NodeObject(n)) === false)
  155. break;
  156. }
  157. return this;
  158. }
  159. eachDeep(callback) {
  160. if (this.type !== NODE || !this.raw[3])
  161. return;
  162. const deep = (nodes, route = '') => {
  163. for (const n of nodes) {
  164. const node = new NodeObject(n);
  165. if (callback(node, route) === false)
  166. return false;
  167. if (node.type === NODE && node.value) {
  168. if (deep(node.value, `${route}${route ? '/' : ''}${node.name}`) === false)
  169. return false;
  170. }
  171. }
  172. }
  173. deep(this.raw[3]);
  174. return this;
  175. }
  176. }
  177. class XmlParser extends NodeBase {
  178. constructor(rawNodes = []) {
  179. super();
  180. this.NODE = NODE;
  181. this.TEXT = TEXT;
  182. this.CDATA = CDATA;
  183. this.COMMENT = COMMENT;
  184. this.rawNodes = rawNodes;
  185. }
  186. get count() {
  187. return this.rawNodes.length;
  188. }
  189. nodeObject(node) {
  190. return new NodeObject(node);
  191. }
  192. newParser(nodes) {
  193. return new XmlParser(nodes);
  194. }
  195. checkType(type) {
  196. if (!type2name[type])
  197. throw new Error(`Invalid type: ${type}`);
  198. }
  199. createTypedNode(type, nameOrValue, attrs = null, value = null) {
  200. this.checkType(type);
  201. switch (type) {
  202. case NODE:
  203. if (!nameOrValue || typeof(nameOrValue) !== 'string')
  204. throw new Error('Node name must be non-empty string');
  205. return new NodeObject([type, nameOrValue, attrs, value]);
  206. case TEXT:
  207. case CDATA:
  208. case COMMENT:
  209. if (typeof(nameOrValue) !== 'string')
  210. throw new Error('Node value must be of type string');
  211. return new NodeObject([type, nameOrValue]);
  212. }
  213. }
  214. createNode(name, attrs = null, value = null) {
  215. return this.createTypedNode(NODE, name, attrs, value);
  216. }
  217. createText(value = null) {
  218. return this.createTypedNode(TEXT, value);
  219. }
  220. createCdata(value = null) {
  221. return this.createTypedNode(CDATA, value);
  222. }
  223. createComment(value = null) {
  224. return this.createTypedNode(COMMENT, value);
  225. }
  226. add(node, after = '*') {
  227. const selectorObj = this.makeSelectorObj(after);
  228. for (const n of this.rawNodes) {
  229. if (n && n[0] === NODE) {
  230. if (!Array.isArray(n[3]))
  231. n[3] = [];
  232. if (Array.isArray(node)) {
  233. for (const node_ of node)
  234. this.rawAdd(n[3], node_.raw, selectorObj);
  235. } else {
  236. this.rawAdd(n[3], node.raw, selectorObj);
  237. }
  238. }
  239. }
  240. return this;
  241. }
  242. addRoot(node, after = '*') {
  243. const selectorObj = this.makeSelectorObj(after);
  244. if (Array.isArray(node)) {
  245. for (const node_ of node)
  246. this.rawAdd(this.rawNodes, node_.raw, selectorObj);
  247. } else {
  248. this.rawAdd(this.rawNodes, node.raw, selectorObj);
  249. }
  250. return this;
  251. }
  252. remove(selector = '') {
  253. const selectorObj = this.makeSelectorObj(selector);
  254. for (const n of this.rawNodes) {
  255. if (n && n[0] === NODE && Array.isArray(n[3])) {
  256. this.rawRemove(n[3], selectorObj);
  257. if (!n[3].length)
  258. n[3] = null;
  259. }
  260. }
  261. return this;
  262. }
  263. removeRoot(selector = '') {
  264. const selectorObj = this.makeSelectorObj(selector);
  265. this.rawRemove(this.rawNodes, selectorObj);
  266. return this;
  267. }
  268. each(callback, self = false) {
  269. if (self) {
  270. for (const n of this.rawNodes) {
  271. if (callback(new NodeObject(n)) === false)
  272. return this;
  273. }
  274. } else {
  275. for (const n of this.rawNodes) {
  276. if (n[0] === NODE && n[3]) {
  277. for (const nn of n[3])
  278. if (callback(new NodeObject(nn)) === false)
  279. return this;
  280. }
  281. }
  282. }
  283. return this;
  284. }
  285. eachSelf(callback) {
  286. return this.each(callback, true);
  287. }
  288. eachDeep(callback, self = false) {
  289. const deep = (nodes, route = '') => {
  290. for (const n of nodes) {
  291. const node = new NodeObject(n);
  292. if (callback(node, route) === false)
  293. return false;
  294. if (node.type === NODE && node.value) {
  295. if (deep(node.value, `${route}${route ? '/' : ''}${node.name}`) === false)
  296. return false;
  297. }
  298. }
  299. }
  300. if (self) {
  301. deep(this.rawNodes);
  302. } else {
  303. for (const n of this.rawNodes) {
  304. if (n[0] === NODE && n[3])
  305. if (deep(n[3]) === false)
  306. break;
  307. }
  308. }
  309. return this;
  310. }
  311. eachDeepSelf(callback) {
  312. return this.eachDeep(callback, true);
  313. }
  314. rawSelect(nodes, selectorObj, callback) {
  315. for (const n of nodes)
  316. if (this.checkNode(n, selectorObj))
  317. callback(n);
  318. return this;
  319. }
  320. select(selector = '', self = false) {
  321. let newRawNodes = [];
  322. if (selector.indexOf('/') >= 0) {
  323. const selectors = selector.split('/');
  324. let res = this;
  325. for (const sel of selectors) {
  326. res = res.select(sel, self);
  327. self = false;
  328. }
  329. newRawNodes = res.rawNodes;
  330. } else {
  331. const selectorObj = this.makeSelectorObj(selector);
  332. if (self) {
  333. this.rawSelect(this.rawNodes, selectorObj, (node) => {
  334. newRawNodes.push(node);
  335. })
  336. } else {
  337. for (const n of this.rawNodes) {
  338. if (n && n[0] === NODE && Array.isArray(n[3])) {
  339. this.rawSelect(n[3], selectorObj, (node) => {
  340. newRawNodes.push(node);
  341. })
  342. }
  343. }
  344. }
  345. }
  346. return new XmlParser(newRawNodes);
  347. }
  348. $$(selector, self) {
  349. return this.select(selector, self);
  350. }
  351. $$self(selector) {
  352. return this.select(selector, true);
  353. }
  354. selectFirst(selector, self) {
  355. const result = this.select(selector, self);
  356. const node = (result.count ? result.rawNodes[0] : null);
  357. return new NodeObject(node);
  358. }
  359. $(selector, self) {
  360. return this.selectFirst(selector, self);
  361. }
  362. $self(selector) {
  363. return this.selectFirst(selector, true);
  364. }
  365. toJson(options = {}) {
  366. const {format = false} = options;
  367. if (format)
  368. return JSON.stringify(this.rawNodes, null, 2);
  369. else
  370. return JSON.stringify(this.rawNodes);
  371. }
  372. fromJson(jsonString) {
  373. const parsed = JSON.parse(jsonString);
  374. if (!Array.isArray(parsed))
  375. throw new Error('JSON parse error: root element must be array');
  376. this.rawNodes = parsed;
  377. return this;
  378. }
  379. toString(options = {}) {
  380. const {
  381. encoding = 'utf-8',
  382. format = false,
  383. noHeader = false,
  384. expandEmpty = false
  385. } = options;
  386. let deepType = 0;
  387. let out = '';
  388. if (!noHeader)
  389. out += `<?xml version="1.0" encoding="${encoding}"?>`;
  390. const nodesToString = (nodes, depth = 0) => {
  391. let result = '';
  392. const indent = '\n' + ' '.repeat(depth);
  393. let lastType = 0;
  394. for (const n of nodes) {
  395. const node = new NodeObject(n);
  396. let open = '';
  397. let body = '';
  398. let close = '';
  399. if (node.type === NODE) {
  400. if (!node.name)
  401. continue;
  402. let attrs = '';
  403. const nodeAttrs = node.attrs();
  404. if (nodeAttrs) {
  405. for (const [attrName, attrValue] of nodeAttrs) {
  406. if (typeof(attrValue) === 'string')
  407. attrs += ` ${attrName}="${attrValue}"`;
  408. else
  409. if (attrValue)
  410. attrs += ` ${attrName}`;
  411. }
  412. }
  413. if (node.value)
  414. body = nodesToString(node.value, depth + 2);
  415. if (!body && !expandEmpty) {
  416. open = (format && lastType !== TEXT ? indent : '');
  417. open += `<${node.name}${attrs}/>`;
  418. } else {
  419. open = (format && lastType !== TEXT ? indent : '');
  420. open += `<${node.name}${attrs}>`;
  421. close = (format && deepType && deepType !== TEXT ? indent : '');
  422. close += `</${node.name}>`;
  423. }
  424. } else if (node.type === TEXT) {
  425. body = node.value || '';
  426. } else if (node.type === CDATA) {
  427. body = (format && lastType !== TEXT ? indent : '');
  428. body += `<![CDATA[${node.value || ''}]]>`;
  429. } else if (node.type === COMMENT) {
  430. body = (format && lastType !== TEXT ? indent : '');
  431. body += `<!--${node.value || ''}-->`;
  432. }
  433. result += `${open}${body}${close}`;
  434. lastType = node.type;
  435. }
  436. deepType = lastType;
  437. return result;
  438. }
  439. out += nodesToString(this.rawNodes) + (format ? '\n' : '');
  440. return out;
  441. }
  442. fromString(xmlString, options = {}) {
  443. const {
  444. lowerCase = false,
  445. whiteSpace = false,
  446. pickNode = false,
  447. } = options;
  448. const parsed = [];
  449. const root = this.createNode('root', null, parsed);//fake node
  450. let node = root;
  451. let route = '';
  452. let routeStack = [];
  453. let ignoreNode = false;
  454. const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  455. if (tag == '?xml')
  456. return;
  457. if (!ignoreNode && pickNode) {
  458. route += `${route ? '/' : ''}${tag}`;
  459. ignoreNode = !pickNode(route);
  460. }
  461. let newNode = node;
  462. if (!ignoreNode)
  463. newNode = this.createNode(tag);
  464. routeStack.push({tag, route, ignoreNode, node: newNode});
  465. if (ignoreNode)
  466. return;
  467. if (tail && tail.trim() !== '') {
  468. const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
  469. const attrs = new Map();
  470. for (const attr of parsedAttrs.values()) {
  471. attrs.set(attr.fn, attr.value);
  472. }
  473. if (attrs.size)
  474. newNode.attrs(attrs);
  475. }
  476. if (!node.value)
  477. node.value = [];
  478. node.value.push(newNode.raw);
  479. node = newNode;
  480. };
  481. const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  482. if (routeStack.length && routeStack[routeStack.length - 1].tag === tag) {
  483. routeStack.pop();
  484. if (routeStack.length) {
  485. const last = routeStack[routeStack.length - 1];
  486. route = last.route;
  487. ignoreNode = last.ignoreNode;
  488. node = last.node;
  489. } else {
  490. route = '';
  491. ignoreNode = false;
  492. node = root;
  493. }
  494. }
  495. }
  496. const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  497. if (ignoreNode || (pickNode && !pickNode(`${route}/*TEXT`)))
  498. return;
  499. if (!whiteSpace && text.trim() == '')
  500. return;
  501. if (!node.value)
  502. node.value = [];
  503. node.value.push(this.createText(text).raw);
  504. };
  505. const onCdata = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  506. if (ignoreNode || (pickNode && !pickNode(`${route}/*CDATA`)))
  507. return;
  508. if (!node.value)
  509. node.value = [];
  510. node.value.push(this.createCdata(tagData).raw);
  511. }
  512. const onComment = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  513. if (ignoreNode || (pickNode && !pickNode(`${route}/*COMMENT`)))
  514. return;
  515. if (!node.value)
  516. node.value = [];
  517. node.value.push(this.createComment(tagData).raw);
  518. }
  519. sax.parseSync(xmlString, {
  520. onStartNode, onEndNode, onTextNode, onCdata, onComment, lowerCase
  521. });
  522. this.rawNodes = parsed;
  523. return this;
  524. }
  525. toObject(options = {}) {
  526. const {
  527. compactText = false
  528. } = options;
  529. const nodesToObject = (nodes) => {
  530. const result = {};
  531. for (const n of nodes) {
  532. const node = new NodeObject(n);
  533. if (node.type === NODE) {
  534. if (!node.name)
  535. continue;
  536. let newNode = {};
  537. const nodeAttrs = node.attrs();
  538. if (nodeAttrs)
  539. newNode['*ATTRS'] = Object.fromEntries(nodeAttrs);
  540. if (node.value) {
  541. Object.assign(newNode, nodesToObject(node.value));
  542. //схлопывание текстового узла до string
  543. if (compactText
  544. && !Array.isArray(newNode)
  545. && Object.prototype.hasOwnProperty.call(newNode, '*TEXT')
  546. && Object.keys(newNode).length === 1) {
  547. newNode = newNode['*TEXT'];
  548. }
  549. }
  550. if (!Object.prototype.hasOwnProperty.call(result, node.name)) {
  551. result[node.name] = newNode;
  552. } else {
  553. if (!Array.isArray(result[node.name])) {
  554. result[node.name] = [result[node.name]];
  555. }
  556. result[node.name].push(newNode);
  557. }
  558. } else if (node.type === TEXT) {
  559. if (!result['*TEXT'])
  560. result['*TEXT'] = '';
  561. result['*TEXT'] += node.value || '';
  562. } else if (node.type === CDATA) {
  563. if (!result['*CDATA'])
  564. result['*CDATA'] = '';
  565. result['*CDATA'] += node.value || '';
  566. } else if (node.type === COMMENT) {
  567. if (!result['*COMMENT'])
  568. result['*COMMENT'] = '';
  569. result['*COMMENT'] += node.value || '';
  570. }
  571. }
  572. return result;
  573. }
  574. return nodesToObject(this.rawNodes);
  575. }
  576. fromObject(xmlObject) {
  577. const objectToNodes = (obj) => {
  578. const result = [];
  579. for (const [tag, objNode] of Object.entries(obj)) {
  580. if (tag === '*TEXT') {
  581. result.push(this.createText(objNode).raw);
  582. } else if (tag === '*CDATA') {
  583. result.push(this.createCdata(objNode).raw);
  584. } else if (tag === '*COMMENT') {
  585. result.push(this.createComment(objNode).raw);
  586. } else if (tag === '*ATTRS') {
  587. //пропускаем
  588. } else {
  589. if (typeof(objNode) === 'string') {
  590. result.push(this.createNode(tag, null, [this.createText(objNode).raw]).raw);
  591. } else if (Array.isArray(objNode)) {
  592. for (const n of objNode) {
  593. if (typeof(n) === 'string') {
  594. result.push(this.createNode(tag, null, [this.createText(n).raw]).raw);
  595. } else if (typeof(n) === 'object') {
  596. result.push(this.createNode(tag, (n['*ATTRS'] ? Object.entries(n['*ATTRS']) : null), objectToNodes(n)).raw);
  597. }
  598. }
  599. } else if (typeof(objNode) === 'object') {
  600. result.push(this.createNode(tag, (objNode['*ATTRS'] ? Object.entries(objNode['*ATTRS']) : null), objectToNodes(objNode)).raw);
  601. }
  602. }
  603. }
  604. return result;
  605. };
  606. this.rawNodes = objectToNodes(xmlObject);
  607. return this;
  608. }
  609. inspector(obj) {
  610. if (!obj)
  611. obj = this.toObject();
  612. return new ObjectInspector(obj);
  613. }
  614. }
  615. module.exports = XmlParser;