XmlParser.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753
  1. const sax = require('./sax');
  2. //node types
  3. const NODE = 1;
  4. const TEXT = 2;
  5. const CDATA = 3;
  6. const COMMENT = 4;
  7. const name2type = {
  8. 'NODE': NODE,
  9. 'TEXT': TEXT,
  10. 'CDATA': CDATA,
  11. 'COMMENT': COMMENT,
  12. };
  13. const type2name = {
  14. [NODE]: 'NODE',
  15. [TEXT]: 'TEXT',
  16. [CDATA]: 'CDATA',
  17. [COMMENT]: 'COMMENT',
  18. };
  19. class NodeBase {
  20. makeSelectorObj(selectorString) {
  21. const result = {all: false, before: false, type: 0, name: ''};
  22. if (selectorString === '') {
  23. result.before = true;
  24. } else if (selectorString === '*') {
  25. result.all = true;
  26. } else if (selectorString[0] === '*') {
  27. const typeName = selectorString.substring(1);
  28. result.type = name2type[typeName];
  29. if (!result.type)
  30. throw new Error(`Unknown selector type: ${typeName}`);
  31. } else {
  32. result.name = selectorString;
  33. }
  34. return result;
  35. }
  36. checkNode(rawNode, selectorObj) {
  37. return selectorObj.all || selectorObj.before
  38. || (selectorObj.type && rawNode[0] === selectorObj.type)
  39. || (rawNode[0] === NODE && rawNode[1] === selectorObj.name);
  40. }
  41. findNodeIndex(nodes, selectorObj) {
  42. for (let i = 0; i < nodes.length; i++)
  43. if (this.checkNode(nodes[i], selectorObj))
  44. return i;
  45. }
  46. rawAdd(nodes, rawNode, selectorObj) {
  47. if (selectorObj.all) {
  48. nodes.push(rawNode);
  49. } else if (selectorObj.before) {
  50. nodes.unshift(rawNode);
  51. } else {
  52. const index = this.findNodeIndex(nodes, selectorObj);
  53. if (index >= 0)
  54. nodes.splice(index, 0, rawNode);
  55. else
  56. nodes.push(rawNode);
  57. }
  58. }
  59. rawRemove(nodes, selectorObj) {
  60. if (selectorObj.before)
  61. return;
  62. for (let i = nodes.length - 1; i >= 0; i--) {
  63. if (this.checkNode(nodes[i], selectorObj))
  64. nodes.splice(i, 1);
  65. }
  66. }
  67. }
  68. class NodeObject extends NodeBase {
  69. constructor(rawNode) {
  70. super();
  71. if (rawNode)
  72. this.raw = rawNode;
  73. else
  74. this.raw = [];
  75. }
  76. get type() {
  77. return this.raw[0] || null;
  78. }
  79. get name() {
  80. if (this.type === NODE)
  81. return this.raw[1] || null;
  82. return null;
  83. }
  84. set name(value) {
  85. if (this.type === NODE)
  86. this.raw[1] = value;
  87. }
  88. attrs(key, value) {
  89. if (this.type !== NODE)
  90. return null;
  91. let map = null;
  92. if (key instanceof Map) {
  93. map = key;
  94. this.raw[2] = Array.from(map);
  95. } else if (Array.isArray(this.raw[2])) {
  96. map = new Map(this.raw[2]);
  97. if (key) {
  98. map.set(key, value);
  99. this.raw[2] = Array.from(map);
  100. }
  101. }
  102. return map;
  103. }
  104. get value() {
  105. switch (this.type) {
  106. case NODE:
  107. return this.raw[3] || null;
  108. case TEXT:
  109. case CDATA:
  110. case COMMENT:
  111. return this.raw[1] || null;
  112. }
  113. return null;
  114. }
  115. set value(v) {
  116. switch (this.type) {
  117. case NODE:
  118. this.raw[3] = v;
  119. break;
  120. case TEXT:
  121. case CDATA:
  122. case COMMENT:
  123. this.raw[1] = v;
  124. }
  125. }
  126. add(node, after = '*') {
  127. if (this.type !== NODE)
  128. return;
  129. const selectorObj = this.makeSelectorObj(after);
  130. if (!Array.isArray(this.raw[3]))
  131. this.raw[3] = [];
  132. if (Array.isArray(node)) {
  133. for (const node_ of node)
  134. this.rawAdd(this.raw[3], node_.raw, selectorObj);
  135. } else {
  136. this.rawAdd(this.raw[3], node.raw, selectorObj);
  137. }
  138. return this;
  139. }
  140. remove(selector = '') {
  141. if (this.type !== NODE || !this.raw[3])
  142. return;
  143. const selectorObj = this.makeSelectorObj(selector);
  144. this.rawRemove(this.raw[3], selectorObj);
  145. if (!this.raw[3].length)
  146. this.raw[3] = null;
  147. return this;
  148. }
  149. each(callback) {
  150. if (this.type !== NODE || !this.raw[3])
  151. return;
  152. for (const n of this.raw[3]) {
  153. callback(new NodeObject(n));
  154. }
  155. return this;
  156. }
  157. eachDeep(callback) {
  158. if (this.type !== NODE || !this.raw[3])
  159. return;
  160. const deep = (nodes, route = '') => {
  161. for (const n of nodes) {
  162. const node = new NodeObject(n);
  163. callback(node, route);
  164. if (node.type === NODE && node.value) {
  165. deep(node.value, `${route}${route ? '/' : ''}${node.name}`);
  166. }
  167. }
  168. }
  169. deep(this.raw[3]);
  170. return this;
  171. }
  172. }
  173. class XmlParser extends NodeBase {
  174. constructor(rawNodes = []) {
  175. super();
  176. this.NODE = NODE;
  177. this.TEXT = TEXT;
  178. this.CDATA = CDATA;
  179. this.COMMENT = COMMENT;
  180. this.rawNodes = rawNodes;
  181. }
  182. get count() {
  183. return this.rawNodes.length;
  184. }
  185. nodeObject(node) {
  186. return new NodeObject(node);
  187. }
  188. newParser(nodes) {
  189. return new XmlParser(nodes);
  190. }
  191. checkType(type) {
  192. if (!type2name[type])
  193. throw new Error(`Invalid type: ${type}`);
  194. }
  195. createTypedNode(type, nameOrValue, attrs = null, value = null) {
  196. this.checkType(type);
  197. switch (type) {
  198. case NODE:
  199. if (!nameOrValue || typeof(nameOrValue) !== 'string')
  200. throw new Error('Node name must be non-empty string');
  201. return new NodeObject([type, nameOrValue, attrs, value]);
  202. case TEXT:
  203. case CDATA:
  204. case COMMENT:
  205. if (typeof(nameOrValue) !== 'string')
  206. throw new Error('Node value must be of type string');
  207. return new NodeObject([type, nameOrValue]);
  208. }
  209. }
  210. createNode(name, attrs = null, value = null) {
  211. return this.createTypedNode(NODE, name, attrs, value);
  212. }
  213. createText(value = null) {
  214. return this.createTypedNode(TEXT, value);
  215. }
  216. createCdata(value = null) {
  217. return this.createTypedNode(CDATA, value);
  218. }
  219. createComment(value = null) {
  220. return this.createTypedNode(COMMENT, value);
  221. }
  222. add(node, after = '*') {
  223. const selectorObj = this.makeSelectorObj(after);
  224. for (const n of this.rawNodes) {
  225. if (n && n[0] === NODE) {
  226. if (!Array.isArray(n[3]))
  227. n[3] = [];
  228. if (Array.isArray(node)) {
  229. for (const node_ of node)
  230. this.rawAdd(n[3], node_.raw, selectorObj);
  231. } else {
  232. this.rawAdd(n[3], node.raw, selectorObj);
  233. }
  234. }
  235. }
  236. return this;
  237. }
  238. addRoot(node, after = '*') {
  239. const selectorObj = this.makeSelectorObj(after);
  240. if (Array.isArray(node)) {
  241. for (const node_ of node)
  242. this.rawAdd(this.rawNodes, node_.raw, selectorObj);
  243. } else {
  244. this.rawAdd(this.rawNodes, node.raw, selectorObj);
  245. }
  246. return this;
  247. }
  248. remove(selector = '') {
  249. const selectorObj = this.makeSelectorObj(selector);
  250. for (const n of this.rawNodes) {
  251. if (n && n[0] === NODE && Array.isArray(n[3])) {
  252. this.rawRemove(n[3], selectorObj);
  253. if (!n[3].length)
  254. n[3] = null;
  255. }
  256. }
  257. return this;
  258. }
  259. removeRoot(selector = '') {
  260. const selectorObj = this.makeSelectorObj(selector);
  261. this.rawRemove(this.rawNodes, selectorObj);
  262. return this;
  263. }
  264. each(callback, self = false) {
  265. if (self) {
  266. for (const n of this.rawNodes) {
  267. callback(new NodeObject(n));
  268. }
  269. } else {
  270. for (const n of this.rawNodes) {
  271. if (n[0] === NODE && n[3]) {
  272. for (const nn of n[3])
  273. callback(new NodeObject(nn));
  274. }
  275. }
  276. }
  277. return this;
  278. }
  279. eachSelf(callback) {
  280. return this.each(callback, true);
  281. }
  282. eachDeep(callback, self = false) {
  283. const deep = (nodes, route = '') => {
  284. for (const n of nodes) {
  285. const node = new NodeObject(n);
  286. callback(node, route);
  287. if (node.type === NODE && node.value) {
  288. deep(node.value, `${route}${route ? '/' : ''}${node.name}`);
  289. }
  290. }
  291. }
  292. if (self) {
  293. deep(this.rawNodes);
  294. } else {
  295. for (const n of this.rawNodes) {
  296. if (n[0] === NODE && n[3])
  297. deep(n[3]);
  298. }
  299. }
  300. return this;
  301. }
  302. eachDeepSelf(callback) {
  303. return this.eachDeep(callback, true);
  304. }
  305. rawSelect(nodes, selectorObj, callback) {
  306. for (const n of nodes)
  307. if (this.checkNode(n, selectorObj))
  308. callback(n);
  309. return this;
  310. }
  311. select(selector = '', self = false) {
  312. let newRawNodes = [];
  313. if (selector.indexOf('/') >= 0) {
  314. const selectors = selector.split('/');
  315. let res = this;
  316. for (const sel of selectors) {
  317. res = res.select(sel, self);
  318. self = false;
  319. }
  320. newRawNodes = res.rawNodes;
  321. } else {
  322. const selectorObj = this.makeSelectorObj(selector);
  323. if (self) {
  324. this.rawSelect(this.rawNodes, selectorObj, (node) => {
  325. newRawNodes.push(node);
  326. })
  327. } else {
  328. for (const n of this.rawNodes) {
  329. if (n && n[0] === NODE && Array.isArray(n[3])) {
  330. this.rawSelect(n[3], selectorObj, (node) => {
  331. newRawNodes.push(node);
  332. })
  333. }
  334. }
  335. }
  336. }
  337. return new XmlParser(newRawNodes);
  338. }
  339. $$(selector, self) {
  340. return this.select(selector, self);
  341. }
  342. $$self(selector) {
  343. return this.select(selector, true);
  344. }
  345. selectFirst(selector, self) {
  346. const result = this.select(selector, self);
  347. const node = (result.count ? result.rawNodes[0] : null);
  348. return new NodeObject(node);
  349. }
  350. $(selector, self) {
  351. return this.selectFirst(selector, self);
  352. }
  353. $self(selector) {
  354. return this.selectFirst(selector, true);
  355. }
  356. toJson(options = {}) {
  357. const {format = false} = options;
  358. if (format)
  359. return JSON.stringify(this.rawNodes, null, 2);
  360. else
  361. return JSON.stringify(this.rawNodes);
  362. }
  363. fromJson(jsonString) {
  364. const parsed = JSON.parse(jsonString);
  365. if (!Array.isArray(parsed))
  366. throw new Error('JSON parse error: root element must be array');
  367. this.rawNodes = parsed;
  368. return this;
  369. }
  370. toString(options = {}) {
  371. const {
  372. encoding = 'utf-8',
  373. format = false,
  374. noHeader = false,
  375. expandEmpty = false
  376. } = options;
  377. let deepType = 0;
  378. let out = '';
  379. if (!noHeader)
  380. out += `<?xml version="1.0" encoding="${encoding}"?>`;
  381. const nodesToString = (nodes, depth = 0) => {
  382. let result = '';
  383. const indent = '\n' + ' '.repeat(depth);
  384. let lastType = 0;
  385. for (const n of nodes) {
  386. const node = new NodeObject(n);
  387. let open = '';
  388. let body = '';
  389. let close = '';
  390. if (node.type === NODE) {
  391. if (!node.name)
  392. continue;
  393. let attrs = '';
  394. const nodeAttrs = node.attrs();
  395. if (nodeAttrs) {
  396. for (const [attrName, attrValue] of nodeAttrs) {
  397. if (typeof(attrValue) === 'string')
  398. attrs += ` ${attrName}="${attrValue}"`;
  399. else
  400. if (attrValue)
  401. attrs += ` ${attrName}`;
  402. }
  403. }
  404. if (node.value)
  405. body = nodesToString(node.value, depth + 2);
  406. if (!body && !expandEmpty) {
  407. open = (format && lastType !== TEXT ? indent : '');
  408. open += `<${node.name}${attrs}/>`;
  409. } else {
  410. open = (format && lastType !== TEXT ? indent : '');
  411. open += `<${node.name}${attrs}>`;
  412. close = (format && deepType && deepType !== TEXT ? indent : '');
  413. close += `</${node.name}>`;
  414. }
  415. } else if (node.type === TEXT) {
  416. body = node.value || '';
  417. } else if (node.type === CDATA) {
  418. body = (format && lastType !== TEXT ? indent : '');
  419. body += `<![CDATA[${node.value || ''}]]>`;
  420. } else if (node.type === COMMENT) {
  421. body = (format && lastType !== TEXT ? indent : '');
  422. body += `<!--${node.value || ''}-->`;
  423. }
  424. result += `${open}${body}${close}`;
  425. lastType = node.type;
  426. }
  427. deepType = lastType;
  428. return result;
  429. }
  430. out += nodesToString(this.rawNodes) + (format ? '\n' : '');
  431. return out;
  432. }
  433. fromString(xmlString, options = {}) {
  434. const {
  435. lowerCase = false,
  436. whiteSpace = false,
  437. pickNode = false,
  438. } = options;
  439. const parsed = [];
  440. const root = this.createNode('root', null, parsed);//fake node
  441. let node = root;
  442. let route = '';
  443. let routeStack = [];
  444. let ignoreNode = false;
  445. const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  446. if (tag == '?xml')
  447. return;
  448. if (!ignoreNode && pickNode) {
  449. route += `${route ? '/' : ''}${tag}`;
  450. ignoreNode = !pickNode(route);
  451. }
  452. let newNode = node;
  453. if (!ignoreNode)
  454. newNode = this.createNode(tag);
  455. routeStack.push({tag, route, ignoreNode, node: newNode});
  456. if (ignoreNode)
  457. return;
  458. if (tail && tail.trim() !== '') {
  459. const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
  460. const attrs = new Map();
  461. for (const attr of parsedAttrs.values()) {
  462. attrs.set(attr.fn, attr.value);
  463. }
  464. if (attrs.size)
  465. newNode.attrs(attrs);
  466. }
  467. if (!node.value)
  468. node.value = [];
  469. node.value.push(newNode.raw);
  470. node = newNode;
  471. };
  472. const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  473. if (routeStack.length && routeStack[routeStack.length - 1].tag === tag) {
  474. routeStack.pop();
  475. if (routeStack.length) {
  476. const last = routeStack[routeStack.length - 1];
  477. route = last.route;
  478. ignoreNode = last.ignoreNode;
  479. node = last.node;
  480. } else {
  481. route = '';
  482. ignoreNode = false;
  483. node = root;
  484. }
  485. }
  486. }
  487. const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  488. if (ignoreNode || (pickNode && !pickNode(`${route}/*TEXT`)))
  489. return;
  490. if (!whiteSpace && text.trim() == '')
  491. return;
  492. if (!node.value)
  493. node.value = [];
  494. node.value.push(this.createText(text).raw);
  495. };
  496. const onCdata = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  497. if (ignoreNode || (pickNode && !pickNode(`${route}/*CDATA`)))
  498. return;
  499. if (!node.value)
  500. node.value = [];
  501. node.value.push(this.createCdata(tagData).raw);
  502. }
  503. const onComment = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  504. if (ignoreNode || (pickNode && !pickNode(`${route}/*COMMENT`)))
  505. return;
  506. if (!node.value)
  507. node.value = [];
  508. node.value.push(this.createComment(tagData).raw);
  509. }
  510. sax.parseSync(xmlString, {
  511. onStartNode, onEndNode, onTextNode, onCdata, onComment, lowerCase
  512. });
  513. this.rawNodes = parsed;
  514. return this;
  515. }
  516. toObject(options = {}) {
  517. const {
  518. compactText = false
  519. } = options;
  520. const nodesToObject = (nodes) => {
  521. const result = {};
  522. for (const n of nodes) {
  523. const node = new NodeObject(n);
  524. if (node.type === NODE) {
  525. if (!node.name)
  526. continue;
  527. let newNode = {};
  528. const nodeAttrs = node.attrs();
  529. if (nodeAttrs)
  530. newNode['*ATTRS'] = Object.fromEntries(nodeAttrs);
  531. if (node.value) {
  532. Object.assign(newNode, nodesToObject(node.value));
  533. //схлопывание текстового узла до string
  534. if (compactText
  535. && !Array.isArray(newNode)
  536. && Object.prototype.hasOwnProperty.call(newNode, '*TEXT')
  537. && Object.keys(newNode).length === 1) {
  538. newNode = newNode['*TEXT'];
  539. }
  540. }
  541. if (!Object.prototype.hasOwnProperty.call(result, node.name)) {
  542. result[node.name] = newNode;
  543. } else {
  544. if (!Array.isArray(result[node.name])) {
  545. result[node.name] = [result[node.name]];
  546. }
  547. result[node.name].push(newNode);
  548. }
  549. } else if (node.type === TEXT) {
  550. if (!result['*TEXT'])
  551. result['*TEXT'] = '';
  552. result['*TEXT'] += node.value || '';
  553. } else if (node.type === CDATA) {
  554. if (!result['*CDATA'])
  555. result['*CDATA'] = '';
  556. result['*CDATA'] += node.value || '';
  557. } else if (node.type === COMMENT) {
  558. if (!result['*COMMENT'])
  559. result['*COMMENT'] = '';
  560. result['*COMMENT'] += node.value || '';
  561. }
  562. }
  563. return result;
  564. }
  565. return nodesToObject(this.rawNodes);
  566. }
  567. fromObject(xmlObject) {
  568. const objectToNodes = (obj) => {
  569. const result = [];
  570. for (const [tag, objNode] of Object.entries(obj)) {
  571. if (tag === '*TEXT') {
  572. result.push(this.createText(objNode).raw);
  573. } else if (tag === '*CDATA') {
  574. result.push(this.createCdata(objNode).raw);
  575. } else if (tag === '*COMMENT') {
  576. result.push(this.createComment(objNode).raw);
  577. } else if (tag === '*ATTRS') {
  578. //пропускаем
  579. } else {
  580. if (typeof(objNode) === 'string') {
  581. result.push(this.createNode(tag, null, [this.createText(objNode).raw]).raw);
  582. } else if (Array.isArray(objNode)) {
  583. for (const n of objNode) {
  584. if (typeof(n) === 'string') {
  585. result.push(this.createNode(tag, null, [this.createText(n).raw]).raw);
  586. } else if (typeof(n) === 'object') {
  587. result.push(this.createNode(tag, (n['*ATTRS'] ? Object.entries(n['*ATTRS']) : null), objectToNodes(n)).raw);
  588. }
  589. }
  590. } else if (typeof(objNode) === 'object') {
  591. result.push(this.createNode(tag, (objNode['*ATTRS'] ? Object.entries(objNode['*ATTRS']) : null), objectToNodes(objNode)).raw);
  592. }
  593. }
  594. }
  595. return result;
  596. };
  597. this.rawNodes = objectToNodes(xmlObject);
  598. return this;
  599. }
  600. }
  601. module.exports = XmlParser;