XmlParser.js 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. const sax = require('./sax');
  2. const ObjectNavigator = require('./ObjectNavigator');
  3. //node types
  4. const NODE = 1;
  5. const TEXT = 2;
  6. const CDATA = 3;
  7. const COMMENT = 4;
  8. const name2type = {
  9. 'NODE': NODE,
  10. 'TEXT': TEXT,
  11. 'CDATA': CDATA,
  12. 'COMMENT': COMMENT,
  13. };
  14. const type2name = {
  15. [NODE]: 'NODE',
  16. [TEXT]: 'TEXT',
  17. [CDATA]: 'CDATA',
  18. [COMMENT]: 'COMMENT',
  19. };
  20. class NodeBase {
  21. makeSelectorObj(selectorString) {
  22. const result = {all: false, before: false, type: 0, name: ''};
  23. if (selectorString === '') {
  24. result.before = true;
  25. } else if (selectorString === '*') {
  26. result.all = true;
  27. } else if (selectorString[0] === '*') {
  28. const typeName = selectorString.substring(1);
  29. result.type = name2type[typeName];
  30. if (!result.type)
  31. throw new Error(`Unknown selector type: ${typeName}`);
  32. } else {
  33. result.name = selectorString;
  34. }
  35. return result;
  36. }
  37. checkNode(rawNode, selectorObj) {
  38. return selectorObj.all || selectorObj.before
  39. || (selectorObj.type && rawNode[0] === selectorObj.type)
  40. || (rawNode[0] === NODE && rawNode[1] === selectorObj.name);
  41. }
  42. findNodeIndex(nodes, selectorObj) {
  43. for (let i = 0; i < nodes.length; i++)
  44. if (this.checkNode(nodes[i], selectorObj))
  45. return i;
  46. }
  47. rawAdd(nodes, rawNode, selectorObj) {
  48. if (selectorObj.all) {
  49. nodes.push(rawNode);
  50. } else if (selectorObj.before) {
  51. nodes.unshift(rawNode);
  52. } else {
  53. const index = this.findNodeIndex(nodes, selectorObj);
  54. if (index >= 0)
  55. nodes.splice(index, 0, rawNode);
  56. else
  57. nodes.push(rawNode);
  58. }
  59. }
  60. rawRemove(nodes, selectorObj) {
  61. if (selectorObj.before)
  62. return;
  63. for (let i = nodes.length - 1; i >= 0; i--) {
  64. if (this.checkNode(nodes[i], selectorObj))
  65. nodes.splice(i, 1);
  66. }
  67. }
  68. }
  69. class NodeObject extends NodeBase {
  70. constructor(raw = null) {
  71. super();
  72. if (raw)
  73. this.raw = raw;
  74. else
  75. this.raw = [];
  76. }
  77. get type() {
  78. return this.raw[0] || null;
  79. }
  80. get name() {
  81. if (this.type === NODE)
  82. return this.raw[1] || null;
  83. return null;
  84. }
  85. set name(value) {
  86. if (this.type === NODE)
  87. this.raw[1] = value;
  88. }
  89. attrs(key, value) {
  90. if (this.type !== NODE)
  91. return null;
  92. let map = null;
  93. if (key instanceof Map) {
  94. map = key;
  95. this.raw[2] = Array.from(map);
  96. } else if (Array.isArray(this.raw[2])) {
  97. map = new Map(this.raw[2]);
  98. if (key) {
  99. map.set(key, value);
  100. this.raw[2] = Array.from(map);
  101. }
  102. }
  103. return map;
  104. }
  105. get value() {
  106. switch (this.type) {
  107. case NODE:
  108. return this.raw[3] || null;
  109. case TEXT:
  110. case CDATA:
  111. case COMMENT:
  112. return this.raw[1] || null;
  113. }
  114. return null;
  115. }
  116. set value(v) {
  117. switch (this.type) {
  118. case NODE:
  119. this.raw[3] = v;
  120. break;
  121. case TEXT:
  122. case CDATA:
  123. case COMMENT:
  124. this.raw[1] = v;
  125. }
  126. }
  127. add(node, after = '*') {
  128. if (this.type !== NODE)
  129. return;
  130. const selectorObj = this.makeSelectorObj(after);
  131. if (!Array.isArray(this.raw[3]))
  132. this.raw[3] = [];
  133. if (Array.isArray(node)) {
  134. for (const node_ of node)
  135. this.rawAdd(this.raw[3], node_.raw, selectorObj);
  136. } else {
  137. this.rawAdd(this.raw[3], node.raw, selectorObj);
  138. }
  139. return this;
  140. }
  141. remove(selector = '') {
  142. if (this.type !== NODE || !this.raw[3])
  143. return;
  144. const selectorObj = this.makeSelectorObj(selector);
  145. this.rawRemove(this.raw[3], selectorObj);
  146. if (!this.raw[3].length)
  147. this.raw[3] = null;
  148. return this;
  149. }
  150. each(callback) {
  151. if (this.type !== NODE || !this.raw[3])
  152. return;
  153. for (const n of this.raw[3]) {
  154. callback(new NodeObject(n));
  155. }
  156. return this;
  157. }
  158. eachDeep(callback) {
  159. if (this.type !== NODE || !this.raw[3])
  160. return;
  161. const deep = (nodes, route = '') => {
  162. for (const n of nodes) {
  163. const node = new NodeObject(n);
  164. callback(node, route);
  165. if (node.type === NODE && node.value) {
  166. deep(node.value, `${route}${route ? '/' : ''}${node.name}`);
  167. }
  168. }
  169. }
  170. deep(this.raw[3]);
  171. return this;
  172. }
  173. }
  174. class XmlParser extends NodeBase {
  175. constructor(rawNodes = []) {
  176. super();
  177. this.NODE = NODE;
  178. this.TEXT = TEXT;
  179. this.CDATA = CDATA;
  180. this.COMMENT = COMMENT;
  181. this.rawNodes = rawNodes;
  182. }
  183. get count() {
  184. return this.rawNodes.length;
  185. }
  186. nodeObject(node) {
  187. return new NodeObject(node);
  188. }
  189. newParser(nodes) {
  190. return new XmlParser(nodes);
  191. }
  192. checkType(type) {
  193. if (!type2name[type])
  194. throw new Error(`Invalid type: ${type}`);
  195. }
  196. createTypedNode(type, nameOrValue, attrs = null, value = null) {
  197. this.checkType(type);
  198. switch (type) {
  199. case NODE:
  200. if (!nameOrValue || typeof(nameOrValue) !== 'string')
  201. throw new Error('Node name must be non-empty string');
  202. return new NodeObject([type, nameOrValue, attrs, value]);
  203. case TEXT:
  204. case CDATA:
  205. case COMMENT:
  206. if (typeof(nameOrValue) !== 'string')
  207. throw new Error('Node value must be of type string');
  208. return new NodeObject([type, nameOrValue]);
  209. }
  210. }
  211. createNode(name, attrs = null, value = null) {
  212. return this.createTypedNode(NODE, name, attrs, value);
  213. }
  214. createText(value = null) {
  215. return this.createTypedNode(TEXT, value);
  216. }
  217. createCdata(value = null) {
  218. return this.createTypedNode(CDATA, value);
  219. }
  220. createComment(value = null) {
  221. return this.createTypedNode(COMMENT, value);
  222. }
  223. add(node, after = '*') {
  224. const selectorObj = this.makeSelectorObj(after);
  225. for (const n of this.rawNodes) {
  226. if (n && n[0] === NODE) {
  227. if (!Array.isArray(n[3]))
  228. n[3] = [];
  229. if (Array.isArray(node)) {
  230. for (const node_ of node)
  231. this.rawAdd(n[3], node_.raw, selectorObj);
  232. } else {
  233. this.rawAdd(n[3], node.raw, selectorObj);
  234. }
  235. }
  236. }
  237. return this;
  238. }
  239. addRoot(node, after = '*') {
  240. const selectorObj = this.makeSelectorObj(after);
  241. if (Array.isArray(node)) {
  242. for (const node_ of node)
  243. this.rawAdd(this.rawNodes, node_.raw, selectorObj);
  244. } else {
  245. this.rawAdd(this.rawNodes, node.raw, selectorObj);
  246. }
  247. return this;
  248. }
  249. remove(selector = '') {
  250. const selectorObj = this.makeSelectorObj(selector);
  251. for (const n of this.rawNodes) {
  252. if (n && n[0] === NODE && Array.isArray(n[3])) {
  253. this.rawRemove(n[3], selectorObj);
  254. if (!n[3].length)
  255. n[3] = null;
  256. }
  257. }
  258. return this;
  259. }
  260. removeRoot(selector = '') {
  261. const selectorObj = this.makeSelectorObj(selector);
  262. this.rawRemove(this.rawNodes, selectorObj);
  263. return this;
  264. }
  265. each(callback, self = false) {
  266. if (self) {
  267. for (const n of this.rawNodes) {
  268. callback(new NodeObject(n));
  269. }
  270. } else {
  271. for (const n of this.rawNodes) {
  272. if (n[0] === NODE && n[3]) {
  273. for (const nn of n[3])
  274. callback(new NodeObject(nn));
  275. }
  276. }
  277. }
  278. return this;
  279. }
  280. eachSelf(callback) {
  281. return this.each(callback, true);
  282. }
  283. eachDeep(callback, self = false) {
  284. const deep = (nodes, route = '') => {
  285. for (const n of nodes) {
  286. const node = new NodeObject(n);
  287. callback(node, route);
  288. if (node.type === NODE && node.value) {
  289. deep(node.value, `${route}${route ? '/' : ''}${node.name}`);
  290. }
  291. }
  292. }
  293. if (self) {
  294. deep(this.rawNodes);
  295. } else {
  296. for (const n of this.rawNodes) {
  297. if (n[0] === NODE && n[3])
  298. deep(n[3]);
  299. }
  300. }
  301. return this;
  302. }
  303. eachDeepSelf(callback) {
  304. return this.eachDeep(callback, true);
  305. }
  306. rawSelect(nodes, selectorObj, callback) {
  307. for (const n of nodes)
  308. if (this.checkNode(n, selectorObj))
  309. callback(n);
  310. return this;
  311. }
  312. select(selector = '', self = false) {
  313. let newRawNodes = [];
  314. if (selector.indexOf('/') >= 0) {
  315. const selectors = selector.split('/');
  316. let res = this;
  317. for (const sel of selectors) {
  318. res = res.select(sel, self);
  319. self = false;
  320. }
  321. newRawNodes = res.rawNodes;
  322. } else {
  323. const selectorObj = this.makeSelectorObj(selector);
  324. if (self) {
  325. this.rawSelect(this.rawNodes, selectorObj, (node) => {
  326. newRawNodes.push(node);
  327. })
  328. } else {
  329. for (const n of this.rawNodes) {
  330. if (n && n[0] === NODE && Array.isArray(n[3])) {
  331. this.rawSelect(n[3], selectorObj, (node) => {
  332. newRawNodes.push(node);
  333. })
  334. }
  335. }
  336. }
  337. }
  338. return new XmlParser(newRawNodes);
  339. }
  340. $$(selector, self) {
  341. return this.select(selector, self);
  342. }
  343. $$self(selector) {
  344. return this.select(selector, true);
  345. }
  346. selectFirst(selector, self) {
  347. const result = this.select(selector, self);
  348. const node = (result.count ? result.rawNodes[0] : null);
  349. return new NodeObject(node);
  350. }
  351. $(selector, self) {
  352. return this.selectFirst(selector, self);
  353. }
  354. $self(selector) {
  355. return this.selectFirst(selector, true);
  356. }
  357. toJson(options = {}) {
  358. const {format = false} = options;
  359. if (format)
  360. return JSON.stringify(this.rawNodes, null, 2);
  361. else
  362. return JSON.stringify(this.rawNodes);
  363. }
  364. fromJson(jsonString) {
  365. const parsed = JSON.parse(jsonString);
  366. if (!Array.isArray(parsed))
  367. throw new Error('JSON parse error: root element must be array');
  368. this.rawNodes = parsed;
  369. return this;
  370. }
  371. toString(options = {}) {
  372. const {
  373. encoding = 'utf-8',
  374. format = false,
  375. noHeader = false,
  376. expandEmpty = false
  377. } = options;
  378. let deepType = 0;
  379. let out = '';
  380. if (!noHeader)
  381. out += `<?xml version="1.0" encoding="${encoding}"?>`;
  382. const nodesToString = (nodes, depth = 0) => {
  383. let result = '';
  384. const indent = '\n' + ' '.repeat(depth);
  385. let lastType = 0;
  386. for (const n of nodes) {
  387. const node = new NodeObject(n);
  388. let open = '';
  389. let body = '';
  390. let close = '';
  391. if (node.type === NODE) {
  392. if (!node.name)
  393. continue;
  394. let attrs = '';
  395. const nodeAttrs = node.attrs();
  396. if (nodeAttrs) {
  397. for (const [attrName, attrValue] of nodeAttrs) {
  398. if (typeof(attrValue) === 'string')
  399. attrs += ` ${attrName}="${attrValue}"`;
  400. else
  401. if (attrValue)
  402. attrs += ` ${attrName}`;
  403. }
  404. }
  405. if (node.value)
  406. body = nodesToString(node.value, depth + 2);
  407. if (!body && !expandEmpty) {
  408. open = (format && lastType !== TEXT ? indent : '');
  409. open += `<${node.name}${attrs}/>`;
  410. } else {
  411. open = (format && lastType !== TEXT ? indent : '');
  412. open += `<${node.name}${attrs}>`;
  413. close = (format && deepType && deepType !== TEXT ? indent : '');
  414. close += `</${node.name}>`;
  415. }
  416. } else if (node.type === TEXT) {
  417. body = node.value || '';
  418. } else if (node.type === CDATA) {
  419. body = (format && lastType !== TEXT ? indent : '');
  420. body += `<![CDATA[${node.value || ''}]]>`;
  421. } else if (node.type === COMMENT) {
  422. body = (format && lastType !== TEXT ? indent : '');
  423. body += `<!--${node.value || ''}-->`;
  424. }
  425. result += `${open}${body}${close}`;
  426. lastType = node.type;
  427. }
  428. deepType = lastType;
  429. return result;
  430. }
  431. out += nodesToString(this.rawNodes) + (format ? '\n' : '');
  432. return out;
  433. }
  434. fromString(xmlString, options = {}) {
  435. const {
  436. lowerCase = false,
  437. whiteSpace = false,
  438. pickNode = false,
  439. } = options;
  440. const parsed = [];
  441. const root = this.createNode('root', null, parsed);//fake node
  442. let node = root;
  443. let route = '';
  444. let routeStack = [];
  445. let ignoreNode = false;
  446. const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  447. if (tag == '?xml')
  448. return;
  449. if (!ignoreNode && pickNode) {
  450. route += `${route ? '/' : ''}${tag}`;
  451. ignoreNode = !pickNode(route);
  452. }
  453. let newNode = node;
  454. if (!ignoreNode)
  455. newNode = this.createNode(tag);
  456. routeStack.push({tag, route, ignoreNode, node: newNode});
  457. if (ignoreNode)
  458. return;
  459. if (tail && tail.trim() !== '') {
  460. const parsedAttrs = sax.getAttrsSync(tail, lowerCase);
  461. const attrs = new Map();
  462. for (const attr of parsedAttrs.values()) {
  463. attrs.set(attr.fn, attr.value);
  464. }
  465. if (attrs.size)
  466. newNode.attrs(attrs);
  467. }
  468. if (!node.value)
  469. node.value = [];
  470. node.value.push(newNode.raw);
  471. node = newNode;
  472. };
  473. const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  474. if (routeStack.length && routeStack[routeStack.length - 1].tag === tag) {
  475. routeStack.pop();
  476. if (routeStack.length) {
  477. const last = routeStack[routeStack.length - 1];
  478. route = last.route;
  479. ignoreNode = last.ignoreNode;
  480. node = last.node;
  481. } else {
  482. route = '';
  483. ignoreNode = false;
  484. node = root;
  485. }
  486. }
  487. }
  488. const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  489. if (ignoreNode || (pickNode && !pickNode(`${route}/*TEXT`)))
  490. return;
  491. if (!whiteSpace && text.trim() == '')
  492. return;
  493. if (!node.value)
  494. node.value = [];
  495. node.value.push(this.createText(text).raw);
  496. };
  497. const onCdata = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  498. if (ignoreNode || (pickNode && !pickNode(`${route}/*CDATA`)))
  499. return;
  500. if (!node.value)
  501. node.value = [];
  502. node.value.push(this.createCdata(tagData).raw);
  503. }
  504. const onComment = (tagData, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  505. if (ignoreNode || (pickNode && !pickNode(`${route}/*COMMENT`)))
  506. return;
  507. if (!node.value)
  508. node.value = [];
  509. node.value.push(this.createComment(tagData).raw);
  510. }
  511. sax.parseSync(xmlString, {
  512. onStartNode, onEndNode, onTextNode, onCdata, onComment, lowerCase
  513. });
  514. this.rawNodes = parsed;
  515. return this;
  516. }
  517. toObject(options = {}) {
  518. const {
  519. compactText = false
  520. } = options;
  521. const nodesToObject = (nodes) => {
  522. const result = {};
  523. for (const n of nodes) {
  524. const node = new NodeObject(n);
  525. if (node.type === NODE) {
  526. if (!node.name)
  527. continue;
  528. let newNode = {};
  529. const nodeAttrs = node.attrs();
  530. if (nodeAttrs)
  531. newNode['*ATTRS'] = Object.fromEntries(nodeAttrs);
  532. if (node.value) {
  533. Object.assign(newNode, nodesToObject(node.value));
  534. //схлопывание текстового узла до string
  535. if (compactText
  536. && !Array.isArray(newNode)
  537. && Object.prototype.hasOwnProperty.call(newNode, '*TEXT')
  538. && Object.keys(newNode).length === 1) {
  539. newNode = newNode['*TEXT'];
  540. }
  541. }
  542. if (!Object.prototype.hasOwnProperty.call(result, node.name)) {
  543. result[node.name] = newNode;
  544. } else {
  545. if (!Array.isArray(result[node.name])) {
  546. result[node.name] = [result[node.name]];
  547. }
  548. result[node.name].push(newNode);
  549. }
  550. } else if (node.type === TEXT) {
  551. if (!result['*TEXT'])
  552. result['*TEXT'] = '';
  553. result['*TEXT'] += node.value || '';
  554. } else if (node.type === CDATA) {
  555. if (!result['*CDATA'])
  556. result['*CDATA'] = '';
  557. result['*CDATA'] += node.value || '';
  558. } else if (node.type === COMMENT) {
  559. if (!result['*COMMENT'])
  560. result['*COMMENT'] = '';
  561. result['*COMMENT'] += node.value || '';
  562. }
  563. }
  564. return result;
  565. }
  566. return nodesToObject(this.rawNodes);
  567. }
  568. fromObject(xmlObject) {
  569. const objectToNodes = (obj) => {
  570. const result = [];
  571. for (const [tag, objNode] of Object.entries(obj)) {
  572. if (tag === '*TEXT') {
  573. result.push(this.createText(objNode).raw);
  574. } else if (tag === '*CDATA') {
  575. result.push(this.createCdata(objNode).raw);
  576. } else if (tag === '*COMMENT') {
  577. result.push(this.createComment(objNode).raw);
  578. } else if (tag === '*ATTRS') {
  579. //пропускаем
  580. } else {
  581. if (typeof(objNode) === 'string') {
  582. result.push(this.createNode(tag, null, [this.createText(objNode).raw]).raw);
  583. } else if (Array.isArray(objNode)) {
  584. for (const n of objNode) {
  585. if (typeof(n) === 'string') {
  586. result.push(this.createNode(tag, null, [this.createText(n).raw]).raw);
  587. } else if (typeof(n) === 'object') {
  588. result.push(this.createNode(tag, (n['*ATTRS'] ? Object.entries(n['*ATTRS']) : null), objectToNodes(n)).raw);
  589. }
  590. }
  591. } else if (typeof(objNode) === 'object') {
  592. result.push(this.createNode(tag, (objNode['*ATTRS'] ? Object.entries(objNode['*ATTRS']) : null), objectToNodes(objNode)).raw);
  593. }
  594. }
  595. }
  596. return result;
  597. };
  598. this.rawNodes = objectToNodes(xmlObject);
  599. return this;
  600. }
  601. navigator() {
  602. return new ObjectNavigator(this.toObject());
  603. }
  604. }
  605. module.exports = XmlParser;