sax.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. function parseSync(xstr, options) {
  2. const dummy = () => {};
  3. let {onStartNode: _onStartNode = dummy,
  4. onEndNode: _onEndNode = dummy,
  5. onTextNode: _onTextNode = dummy,
  6. onCdata: _onCdata = dummy,
  7. onComment: _onComment = dummy,
  8. onProgress: _onProgress = dummy,
  9. innerCut = new Set(),
  10. lowerCase = true,
  11. } = options;
  12. let i = 0;
  13. const len = xstr.length;
  14. const progStep = len/20;
  15. let nextProg = 0;
  16. let cutCounter = 0;
  17. let cutTag = '';
  18. let inCdata;
  19. let inComment;
  20. let leftData = 0;
  21. while (i < len) {
  22. inCdata = false;
  23. inComment = false;
  24. let singleTag = false;
  25. let left = xstr.indexOf('<', i);
  26. if (left < 0)
  27. break;
  28. leftData = left;
  29. if (left < len - 2 && xstr[left + 1] == '!') {
  30. if (xstr[left + 2] == '-') {
  31. const leftComment = xstr.indexOf('<!--', left);
  32. if (leftComment == left) {
  33. inComment = true;
  34. leftData = left + 3;
  35. }
  36. }
  37. if (!inComment && xstr[left + 2] == '[') {
  38. const leftCdata = xstr.indexOf('<![CDATA[', left);
  39. if (leftCdata == left) {
  40. inCdata = true;
  41. leftData = left + 8;
  42. }
  43. }
  44. }
  45. if (left != i) {
  46. const text = xstr.substr(i, left - i);
  47. _onTextNode(text, cutCounter, cutTag);
  48. }
  49. let right = null;
  50. let rightData = null;
  51. if (inCdata) {
  52. rightData = xstr.indexOf(']]>', leftData + 1);
  53. if (rightData < 0)
  54. break;
  55. right = rightData + 2;
  56. } else if (inComment) {
  57. rightData = xstr.indexOf('-->', leftData + 1);
  58. if (rightData < 0)
  59. break;
  60. right = rightData + 2;
  61. } else {
  62. rightData = xstr.indexOf('>', leftData + 1);
  63. if (rightData < 0)
  64. break;
  65. right = rightData;
  66. if (xstr[right - 1] === '/') {
  67. singleTag = true;
  68. rightData--;
  69. }
  70. }
  71. let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
  72. if (inCdata) {
  73. _onCdata(tagData, cutCounter, cutTag);
  74. } else if (inComment) {
  75. _onComment(tagData, cutCounter, cutTag);
  76. } else {
  77. let tag = '';
  78. let tail = '';
  79. const firstSpace = tagData.indexOf(' ');
  80. if (firstSpace >= 0) {
  81. tail = tagData.substr(firstSpace);
  82. tag = tagData.substr(0, firstSpace);
  83. } else {
  84. tag = tagData;
  85. }
  86. if (lowerCase)
  87. tag = tag.toLowerCase();
  88. if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
  89. if (!cutCounter)
  90. cutTag = tag;
  91. cutCounter++;
  92. }
  93. let endTag = (singleTag ? tag : '');
  94. if (tag === '' || tag[0] !== '/') {
  95. _onStartNode(tag, tail, singleTag, cutCounter, cutTag);
  96. } else {
  97. endTag = tag.substr(1);
  98. }
  99. if (endTag)
  100. _onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
  101. if (cutTag === endTag) {
  102. cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
  103. if (!cutCounter)
  104. cutTag = '';
  105. }
  106. }
  107. if (right >= nextProg) {
  108. _onProgress(Math.round(right/(len + 1)*100));
  109. nextProg += progStep;
  110. }
  111. i = right + 1;
  112. }
  113. if (i < len) {
  114. if (inCdata) {
  115. _onCdata(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
  116. } else if (inComment) {
  117. _onComment(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
  118. } else {
  119. _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
  120. }
  121. }
  122. _onProgress(100);
  123. }
  124. //асинхронная копия parseSync
  125. //делается заменой "_on" => "await _on" после while
  126. async function parse(xstr, options) {
  127. const dummy = () => {};
  128. let {onStartNode: _onStartNode = dummy,
  129. onEndNode: _onEndNode = dummy,
  130. onTextNode: _onTextNode = dummy,
  131. onCdata: _onCdata = dummy,
  132. onComment: _onComment = dummy,
  133. onProgress: _onProgress = dummy,
  134. innerCut = new Set(),
  135. lowerCase = true,
  136. } = options;
  137. let i = 0;
  138. const len = xstr.length;
  139. const progStep = len/20;
  140. let nextProg = 0;
  141. let cutCounter = 0;
  142. let cutTag = '';
  143. let inCdata;
  144. let inComment;
  145. let leftData = 0;
  146. while (i < len) {
  147. inCdata = false;
  148. inComment = false;
  149. let singleTag = false;
  150. let left = xstr.indexOf('<', i);
  151. if (left < 0)
  152. break;
  153. leftData = left;
  154. if (left < len - 2 && xstr[left + 1] == '!') {
  155. if (xstr[left + 2] == '-') {
  156. const leftComment = xstr.indexOf('<!--', left);
  157. if (leftComment == left) {
  158. inComment = true;
  159. leftData = left + 3;
  160. }
  161. }
  162. if (!inComment && xstr[left + 2] == '[') {
  163. const leftCdata = xstr.indexOf('<![CDATA[', left);
  164. if (leftCdata == left) {
  165. inCdata = true;
  166. leftData = left + 8;
  167. }
  168. }
  169. }
  170. if (left != i) {
  171. const text = xstr.substr(i, left - i);
  172. await _onTextNode(text, cutCounter, cutTag);
  173. }
  174. let right = null;
  175. let rightData = null;
  176. if (inCdata) {
  177. rightData = xstr.indexOf(']]>', leftData + 1);
  178. if (rightData < 0)
  179. break;
  180. right = rightData + 2;
  181. } else if (inComment) {
  182. rightData = xstr.indexOf('-->', leftData + 1);
  183. if (rightData < 0)
  184. break;
  185. right = rightData + 2;
  186. } else {
  187. rightData = xstr.indexOf('>', leftData + 1);
  188. if (rightData < 0)
  189. break;
  190. right = rightData;
  191. if (xstr[right - 1] === '/') {
  192. singleTag = true;
  193. rightData--;
  194. }
  195. }
  196. let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
  197. if (inCdata) {
  198. await _onCdata(tagData, cutCounter, cutTag);
  199. } else if (inComment) {
  200. await _onComment(tagData, cutCounter, cutTag);
  201. } else {
  202. let tag = '';
  203. let tail = '';
  204. const firstSpace = tagData.indexOf(' ');
  205. if (firstSpace >= 0) {
  206. tail = tagData.substr(firstSpace);
  207. tag = tagData.substr(0, firstSpace);
  208. } else {
  209. tag = tagData;
  210. }
  211. if (lowerCase)
  212. tag = tag.toLowerCase();
  213. if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
  214. if (!cutCounter)
  215. cutTag = tag;
  216. cutCounter++;
  217. }
  218. let endTag = (singleTag ? tag : '');
  219. if (tag === '' || tag[0] !== '/') {
  220. await _onStartNode(tag, tail, singleTag, cutCounter, cutTag);
  221. } else {
  222. endTag = tag.substr(1);
  223. }
  224. if (endTag)
  225. await _onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
  226. if (cutTag === endTag) {
  227. cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
  228. if (!cutCounter)
  229. cutTag = '';
  230. }
  231. }
  232. if (right >= nextProg) {
  233. await _onProgress(Math.round(right/(len + 1)*100));
  234. nextProg += progStep;
  235. }
  236. i = right + 1;
  237. }
  238. if (i < len) {
  239. if (inCdata) {
  240. await _onCdata(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
  241. } else if (inComment) {
  242. await _onComment(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
  243. } else {
  244. await _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
  245. }
  246. }
  247. await _onProgress(100);
  248. }
  249. function getAttrsSync(tail, lowerCase = true) {
  250. let result = new Map();
  251. let name = '';
  252. let value = '';
  253. let vOpen = '';
  254. let inName = false;
  255. let inValue = false;
  256. let waitValue = false;
  257. let waitEq = true;
  258. const pushResult = () => {
  259. if (waitEq)
  260. value = true;
  261. if (lowerCase)
  262. name = name.toLowerCase();
  263. if (name != '') {
  264. const fn = name;
  265. let ns = '';
  266. if (fn.indexOf(':') >= 0) {
  267. [ns, name] = fn.split(':');
  268. }
  269. result.set(fn, {value, ns, name, fn});
  270. }
  271. name = '';
  272. value = '';
  273. vOpen = '';
  274. inName = false;
  275. inValue = false;
  276. waitValue = false;
  277. waitEq = true;
  278. };
  279. tail = tail.replace(/[\t\n\r]/g, ' ');
  280. for (let i = 0; i < tail.length; i++) {
  281. const c = tail.charAt(i);
  282. if (c == ' ') {
  283. if (inValue) {
  284. if (vOpen == '"')
  285. value += c;
  286. else
  287. pushResult();
  288. } else if (inName) {
  289. inName = false;
  290. }
  291. } else if (!inValue && c == '=') {
  292. waitEq = false;
  293. waitValue = true;
  294. inName = false;
  295. } else if (c == '"') {
  296. if (inValue) {
  297. pushResult();
  298. } else if (waitValue) {
  299. inValue = true;
  300. vOpen = '"';
  301. }
  302. } else if (inValue) {
  303. value += c;
  304. } else if (inName) {
  305. name += c;
  306. } else if (waitEq) {
  307. pushResult();
  308. inName = true;
  309. name = c;
  310. } else if (waitValue) {
  311. waitValue = false;
  312. inValue = true;
  313. vOpen = ' ';
  314. value = c;
  315. } else {
  316. inName = true;
  317. name = c;
  318. }
  319. }
  320. if (name != '')
  321. pushResult();
  322. return result;
  323. }
  324. module.exports = {
  325. parseSync,
  326. getAttrsSync,
  327. parse
  328. }