123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367 |
- function parseSync(xstr, options) {
- const dummy = () => {};
- let {onStartNode: _onStartNode = dummy,
- onEndNode: _onEndNode = dummy,
- onTextNode: _onTextNode = dummy,
- onCdata: _onCdata = dummy,
- onComment: _onComment = dummy,
- onProgress: _onProgress = dummy,
- innerCut = new Set(),
- lowerCase = true,
- } = options;
- let i = 0;
- const len = xstr.length;
- const progStep = len/20;
- let nextProg = 0;
- let cutCounter = 0;
- let cutTag = '';
- let inCdata;
- let inComment;
- let leftData = 0;
- while (i < len) {
- inCdata = false;
- inComment = false;
- let singleTag = false;
- let left = xstr.indexOf('<', i);
- if (left < 0)
- break;
- leftData = left;
- if (left < len - 2 && xstr[left + 1] == '!') {
- if (xstr[left + 2] == '-') {
- const leftComment = xstr.indexOf('<!--', left);
- if (leftComment == left) {
- inComment = true;
- leftData = left + 3;
- }
- }
- if (!inComment && xstr[left + 2] == '[') {
- const leftCdata = xstr.indexOf('<![CDATA[', left);
- if (leftCdata == left) {
- inCdata = true;
- leftData = left + 8;
- }
- }
- }
- if (left != i) {
- const text = xstr.substr(i, left - i);
- _onTextNode(text, cutCounter, cutTag);
- }
- let right = null;
- let rightData = null;
- if (inCdata) {
- rightData = xstr.indexOf(']]>', leftData + 1);
- if (rightData < 0)
- break;
- right = rightData + 2;
- } else if (inComment) {
- rightData = xstr.indexOf('-->', leftData + 1);
- if (rightData < 0)
- break;
- right = rightData + 2;
- } else {
- rightData = xstr.indexOf('>', leftData + 1);
- if (rightData < 0)
- break;
- right = rightData;
- if (xstr[right - 1] === '/') {
- singleTag = true;
- rightData--;
- }
- }
- let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
- if (inCdata) {
- _onCdata(tagData, cutCounter, cutTag);
- } else if (inComment) {
- _onComment(tagData, cutCounter, cutTag);
- } else {
- let tag = '';
- let tail = '';
- const firstSpace = tagData.indexOf(' ');
- if (firstSpace >= 0) {
- tail = tagData.substr(firstSpace);
- tag = tagData.substr(0, firstSpace);
- } else {
- tag = tagData;
- }
- if (lowerCase)
- tag = tag.toLowerCase();
- if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
- if (!cutCounter)
- cutTag = tag;
- cutCounter++;
- }
- let endTag = (singleTag ? tag : '');
- if (tag === '' || tag[0] !== '/') {
- _onStartNode(tag, tail, singleTag, cutCounter, cutTag);
- } else {
- endTag = tag.substr(1);
- }
- if (endTag)
- _onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
- if (cutTag === endTag) {
- cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
- if (!cutCounter)
- cutTag = '';
- }
- }
- if (right >= nextProg) {
- _onProgress(Math.round(right/(len + 1)*100));
- nextProg += progStep;
- }
- i = right + 1;
- }
- if (i < len) {
- if (inCdata) {
- _onCdata(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
- } else if (inComment) {
- _onComment(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
- } else {
- _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
- }
- }
- _onProgress(100);
- }
- //асинхронная копия parseSync
- //делается заменой "_on" => "await _on" после while
- async function parse(xstr, options) {
- const dummy = () => {};
- let {onStartNode: _onStartNode = dummy,
- onEndNode: _onEndNode = dummy,
- onTextNode: _onTextNode = dummy,
- onCdata: _onCdata = dummy,
- onComment: _onComment = dummy,
- onProgress: _onProgress = dummy,
- innerCut = new Set(),
- lowerCase = true,
- } = options;
- let i = 0;
- const len = xstr.length;
- const progStep = len/20;
- let nextProg = 0;
- let cutCounter = 0;
- let cutTag = '';
- let inCdata;
- let inComment;
- let leftData = 0;
- while (i < len) {
- inCdata = false;
- inComment = false;
- let singleTag = false;
- let left = xstr.indexOf('<', i);
- if (left < 0)
- break;
- leftData = left;
- if (left < len - 2 && xstr[left + 1] == '!') {
- if (xstr[left + 2] == '-') {
- const leftComment = xstr.indexOf('<!--', left);
- if (leftComment == left) {
- inComment = true;
- leftData = left + 3;
- }
- }
- if (!inComment && xstr[left + 2] == '[') {
- const leftCdata = xstr.indexOf('<![CDATA[', left);
- if (leftCdata == left) {
- inCdata = true;
- leftData = left + 8;
- }
- }
- }
- if (left != i) {
- const text = xstr.substr(i, left - i);
- await _onTextNode(text, cutCounter, cutTag);
- }
- let right = null;
- let rightData = null;
- if (inCdata) {
- rightData = xstr.indexOf(']]>', leftData + 1);
- if (rightData < 0)
- break;
- right = rightData + 2;
- } else if (inComment) {
- rightData = xstr.indexOf('-->', leftData + 1);
- if (rightData < 0)
- break;
- right = rightData + 2;
- } else {
- rightData = xstr.indexOf('>', leftData + 1);
- if (rightData < 0)
- break;
- right = rightData;
- if (xstr[right - 1] === '/') {
- singleTag = true;
- rightData--;
- }
- }
- let tagData = xstr.substr(leftData + 1, rightData - leftData - 1);
- if (inCdata) {
- await _onCdata(tagData, cutCounter, cutTag);
- } else if (inComment) {
- await _onComment(tagData, cutCounter, cutTag);
- } else {
- let tag = '';
- let tail = '';
- const firstSpace = tagData.indexOf(' ');
- if (firstSpace >= 0) {
- tail = tagData.substr(firstSpace);
- tag = tagData.substr(0, firstSpace);
- } else {
- tag = tagData;
- }
- if (lowerCase)
- tag = tag.toLowerCase();
- if (innerCut.has(tag) && (!cutCounter || cutTag === tag)) {
- if (!cutCounter)
- cutTag = tag;
- cutCounter++;
- }
- let endTag = (singleTag ? tag : '');
- if (tag === '' || tag[0] !== '/') {
- await _onStartNode(tag, tail, singleTag, cutCounter, cutTag);
- } else {
- endTag = tag.substr(1);
- }
- if (endTag)
- await _onEndNode(endTag, tail, singleTag, cutCounter, cutTag);
- if (cutTag === endTag) {
- cutCounter = (cutCounter > 0 ? cutCounter - 1 : 0);
- if (!cutCounter)
- cutTag = '';
- }
- }
- if (right >= nextProg) {
- await _onProgress(Math.round(right/(len + 1)*100));
- nextProg += progStep;
- }
- i = right + 1;
- }
- if (i < len) {
- if (inCdata) {
- await _onCdata(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
- } else if (inComment) {
- await _onComment(xstr.substr(leftData + 1, len - leftData - 1), cutCounter, cutTag);
- } else {
- await _onTextNode(xstr.substr(i, len - i), cutCounter, cutTag);
- }
- }
- await _onProgress(100);
- }
- function getAttrsSync(tail, lowerCase = true) {
- let result = new Map();
- let name = '';
- let value = '';
- let vOpen = '';
- let inName = false;
- let inValue = false;
- let waitValue = false;
- let waitEq = true;
- const pushResult = () => {
- if (waitEq)
- value = true;
- if (lowerCase)
- name = name.toLowerCase();
- if (name != '') {
- const fn = name;
- let ns = '';
- if (fn.indexOf(':') >= 0) {
- [ns, name] = fn.split(':');
- }
- result.set(fn, {value, ns, name, fn});
- }
- name = '';
- value = '';
- vOpen = '';
- inName = false;
- inValue = false;
- waitValue = false;
- waitEq = true;
- };
- tail = tail.replace(/[\t\n\r]/g, ' ');
- for (let i = 0; i < tail.length; i++) {
- const c = tail.charAt(i);
- if (c == ' ') {
- if (inValue) {
- if (vOpen == '"')
- value += c;
- else
- pushResult();
- } else if (inName) {
- inName = false;
- }
- } else if (!inValue && c == '=') {
- waitEq = false;
- waitValue = true;
- inName = false;
- } else if (c == '"') {
- if (inValue) {
- pushResult();
- } else if (waitValue) {
- inValue = true;
- vOpen = '"';
- }
- } else if (inValue) {
- value += c;
- } else if (inName) {
- name += c;
- } else if (waitEq) {
- pushResult();
- inName = true;
- name = c;
- } else if (waitValue) {
- waitValue = false;
- inValue = true;
- vOpen = ' ';
- value = c;
- } else {
- inName = true;
- name = c;
- }
- }
- if (name != '')
- pushResult();
- return result;
- }
- module.exports = {
- parseSync,
- getAttrsSync,
- parse
- }
|