BookParser.js 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. import EasySAXParser from './easysax';
  2. import {sleep} from '../../../share/utils';
  3. export default class BookParser {
  4. constructor() {
  5. this.parser = new EasySAXParser();
  6. }
  7. async parse(data, callback) {
  8. if (!callback)
  9. callback = () => {};
  10. callback(0);
  11. this.data = data;
  12. if (data.indexOf('<FictionBook') < 0) {
  13. throw new Error('Неверный формат файла');
  14. }
  15. let path = '';
  16. let tag = '';
  17. let nextPerc = 0;
  18. let paraIndex = -1;
  19. let paraOffset = 0;
  20. let para = []; /*array of
  21. {
  22. index: Number,
  23. offset: Number, //сумма всех length до этого параграфа
  24. length: Number, //длина text без тегов
  25. text: String //текст параграфа (или title или epigraph и т.д) с вложенными тегами
  26. }
  27. */
  28. const newParagraph = (text, len) => {
  29. paraIndex++;
  30. let p = {
  31. index: paraIndex,
  32. offset: paraOffset,
  33. length: len,
  34. text: text
  35. };
  36. para[paraIndex] = p;
  37. paraOffset += p.length;
  38. };
  39. const growParagraph = (text, len) => {
  40. let p = para[paraIndex];
  41. if (p) {
  42. paraOffset -= p.length;
  43. if (p.text == ' ') {
  44. p.length = 0;
  45. p.text = '';
  46. }
  47. p.length += len;
  48. p.text += text;
  49. } else {
  50. p = {
  51. index: paraIndex,
  52. offset: paraOffset,
  53. length: len,
  54. text: text
  55. };
  56. }
  57. para[paraIndex] = p;
  58. paraOffset += p.length;
  59. };
  60. let fb2 = {};
  61. const parser = this.parser;
  62. parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars
  63. });
  64. parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars
  65. tag = elemName;
  66. path += '/' + elemName;
  67. if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/FictionBook/body/section') == 0) {
  68. newParagraph(' ', 1);
  69. }
  70. });
  71. parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars
  72. if (tag == elemName) {
  73. path = path.substr(0, path.length - tag.length - 1);
  74. let i = path.lastIndexOf('/');
  75. if (i >= 0) {
  76. tag = path.substr(i + 1);
  77. } else {
  78. tag = path;
  79. }
  80. }
  81. });
  82. parser.on('textNode', (text) => {
  83. text = text.trim();
  84. switch (path) {
  85. case '/FictionBook/description/title-info/author/first-name':
  86. fb2.firstName = text;
  87. break;
  88. case '/FictionBook/description/title-info/author/last-name':
  89. fb2.lastName = text;
  90. break;
  91. case '/FictionBook/description/title-info/genre':
  92. fb2.genre = text;
  93. break;
  94. case '/FictionBook/description/title-info/date':
  95. fb2.date = text;
  96. break;
  97. case '/FictionBook/description/title-info/book-title':
  98. fb2.bookTitle = text;
  99. break;
  100. case '/FictionBook/description/title-info/id':
  101. fb2.id = text;
  102. break;
  103. }
  104. if (path.indexOf('/FictionBook/description/title-info/annotation') == 0) {
  105. if (!fb2.annotation)
  106. fb2.annotation = '';
  107. if (tag != 'annotation')
  108. fb2.annotation += `<${tag}>${text}</${tag}>`;
  109. else
  110. fb2.annotation += text;
  111. }
  112. if (text == '')
  113. return;
  114. if (path.indexOf('/FictionBook/body/title') == 0) {
  115. newParagraph(text, text.length);
  116. }
  117. if (text == '')
  118. return;
  119. if (path.indexOf('/FictionBook/body/section') == 0) {
  120. switch (tag) {
  121. case 'p':
  122. growParagraph(text, text.length);
  123. break;
  124. case 'section':
  125. case 'title':
  126. newParagraph(text, text.length);
  127. break;
  128. default:
  129. growParagraph(`<${tag}>${text}</${tag}>`, text.length);
  130. }
  131. }
  132. });
  133. parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars
  134. });
  135. parser.on('comment', (text) => {// eslint-disable-line no-unused-vars
  136. });
  137. parser.on('progress', async(progress) => {
  138. if (progress > nextPerc) {
  139. await sleep(1);
  140. callback(progress);
  141. nextPerc += 10;
  142. }
  143. });
  144. await parser.parse(data);
  145. this.meta = fb2;
  146. this.para = para;
  147. callback(100);
  148. await sleep(10);
  149. return {fb2};
  150. }
  151. }