BookParser.js 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. import EasySAXParser from './easysax';
  2. import {sleep} from '../../../share/utils';
  3. export default class BookParser {
  4. constructor() {
  5. this.parser = new EasySAXParser();
  6. }
  7. async parse(data, callback) {
  8. if (!callback)
  9. callback = () => {};
  10. callback(0);
  11. this.data = data;
  12. if (data.indexOf('<FictionBook') < 0) {
  13. throw new Error('Неверный формат файла');
  14. }
  15. //defaults
  16. let fb2 = {
  17. firstName: '',
  18. middleName: '',
  19. lastName: '',
  20. bookTitle: '',
  21. };
  22. let path = '';
  23. let tag = '';
  24. let nextPerc = 0;
  25. let paraIndex = -1;
  26. let paraOffset = 0;
  27. let para = []; /*array of
  28. {
  29. index: Number,
  30. offset: Number, //сумма всех length до этого параграфа
  31. length: Number, //длина text без тегов
  32. text: String //текст параграфа (или title или epigraph и т.д) с вложенными тегами
  33. }
  34. */
  35. const newParagraph = (text, len) => {
  36. paraIndex++;
  37. let p = {
  38. index: paraIndex,
  39. offset: paraOffset,
  40. length: len,
  41. text: text
  42. };
  43. para[paraIndex] = p;
  44. paraOffset += p.length;
  45. };
  46. const growParagraph = (text, len) => {
  47. let p = para[paraIndex];
  48. if (p) {
  49. paraOffset -= p.length;
  50. if (p.text == ' ') {
  51. p.length = 0;
  52. p.text = '';
  53. }
  54. p.length += len;
  55. p.text += text;
  56. } else {
  57. p = {
  58. index: paraIndex,
  59. offset: paraOffset,
  60. length: len,
  61. text: text
  62. };
  63. }
  64. para[paraIndex] = p;
  65. paraOffset += p.length;
  66. };
  67. const parser = this.parser;
  68. parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars
  69. });
  70. parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars
  71. tag = elemName;
  72. path += '/' + elemName;
  73. if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/FictionBook/body/section') == 0) {
  74. newParagraph(' ', 1);
  75. }
  76. });
  77. parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars
  78. if (tag == elemName) {
  79. path = path.substr(0, path.length - tag.length - 1);
  80. let i = path.lastIndexOf('/');
  81. if (i >= 0) {
  82. tag = path.substr(i + 1);
  83. } else {
  84. tag = path;
  85. }
  86. }
  87. });
  88. parser.on('textNode', (text) => {
  89. text = text.trim();
  90. switch (path) {
  91. case '/FictionBook/description/title-info/author/first-name':
  92. fb2.firstName = text;
  93. break;
  94. case '/FictionBook/description/title-info/author/middle-name':
  95. fb2.middleName = text;
  96. break;
  97. case '/FictionBook/description/title-info/author/last-name':
  98. fb2.lastName = text;
  99. break;
  100. case '/FictionBook/description/title-info/genre':
  101. fb2.genre = text;
  102. break;
  103. case '/FictionBook/description/title-info/date':
  104. fb2.date = text;
  105. break;
  106. case '/FictionBook/description/title-info/book-title':
  107. fb2.bookTitle = text;
  108. break;
  109. case '/FictionBook/description/title-info/id':
  110. fb2.id = text;
  111. break;
  112. }
  113. if (path.indexOf('/FictionBook/description/title-info/annotation') == 0) {
  114. if (!fb2.annotation)
  115. fb2.annotation = '';
  116. if (tag != 'annotation')
  117. fb2.annotation += `<${tag}>${text}</${tag}>`;
  118. else
  119. fb2.annotation += text;
  120. }
  121. if (text == '')
  122. return;
  123. if (path.indexOf('/FictionBook/body/title') == 0) {
  124. newParagraph(text, text.length);
  125. }
  126. if (text == '')
  127. return;
  128. if (path.indexOf('/FictionBook/body/section') == 0) {
  129. switch (tag) {
  130. case 'p':
  131. growParagraph(text, text.length);
  132. break;
  133. case 'section':
  134. case 'title':
  135. newParagraph(text, text.length);
  136. break;
  137. default:
  138. growParagraph(`<${tag}>${text}</${tag}>`, text.length);
  139. }
  140. }
  141. });
  142. parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars
  143. });
  144. parser.on('comment', (text) => {// eslint-disable-line no-unused-vars
  145. });
  146. parser.on('progress', async(progress) => {
  147. if (progress > nextPerc) {
  148. await sleep(1);
  149. callback(progress);
  150. nextPerc += 10;
  151. }
  152. });
  153. await parser.parse(data);
  154. this.meta = fb2;
  155. this.para = para;
  156. callback(100);
  157. await sleep(10);
  158. return {fb2};
  159. }
  160. }