BookParser.js 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. import EasySAXParser from './easysax';
  2. import {sleep} from '../../../share/utils';
  3. export default class BookParser {
  4. constructor() {
  5. this.parser = new EasySAXParser();
  6. // defaults
  7. this.p = 30;// px, отступ параграфа
  8. this.w = 300;// px, ширина страницы
  9. this.textAlignJustify = false;// выравнивание по ширине
  10. this.wordWrap = false;// перенос по слогам, если textAlignJustify = true
  11. // заглушка
  12. this.measureText = (text, style) => {// eslint-disable-line no-unused-vars
  13. return text.length*10;
  14. };
  15. // stuff
  16. }
  17. async parse(data, callback) {
  18. if (!callback)
  19. callback = () => {};
  20. callback(0);
  21. this.data = data;
  22. if (data.indexOf('<FictionBook') < 0) {
  23. throw new Error('Неверный формат файла');
  24. }
  25. //defaults
  26. let fb2 = {
  27. firstName: '',
  28. middleName: '',
  29. lastName: '',
  30. bookTitle: '',
  31. };
  32. let path = '';
  33. let tag = '';
  34. let nextPerc = 0;
  35. let paraIndex = -1;
  36. let paraOffset = 0;
  37. let para = []; /*array of
  38. {
  39. index: Number,
  40. offset: Number, //сумма всех length до этого параграфа
  41. length: Number, //длина text без тегов
  42. text: String //текст параграфа (или title или epigraph и т.д) с вложенными тегами
  43. }
  44. */
  45. const newParagraph = (text, len) => {
  46. paraIndex++;
  47. let p = {
  48. index: paraIndex,
  49. offset: paraOffset,
  50. length: len,
  51. text: text
  52. };
  53. para[paraIndex] = p;
  54. paraOffset += p.length;
  55. };
  56. const growParagraph = (text, len) => {
  57. let p = para[paraIndex];
  58. if (p) {
  59. paraOffset -= p.length;
  60. if (p.text == ' ') {
  61. p.length = 0;
  62. p.text = '';
  63. }
  64. p.length += len;
  65. p.text += text;
  66. } else {
  67. p = {
  68. index: paraIndex,
  69. offset: paraOffset,
  70. length: len,
  71. text: text
  72. };
  73. }
  74. para[paraIndex] = p;
  75. paraOffset += p.length;
  76. };
  77. const parser = this.parser;
  78. parser.on('error', (msgError) => {// eslint-disable-line no-unused-vars
  79. });
  80. parser.on('startNode', (elemName, getAttr, isTagEnd, getStrNode) => {// eslint-disable-line no-unused-vars
  81. tag = elemName;
  82. path += '/' + elemName;
  83. if ((tag == 'p' || tag == 'empty-line') && path.indexOf('/FictionBook/body/section') == 0) {
  84. newParagraph(' ', 1);
  85. }
  86. });
  87. parser.on('endNode', (elemName, isTagStart, getStrNode) => {// eslint-disable-line no-unused-vars
  88. if (tag == elemName) {
  89. path = path.substr(0, path.length - tag.length - 1);
  90. let i = path.lastIndexOf('/');
  91. if (i >= 0) {
  92. tag = path.substr(i + 1);
  93. } else {
  94. tag = path;
  95. }
  96. }
  97. });
  98. parser.on('textNode', (text) => {
  99. text = text.trim();
  100. switch (path) {
  101. case '/FictionBook/description/title-info/author/first-name':
  102. fb2.firstName = text;
  103. break;
  104. case '/FictionBook/description/title-info/author/middle-name':
  105. fb2.middleName = text;
  106. break;
  107. case '/FictionBook/description/title-info/author/last-name':
  108. fb2.lastName = text;
  109. break;
  110. case '/FictionBook/description/title-info/genre':
  111. fb2.genre = text;
  112. break;
  113. case '/FictionBook/description/title-info/date':
  114. fb2.date = text;
  115. break;
  116. case '/FictionBook/description/title-info/book-title':
  117. fb2.bookTitle = text;
  118. break;
  119. case '/FictionBook/description/title-info/id':
  120. fb2.id = text;
  121. break;
  122. }
  123. if (path.indexOf('/FictionBook/description/title-info/annotation') == 0) {
  124. if (!fb2.annotation)
  125. fb2.annotation = '';
  126. if (tag != 'annotation')
  127. fb2.annotation += `<${tag}>${text}</${tag}>`;
  128. else
  129. fb2.annotation += text;
  130. }
  131. if (text == '')
  132. return;
  133. if (path.indexOf('/FictionBook/body/title') == 0) {
  134. newParagraph(text, text.length);
  135. }
  136. if (text == '')
  137. return;
  138. if (path.indexOf('/FictionBook/body/section') == 0) {
  139. switch (tag) {
  140. case 'p':
  141. growParagraph(text, text.length);
  142. break;
  143. case 'section':
  144. case 'title':
  145. newParagraph(text, text.length);
  146. break;
  147. default:
  148. growParagraph(`<${tag}>${text}</${tag}>`, text.length);
  149. }
  150. }
  151. });
  152. parser.on('cdata', (data) => {// eslint-disable-line no-unused-vars
  153. });
  154. parser.on('comment', (text) => {// eslint-disable-line no-unused-vars
  155. });
  156. parser.on('progress', async(progress) => {
  157. if (progress > nextPerc) {
  158. await sleep(1);
  159. callback(progress);
  160. nextPerc += 10;
  161. }
  162. });
  163. await parser.parse(data);
  164. this.fb2 = fb2;
  165. this.para = para;
  166. callback(100);
  167. await sleep(10);
  168. return {fb2};
  169. }
  170. findParaIndex(bookPos) {
  171. let result = undefined;
  172. //дихотомия
  173. let first = 0;
  174. let last = this.para.length - 1;
  175. while (first < last) {
  176. let mid = first + Math.floor((last - first)/2);
  177. if (bookPos >= this.para[mid].offset)
  178. last = mid;
  179. else
  180. first = mid + 1;
  181. }
  182. if (last >= 0) {
  183. const ofs = this.para[last].offset;
  184. if (bookPos >= ofs && bookPos < ofs + this.para[last].length)
  185. result = last;
  186. }
  187. return result;
  188. }
  189. parsePara(paraIndex) {
  190. const para = this.para[paraIndex];
  191. if (para.parsed &&
  192. para.parsed.w === this.w &&
  193. para.parsed.p === this.p &&
  194. para.parsed.textAlignJustify === this.textAlignJustify &&
  195. para.parsed.wordWrap === this.wordWrap)
  196. return para.parsed;
  197. const parsed = {
  198. w: this.w,
  199. p: this.p,
  200. textAlignJustify: this.textAlignJustify,
  201. wordWrap: this.wordWrap
  202. };
  203. const lines = [];
  204. /* array of
  205. {
  206. begin: Number,
  207. end: Number,
  208. parts: array of {
  209. style: 'bold'|'italic',
  210. text: String,
  211. }
  212. }*/
  213. //
  214. parsed.lines = lines;
  215. para.parsed = parsed;
  216. return parsed;
  217. }
  218. findLineIndex(bookPos, lines) {
  219. let result = undefined;
  220. //дихотомия
  221. let first = 0;
  222. let last = lines.length - 1;
  223. while (first < last) {
  224. let mid = first + Math.floor((last - first)/2);
  225. if (bookPos >= lines[mid].begin)
  226. last = mid;
  227. else
  228. first = mid + 1;
  229. }
  230. if (last >= 0) {
  231. if (bookPos >= lines[last].begin && bookPos <= lines[last].end)
  232. result = last;
  233. }
  234. return result;
  235. }
  236. getLines(bookPos, n) {
  237. const result = [];
  238. let paraIndex = this.findParaIndex(bookPos);
  239. if (paraIndex === undefined)
  240. return result;
  241. if (n > 0) {
  242. let parsed = this.parsePara(paraIndex);
  243. let i = this.findLineIndex(bookPos, parsed.lines);
  244. if (i === undefined)
  245. return result;
  246. while (n > 0) {
  247. result.push(parsed.lines[i]);
  248. i++;
  249. if (i >= parsed.lines.length) {
  250. paraIndex++;
  251. if (paraIndex < this.para.length)
  252. parsed = this.parsePara(paraIndex);
  253. else
  254. return result;
  255. i = 0;
  256. }
  257. n--;
  258. }
  259. } else if (n < 0) {
  260. n = -n;
  261. let parsed = this.parsePara(paraIndex);
  262. let i = this.findLineIndex(bookPos, parsed.lines);
  263. if (i === undefined)
  264. return result;
  265. while (n > 0) {
  266. result.push(parsed.lines[i]);
  267. i--;
  268. if (i > 0) {
  269. paraIndex--;
  270. if (paraIndex >= this.para.length)
  271. parsed = this.parsePara(paraIndex);
  272. else
  273. return result;
  274. i = parsed.lines.length - 1;
  275. }
  276. n--;
  277. }
  278. }
  279. return result;
  280. }
  281. }