Fb2Parser.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. const XmlParser = require('../xml/XmlParser');
  2. class Fb2Parser extends XmlParser {
  3. get xlinkNS() {
  4. if (!this._xlinkNS) {
  5. const rootAttrs = this.$self().attrs();
  6. let ns = 'l';
  7. for (const [key, value] of rootAttrs) {
  8. if (value == 'http://www.w3.org/1999/xlink') {
  9. ns = key.split(':')[1] || ns;
  10. break;
  11. }
  12. }
  13. this._xlinkNS = ns;
  14. }
  15. return this._xlinkNS;
  16. }
  17. bookInfo(fb2Object) {
  18. const result = {};
  19. if (!fb2Object)
  20. fb2Object = this.toObject();
  21. const desc = this.inspector(fb2Object).$('fictionbook/description');
  22. if (!desc)
  23. return result;
  24. const parseAuthors = (node, tagName) => {
  25. const authors = [];
  26. for (const a of node.$$(tagName)) {
  27. let names = [];
  28. names.push(a.text('last-name'));
  29. names.push(a.text('first-name'));
  30. names.push(a.text('middle-name'));
  31. names = names.filter(n => n);
  32. if (!names.length)
  33. names.push(a.text('nickname'));
  34. authors.push(names.join(' '));
  35. }
  36. return authors;
  37. }
  38. const parseSequence = (node, tagName) => {
  39. const sequence = [];
  40. for (const s of node.$$(tagName)) {
  41. const seqAttrs = s.attrs() || {};
  42. const name = seqAttrs['name'] || null;
  43. const num = seqAttrs['number'] || null;
  44. const lang = seqAttrs['xml:lang'] || null;
  45. sequence.push({name, num, lang});
  46. }
  47. return sequence;
  48. }
  49. const parseTitleInfo = (titleInfo) => {
  50. const info = {};
  51. info.genre = [];
  52. for (const g of titleInfo.$$('genre'))
  53. info.genre.push(g.text());
  54. info.author = parseAuthors(titleInfo, 'author');
  55. info.bookTitle = titleInfo.text('book-title');
  56. //annotation как Object
  57. info.annotation = titleInfo.$('annotation') && titleInfo.$('annotation').value;
  58. info.annotationXml = null;
  59. info.annotationHtml = null;
  60. if (info.annotation) {
  61. //annotation как кусок xml
  62. info.annotationXml = (new XmlParser()).fromObject(info.annotation).toString({noHeader: true});
  63. //annotation как html
  64. info.annotationHtml = this.toHtml(info.annotationXml);
  65. }
  66. info.keywords = titleInfo.text('keywords');
  67. info.date = titleInfo.text('date');
  68. info.coverpage = titleInfo.$('coverpage') && titleInfo.$('coverpage').value;
  69. info.lang = titleInfo.text('lang');
  70. info.srcLang = titleInfo.text('src-lang');
  71. info.translator = parseAuthors(titleInfo, 'translator');
  72. info.sequence = parseSequence(titleInfo, 'sequence');
  73. return info;
  74. }
  75. //title-info
  76. const titleInfo = desc.$('title-info');
  77. if (titleInfo) {
  78. result.titleInfo = parseTitleInfo(titleInfo);
  79. }
  80. //src-title-info
  81. const srcTitleInfo = desc.$('src-title-info');
  82. if (srcTitleInfo) {
  83. result.srcTitleInfo = parseTitleInfo(srcTitleInfo);
  84. }
  85. //document-info
  86. const documentInfo = desc.$('document-info');
  87. if (documentInfo) {
  88. const info = {};
  89. info.author = parseAuthors(documentInfo, 'author');
  90. info.programUsed = documentInfo.text('program-used');
  91. info.date = documentInfo.text('date');
  92. info.srcUrl = [];
  93. for (const url of documentInfo.$$('src-url'))
  94. info.srcUrl.push(url.text());
  95. info.srcOcr = documentInfo.text('src-ocr');
  96. info.id = documentInfo.text('id');
  97. info.version = documentInfo.text('version');
  98. //аналогично annotation
  99. info.history = documentInfo.$('history') && documentInfo.$('history').value;
  100. info.historyXml = null;
  101. info.historyHtml = null;
  102. if (info.history) {
  103. //history как кусок xml
  104. info.historyXml = (new XmlParser()).fromObject(info.history).toString({noHeader: true});
  105. //history как html
  106. info.historyHtml = this.toHtml(info.historyXml);
  107. }
  108. info.publisher = parseAuthors(documentInfo, 'publisher');
  109. result.documentInfo = info;
  110. }
  111. //publish-info
  112. const publishInfo = desc.$('publish-info');
  113. if (publishInfo) {
  114. const info = {};
  115. info.bookName = publishInfo.text('book-name');
  116. info.publisher = publishInfo.text('publisher');
  117. info.city = publishInfo.text('city');
  118. info.year = publishInfo.text('year');
  119. info.isbn = publishInfo.text('isbn');
  120. info.sequence = parseSequence(publishInfo, 'sequence');
  121. result.publishInfo = info;
  122. }
  123. return result;
  124. }
  125. bookInfoList(fb2Object, options = {}) {
  126. let {
  127. correctMapping = false,
  128. valueToString = false,
  129. } = options;
  130. if (!correctMapping)
  131. correctMapping = mapping => mapping;
  132. const myValueToString = (value, nodePath, origVTS) => {//eslint-disable-line no-unused-vars
  133. if (nodePath == 'titleInfo/sequence'
  134. || nodePath == 'srcTitleInfo/sequence'
  135. || nodePath == 'publishInfo/sequence')
  136. return value.map(v => [v.name, v.num].filter(s => s).join(' #')).join(', ');
  137. if (typeof(value) === 'string') {
  138. return value;
  139. } else if (Array.isArray(value)) {
  140. return value.join(', ');
  141. } else if (typeof(value) === 'object') {
  142. return JSON.stringify(value);
  143. }
  144. return value;
  145. };
  146. if (!valueToString)
  147. valueToString = myValueToString;
  148. let mapping = [
  149. {name: 'titleInfo', label: 'Общая информация', value: [
  150. {name: 'author', label: 'Автор(ы)'},
  151. {name: 'bookTitle', label: 'Название'},
  152. {name: 'sequence', label: 'Серия'},
  153. {name: 'genre', label: 'Жанр'},
  154. {name: 'date', label: 'Дата'},
  155. {name: 'lang', label: 'Язык книги'},
  156. {name: 'srcLang', label: 'Язык оригинала'},
  157. {name: 'translator', label: 'Переводчик(и)'},
  158. {name: 'keywords', label: 'Ключевые слова'},
  159. ]},
  160. {name: 'srcTitleInfo', label: 'Информация о произведении на языке оригинала', value: [
  161. {name: 'author', label: 'Автор(ы)'},
  162. {name: 'bookTitle', label: 'Название'},
  163. {name: 'sequence', label: 'Серия'},
  164. {name: 'genre', label: 'Жанр'},
  165. {name: 'date', label: 'Дата'},
  166. {name: 'lang', label: 'Язык книги'},
  167. {name: 'srcLang', label: 'Язык оригинала'},
  168. {name: 'translator', label: 'Переводчик(и)'},
  169. {name: 'keywords', label: 'Ключевые слова'},
  170. ]},
  171. {name: 'publishInfo', label: 'Издательская информация', value: [
  172. {name: 'bookName', label: 'Название'},
  173. {name: 'publisher', label: 'Издательство'},
  174. {name: 'city', label: 'Город'},
  175. {name: 'year', label: 'Год'},
  176. {name: 'isbn', label: 'ISBN'},
  177. {name: 'sequence', label: 'Серия'},
  178. ]},
  179. {name: 'documentInfo', label: 'Информация о документе (OCR)', value: [
  180. {name: 'author', label: 'Автор(ы)'},
  181. {name: 'programUsed', label: 'Программа'},
  182. {name: 'date', label: 'Дата'},
  183. //srcUrl = []
  184. {name: 'id', label: 'ID'},
  185. {name: 'version', label: 'Версия'},
  186. {name: 'srcOcr', label: 'Автор источника'},
  187. {name: 'historyHtml', label: 'История'},
  188. {name: 'publisher', label: 'Правообладатели'},
  189. ]},
  190. ];
  191. mapping = correctMapping(mapping);
  192. const bookInfo = this.bookInfo(fb2Object);
  193. //заполняем mapping
  194. let result = [];
  195. for (const item of mapping) {
  196. const itemOut = {name: item.name, label: item.label, value: []};
  197. const info = bookInfo[item.name];
  198. if (!info)
  199. continue;
  200. for (const subItem of item.value) {
  201. if (info[subItem.name] !== null) {
  202. const subItemOut = {
  203. name: subItem.name,
  204. label: subItem.label,
  205. value: valueToString(info[subItem.name], `${item.name}/${subItem.name}`, myValueToString),
  206. };
  207. if (subItemOut.value)
  208. itemOut.value.push(subItemOut);
  209. }
  210. }
  211. if (itemOut.value.length)
  212. result.push(itemOut);
  213. }
  214. return result;
  215. }
  216. toHtml(xmlString) {
  217. const substs = {
  218. '<subtitle>': '<p><b>',
  219. '</subtitle>': '</b></p>',
  220. '<empty-line/>': '<br>',
  221. '<strong>': '<b>',
  222. '</strong>': '</b>',
  223. '<emphasis>': '<i>',
  224. '</emphasis>': '</i>',
  225. '<stanza>': '<br>',
  226. '</stanza>': '',
  227. '<poem>': '<br>',
  228. '</poem>': '',
  229. '<cite>': '<i>',
  230. '</cite>': '</i>',
  231. '<table>': '<br>',
  232. '</table>': '',
  233. };
  234. for (const [tag, s] of Object.entries(substs)) {
  235. const r = new RegExp(`${tag}`, 'g');
  236. xmlString = xmlString.replace(r, s);
  237. }
  238. return xmlString;
  239. }
  240. }
  241. module.exports = Fb2Parser;