Fb2Parser.js 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. const XmlParser = require('../xml/XmlParser');
  2. class Fb2Parser extends XmlParser {
  3. get xlinkNS() {
  4. if (!this._xlinkNS) {
  5. const rootAttrs = this.selectFirstSelf().attrs();
  6. let ns = 'l';
  7. for (const [key, value] of rootAttrs) {
  8. if (value == 'http://www.w3.org/1999/xlink') {
  9. ns = key.split(':')[1] || ns;
  10. break;
  11. }
  12. }
  13. this._xlinkNS = ns;
  14. }
  15. return this._xlinkNS;
  16. }
  17. bookInfo() {
  18. const result = {};
  19. const desc = this.$$('/description/');
  20. if (!desc)
  21. return result;
  22. const parseAuthors = (node, tagName) => {
  23. const authors = [];
  24. for (const a of node.$$array(tagName)) {
  25. let names = [];
  26. names.push(a.text('/last-name'));
  27. names.push(a.text('/first-name'));
  28. names.push(a.text('/middle-name'));
  29. names = names.filter(n => n);
  30. if (!names.length)
  31. names.push(a.text('/nickname'));
  32. authors.push(names.join(' '));
  33. }
  34. return authors;
  35. }
  36. const parseSequence = (node, tagName) => {
  37. const sequence = [];
  38. for (const s of node.$$array(tagName)) {
  39. const seqAttrs = s.attrs() || {};
  40. const name = seqAttrs['name'] || null;
  41. const num = seqAttrs['number'] || null;
  42. const lang = seqAttrs['xml:lang'] || null;
  43. sequence.push({name, num, lang});
  44. }
  45. return sequence;
  46. }
  47. const parseTitleInfo = (titleInfo) => {
  48. const info = {};
  49. info.genre = [];
  50. for (const g of titleInfo.$$array('genre'))
  51. info.genre.push(g.text());
  52. info.author = parseAuthors(titleInfo, 'author');
  53. info.bookTitle = titleInfo.text('book-title');
  54. //annotation как Object
  55. info.annotation = titleInfo.$('annotation') && titleInfo.$('annotation').value;
  56. info.annotationXml = null;
  57. info.annotationHtml = null;
  58. if (info.annotation) {
  59. //annotation как кусок xml
  60. info.annotationXml = titleInfo.$$('annotation/').toString({noHeader: true});
  61. //annotation как html
  62. info.annotationHtml = this.toHtml(info.annotationXml);
  63. }
  64. info.keywords = titleInfo.text('keywords');
  65. info.date = titleInfo.text('date');
  66. info.coverpage = titleInfo.$('coverpage') && titleInfo.$('coverpage').value;
  67. info.lang = titleInfo.text('lang');
  68. info.srcLang = titleInfo.text('src-lang');
  69. info.translator = parseAuthors(titleInfo, 'translator');
  70. info.sequence = parseSequence(titleInfo, 'sequence');
  71. return info;
  72. }
  73. //title-info
  74. const titleInfo = desc.$$('title-info/');
  75. if (titleInfo) {
  76. result.titleInfo = parseTitleInfo(titleInfo);
  77. }
  78. //src-title-info
  79. const srcTitleInfo = desc.$$('src-title-info/');
  80. if (srcTitleInfo) {
  81. result.srcTitleInfo = parseTitleInfo(srcTitleInfo);
  82. }
  83. //document-info
  84. const documentInfo = desc.$$('document-info/');
  85. if (documentInfo) {
  86. const info = {};
  87. info.author = parseAuthors(documentInfo, 'author');
  88. info.programUsed = documentInfo.text('program-used');
  89. info.date = documentInfo.text('date');
  90. info.srcUrl = [];
  91. for (const url of documentInfo.$$array('src-url'))
  92. info.srcUrl.push(url.text());
  93. info.srcOcr = documentInfo.text('src-ocr');
  94. info.id = documentInfo.text('id');
  95. info.version = documentInfo.text('version');
  96. //аналогично annotation
  97. info.history = documentInfo.$('history') && documentInfo.$('history').value;
  98. info.historyXml = null;
  99. info.historyHtml = null;
  100. if (info.history) {
  101. //history как кусок xml
  102. info.historyXml = documentInfo.$$('history/').toString({noHeader: true});
  103. //history как html
  104. info.historyHtml = this.toHtml(info.historyXml);
  105. }
  106. info.publisher = parseAuthors(documentInfo, 'publisher');
  107. result.documentInfo = info;
  108. }
  109. //publish-info
  110. const publishInfo = desc.$$('publish-info/');
  111. if (publishInfo) {
  112. const info = {};
  113. info.bookName = publishInfo.text('book-name');
  114. info.publisher = publishInfo.text('publisher');
  115. info.city = publishInfo.text('city');
  116. info.year = publishInfo.text('year');
  117. info.isbn = publishInfo.text('isbn');
  118. info.sequence = parseSequence(publishInfo, 'sequence');
  119. result.publishInfo = info;
  120. }
  121. return result;
  122. }
  123. bookInfoList(bookInfo, options = {}) {
  124. let {
  125. correctMapping = false,
  126. valueToString = false,
  127. } = options;
  128. if (!correctMapping)
  129. correctMapping = mapping => mapping;
  130. const myValueToString = (value, nodePath, origVTS) => {//eslint-disable-line no-unused-vars
  131. if (nodePath == 'titleInfo/sequence'
  132. || nodePath == 'srcTitleInfo/sequence'
  133. || nodePath == 'publishInfo/sequence')
  134. return value.map(v => [v.name, v.num].filter(s => s).join(' #')).join(', ');
  135. if (typeof(value) === 'string') {
  136. return value;
  137. } else if (Array.isArray(value)) {
  138. return value.join(', ');
  139. } else if (typeof(value) === 'object') {
  140. return JSON.stringify(value);
  141. }
  142. return value;
  143. };
  144. if (!valueToString)
  145. valueToString = myValueToString;
  146. let mapping = [
  147. {name: 'titleInfo', label: 'Общая информация', value: [
  148. {name: 'author', label: 'Автор(ы)'},
  149. {name: 'bookTitle', label: 'Название'},
  150. {name: 'sequence', label: 'Серия'},
  151. {name: 'genre', label: 'Жанр'},
  152. {name: 'date', label: 'Дата'},
  153. {name: 'lang', label: 'Язык книги'},
  154. {name: 'srcLang', label: 'Язык оригинала'},
  155. {name: 'translator', label: 'Переводчик(и)'},
  156. {name: 'keywords', label: 'Ключевые слова'},
  157. ]},
  158. {name: 'srcTitleInfo', label: 'Информация о произведении на языке оригинала', value: [
  159. {name: 'author', label: 'Автор(ы)'},
  160. {name: 'bookTitle', label: 'Название'},
  161. {name: 'sequence', label: 'Серия'},
  162. {name: 'genre', label: 'Жанр'},
  163. {name: 'date', label: 'Дата'},
  164. {name: 'lang', label: 'Язык книги'},
  165. {name: 'srcLang', label: 'Язык оригинала'},
  166. {name: 'translator', label: 'Переводчик(и)'},
  167. {name: 'keywords', label: 'Ключевые слова'},
  168. ]},
  169. {name: 'publishInfo', label: 'Издательская информация', value: [
  170. {name: 'bookName', label: 'Название'},
  171. {name: 'publisher', label: 'Издательство'},
  172. {name: 'city', label: 'Город'},
  173. {name: 'year', label: 'Год'},
  174. {name: 'isbn', label: 'ISBN'},
  175. {name: 'sequence', label: 'Серия'},
  176. ]},
  177. {name: 'documentInfo', label: 'Информация о документе (OCR)', value: [
  178. {name: 'author', label: 'Автор(ы)'},
  179. {name: 'programUsed', label: 'Программа'},
  180. {name: 'date', label: 'Дата'},
  181. //srcUrl = []
  182. {name: 'id', label: 'ID'},
  183. {name: 'version', label: 'Версия'},
  184. {name: 'srcOcr', label: 'Автор источника'},
  185. {name: 'historyHtml', label: 'История'},
  186. {name: 'publisher', label: 'Правообладатели'},
  187. ]},
  188. ];
  189. mapping = correctMapping(mapping);
  190. bookInfo = (bookInfo ? bookInfo : this.bookInfo());
  191. //заполняем mapping
  192. let result = [];
  193. for (const item of mapping) {
  194. const itemOut = {name: item.name, label: item.label, value: []};
  195. const info = bookInfo[item.name];
  196. if (!info)
  197. continue;
  198. for (const subItem of item.value) {
  199. if (info[subItem.name] !== null) {
  200. const subItemOut = {
  201. name: subItem.name,
  202. label: subItem.label,
  203. value: valueToString(info[subItem.name], `${item.name}/${subItem.name}`, myValueToString),
  204. };
  205. if (subItemOut.value)
  206. itemOut.value.push(subItemOut);
  207. }
  208. }
  209. if (itemOut.value.length)
  210. result.push(itemOut);
  211. }
  212. return result;
  213. }
  214. toHtml(xmlString) {
  215. const substs = {
  216. '<subtitle>': '<p><b>',
  217. '</subtitle>': '</b></p>',
  218. '<empty-line/>': '<br>',
  219. '<strong>': '<b>',
  220. '</strong>': '</b>',
  221. '<emphasis>': '<i>',
  222. '</emphasis>': '</i>',
  223. '<stanza>': '<br>',
  224. '</stanza>': '',
  225. '<poem>': '<br>',
  226. '</poem>': '',
  227. '<cite>': '<i>',
  228. '</cite>': '</i>',
  229. '<table>': '<br>',
  230. '</table>': '',
  231. };
  232. for (const [tag, s] of Object.entries(substs)) {
  233. const r = new RegExp(tag, 'g');
  234. xmlString = xmlString.replace(r, s);
  235. }
  236. return xmlString;
  237. }
  238. }
  239. module.exports = Fb2Parser;