ConvertBase.js 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. const iconv = require('iconv-lite');
  2. const chardet = require('chardet');
  3. const textUtils = require('./textUtils');
  4. class ConvertBase {
  5. constructor(config) {
  6. this.config = config;
  7. }
  8. run(data, opts) {// eslint-disable-line no-unused-vars
  9. //override
  10. }
  11. decode(data) {
  12. let selected = textUtils.getEncoding(data);
  13. if (selected == 'ISO-8859-5') {
  14. const charsetAll = chardet.detectAll(data.slice(0, 20000));
  15. for (const charset of charsetAll) {
  16. if (charset.name.indexOf('ISO-8859') < 0) {
  17. selected = charset.name;
  18. break;
  19. }
  20. }
  21. }
  22. if (selected.toLowerCase() != 'utf-8')
  23. return iconv.decode(data, selected);
  24. else
  25. return data;
  26. }
  27. repSpaces(text) {
  28. return text.replace(/&nbsp;|[\t\n\r]/g, ' ');
  29. }
  30. formatFb2(fb2) {
  31. let out = '<?xml version="1.0" encoding="utf-8"?>';
  32. out += '<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">';
  33. out += this.formatFb2Node(fb2);
  34. out += '</FictionBook>';
  35. return out;
  36. }
  37. formatFb2Node(node, name) {
  38. let out = '';
  39. if (Array.isArray(node)) {
  40. for (const n of node) {
  41. out += this.formatFb2Node(n);
  42. }
  43. } else if (typeof node == 'string') {
  44. if (name)
  45. out += `<${name}>${this.repSpaces(node)}</${name}>`;
  46. else
  47. out += this.repSpaces(node);
  48. } else {
  49. if (node._n)
  50. name = node._n;
  51. let attrs = '';
  52. if (node._attrs) {
  53. for (let attrName in node._attrs) {
  54. attrs += ` ${attrName}="${node._attrs[attrName]}"`;
  55. }
  56. }
  57. let tOpen = '';
  58. let tBody = '';
  59. let tClose = '';
  60. if (name)
  61. tOpen += `<${name}${attrs}>`;
  62. if (node.hasOwnProperty('_t'))
  63. tBody += this.repSpaces(node._t);
  64. for (let nodeName in node) {
  65. if (nodeName && nodeName[0] == '_' && nodeName != '_a')
  66. continue;
  67. const n = node[nodeName];
  68. tBody += this.formatFb2Node(n, nodeName);
  69. }
  70. if (name)
  71. tClose += `</${name}>`;
  72. if (attrs == '' && name == 'p' && tBody.trim() == '')
  73. out += '<empty-line/>'
  74. else
  75. out += `${tOpen}${tBody}${tClose}`;
  76. }
  77. return out;
  78. }
  79. }
  80. module.exports = ConvertBase;