ConvertSites.js 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. const URL = require('url').URL;
  2. const ConvertHtml = require('./ConvertHtml');
  3. const sitesFilter = {
  4. 'www.fanfiction.net': {
  5. converter: 'cutter',
  6. begin: `<div class='storytext xcontrast_txt nocopy' id='storytext'>`,
  7. end: `<div style='height:5px'></div><div style='clear:both;text-align:right;'>`,
  8. },
  9. 'archiveofourown.org': {
  10. converter: 'cutter',
  11. begin: `<!-- BEGIN section where work skin applies -->`,
  12. end: `<!-- END work skin -->`,
  13. },
  14. 'flibusta.is': {
  15. converter: 'flibusta'
  16. },
  17. };
  18. class ConvertSites extends ConvertHtml {
  19. check(data, opts) {
  20. const {url, dataType} = opts;
  21. const parsedUrl = new URL(url);
  22. if (dataType && dataType.ext == 'html') {
  23. if (sitesFilter[parsedUrl.hostname])
  24. return {hostname: parsedUrl.hostname};
  25. }
  26. return false;
  27. }
  28. async run(data, opts) {
  29. if (!opts.enableSitesFilter)
  30. return false;
  31. const checkResult = this.check(data, opts);
  32. if (!checkResult)
  33. return false;
  34. const {hostname} = checkResult;
  35. let text = this.decode(data).toString();
  36. text = this[sitesFilter[hostname].converter](text, sitesFilter[hostname]);
  37. if (text === false)
  38. return false;
  39. return await super.run(Buffer.from(text), {skipCheck: true, cutTitle: true});
  40. }
  41. getTitle(text) {
  42. let title = '';
  43. const m = text.match(/<title>([\s\S]*?)<\/title>/);
  44. if (m)
  45. title = m[1];
  46. return title.trim();
  47. }
  48. cutter(text, opts) {
  49. const title = `<title>${this.getTitle(text)}</title>`;
  50. const l = text.indexOf(opts.begin) + opts.begin.length;
  51. const r = text.indexOf(opts.end);
  52. if (l < 0 || r < 0 || r <= l)
  53. return false;
  54. return text.substring(l, r) + title;
  55. }
  56. flibusta(text) {
  57. let author = '';
  58. let m = text.match(/- <a href=".+">([\s\S]*?)<\/a><br\/?>/);
  59. if (m)
  60. author = m[1];
  61. let book = this.getTitle(text);
  62. book = book.replace(' (fb2) | Флибуста', '');
  63. const title = `<title>${author}${(author ? ' - ' : '')}${book}</title>`;
  64. let begin = '<h3 class="book">';
  65. if (text.indexOf(begin) <= 0)
  66. begin = '<h3 class=book>';
  67. const end = '<div id="footer">';
  68. const l = text.indexOf(begin);
  69. const r = text.indexOf(end);
  70. if (l < 0 || r < 0 || r <= l)
  71. return false;
  72. return text.substring(l, r)
  73. .replace(/blockquote class="?book"?/g, 'p')
  74. .replace(/<br\/?>\s*<\/h3>/g, '</h3>')
  75. .replace(/<h3 class="?book"?>/g, '<br><br><subtitle>')
  76. .replace(/<h5 class="?book"?>/g, '<br><br><subtitle>')
  77. .replace(/<h3>/g, '<br><br><subtitle>')
  78. .replace(/<h5>/g, '<br><br><subtitle>')
  79. .replace(/<\/h3>/g, '</subtitle><br>')
  80. .replace(/<\/h5>/g, '</subtitle><br>')
  81. .replace(/<div class="?stanza"?>/g, '<br>')
  82. .replace(/<div>/g, '<br>')
  83. + title;
  84. }
  85. }
  86. module.exports = ConvertSites;