ConvertPdfImages.js 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. const fs = require('fs-extra');
  2. const path = require('path');
  3. const utils = require('../../utils');
  4. const sax = require('../../sax');
  5. const ConvertJpegPng = require('./ConvertJpegPng');
  6. class ConvertPdfImages extends ConvertJpegPng {
  7. check(data, opts) {
  8. const {inputFiles} = opts;
  9. return this.config.useExternalBookConverter &&
  10. inputFiles.sourceFileType && inputFiles.sourceFileType.ext == 'pdf';
  11. }
  12. async run(data, opts) {
  13. if (!this.check(data, opts))
  14. return false;
  15. let {inputFiles, callback, abort, pdfQuality} = opts;
  16. pdfQuality = (pdfQuality && pdfQuality <= 100 && pdfQuality >= 10 ? pdfQuality : 20);
  17. const pdftoppmPath = '/usr/bin/pdftoppm';
  18. if (!await fs.pathExists(pdftoppmPath))
  19. throw new Error('Внешний конвертер pdftoppm не найден');
  20. const pdftohtmlPath = '/usr/bin/pdftohtml';
  21. if (!await fs.pathExists(pdftohtmlPath))
  22. throw new Error('Внешний конвертер pdftohtml не найден');
  23. const inpFile = inputFiles.sourceFile;
  24. const dir = `${inputFiles.filesDir}/`;
  25. const outBasename = `${dir}${utils.randomHexString(10)}`;
  26. const outFile = `${outBasename}.tmp`;
  27. //конвертируем в jpeg
  28. let perc = 0;
  29. await this.execConverter(pdftoppmPath, ['-jpeg', '-jpegopt', `quality=${pdfQuality},progressive=y`, inpFile, outFile], () => {
  30. perc = (perc < 100 ? perc + 1 : 40);
  31. callback(perc);
  32. }, abort);
  33. const limitSize = 2*this.config.maxUploadFileSize;
  34. let jpgFilesSize = 0;
  35. //ищем изображения
  36. let files = [];
  37. await utils.findFiles(async(file) => {
  38. if (path.extname(file) == '.jpg') {
  39. jpgFilesSize += (await fs.stat(file)).size;
  40. if (jpgFilesSize > limitSize) {
  41. throw new Error(`Файл для конвертирования слишком большой|FORLOG| jpgFilesSize: ${jpgFilesSize} > ${limitSize}`);
  42. }
  43. files.push({name: file, base: path.basename(file)});
  44. }
  45. }, dir);
  46. files.sort((a, b) => a.base.localeCompare(b.base));
  47. //схема документа (outline)
  48. const outXml = `${outBasename}.xml`;
  49. await this.execConverter(pdftohtmlPath, ['-nodrm', '-i', '-c', '-s', '-xml', inpFile, outXml], null, abort);
  50. const outline = [];
  51. let inOutline = 0;
  52. let inItem = false;
  53. let pageNum = 0;
  54. const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  55. if (inOutline > 0 && inItem && pageNum) {
  56. outline[pageNum] = text;
  57. }
  58. };
  59. const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  60. if (tag == 'outline')
  61. inOutline++;
  62. if (inOutline > 0 && tag == 'item') {
  63. const attrs = sax.getAttrsSync(tail);
  64. pageNum = (attrs.page && attrs.page.value ? attrs.page.value : 0);
  65. inItem = true;
  66. }
  67. };
  68. const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
  69. if (tag == 'outline')
  70. inOutline--;
  71. if (tag == 'item')
  72. inItem = false;
  73. };
  74. const dataXml = await fs.readFile(outXml);
  75. const buf = this.decode(dataXml).toString();
  76. sax.parseSync(buf, {
  77. onStartNode, onEndNode, onTextNode
  78. });
  79. await utils.sleep(100);
  80. //формируем список файлов
  81. let i = 0;
  82. const imageFiles = files.map(f => {
  83. i++;
  84. let alt = (outline[i] ? outline[i] : '');
  85. return {src: f.name, alt};
  86. });
  87. return await super.run(data, Object.assign({}, opts, {imageFiles}));
  88. }
  89. }
  90. module.exports = ConvertPdfImages;