فهرست منبع

Работа над конвертером Pdf

Book Pauk 4 سال پیش
والد
کامیت
3137b86cee
2فایلهای تغییر یافته به همراه33 افزوده شده و 18 حذف شده
  1. 1 0
      server/core/Reader/BookConverter/ConvertBase.js
  2. 32 18
      server/core/Reader/BookConverter/ConvertPdf.js

+ 1 - 0
server/core/Reader/BookConverter/ConvertBase.js

@@ -70,6 +70,7 @@ class ConvertBase {
                 const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`;
                 const error = `${result.code}|FORLOG|, exec: ${path}, args: ${args.join(' ')}, stdout: ${result.stdout}, stderr: ${result.stderr}`;
                 throw new Error(`Внешний конвертер завершился с ошибкой: ${error}`);
                 throw new Error(`Внешний конвертер завершился с ошибкой: ${error}`);
             }
             }
+            return result;
         } catch(e) {
         } catch(e) {
             if (e.status == 'killed') {
             if (e.status == 'killed') {
                 throw new Error('Слишком долгое ожидание конвертера');
                 throw new Error('Слишком долгое ожидание конвертера');

+ 32 - 18
server/core/Reader/BookConverter/ConvertPdf.js

@@ -5,7 +5,6 @@ const path = require('path');
 const sax = require('../../sax');
 const sax = require('../../sax');
 const utils = require('../../utils');
 const utils = require('../../utils');
 const ConvertHtml = require('./ConvertHtml');
 const ConvertHtml = require('./ConvertHtml');
-const xmlParser = require('../../xmlParser');
 
 
 class ConvertPdf extends ConvertHtml {
 class ConvertPdf extends ConvertHtml {
     check(data, opts) {
     check(data, opts) {
@@ -26,16 +25,15 @@ class ConvertPdf extends ConvertHtml {
         const inpFile = inputFiles.sourceFile;
         const inpFile = inputFiles.sourceFile;
         const outBasename = `${inputFiles.filesDir}/${utils.randomHexString(10)}`;
         const outBasename = `${inputFiles.filesDir}/${utils.randomHexString(10)}`;
         const outFile = `${outBasename}.xml`;
         const outFile = `${outBasename}.xml`;
-        const metaFile = `${outBasename}_metadata.xml`;
 
 
-        const pdfaltoPath = `${this.config.dataDir}/pdfalto/pdfalto`;
+        const pdftohtmlPath = '/usr/bin/pdftohtml';
 
 
-        if (!await fs.pathExists(pdfaltoPath))
-            throw new Error('Внешний конвертер pdfalto не найден');
+        if (!await fs.pathExists(pdftohtmlPath))
+            throw new Error('Внешний конвертер pdftohtml не найден');
 
 
         //конвертируем в xml
         //конвертируем в xml
         let perc = 0;
         let perc = 0;
-        await this.execConverter(pdfaltoPath, [inpFile, outFile], () => {
+        await this.execConverter(pdftohtmlPath, ['-nodrm', '-c', '-s', '-xml', inpFile, outFile], () => {
             perc = (perc < 80 ? perc + 10 : 40);
             perc = (perc < 80 ? perc + 10 : 40);
             callback(perc);
             callback(perc);
         }, abort);
         }, abort);
@@ -57,8 +55,6 @@ class ConvertPdf extends ConvertHtml {
         let images = [];
         let images = [];
         let loading = [];
         let loading = [];
 
 
-        let title = '';
-        let author = '';
         let i = -1;
         let i = -1;
 
 
         const loadImage = async(image) => {
         const loadImage = async(image) => {
@@ -277,16 +273,8 @@ class ConvertPdf extends ConvertHtml {
         }
         }
         indents[0] = 0;
         indents[0] = 0;
 
 
-        //title
-        if (fs.pathExists(metaFile)) {
-            const metaXmlString = (await fs.readFile(metaFile)).toString();
-            let metaXmlParsed = xmlParser.parseXml(metaXmlString);
-            metaXmlParsed = xmlParser.simplifyXmlParsed(metaXmlParsed);
-            if (metaXmlParsed.metadata) {
-                title = (metaXmlParsed.metadata.title ? metaXmlParsed.metadata.title._t : '');
-                author = (metaXmlParsed.metadata.author ? metaXmlParsed.metadata.author._t : '');
-            }
-        }
+        //author & title
+        let {author, title} = await this.getPdfTitleAndAuthor(inpFile);
 
 
         if (!title && uploadFileName)
         if (!title && uploadFileName)
             title = uploadFileName;
             title = uploadFileName;
@@ -343,6 +331,32 @@ class ConvertPdf extends ConvertHtml {
         await utils.sleep(100);
         await utils.sleep(100);
         return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
         return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
     }
     }
+
+    async getPdfTitleAndAuthor(pdfFile) {
+        const result = {author: '', title: ''};
+
+        const pdfinfoPath = '/usr/bin/pdfinfo';
+
+        if (!await fs.pathExists(pdfinfoPath))
+            throw new Error('Внешний конвертер pdfinfo не найден');
+
+        const execResult = await this.execConverter(pdfinfoPath, [pdfFile]);
+
+        const titlePrefix = 'Title:';
+        const authorPrefix = 'Author:';
+
+        const stdout = execResult.stdout.split("\n");
+        stdout.forEach(line => {
+            if (line.indexOf(titlePrefix) == 0) 
+                result.title = line.substring(titlePrefix.length).trim();
+
+            if (line.indexOf(authorPrefix) == 0)
+                result.author = line.substring(authorPrefix.length).trim();
+        });
+
+        return result;
+    }
 }
 }
 
 
+
 module.exports = ConvertPdf;
 module.exports = ConvertPdf;