瀏覽代碼

Работа над конвертером pdf

Book Pauk 6 年之前
父節點
當前提交
b4493b2e8d
共有 2 個文件被更改,包括 46 次插入10 次删除
  1. 5 2
      server/core/BookConverter/ConvertHtml.js
  2. 41 8
      server/core/BookConverter/ConvertPdf.js

+ 5 - 2
server/core/BookConverter/ConvertHtml.js

@@ -28,6 +28,7 @@ class ConvertHtml extends ConvertBase {
         } else {
             isText = opts.isText;
         }
+        const {cutTitle} = opts;
 
         let titleInfo = {};
         let desc = {_n: 'description', 'title-info': titleInfo};
@@ -73,7 +74,7 @@ class ConvertHtml extends ConvertBase {
         const newPara = new Set(['tr', 'br', 'br/', 'dd', 'p', 'title', '/title', 'h1', 'h2', 'h3', '/h1', '/h2', '/h3']);
 
         const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
-            if (!cutCounter) {
+            if (!cutCounter && !(cutTitle && inTitle)) {
                 growParagraph(text);
             }
 
@@ -111,7 +112,9 @@ class ConvertHtml extends ConvertBase {
             for (let i = 0; i < spaceCounter.length; i++) {
                 total += (spaceCounter[i] ? spaceCounter[i] : 0);
             }
-            total /= 10;
+
+            total /= 20;
+            
             let i = spaceCounter.length - 1;
             while (i > 0 && (!spaceCounter[i] || spaceCounter[i] < total)) i--;
 

+ 41 - 8
server/core/BookConverter/ConvertPdf.js

@@ -45,15 +45,20 @@ class ConvertPdf extends ConvertHtml {
         const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
             if (!cutCounter) {
                 if (tag == 'text' && !inText) {
-                    inText = true;
-                    i++;
-
                     let attrs = sax.getAttrsSync(tail);
-                    lines[i] = {
+                    const line = {
                         text: '',
                         top: (attrs.top && attrs.top.value ? attrs.top.value : null),
                         left: (attrs.left && attrs.left.value ? attrs.left.value : null),
+                        width: (attrs.width && attrs.width.value ? attrs.width.value : null),
+                        height: (attrs.height && attrs.height.value ? attrs.height.value : null),
                     };
+
+                    if (line.width !== '0' || line.height !== '0') {
+                        inText = true;
+                        i++;
+                        lines[i] = line;
+                    }
                 }
             }
         };
@@ -69,12 +74,40 @@ class ConvertPdf extends ConvertHtml {
         });
 
         //найдем параграфы и отступы
-console.log(lines.length);
+        const indents = [];
+        for (const line of lines) {
+            const top = parseInt(line.top);
+            const left = parseInt(line.left);
+
+            if (!isNaN(top)) {
+                line.top = top;
+            }
+
+            if (!isNaN(left)) {
+                indents[left] = 1;
+                line.left = left;
+            }
+        }
+
+        let j = 0;
+        for (let i = 0; i < indents.length; i++) {
+            if (indents[i]) {
+                j++;
+                indents[i] = j;
+            }
+        }
+        indents[0] = 0;
+
         //формируем текст
-        let text = ''
+        let text = `<title>${title}</title>`;
+        for (const line of lines) {
+            const left = line.left || 0;
+            const sp = ' '.repeat(indents[left]);
+
+            text += sp + line.text + "\n";
+        }
 
-        text = title + "\n" + text;
-        return await super.run(Buffer.from(text), {skipCheck: true, isText: true});
+        return await super.run(Buffer.from(text), {skipCheck: true, isText: true, cutTitle: true});
     }
 }