瀏覽代碼

Работа над конвертером pdf

Book Pauk 4 年之前
父節點
當前提交
75e01c899e
共有 2 個文件被更改,包括 60 次插入18 次删除
  1. 33 1
      server/core/Reader/BookConverter/ConvertHtml.js
  2. 27 17
      server/core/Reader/BookConverter/ConvertPdf.js

+ 33 - 1
server/core/Reader/BookConverter/ConvertHtml.js

@@ -52,6 +52,8 @@ class ConvertHtml extends ConvertBase {
         let image = {};
         let bold = false;
         let italic = false;
+        let superscript = false;
+        let subscript = false;
         let begining = true;
 
         let spaceCounter = [];
@@ -101,7 +103,11 @@ class ConvertHtml extends ConvertBase {
                 tOpen += (inSubTitle ? '<subtitle>' : '');
                 tOpen += (bold ? '<strong>' : '');
                 tOpen += (italic ? '<emphasis>' : '');
+                tOpen += (superscript ? '<sup>' : '');
+                tOpen += (subscript ? '<sub>' : '');
                 let tClose = ''
+                tClose += (subscript ? '</sub>' : '');
+                tClose += (superscript ? '</sup>' : '');
                 tClose +=  (italic ? '</emphasis>' : '');
                 tClose += (bold ? '</strong>' : '');
                 tClose += (inSubTitle ? '</subtitle>' : '');
@@ -152,6 +158,12 @@ class ConvertHtml extends ConvertBase {
                         bold = true;
                         break;
                 }
+
+                if (tag == 'sup')
+                    superscript = true;
+        
+                if (tag == 'sub')
+                    subscript = true;
             }
 
             if (tag == 'title' || tag == 'fb2-title') {
@@ -174,7 +186,7 @@ class ConvertHtml extends ConvertBase {
                 inImage = true;
                 const attrs = sax.getAttrsSync(tail);
                 image = {_n: 'binary', _attrs: {id: attrs.name.value, 'content-type': attrs.type.value}, _t: ''};
-            }
+            }            
         };
 
         const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
@@ -197,6 +209,12 @@ class ConvertHtml extends ConvertBase {
                         bold = false;
                         break;
                 }
+
+                if (tag == 'sup')
+                    superscript = false;
+        
+                if (tag == 'sub')
+                    subscript = false;
             }
 
             if (tag == 'title' || tag == 'fb2-title')
@@ -302,6 +320,8 @@ class ConvertHtml extends ConvertBase {
         //убираем лишнее, делаем валидный fb2, т.к. в рез-те разбиения на параграфы бьются теги
         bold = false;
         italic = false;
+        superscript = false;
+        subscript = false;
         inSubTitle = false;
         pars = body.section._a[0];
         for (let i = 0; i < pars.length; i++) {
@@ -321,7 +341,11 @@ class ConvertHtml extends ConvertBase {
                     tOpen += (inSubTitle ? '<subtitle>' : '');
                     tOpen += (bold ? '<strong>' : '');
                     tOpen += (italic ? '<emphasis>' : '');
+                    tOpen += (superscript ? '<sup>' : '');
+                    tOpen += (subscript ? '<sub>' : '');
                     let tClose = ''
+                    tClose += (subscript ? '</sub>' : '');
+                    tClose += (superscript ? '</sup>' : '');
                     tClose +=  (italic ? '</emphasis>' : '');
                     tClose += (bold ? '</strong>' : '');
                     tClose += (inSubTitle ? '</subtitle>' : '');
@@ -337,6 +361,10 @@ class ConvertHtml extends ConvertBase {
                         bold = true;
                     if (tag == 'emphasis')
                         italic = true;
+                    if (tag == 'sup')
+                        superscript = true;
+                    if (tag == 'sub')
+                        subscript = true;
                     if (tag == 'subtitle')
                         inSubTitle = true;
                 }
@@ -346,6 +374,10 @@ class ConvertHtml extends ConvertBase {
                         bold = false;
                     if (tag == 'emphasis')
                         italic = false;
+                    if (tag == 'sup')
+                        superscript = false;
+                    if (tag == 'sub')
+                        subscript = false;
                     if (tag == 'subtitle')
                         inSubTitle = false;
                 }

+ 27 - 17
server/core/Reader/BookConverter/ConvertPdf.js

@@ -91,15 +91,16 @@ class ConvertPdf extends ConvertHtml {
             
             //объединяем в одну строку равные по высоте
             const pl = [];
-            let pt = -100;
+            let pt = 0;
             let j = -1;
             pagelines.forEach(line => {
                 //добавим закрывающий тег стиля
                 line.text += line.tClose;
 
+                const f = (line.fonts.length ? fonts[line.fonts[0]] : null);
+
                 //проверим, возможно это заголовок
                 if (line.fonts.length == 1 && line.pageWidth) {
-                    const f = fonts[line.fonts[0]];
                     const centerLeft = (line.pageWidth - line.width)/2;
                     if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
                         if (!sectionTitleFound) {
@@ -111,8 +112,14 @@ class ConvertPdf extends ConvertHtml {
                     }
                 }
 
-                //объедняем
-                if (Math.abs(pt - line.top) > 3) {
+                //добавим пустую строку, если надо
+                if (f && f.fontSize && Math.abs(pt - line.top) > f.fontSize*1.5) {
+                    j++;
+                    pl[j] = {text: '<br>'};
+                }
+
+                //объединяем
+                if (pt == 0 || Math.abs(pt - line.top) > 3) {
                     j++;
                     pl[j] = line;
                 } else {
@@ -136,21 +143,24 @@ class ConvertPdf extends ConvertHtml {
                 const attrs = sax.getAttrsSync(tail);
                 const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
                 const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : '');
+                const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : '');
 
-                if (fontId && fontStyle) {
-                    const styles = fontStyle.split(' ');
+                if (fontId) {
                     const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
-                    const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false};
-
-                    styles.forEach(style => {
-                        const s = styleTags[style];
-                        if (s) {
-                            f.tOpen += `<${s}>`;
-                            f.tClose = `</${s}>${f.tClose}`;
-                            if (s == 'b')
-                                f.isBold = true;
-                        }
-                    });
+                    const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize};
+
+                    if (fontStyle) {
+                        const styles = fontStyle.split(' ');
+                        styles.forEach(style => {
+                            const s = styleTags[style];
+                            if (s) {
+                                f.tOpen += `<${s}>`;
+                                f.tClose = `</${s}>${f.tClose}`;
+                                if (s == 'b')
+                                    f.isBold = true;
+                            }
+                        });
+                    }
                 }
             }