瀏覽代碼

Реструктуризация

Book Pauk 2 年之前
父節點
當前提交
1d99472ca1
共有 2 個文件被更改,包括 130 次插入23 次删除
  1. 102 0
      server/core/fb2/Fb2Helper.js
  2. 28 23
      server/core/fb2/Fb2Parser.js

+ 102 - 0
server/core/fb2/Fb2Helper.js

@@ -0,0 +1,102 @@
+const fs = require('fs-extra');
+const iconv = require('iconv-lite');
+const textUtils = require('./textUtils');
+
+const Fb2Parser = require('../fb2/Fb2Parser');
+const utils = require('../utils');
+
+class Fb2Helper {
+    checkEncoding(data) {
+        //Корректируем кодировку UTF-16
+        let encoding = textUtils.getEncoding(data);
+        if (encoding.indexOf('UTF-16') == 0) {
+            data = Buffer.from(iconv.decode(data, encoding));
+            encoding = 'utf-8';
+        }
+
+        //Корректируем пробелы, всякие файлы попадаются :(
+        if (data[0] == 32) {
+            data = Buffer.from(data.toString().trim());
+        }
+
+        //Окончательно корректируем кодировку
+        let result = data;
+
+        let left = data.indexOf('<?xml version="1.0"');
+        if (left < 0) {
+            left = data.indexOf('<?xml version=\'1.0\'');
+        }
+
+        if (left >= 0) {
+            const right = data.indexOf('?>', left);
+            if (right >= 0) {
+                const head = data.slice(left, right + 2).toString();
+                const m = head.match(/encoding=['"](.*?)['"]/);
+                if (m) {
+                    let enc = m[1].toLowerCase();
+                    if (enc != 'utf-8') {
+                        //enc может не соответсвовать реальной кодировке файла, поэтому:
+                        if (encoding.indexOf('ISO-8859') >= 0) {
+                            encoding = enc;
+                        }
+
+                        result = iconv.decode(data, encoding);
+                        result = Buffer.from(result.toString().replace(m[0], `encoding="utf-8"`));
+                    }
+                }
+            }
+        }
+
+        return result;
+    }
+
+    async getDescAndCover(bookFile) {
+        let data = await fs.readFile(bookFile);
+        data = await utils.gunzipBuffer(data);
+
+        data = this.checkEncoding(data);
+
+        const fb2 = new Fb2Parser();
+
+        fb2.fromString(data.toString(), {
+            lowerCase: true,
+            pickNode: route => route.indexOf('fictionbook/body') !== 0,
+        });
+
+        const desc = fb2.$$('description').toObject();
+        const coverImage = fb2.inspector(desc).$('description/title-info/coverpage/image');
+
+        let cover = null;
+        let coverExt = '';
+        if (coverImage) {
+            const coverAttrs = coverImage.attrs();
+            const href = coverAttrs['l:href'];
+            let coverType = coverAttrs['content-type'];
+            coverType = (coverType == 'image/jpg' || coverType == 'application/octet-stream' ? 'image/jpeg' : coverType);
+            coverExt = (coverType == 'image/png' ? '.png' : '.jpg');
+
+            if (href) {
+                const binaryId = (href[0] == '#' ? href.substring(1) : href);
+
+                //найдем нужный image
+                fb2.$$('binary').eachSelf(node => {
+                    let attrs = node.attrs();
+                    if (!attrs)
+                        return;
+                    attrs = Object.fromEntries(attrs);
+
+                    if (attrs.id === binaryId) {
+                        const textNode = new Fb2Parser(node.value);
+                        const base64 = textNode.$self('*TEXT').value;
+
+                        cover = (base64 ? Buffer.from(base64, 'base64') : null);
+                    }
+                });
+            }
+        }
+
+        return {desc, cover, coverExt};
+    }
+}
+
+module.exports = Fb2Helper;

+ 28 - 23
server/core/fb2/Fb2Parser.js

@@ -1,28 +1,6 @@
 const XmlParser = require('../xml/XmlParser');
 
-class Fb2Parser {
-    constructor() {
-        this.xml = new XmlParser();
-    }
-
-    toString(options) {
-        return this.xml.toString(options);
-    }
-
-    fromString(fb2String) {
-        this.xml.fromString(fb2String);
-        return this;
-    }
-
-    toObject(options) {
-        return this.xml.toObject(options);
-    }
-
-    fromObject(fb2Object) {
-        this.xml.fromObject(fb2Object);
-        return this;
-    }
-
+class Fb2Parser extends XmlParser {
     bookInfo(fb2Object) {
         if (!fb2Object)
             fb2Object = this.toObject();
@@ -33,6 +11,33 @@ class Fb2Parser {
 
     bookInfoList(fb2Object) {
     }
+
+    toHtml(xmlString) {
+        const substs = {
+            '<subtitle>': '<p><b>',
+            '</subtitle>': '</b></p>',
+            '<empty-line/>': '<br>',
+            '<strong>': '<b>',
+            '</strong>': '</b>',
+            '<emphasis>': '<i>',
+            '</emphasis>': '</i>',
+            '<stanza>': '<br>',
+            '</stanza>': '',
+            '<poem>': '<br>',
+            '</poem>': '',
+            '<cite>': '<i>',
+            '</cite>': '</i>',
+            '<table>': '<br>',
+            '</table>': '',
+        };
+
+        for (const [tag, s] of Object.entries(substs)) {
+            const r = new RegExp(`${tag}`, 'g');
+            xmlString = xmlString.replace(r, s);
+        }
+
+        return xmlString;
+    }    
 }
 
 module.exports = Fb2Parser;