Преглед изворни кода

Промежуточный коммит, загрузка и обработка файла книги

Book Pauk пре 6 година
родитељ
комит
42ae088df3
3 измењених фајлова са 120 додато и 2 уклоњено
  1. 35 0
      server/core/FileDecompressor.js
  2. 57 0
      server/core/FileDetector.js
  3. 28 2
      server/core/ReaderWorker.js

+ 35 - 0
server/core/FileDecompressor.js

@@ -0,0 +1,35 @@
+const fs = require('fs-extra');
+const decompress = require('decompress');
+const FileDetector = require('./FileDetector');
+
+class FileDecompressor {
+    constructor() {
+        this.detector = new FileDetector();
+    }
+
+    async decompressFile(filename, outputDir) {
+        const fileType = await this.detector.detectFile(filename);
+
+        if (!(fileType.ext == 'zip' || fileType.ext == 'bz2'))
+            return filename;
+
+        const files = await decompress(filename, outputDir);
+
+        let result = filename;
+        let max = 0;
+        if (!files.length) {
+            //ищем файл с максимальным размером
+            for (let file of files) {
+                const stats = await fs.stat(file);
+                if (stats.size > max) {
+                    result = file;
+                    max = stats.size;
+                }
+            }
+        }
+
+        return result;
+    }
+}
+
+module.exports = FileDecompressor;

+ 57 - 0
server/core/FileDetector.js

@@ -0,0 +1,57 @@
+const detect = require('detect-file-type');
+
+//html
+detect.addSignature(
+  {
+    "type": "html",
+    "ext": "html",
+    "mime": "text/html",
+    "rules": [
+      { "type": "or", "rules":
+      [
+        { "type": "contains", "bytes": "3c68746d6c" },
+        { "type": "contains", "bytes": "3c00680074006d006c00" },
+
+        { "type": "contains", "bytes": "3c21646f6374797065" },
+        { "type": "contains", "bytes": "3c626f6479" },
+        { "type": "contains", "bytes": "3c68656164" },
+        { "type": "contains", "bytes": "3c696672616d65" },
+        { "type": "contains", "bytes": "3c696d67" },
+        { "type": "contains", "bytes": "3c6f626a656374" },
+        { "type": "contains", "bytes": "3c736372697074" },
+        { "type": "contains", "bytes": "3c7461626c65" },
+        { "type": "contains", "bytes": "3c7469746c65" },
+      ]
+      }
+    ]
+  }
+);
+
+//xml 3c 3f 78 6d 6c 20 76 65 72 73 69 6f 6e 3d 22 31 2e 30 22
+detect.addSignature(
+  {
+    "type": "xml",
+    "ext": "xml",
+    "mime": "application/xml",
+    "rules": [
+      { "type": "or", "rules":
+      [
+        { "type": "contains", "bytes": "3c3f786d6c2076657273696f6e3d22312e3022" },
+      ]
+      }
+    ]
+  }
+);
+
+class FileDetector {
+    detectFile(filename) {
+        return new Promise((resolve, reject) => {
+            detect.fromFile(filename, (err, result) => {
+                if (err) reject(err);
+                resolve(result);
+            });
+        });
+    }
+}
+
+module.exports = FileDetector;

+ 28 - 2
server/core/ReaderWorker.js

@@ -1,4 +1,7 @@
 const workerState = require('./workerState');
+const FileDetector = require('./FileDetector');
+const FileDecompressor = require('./FileDecompressor');
+//const BookParser = require('./BookParser');
 const utils = require('./utils');
 
 const fs = require('fs-extra');
@@ -12,6 +15,8 @@ class ReaderWorker {
         this.config = Object.assign({}, config);
         this.config.tempDownloadDir = `${config.tempDir}/download`;
         fs.ensureDirSync(this.config.tempDownloadDir);
+        this.detector = new FileDetector();
+        this.decomp = new FileDecompressor();
     }
 
     async loadBook(url, wState) {
@@ -21,6 +26,10 @@ class ReaderWorker {
             wState.set({state: 'download', step: 1, totalSteps: 3, url});
 
             const tempFilename = utils.randomHexString(30);
+            const tempFilename2 = utils.randomHexString(30);
+            const decompDirname = utils.randomHexString(30);
+
+            //download
             const d = download(url);
             d.on('downloadProgress', progress => {
                 wState.set({progress:  Math.round(progress.percent*100)});
@@ -29,9 +38,26 @@ class ReaderWorker {
                     d.destroy();
                 }
             });
-            await pipeline(d, fs.createWriteStream(`${this.config.tempDownloadDir}/${tempFilename}`));
+            const downloadedFilename = `${this.config.tempDownloadDir}/${tempFilename}`;
+            await pipeline(d, fs.createWriteStream(downloadedFilename));
+
+            //decompress
+            wState.set({state: 'decompress', step: 2, progress: 0});
+            const decompDir = `${this.config.tempDownloadDir}/${decompDirname}`;
+            const decompFilename = await this.decomp.decompressFile(downloadedFilename, decompDir);
+            wState.set({progress: 100});
             
-            wState.finish({step: 3, file: tempFilename});
+            //parse book
+            const fileType = await this.detector.detectFile(decompFilename);
+            if (fileType.ext == 'html' || fileType.ext == 'xml') {
+                //parse
+            }
+
+            //clean
+            await fs.remove(decompDir);
+            await fs.remove(downloadedFilename);
+
+            wState.finish({step: 3, file: tempFilename, fileType: fileType});
         } catch (e) {
             wState.set({state: 'error', error: (errMes ? errMes : e.message)});
         }