Forráskód Böngészése

Работа над WebWorker и DbCreator

Book Pauk 2 éve
szülő
commit
801a4cdbb5
4 módosított fájl, 233 hozzáadás és 6 törlés
  1. 198 5
      server/core/DbCreator.js
  2. 22 1
      server/core/WebWorker.js
  3. 10 0
      server/core/utils.js
  4. 3 0
      server/index.js

+ 198 - 5
server/core/DbCreator.js

@@ -1,4 +1,5 @@
 const InpxParser = require('./InpxParser');
 const InpxParser = require('./InpxParser');
+const utils = require('./utils');
 
 
 class DbCreator {
 class DbCreator {
     constructor(config) {
     constructor(config) {
@@ -10,21 +11,112 @@ class DbCreator {
 
 
         //book
         //book
         await db.create({
         await db.create({
-            table: 'book'            
+            table: 'book'
         });
         });
 
 
-        //парсинг
-        const parser = new InpxParser();
-
+        callback({job: 'load inpx', jobMessage: 'Загрузка INPX'});
         const readFileCallback = async(readState) => {
         const readFileCallback = async(readState) => {
             callback(readState);
             callback(readState);
         };
         };
 
 
+        //поисковые таблицы, ниже сохраним в БД
+        let authorMap = new Map();//авторы
+        let authorArr = [];
+        let seriesMap = new Map();//серии
+        let seriesArr = [];
+        let titleMap = new Map();//названия
+        let titleArr = [];
+        let genreMap = new Map();//жанры
+        let genreArr = [];
+
         let recsLoaded = 0;
         let recsLoaded = 0;
         let id = 0;
         let id = 0;
         const parsedCallback = async(chunk) => {
         const parsedCallback = async(chunk) => {
             for (const rec of chunk) {
             for (const rec of chunk) {
                 rec.id = ++id;
                 rec.id = ++id;
+
+                if (!rec.author)
+                    continue;
+
+                //авторы
+                const author = rec.author.split(',');
+                if (author.length > 1)
+                    author.push(rec.author);
+
+                const authorIds = [];
+                for (const a of author) {
+                    let authorRec;
+                    if (authorMap.has(a)) {
+                         const authorId = authorMap.get(a);
+                         authorRec = authorArr[authorId];
+                    } else {
+                        authorRec = {id: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
+                        authorArr.push(authorRec);
+                        authorMap.set(a, authorRec.id);
+                    }
+
+                    authorRec.bookId.push(id);
+                    authorIds.push(authorRec.id);
+                }
+
+                //серии
+                if (rec.series) {
+                    const series = rec.series;
+
+                    let seriesRec;
+                    if (seriesMap.has(series)) {
+                        const seriesId = seriesMap.get(series);
+                        seriesRec = seriesArr[seriesId];
+                    } else {
+                        seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
+                        seriesArr.push(seriesRec);
+                        seriesMap.set(series, seriesRec.id);
+                    }
+
+                    for (const id of authorIds) {
+                        seriesRec.authorId.add(id);
+                    }
+                }
+
+                //названия
+                if (rec.title) {
+                    const title = rec.title;
+
+                    let titleRec;
+                    if (titleMap.has(title)) {
+                        const titileId = titleMap.get(title);
+                        titleRec = titleArr[titileId];
+                    } else {
+                        titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
+                        titleArr.push(titleRec);
+                        titleMap.set(title, titleRec.id);
+                    }
+
+                    for (const id of authorIds) {
+                        titleRec.authorId.add(id);
+                    }
+                }
+
+                //жанры
+                if (rec.genre) {
+                    const genre = rec.genre.split(',');
+
+                    for (const g of genre) {
+                        let genreRec;
+                        if (genreMap.has(g)) {
+                            const genreId = genreMap.get(g);
+                            genreRec = genreArr[genreId];
+                        } else {
+                            genreRec = {id: genreArr.length, value: g, authorId: new Set()};
+                            genreArr.push(genreRec);
+                            genreMap.set(g, genreRec.id);
+                        }
+
+                        for (const id of authorIds) {
+                            genreRec.authorId.add(id);
+                        }
+                    }
+                }
             }
             }
 
 
             await db.insert({table: 'book', rows: chunk});
             await db.insert({table: 'book', rows: chunk});
@@ -33,9 +125,110 @@ class DbCreator {
             callback({recsLoaded});
             callback({recsLoaded});
         };
         };
 
 
+        //парсинг
+        const parser = new InpxParser();
         await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
         await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
 
 
-        //поисковые таблицы
+        callback({job: 'config save', jobMessage: 'Сохранение конфигурации'});
+
+        //чистка памяти, ибо жрет как не в себя
+        authorMap = null;
+        seriesMap = null;
+        titleMap = null;
+        genreMap = null;
+
+        utils.freeMemory();
+
+        //конфиг
+
+        console.log('author:', authorArr.length);
+        console.log('series:', seriesArr.length);
+        console.log('title:', titleArr.length);
+        console.log('genre:', genreArr.length);
+
+
+        //сохраним поисковые таблицы
+        const chunkSize = 10000;
+
+        //author
+        callback({job: 'author save', jobMessage: 'Сохранение авторов книг'});
+        await db.create({
+            table: 'author',
+            index: {field: 'value', depth: config.indexDepth},
+        });
+
+        //вставка в БД по кусочкам, экономим память
+        for (let i = 0; i < authorArr.length; i += chunkSize) {
+            const chunk = authorArr.slice(i, i + chunkSize);
+
+            await db.insert({table: 'author', rows: chunk});
+        }
+
+        authorArr = null;
+        await db.close({table: 'author'});
+        utils.freeMemory();
+
+        //series
+        callback({job: 'series save', jobMessage: 'Сохранение серий книг'});
+        await db.create({
+            table: 'series',
+            index: {field: 'value', depth: config.indexDepth},
+        });
+
+        //вставка в БД по кусочкам, экономим память
+        for (let i = 0; i < seriesArr.length; i += chunkSize) {
+            const chunk = seriesArr.slice(i, i + chunkSize);
+            for (const rec of chunk)
+                rec.authorId = Array.from(rec.authorId);
+
+            await db.insert({table: 'series', rows: chunk});
+        }
+
+        seriesArr = null;
+        await db.close({table: 'series'});
+        utils.freeMemory();
+
+        //title
+        callback({job: 'title save', jobMessage: 'Сохранение названий книг'});
+        await db.create({
+            table: 'title',
+            index: {field: 'value', depth: config.indexDepth},
+        });
+
+        //вставка в БД по кусочкам, экономим память
+        for (let i = 0; i < titleArr.length; i += chunkSize) {
+            const chunk = titleArr.slice(i, i + chunkSize);
+            for (const rec of chunk)
+                rec.authorId = Array.from(rec.authorId);
+
+            await db.insert({table: 'title', rows: chunk});
+        }
+
+        titleArr = null;
+        await db.close({table: 'title'});
+        utils.freeMemory();
+
+        //genre
+        callback({job: 'genre save', jobMessage: 'Сохранение жанров'});
+        await db.create({
+            table: 'genre',
+            index: {field: 'value', depth: config.indexDepth},
+        });
+
+        //вставка в БД по кусочкам, экономим память
+        for (let i = 0; i < genreArr.length; i += chunkSize) {
+            const chunk = genreArr.slice(i, i + chunkSize);
+            for (const rec of chunk)
+                rec.authorId = Array.from(rec.authorId);
+
+            await db.insert({table: 'genre', rows: chunk});
+        }
+
+        genreArr = null;
+        await db.close({table: 'genre'});
+        utils.freeMemory();
+
+        callback({job: 'done', jobMessage: ''});
     }
     }
 }
 }
 
 

+ 22 - 1
server/core/WebWorker.js

@@ -1,3 +1,4 @@
+const os = require('os');
 const fs = require('fs-extra');
 const fs = require('fs-extra');
 
 
 const WorkerState = require('./WorkerState');
 const WorkerState = require('./WorkerState');
@@ -6,6 +7,7 @@ const DbCreator = require('./DbCreator');
 
 
 const ayncExit = new (require('./AsyncExit'))();
 const ayncExit = new (require('./AsyncExit'))();
 const log = new (require('./AppLogger'))().log;//singleton
 const log = new (require('./AppLogger'))().log;//singleton
+const utils = require('./utils');
 
 
 //server states
 //server states
 const ssNormal = 'normal';
 const ssNormal = 'normal';
@@ -34,6 +36,7 @@ class WebWorker {
             ayncExit.add(this.closeDb.bind(this));
             ayncExit.add(this.closeDb.bind(this));
 
 
             this.loadOrCreateDb();//no await
             this.loadOrCreateDb();//no await
+            this.logServerStats();//no await
 
 
             instance = this;
             instance = this;
         }
         }
@@ -70,7 +73,7 @@ class WebWorker {
         if (await fs.pathExists(dbPath))
         if (await fs.pathExists(dbPath))
             throw new Error(`createDb.pathExists: ${dbPath}`);
             throw new Error(`createDb.pathExists: ${dbPath}`);
 
 
-        const db = new JembaDbThread();
+        const db = new JembaDbThread();//создаем не в потоке, чтобы лучше работал GC
         await db.lock({
         await db.lock({
             dbPath,
             dbPath,
             create: true,
             create: true,
@@ -92,6 +95,8 @@ class WebWorker {
                     log(`  load ${state.fileName}`);
                     log(`  load ${state.fileName}`);
                 if (state.recsLoaded)
                 if (state.recsLoaded)
                     log(`  processed ${state.recsLoaded} records`);
                     log(`  processed ${state.recsLoaded} records`);
+                if (state.job)
+                    log(`  ${state.job}`);
             });
             });
 
 
             log('  finish INPX import');
             log('  finish INPX import');
@@ -132,6 +137,7 @@ class WebWorker {
 
 
             //открываем все таблицы
             //открываем все таблицы
             await this.db.openAll();
             await this.db.openAll();
+            await this.db.close({table: 'title'});
 
 
             log('Searcher DB is ready');
             log('Searcher DB is ready');
         } catch (e) {
         } catch (e) {
@@ -141,6 +147,21 @@ class WebWorker {
             this.setMyState(ssNormal);
             this.setMyState(ssNormal);
         }
         }
     }
     }
+
+    async logServerStats() {
+        while (1) {// eslint-disable-line
+            try {
+                const memUsage = process.memoryUsage().rss/(1024*1024);//Mb
+                let loadAvg = os.loadavg();
+                loadAvg = loadAvg.map(v => v.toFixed(2));
+
+                log(`Server info [ memUsage: ${memUsage.toFixed(2)}MB, loadAvg: (${loadAvg.join(', ')}) ]`);
+            } catch (e) {
+                log(LM_ERR, e.message);
+            }
+            await utils.sleep(5000);
+        }
+    }
 }
 }
 
 
 module.exports = WebWorker;
 module.exports = WebWorker;

+ 10 - 0
server/core/utils.js

@@ -34,10 +34,20 @@ function hasProp(obj, prop) {
     return Object.prototype.hasOwnProperty.call(obj, prop);
     return Object.prototype.hasOwnProperty.call(obj, prop);
 }
 }
 
 
+function freeMemory() {
+    if (global.gc) {
+        global.gc();
+        global.gc();
+        global.gc();
+    }
+}
+
+
 module.exports = {
 module.exports = {
     sleep,
     sleep,
     versionText,
     versionText,
     findFiles,
     findFiles,
     touchFile,
     touchFile,
     hasProp,
     hasProp,
+    freeMemory,
 };
 };

+ 3 - 0
server/index.js

@@ -96,6 +96,9 @@ async function init() {
 
 
     config.recreateDb = argv.recreate || false;
     config.recreateDb = argv.recreate || false;
 
 
+    //TODO as cli param
+    config.indexDepth = 1000;
+
     //app
     //app
     const appDir = `${config.publicDir}/app`;
     const appDir = `${config.publicDir}/app`;
     const appNewDir = `${config.publicDir}/app_new`;
     const appNewDir = `${config.publicDir}/app_new`;