|
@@ -1,4 +1,5 @@
|
|
|
const InpxParser = require('./InpxParser');
|
|
|
+const utils = require('./utils');
|
|
|
|
|
|
class DbCreator {
|
|
|
constructor(config) {
|
|
@@ -10,21 +11,112 @@ class DbCreator {
|
|
|
|
|
|
//book
|
|
|
await db.create({
|
|
|
- table: 'book'
|
|
|
+ table: 'book'
|
|
|
});
|
|
|
|
|
|
- //парсинг
|
|
|
- const parser = new InpxParser();
|
|
|
-
|
|
|
+ callback({job: 'load inpx', jobMessage: 'Загрузка INPX'});
|
|
|
const readFileCallback = async(readState) => {
|
|
|
callback(readState);
|
|
|
};
|
|
|
|
|
|
+ //поисковые таблицы, ниже сохраним в БД
|
|
|
+ let authorMap = new Map();//авторы
|
|
|
+ let authorArr = [];
|
|
|
+ let seriesMap = new Map();//серии
|
|
|
+ let seriesArr = [];
|
|
|
+ let titleMap = new Map();//названия
|
|
|
+ let titleArr = [];
|
|
|
+ let genreMap = new Map();//жанры
|
|
|
+ let genreArr = [];
|
|
|
+
|
|
|
let recsLoaded = 0;
|
|
|
let id = 0;
|
|
|
const parsedCallback = async(chunk) => {
|
|
|
for (const rec of chunk) {
|
|
|
rec.id = ++id;
|
|
|
+
|
|
|
+ if (!rec.author)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ //авторы
|
|
|
+ const author = rec.author.split(',');
|
|
|
+ if (author.length > 1)
|
|
|
+ author.push(rec.author);
|
|
|
+
|
|
|
+ const authorIds = [];
|
|
|
+ for (const a of author) {
|
|
|
+ let authorRec;
|
|
|
+ if (authorMap.has(a)) {
|
|
|
+ const authorId = authorMap.get(a);
|
|
|
+ authorRec = authorArr[authorId];
|
|
|
+ } else {
|
|
|
+ authorRec = {id: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
|
|
|
+ authorArr.push(authorRec);
|
|
|
+ authorMap.set(a, authorRec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ authorRec.bookId.push(id);
|
|
|
+ authorIds.push(authorRec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ //серии
|
|
|
+ if (rec.series) {
|
|
|
+ const series = rec.series;
|
|
|
+
|
|
|
+ let seriesRec;
|
|
|
+ if (seriesMap.has(series)) {
|
|
|
+ const seriesId = seriesMap.get(series);
|
|
|
+ seriesRec = seriesArr[seriesId];
|
|
|
+ } else {
|
|
|
+ seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
|
|
|
+ seriesArr.push(seriesRec);
|
|
|
+ seriesMap.set(series, seriesRec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const id of authorIds) {
|
|
|
+ seriesRec.authorId.add(id);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //названия
|
|
|
+ if (rec.title) {
|
|
|
+ const title = rec.title;
|
|
|
+
|
|
|
+ let titleRec;
|
|
|
+ if (titleMap.has(title)) {
|
|
|
+ const titileId = titleMap.get(title);
|
|
|
+ titleRec = titleArr[titileId];
|
|
|
+ } else {
|
|
|
+ titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
|
|
|
+ titleArr.push(titleRec);
|
|
|
+ titleMap.set(title, titleRec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const id of authorIds) {
|
|
|
+ titleRec.authorId.add(id);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //жанры
|
|
|
+ if (rec.genre) {
|
|
|
+ const genre = rec.genre.split(',');
|
|
|
+
|
|
|
+ for (const g of genre) {
|
|
|
+ let genreRec;
|
|
|
+ if (genreMap.has(g)) {
|
|
|
+ const genreId = genreMap.get(g);
|
|
|
+ genreRec = genreArr[genreId];
|
|
|
+ } else {
|
|
|
+ genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
|
|
+ genreArr.push(genreRec);
|
|
|
+ genreMap.set(g, genreRec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const id of authorIds) {
|
|
|
+ genreRec.authorId.add(id);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
await db.insert({table: 'book', rows: chunk});
|
|
@@ -33,9 +125,110 @@ class DbCreator {
|
|
|
callback({recsLoaded});
|
|
|
};
|
|
|
|
|
|
+ //парсинг
|
|
|
+ const parser = new InpxParser();
|
|
|
await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
|
|
|
|
|
|
- //поисковые таблицы
|
|
|
+ callback({job: 'config save', jobMessage: 'Сохранение конфигурации'});
|
|
|
+
|
|
|
+ //чистка памяти, ибо жрет как не в себя
|
|
|
+ authorMap = null;
|
|
|
+ seriesMap = null;
|
|
|
+ titleMap = null;
|
|
|
+ genreMap = null;
|
|
|
+
|
|
|
+ utils.freeMemory();
|
|
|
+
|
|
|
+ //конфиг
|
|
|
+
|
|
|
+ console.log('author:', authorArr.length);
|
|
|
+ console.log('series:', seriesArr.length);
|
|
|
+ console.log('title:', titleArr.length);
|
|
|
+ console.log('genre:', genreArr.length);
|
|
|
+
|
|
|
+
|
|
|
+ //сохраним поисковые таблицы
|
|
|
+ const chunkSize = 10000;
|
|
|
+
|
|
|
+ //author
|
|
|
+ callback({job: 'author save', jobMessage: 'Сохранение авторов книг'});
|
|
|
+ await db.create({
|
|
|
+ table: 'author',
|
|
|
+ index: {field: 'value', depth: config.indexDepth},
|
|
|
+ });
|
|
|
+
|
|
|
+ //вставка в БД по кусочкам, экономим память
|
|
|
+ for (let i = 0; i < authorArr.length; i += chunkSize) {
|
|
|
+ const chunk = authorArr.slice(i, i + chunkSize);
|
|
|
+
|
|
|
+ await db.insert({table: 'author', rows: chunk});
|
|
|
+ }
|
|
|
+
|
|
|
+ authorArr = null;
|
|
|
+ await db.close({table: 'author'});
|
|
|
+ utils.freeMemory();
|
|
|
+
|
|
|
+ //series
|
|
|
+ callback({job: 'series save', jobMessage: 'Сохранение серий книг'});
|
|
|
+ await db.create({
|
|
|
+ table: 'series',
|
|
|
+ index: {field: 'value', depth: config.indexDepth},
|
|
|
+ });
|
|
|
+
|
|
|
+ //вставка в БД по кусочкам, экономим память
|
|
|
+ for (let i = 0; i < seriesArr.length; i += chunkSize) {
|
|
|
+ const chunk = seriesArr.slice(i, i + chunkSize);
|
|
|
+ for (const rec of chunk)
|
|
|
+ rec.authorId = Array.from(rec.authorId);
|
|
|
+
|
|
|
+ await db.insert({table: 'series', rows: chunk});
|
|
|
+ }
|
|
|
+
|
|
|
+ seriesArr = null;
|
|
|
+ await db.close({table: 'series'});
|
|
|
+ utils.freeMemory();
|
|
|
+
|
|
|
+ //title
|
|
|
+ callback({job: 'title save', jobMessage: 'Сохранение названий книг'});
|
|
|
+ await db.create({
|
|
|
+ table: 'title',
|
|
|
+ index: {field: 'value', depth: config.indexDepth},
|
|
|
+ });
|
|
|
+
|
|
|
+ //вставка в БД по кусочкам, экономим память
|
|
|
+ for (let i = 0; i < titleArr.length; i += chunkSize) {
|
|
|
+ const chunk = titleArr.slice(i, i + chunkSize);
|
|
|
+ for (const rec of chunk)
|
|
|
+ rec.authorId = Array.from(rec.authorId);
|
|
|
+
|
|
|
+ await db.insert({table: 'title', rows: chunk});
|
|
|
+ }
|
|
|
+
|
|
|
+ titleArr = null;
|
|
|
+ await db.close({table: 'title'});
|
|
|
+ utils.freeMemory();
|
|
|
+
|
|
|
+ //genre
|
|
|
+ callback({job: 'genre save', jobMessage: 'Сохранение жанров'});
|
|
|
+ await db.create({
|
|
|
+ table: 'genre',
|
|
|
+ index: {field: 'value', depth: config.indexDepth},
|
|
|
+ });
|
|
|
+
|
|
|
+ //вставка в БД по кусочкам, экономим память
|
|
|
+ for (let i = 0; i < genreArr.length; i += chunkSize) {
|
|
|
+ const chunk = genreArr.slice(i, i + chunkSize);
|
|
|
+ for (const rec of chunk)
|
|
|
+ rec.authorId = Array.from(rec.authorId);
|
|
|
+
|
|
|
+ await db.insert({table: 'genre', rows: chunk});
|
|
|
+ }
|
|
|
+
|
|
|
+ genreArr = null;
|
|
|
+ await db.close({table: 'genre'});
|
|
|
+ utils.freeMemory();
|
|
|
+
|
|
|
+ callback({job: 'done', jobMessage: ''});
|
|
|
}
|
|
|
}
|
|
|
|