|
@@ -58,6 +58,12 @@ class DbCreator {
|
|
|
let genreArr = [];
|
|
|
let langMap = new Map();//языки
|
|
|
let langArr = [];
|
|
|
+ let delMap = new Map();//удаленные
|
|
|
+ let delArr = [];
|
|
|
+ let dateMap = new Map();//дата поступления
|
|
|
+ let dateArr = [];
|
|
|
+ let librateMap = new Map();//оценка
|
|
|
+ let librateArr = [];
|
|
|
|
|
|
//stats
|
|
|
let authorCount = 0;
|
|
@@ -132,6 +138,84 @@ class DbCreator {
|
|
|
callback({progress: (readState.current || 0)/totalFiles});
|
|
|
};
|
|
|
|
|
|
+ const parseField = (fieldValue, fieldMap, fieldArr, bookId, rec, fillBookIds = true) => {
|
|
|
+ let value = fieldValue;
|
|
|
+
|
|
|
+ if (typeof(fieldValue) == 'string') {
|
|
|
+ if (!fieldValue)
|
|
|
+ fieldValue = emptyFieldValue;
|
|
|
+
|
|
|
+ value = fieldValue.toLowerCase();
|
|
|
+ }
|
|
|
+
|
|
|
+ let fieldRec;
|
|
|
+ if (fieldMap.has(value)) {
|
|
|
+ const fieldId = fieldMap.get(value);
|
|
|
+ fieldRec = fieldArr[fieldId];
|
|
|
+ } else {
|
|
|
+ fieldRec = {id: fieldArr.length, value, bookIds: new Set()};
|
|
|
+ if (rec !== undefined) {
|
|
|
+ fieldRec.name = fieldValue;
|
|
|
+ fieldRec.bookCount = 0;
|
|
|
+ fieldRec.bookDelCount = 0;
|
|
|
+ }
|
|
|
+ fieldArr.push(fieldRec);
|
|
|
+ fieldMap.set(value, fieldRec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (fieldValue !== emptyFieldValue || fillBookIds)
|
|
|
+ fieldRec.bookIds.add(bookId);
|
|
|
+
|
|
|
+ if (rec !== undefined) {
|
|
|
+ if (!rec.del)
|
|
|
+ fieldRec.bookCount++;
|
|
|
+ else
|
|
|
+ fieldRec.bookDelCount++;
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ const parseBookRec = (rec) => {
|
|
|
+ //авторы
|
|
|
+ const author = splitAuthor(rec.author);
|
|
|
+
|
|
|
+ for (let i = 0; i < author.length; i++) {
|
|
|
+ const a = author[i];
|
|
|
+
|
|
|
+ //статистика
|
|
|
+ if (!authorMap.has(a.toLowerCase()) && (author.length == 1 || i < author.length - 1)) //без соавторов
|
|
|
+ authorCount++;
|
|
|
+
|
|
|
+ parseField(a, authorMap, authorArr, rec.id, rec);
|
|
|
+ }
|
|
|
+
|
|
|
+ //серии
|
|
|
+ parseField(rec.series, seriesMap, seriesArr, rec.id, rec, false);
|
|
|
+
|
|
|
+ //названия
|
|
|
+ parseField(rec.title, titleMap, titleArr, rec.id, rec);
|
|
|
+
|
|
|
+ //жанры
|
|
|
+ let genre = rec.genre || emptyFieldValue;
|
|
|
+ genre = rec.genre.split(',');
|
|
|
+
|
|
|
+ for (let g of genre) {
|
|
|
+ parseField(g, genreMap, genreArr, rec.id);
|
|
|
+ }
|
|
|
+
|
|
|
+ //языки
|
|
|
+ parseField(rec.lang, langMap, langArr, rec.id);
|
|
|
+
|
|
|
+ //удаленные
|
|
|
+ parseField(rec.del, delMap, delArr, rec.id);
|
|
|
+
|
|
|
+ //дата поступления
|
|
|
+ parseField(rec.date, dateMap, dateArr, rec.id);
|
|
|
+
|
|
|
+ //оценка
|
|
|
+ parseField(rec.librate, librateMap, librateArr, rec.id);
|
|
|
+ };
|
|
|
+
|
|
|
+ //основная процедура парсинга
|
|
|
let id = 0;
|
|
|
const parsedCallback = async(chunk) => {
|
|
|
let filtered = false;
|
|
@@ -153,40 +237,7 @@ class DbCreator {
|
|
|
bookDelCount++;
|
|
|
}
|
|
|
|
|
|
- //авторы
|
|
|
- const author = splitAuthor(rec.author);
|
|
|
-
|
|
|
- for (let i = 0; i < author.length; i++) {
|
|
|
- const a = author[i];
|
|
|
- const value = a.toLowerCase();
|
|
|
-
|
|
|
- let authorRec;
|
|
|
- if (authorMap.has(value)) {
|
|
|
- const authorTmpId = authorMap.get(value);
|
|
|
- authorRec = authorArr[authorTmpId];
|
|
|
- } else {
|
|
|
- authorRec = {tmpId: authorArr.length, author: a, value, bookCount: 0, bookDelCount: 0, bookId: []};
|
|
|
- authorArr.push(authorRec);
|
|
|
- authorMap.set(value, authorRec.tmpId);
|
|
|
-
|
|
|
- if (author.length == 1 || i < author.length - 1) //без соавторов
|
|
|
- authorCount++;
|
|
|
- }
|
|
|
-
|
|
|
- //это нужно для того, чтобы имя автора начиналось с заглавной
|
|
|
- if (a[0].toUpperCase() === a[0])
|
|
|
- authorRec.author = a;
|
|
|
-
|
|
|
- //счетчики
|
|
|
- if (!rec.del) {
|
|
|
- authorRec.bookCount++;
|
|
|
- } else {
|
|
|
- authorRec.bookDelCount++;
|
|
|
- }
|
|
|
-
|
|
|
- //ссылки на книги
|
|
|
- authorRec.bookId.push(id);
|
|
|
- }
|
|
|
+ parseBookRec(rec);
|
|
|
}
|
|
|
|
|
|
let saveChunk = [];
|
|
@@ -205,248 +256,66 @@ class DbCreator {
|
|
|
utils.freeMemory();
|
|
|
};
|
|
|
|
|
|
- //парсинг 1
|
|
|
+ //парсинг
|
|
|
const parser = new InpxParser();
|
|
|
await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
|
|
|
|
|
|
+ //чистка памяти, ибо жрет как не в себя
|
|
|
+ authorMap = null;
|
|
|
+ seriesMap = null;
|
|
|
+ titleMap = null;
|
|
|
+ genreMap = null;
|
|
|
+ langMap = null;
|
|
|
+ delMap = null;
|
|
|
+ dateMap = null;
|
|
|
+ librateMap = null;
|
|
|
+
|
|
|
+ await db.close({table: 'book'});
|
|
|
+ await db.freeMemory();
|
|
|
utils.freeMemory();
|
|
|
|
|
|
- //отсортируем авторов и выдадим им правильные id
|
|
|
- //порядок id соответствует ASC-сортировке по author.toLowerCase
|
|
|
- callback({job: 'author sort', jobMessage: 'Сортировка авторов', jobStep: 2, progress: 0});
|
|
|
+ //отсортируем таблицы выдадим им правильные id
|
|
|
+ //порядок id соответствует ASC-сортировке по value
|
|
|
+ callback({job: 'sort', jobMessage: 'Сортировка', jobStep: 2, progress: 0});
|
|
|
await utils.sleep(100);
|
|
|
+ //сортировка авторов
|
|
|
authorArr.sort((a, b) => a.value.localeCompare(b.value));
|
|
|
+ callback({progress: 0.2});
|
|
|
+ await utils.sleep(100);
|
|
|
|
|
|
id = 0;
|
|
|
- authorMap = new Map();
|
|
|
for (const authorRec of authorArr) {
|
|
|
authorRec.id = ++id;
|
|
|
- authorMap.set(authorRec.author, id);
|
|
|
- delete authorRec.tmpId;
|
|
|
}
|
|
|
+ callback({progress: 0.3});
|
|
|
+ await utils.sleep(100);
|
|
|
|
|
|
- utils.freeMemory();
|
|
|
-
|
|
|
- //подготовка к сохранению author_book
|
|
|
- const saveBookChunk = async(authorChunk, callback) => {
|
|
|
- callback(0);
|
|
|
-
|
|
|
- const ids = [];
|
|
|
- for (const a of authorChunk) {
|
|
|
- for (const id of a.bookId) {
|
|
|
- ids.push(id);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- ids.sort();// обязательно, иначе будет тормозить - особенности JembaDb
|
|
|
-
|
|
|
- callback(0.1);
|
|
|
- const rows = await db.select({table: 'book', where: `@@id(${db.esc(ids)})`});
|
|
|
- callback(0.6);
|
|
|
- await utils.sleep(100);
|
|
|
-
|
|
|
- const bookArr = new Map();
|
|
|
- for (const row of rows)
|
|
|
- bookArr.set(row.id, row);
|
|
|
-
|
|
|
- const abRows = [];
|
|
|
- for (const a of authorChunk) {
|
|
|
- const aBooks = [];
|
|
|
- for (const id of a.bookId) {
|
|
|
- const rec = bookArr.get(id);
|
|
|
- aBooks.push(rec);
|
|
|
- }
|
|
|
-
|
|
|
- abRows.push({id: a.id, author: a.author, books: JSON.stringify(aBooks)});
|
|
|
-
|
|
|
- delete a.bookId;//в дальнейшем не понадобится, authorArr сохраняем без него
|
|
|
- }
|
|
|
-
|
|
|
- callback(0.7);
|
|
|
- await db.insert({
|
|
|
- table: 'author_book',
|
|
|
- rows: abRows,
|
|
|
- });
|
|
|
- callback(1);
|
|
|
- };
|
|
|
-
|
|
|
- callback({job: 'book sort', jobMessage: 'Сортировка книг', jobStep: 3, progress: 0});
|
|
|
-
|
|
|
- //сохранение author_book
|
|
|
- await db.create({
|
|
|
- table: 'author_book',
|
|
|
- });
|
|
|
-
|
|
|
- let idsLen = 0;
|
|
|
- let aChunk = [];
|
|
|
- let prevI = 0;
|
|
|
- for (let i = 0; i < authorArr.length; i++) {// eslint-disable-line
|
|
|
- const author = authorArr[i];
|
|
|
-
|
|
|
- aChunk.push(author);
|
|
|
- idsLen += author.bookId.length;
|
|
|
-
|
|
|
- if (idsLen > 50000) {//константа выяснена эмпирическим путем "память/скорость"
|
|
|
- await saveBookChunk(aChunk, (p) => {
|
|
|
- callback({progress: (prevI + (i - prevI)*p)/authorArr.length});
|
|
|
- });
|
|
|
+ //сортировка серий
|
|
|
+ seriesArr.sort((a, b) => a.value.localeCompare(b.value));
|
|
|
+ callback({progress: 0.5});
|
|
|
+ await utils.sleep(100);
|
|
|
|
|
|
- prevI = i;
|
|
|
- idsLen = 0;
|
|
|
- aChunk = [];
|
|
|
- await utils.sleep(100);
|
|
|
- utils.freeMemory();
|
|
|
- await db.freeMemory();
|
|
|
- }
|
|
|
- }
|
|
|
- if (aChunk.length) {
|
|
|
- await saveBookChunk(aChunk, () => {});
|
|
|
- aChunk = null;
|
|
|
+ id = 0;
|
|
|
+ for (const seriesRec of seriesArr) {
|
|
|
+ seriesRec.id = ++id;
|
|
|
}
|
|
|
+ callback({progress: 0.6});
|
|
|
+ await utils.sleep(100);
|
|
|
|
|
|
- callback({progress: 1});
|
|
|
-
|
|
|
- //чистка памяти, ибо жрет как не в себя
|
|
|
- await db.close({table: 'book'});
|
|
|
- await db.freeMemory();
|
|
|
- utils.freeMemory();
|
|
|
-
|
|
|
- //парсинг 2, подготовка
|
|
|
- const parseField = (fieldValue, fieldMap, fieldArr, authorIds, bookId) => {
|
|
|
- let addBookId = bookId;
|
|
|
- if (!fieldValue) {
|
|
|
- fieldValue = emptyFieldValue;
|
|
|
- addBookId = 0;//!!!
|
|
|
- }
|
|
|
-
|
|
|
- const value = fieldValue.toLowerCase();
|
|
|
-
|
|
|
- let fieldRec;
|
|
|
- if (fieldMap.has(value)) {
|
|
|
- const fieldId = fieldMap.get(value);
|
|
|
- fieldRec = fieldArr[fieldId];
|
|
|
- } else {
|
|
|
- fieldRec = {id: fieldArr.length, value, authorId: new Set()};
|
|
|
- if (bookId)
|
|
|
- fieldRec.bookId = new Set();
|
|
|
- fieldArr.push(fieldRec);
|
|
|
- fieldMap.set(value, fieldRec.id);
|
|
|
- }
|
|
|
-
|
|
|
- for (const id of authorIds) {
|
|
|
- fieldRec.authorId.add(id);
|
|
|
- }
|
|
|
-
|
|
|
- if (addBookId)
|
|
|
- fieldRec.bookId.add(addBookId);
|
|
|
- };
|
|
|
-
|
|
|
- const parseBookRec = (rec) => {
|
|
|
- //авторы
|
|
|
- const author = splitAuthor(rec.author);
|
|
|
-
|
|
|
- const authorIds = [];
|
|
|
- for (const a of author) {
|
|
|
- const authorId = authorMap.get(a);
|
|
|
- if (!authorId) //подстраховка
|
|
|
- continue;
|
|
|
- authorIds.push(authorId);
|
|
|
- }
|
|
|
-
|
|
|
- //серии
|
|
|
- parseField(rec.series, seriesMap, seriesArr, authorIds, rec.id);
|
|
|
-
|
|
|
- //названия
|
|
|
- parseField(rec.title, titleMap, titleArr, authorIds);
|
|
|
-
|
|
|
- //жанры
|
|
|
- let genre = rec.genre || emptyFieldValue;
|
|
|
- genre = rec.genre.split(',');
|
|
|
-
|
|
|
- for (let g of genre) {
|
|
|
- if (!g)
|
|
|
- g = emptyFieldValue;
|
|
|
-
|
|
|
- let genreRec;
|
|
|
- if (genreMap.has(g)) {
|
|
|
- const genreId = genreMap.get(g);
|
|
|
- genreRec = genreArr[genreId];
|
|
|
- } else {
|
|
|
- genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
|
|
- genreArr.push(genreRec);
|
|
|
- genreMap.set(g, genreRec.id);
|
|
|
- }
|
|
|
-
|
|
|
- for (const id of authorIds) {
|
|
|
- genreRec.authorId.add(id);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- //языки
|
|
|
- parseField(rec.lang, langMap, langArr, authorIds);
|
|
|
- };
|
|
|
-
|
|
|
- callback({job: 'search tables create', jobMessage: 'Создание поисковых таблиц', jobStep: 4, progress: 0});
|
|
|
-
|
|
|
- //парсинг 2, теперь можно создавать остальные поисковые таблицы
|
|
|
- let proc = 0;
|
|
|
- while (1) {// eslint-disable-line
|
|
|
- const rows = await db.select({
|
|
|
- table: 'author_book',
|
|
|
- where: `
|
|
|
- let iter = @getItem('parse_book');
|
|
|
- if (!iter) {
|
|
|
- iter = @all();
|
|
|
- @setItem('parse_book', iter);
|
|
|
- }
|
|
|
-
|
|
|
- const ids = new Set();
|
|
|
- let id = iter.next();
|
|
|
- while (!id.done) {
|
|
|
- ids.add(id.value);
|
|
|
- if (ids.size >= 10000)
|
|
|
- break;
|
|
|
- id = iter.next();
|
|
|
- }
|
|
|
-
|
|
|
- return ids;
|
|
|
- `
|
|
|
- });
|
|
|
-
|
|
|
- if (rows.length) {
|
|
|
- for (const row of rows) {
|
|
|
- const books = JSON.parse(row.books);
|
|
|
- for (const rec of books)
|
|
|
- parseBookRec(rec);
|
|
|
- }
|
|
|
-
|
|
|
- proc += rows.length;
|
|
|
- callback({progress: proc/authorArr.length});
|
|
|
- } else
|
|
|
- break;
|
|
|
-
|
|
|
- await utils.sleep(100);
|
|
|
- if (config.lowMemoryMode) {
|
|
|
- utils.freeMemory();
|
|
|
- await db.freeMemory();
|
|
|
- }
|
|
|
+ //сортировка названий
|
|
|
+ titleArr.sort((a, b) => a.value.localeCompare(b.value));
|
|
|
+ callback({progress: 0.8});
|
|
|
+ await utils.sleep(100);
|
|
|
+ id = 0;
|
|
|
+ for (const titleRec of titleArr) {
|
|
|
+ titleRec.id = ++id;
|
|
|
}
|
|
|
|
|
|
- //чистка памяти, ибо жрет как не в себя
|
|
|
- authorMap = null;
|
|
|
- seriesMap = null;
|
|
|
- titleMap = null;
|
|
|
- genreMap = null;
|
|
|
-
|
|
|
- utils.freeMemory();
|
|
|
-
|
|
|
- //config
|
|
|
- callback({job: 'config save', jobMessage: 'Сохранение конфигурации', jobStep: 5, progress: 0});
|
|
|
- await db.create({
|
|
|
- table: 'config'
|
|
|
- });
|
|
|
-
|
|
|
+ //stats
|
|
|
const stats = {
|
|
|
+ filesCount: 0,//вычислим позднее
|
|
|
+ filesCountAll: 0,//вычислим позднее
|
|
|
+ filesDelCount: 0,//вычислим позднее
|
|
|
recsLoaded,
|
|
|
authorCount,
|
|
|
authorCountAll: authorArr.length,
|
|
@@ -461,45 +330,33 @@ class DbCreator {
|
|
|
};
|
|
|
//console.log(stats);
|
|
|
|
|
|
- const inpxHashCreator = new InpxHashCreator(config);
|
|
|
-
|
|
|
- await db.insert({table: 'config', rows: [
|
|
|
- {id: 'inpxInfo', value: (inpxFilter && inpxFilter.info ? inpxFilter.info : parser.info)},
|
|
|
- {id: 'stats', value: stats},
|
|
|
- {id: 'inpxHash', value: await inpxHashCreator.getHash()},
|
|
|
- ]});
|
|
|
-
|
|
|
//сохраним поисковые таблицы
|
|
|
const chunkSize = 10000;
|
|
|
|
|
|
- const saveTable = async(table, arr, nullArr, authorIdToArray = false, bookIdToArray = false) => {
|
|
|
+ const saveTable = async(table, arr, nullArr, indexType = 'string') => {
|
|
|
|
|
|
- arr.sort((a, b) => a.value.localeCompare(b.value));
|
|
|
+ if (indexType == 'string')
|
|
|
+ arr.sort((a, b) => a.value.localeCompare(b.value));
|
|
|
+ else
|
|
|
+ arr.sort((a, b) => a.value - b.value);
|
|
|
|
|
|
await db.create({
|
|
|
table,
|
|
|
- index: {field: 'value', unique: true, depth: 1000000},
|
|
|
+ index: {field: 'value', unique: true, type: indexType, depth: 1000000},
|
|
|
});
|
|
|
|
|
|
//вставка в БД по кусочкам, экономим память
|
|
|
for (let i = 0; i < arr.length; i += chunkSize) {
|
|
|
const chunk = arr.slice(i, i + chunkSize);
|
|
|
|
|
|
- if (authorIdToArray) {
|
|
|
- for (const rec of chunk)
|
|
|
- rec.authorId = Array.from(rec.authorId);
|
|
|
- }
|
|
|
-
|
|
|
- if (bookIdToArray) {
|
|
|
- for (const rec of chunk)
|
|
|
- rec.bookId = Array.from(rec.bookId);
|
|
|
- }
|
|
|
+ for (const rec of chunk)
|
|
|
+ rec.bookIds = Array.from(rec.bookIds);
|
|
|
|
|
|
await db.insert({table, rows: chunk});
|
|
|
|
|
|
if (i % 5 == 0) {
|
|
|
await db.freeMemory();
|
|
|
- await utils.sleep(100);
|
|
|
+ await utils.sleep(10);
|
|
|
}
|
|
|
|
|
|
callback({progress: i/arr.length});
|
|
@@ -512,24 +369,33 @@ class DbCreator {
|
|
|
};
|
|
|
|
|
|
//author
|
|
|
- callback({job: 'author save', jobMessage: 'Сохранение индекса авторов', jobStep: 6, progress: 0});
|
|
|
+ callback({job: 'author save', jobMessage: 'Сохранение индекса авторов', jobStep: 3, progress: 0});
|
|
|
await saveTable('author', authorArr, () => {authorArr = null});
|
|
|
|
|
|
//series
|
|
|
- callback({job: 'series save', jobMessage: 'Сохранение индекса серий', jobStep: 7, progress: 0});
|
|
|
- await saveTable('series_temporary', seriesArr, () => {seriesArr = null}, true, true);
|
|
|
+ callback({job: 'series save', jobMessage: 'Сохранение индекса серий', jobStep: 4, progress: 0});
|
|
|
+ await saveTable('series', seriesArr, () => {seriesArr = null});
|
|
|
|
|
|
//title
|
|
|
- callback({job: 'title save', jobMessage: 'Сохранение индекса названий', jobStep: 8, progress: 0});
|
|
|
- await saveTable('title', titleArr, () => {titleArr = null}, true);
|
|
|
+ callback({job: 'title save', jobMessage: 'Сохранение индекса названий', jobStep: 5, progress: 0});
|
|
|
+ await saveTable('title', titleArr, () => {titleArr = null});
|
|
|
|
|
|
//genre
|
|
|
- callback({job: 'genre save', jobMessage: 'Сохранение индекса жанров', jobStep: 9, progress: 0});
|
|
|
- await saveTable('genre', genreArr, () => {genreArr = null}, true);
|
|
|
+ callback({job: 'genre save', jobMessage: 'Сохранение индекса жанров', jobStep: 6, progress: 0});
|
|
|
+ await saveTable('genre', genreArr, () => {genreArr = null});
|
|
|
|
|
|
+ callback({job: 'others save', jobMessage: 'Сохранение остальных индексов', jobStep: 7, progress: 0});
|
|
|
//lang
|
|
|
- callback({job: 'lang save', jobMessage: 'Сохранение индекса языков', jobStep: 10, progress: 0});
|
|
|
- await saveTable('lang', langArr, () => {langArr = null}, true);
|
|
|
+ await saveTable('lang', langArr, () => {langArr = null});
|
|
|
+
|
|
|
+ //del
|
|
|
+ await saveTable('del', delArr, () => {delArr = null}, 'number');
|
|
|
+
|
|
|
+ //date
|
|
|
+ await saveTable('date', dateArr, () => {dateArr = null});
|
|
|
+
|
|
|
+ //librate
|
|
|
+ await saveTable('librate', librateArr, () => {librateArr = null}, 'number');
|
|
|
|
|
|
//кэш-таблицы запросов
|
|
|
await db.create({table: 'query_cache'});
|
|
@@ -539,92 +405,226 @@ class DbCreator {
|
|
|
await db.create({table: 'file_hash'});
|
|
|
|
|
|
//-- завершающие шаги --------------------------------
|
|
|
- //оптимизация series, превращаем массив bookId в books
|
|
|
- callback({job: 'series optimization', jobMessage: 'Оптимизация', jobStep: 11, progress: 0});
|
|
|
-
|
|
|
await db.open({
|
|
|
table: 'book',
|
|
|
cacheSize: (config.lowMemoryMode ? 5 : 500),
|
|
|
});
|
|
|
- await db.open({table: 'series_temporary'});
|
|
|
+
|
|
|
+ callback({job: 'optimization', jobMessage: 'Оптимизация', jobStep: 8, progress: 0});
|
|
|
+ await this.optimizeTable('author', db, (p) => {
|
|
|
+ if (p.progress)
|
|
|
+ p.progress = 0.3*p.progress;
|
|
|
+ callback(p);
|
|
|
+ });
|
|
|
+ await this.optimizeTable('series', db, (p) => {
|
|
|
+ if (p.progress)
|
|
|
+ p.progress = 0.3 + 0.2*p.progress;
|
|
|
+ callback(p);
|
|
|
+ });
|
|
|
+ await this.optimizeTable('title', db, (p) => {
|
|
|
+ if (p.progress)
|
|
|
+ p.progress = 0.5 + 0.5*p.progress;
|
|
|
+ callback(p);
|
|
|
+ });
|
|
|
+
|
|
|
+ callback({job: 'stats count', jobMessage: 'Подсчет статистики', jobStep: 9, progress: 0});
|
|
|
+ await this.countStats(db, callback, stats);
|
|
|
+
|
|
|
+ //чистка памяти, ибо жрет как не в себя
|
|
|
+ await db.close({table: 'book'});
|
|
|
+ await db.freeMemory();
|
|
|
+ utils.freeMemory();
|
|
|
+
|
|
|
+ //config сохраняем в самом конце, нет конфига - с базой что-то не так
|
|
|
+ const inpxHashCreator = new InpxHashCreator(config);
|
|
|
+
|
|
|
await db.create({
|
|
|
- table: 'series',
|
|
|
- index: {field: 'value', unique: true, depth: 1000000},
|
|
|
+ table: 'config'
|
|
|
});
|
|
|
|
|
|
- const count = await db.select({table: 'series_temporary', count: true});
|
|
|
- const seriesCount = (count.length ? count[0].count : 0);
|
|
|
+ await db.insert({table: 'config', rows: [
|
|
|
+ {id: 'inpxInfo', value: (inpxFilter && inpxFilter.info ? inpxFilter.info : parser.info)},
|
|
|
+ {id: 'stats', value: stats},
|
|
|
+ {id: 'inpxHash', value: await inpxHashCreator.getHash()},
|
|
|
+ ]});
|
|
|
+
|
|
|
+ callback({job: 'done', jobMessage: ''});
|
|
|
+ }
|
|
|
+
|
|
|
+ async optimizeTable(from, db, callback) {
|
|
|
+ const config = this.config;
|
|
|
+
|
|
|
+ const to = `${from}_book`;
|
|
|
+ const toId = `${from}_id`;
|
|
|
|
|
|
- const saveSeriesChunk = async(seriesChunk) => {
|
|
|
+ await db.open({table: from});
|
|
|
+ await db.create({table: to});
|
|
|
+
|
|
|
+ let bookId2RecId = new Map();
|
|
|
+
|
|
|
+ const saveChunk = async(chunk) => {
|
|
|
const ids = [];
|
|
|
- for (const s of seriesChunk) {
|
|
|
- for (const id of s.bookId) {
|
|
|
+ for (const rec of chunk) {
|
|
|
+ for (const id of rec.bookIds) {
|
|
|
+ let b2r = bookId2RecId.get(id);
|
|
|
+ if (!b2r) {
|
|
|
+ b2r = [];
|
|
|
+ bookId2RecId.set(id, b2r);
|
|
|
+ }
|
|
|
+ b2r.push(rec.id);
|
|
|
+
|
|
|
ids.push(id);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- ids.sort();// обязательно, иначе будет тормозить - особенности JembaDb
|
|
|
+ if (config.fullOptimization) {
|
|
|
+ ids.sort((a, b) => a - b);// обязательно, иначе будет тормозить - особенности JembaDb
|
|
|
|
|
|
- const rows = await db.select({table: 'book', where: `@@id(${db.esc(ids)})`});
|
|
|
+ const rows = await db.select({table: 'book', where: `@@id(${db.esc(ids)})`});
|
|
|
|
|
|
- const bookArr = new Map();
|
|
|
- for (const row of rows)
|
|
|
- bookArr.set(row.id, row);
|
|
|
+ const bookArr = new Map();
|
|
|
+ for (const row of rows)
|
|
|
+ bookArr.set(row.id, row);
|
|
|
+
|
|
|
+ for (const rec of chunk) {
|
|
|
+ rec.books = [];
|
|
|
+
|
|
|
+ for (const id of rec.bookIds) {
|
|
|
+ const book = bookArr.get(id);
|
|
|
+ if (book) {//на всякий случай
|
|
|
+ rec.books.push(book);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- for (const s of seriesChunk) {
|
|
|
- const sBooks = [];
|
|
|
- for (const id of s.bookId) {
|
|
|
- const rec = bookArr.get(id);
|
|
|
- sBooks.push(rec);
|
|
|
+ delete rec.name;
|
|
|
+ delete rec.value;
|
|
|
+ delete rec.bookIds;
|
|
|
}
|
|
|
|
|
|
- s.books = JSON.stringify(sBooks);
|
|
|
- delete s.bookId;
|
|
|
+ await db.insert({
|
|
|
+ table: to,
|
|
|
+ rows: chunk,
|
|
|
+ });
|
|
|
}
|
|
|
-
|
|
|
- await db.insert({
|
|
|
- table: 'series',
|
|
|
- rows: seriesChunk,
|
|
|
- });
|
|
|
};
|
|
|
|
|
|
- const rows = await db.select({table: 'series_temporary'});
|
|
|
+ const rows = await db.select({table: from, count: true});
|
|
|
+ const fromLength = rows[0].count;
|
|
|
|
|
|
- idsLen = 0;
|
|
|
- aChunk = [];
|
|
|
- proc = 0;
|
|
|
- for (const row of rows) {// eslint-disable-line
|
|
|
- aChunk.push(row);
|
|
|
- idsLen += row.bookId.length;
|
|
|
- proc++;
|
|
|
+ let processed = 0;
|
|
|
+ while (1) {// eslint-disable-line
|
|
|
+ const chunk = await db.select({
|
|
|
+ table: from,
|
|
|
+ where: `
|
|
|
+ let iter = @getItem('optimize');
|
|
|
+ if (!iter) {
|
|
|
+ iter = @all();
|
|
|
+ @setItem('optimize', iter);
|
|
|
+ }
|
|
|
|
|
|
- if (idsLen > 20000) {//константа выяснена эмпирическим путем "память/скорость"
|
|
|
- await saveSeriesChunk(aChunk);
|
|
|
+ const ids = new Set();
|
|
|
+ let bookIdsLen = 0;
|
|
|
+ let id = iter.next();
|
|
|
+ while (!id.done) {
|
|
|
+ ids.add(id.value);
|
|
|
|
|
|
- idsLen = 0;
|
|
|
- aChunk = [];
|
|
|
+ const row = @row(id.value);
|
|
|
+ bookIdsLen += row.bookIds.length;
|
|
|
+ if (bookIdsLen >= 50000)
|
|
|
+ break;
|
|
|
|
|
|
- callback({progress: proc/seriesCount});
|
|
|
+ id = iter.next();
|
|
|
+ }
|
|
|
|
|
|
- await utils.sleep(100);
|
|
|
+ return ids;
|
|
|
+ `
|
|
|
+ });
|
|
|
+
|
|
|
+ if (chunk.length) {
|
|
|
+ await saveChunk(chunk);
|
|
|
+
|
|
|
+ processed += chunk.length;
|
|
|
+ callback({progress: 0.5*processed/fromLength});
|
|
|
+ } else
|
|
|
+ break;
|
|
|
+
|
|
|
+ if (this.config.lowMemoryMode) {
|
|
|
+ await utils.sleep(10);
|
|
|
utils.freeMemory();
|
|
|
await db.freeMemory();
|
|
|
}
|
|
|
}
|
|
|
- if (aChunk.length) {
|
|
|
- await saveSeriesChunk(aChunk);
|
|
|
- aChunk = null;
|
|
|
- }
|
|
|
|
|
|
- //чистка памяти, ибо жрет как не в себя
|
|
|
- await db.drop({table: 'book'});//таблица больше не понадобится
|
|
|
- await db.drop({table: 'series_temporary'});//таблица больше не понадобится
|
|
|
+ await db.close({table: to});
|
|
|
+ await db.close({table: from});
|
|
|
|
|
|
- await db.close({table: 'series'});
|
|
|
- await db.freeMemory();
|
|
|
+ await db.create({table: toId});
|
|
|
+
|
|
|
+ const chunkSize = 50000;
|
|
|
+ let idRows = [];
|
|
|
+ let proc = 0;
|
|
|
+ for (const [id, value] of bookId2RecId) {
|
|
|
+ idRows.push({id, value});
|
|
|
+ if (idRows.length >= chunkSize) {
|
|
|
+ await db.insert({table: toId, rows: idRows});
|
|
|
+ idRows = [];
|
|
|
+
|
|
|
+ proc += chunkSize;
|
|
|
+ callback({progress: 0.5 + 0.5*proc/bookId2RecId.size});
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (idRows.length)
|
|
|
+ await db.insert({table: toId, rows: idRows});
|
|
|
+ await db.close({table: toId});
|
|
|
+
|
|
|
+ bookId2RecId = null;
|
|
|
utils.freeMemory();
|
|
|
+ }
|
|
|
|
|
|
- callback({job: 'done', jobMessage: ''});
|
|
|
+ async countStats(db, callback, stats) {
|
|
|
+ //статистика по количеству файлов
|
|
|
+
|
|
|
+ //эмуляция прогресса
|
|
|
+ let countDone = false;
|
|
|
+ (async() => {
|
|
|
+ let i = 0;
|
|
|
+ while (!countDone) {
|
|
|
+ callback({progress: i/100});
|
|
|
+ i = (i < 100 ? i + 5 : 100);
|
|
|
+ await utils.sleep(1000);
|
|
|
+ }
|
|
|
+ })();
|
|
|
+
|
|
|
+ //подчсет
|
|
|
+ const countRes = await db.select({table: 'book', rawResult: true, where: `
|
|
|
+ const files = new Set();
|
|
|
+ const filesDel = new Set();
|
|
|
+
|
|
|
+ for (const id of @all()) {
|
|
|
+ const r = @row(id);
|
|
|
+ const file = ${"`${r.folder}/${r.file}.${r.ext}`"};
|
|
|
+ if (!r.del) {
|
|
|
+ files.add(file);
|
|
|
+ } else {
|
|
|
+ filesDel.add(file);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for (const file of filesDel)
|
|
|
+ if (files.has(file))
|
|
|
+ filesDel.delete(file);
|
|
|
+
|
|
|
+ return {filesCount: files.size, filesDelCount: filesDel.size};
|
|
|
+ `});
|
|
|
+
|
|
|
+ if (countRes.length) {
|
|
|
+ const res = countRes[0].rawResult;
|
|
|
+ stats.filesCount = res.filesCount;
|
|
|
+ stats.filesCountAll = res.filesCount + res.filesDelCount;
|
|
|
+ stats.filesDelCount = res.filesDelCount;
|
|
|
+ }
|
|
|
+
|
|
|
+ countDone = true;
|
|
|
}
|
|
|
}
|
|
|
|