|
@@ -61,118 +61,188 @@ class DbCreator {
|
|
if (author.length > 1)
|
|
if (author.length > 1)
|
|
author.push(rec.author);
|
|
author.push(rec.author);
|
|
|
|
|
|
- const authorIds = [];
|
|
|
|
for (let i = 0; i < author.length; i++) {
|
|
for (let i = 0; i < author.length; i++) {
|
|
const a = author[i];
|
|
const a = author[i];
|
|
|
|
|
|
let authorRec;
|
|
let authorRec;
|
|
if (authorMap.has(a)) {
|
|
if (authorMap.has(a)) {
|
|
- const authorId = authorMap.get(a);
|
|
|
|
- authorRec = authorArr[authorId];
|
|
|
|
|
|
+ const authorTmpId = authorMap.get(a);
|
|
|
|
+ authorRec = authorArr[authorTmpId];
|
|
} else {
|
|
} else {
|
|
- authorRec = {id: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
|
|
|
|
|
|
+ authorRec = {tmpId: authorArr.length, author: a, value: a.toLowerCase(), bookId: []};
|
|
authorArr.push(authorRec);
|
|
authorArr.push(authorRec);
|
|
- authorMap.set(a, authorRec.id);
|
|
|
|
|
|
+ authorMap.set(a, authorRec.tmpId);
|
|
|
|
|
|
if (author.length == 1 || i < author.length - 1) //без соавторов
|
|
if (author.length == 1 || i < author.length - 1) //без соавторов
|
|
authorCount++;
|
|
authorCount++;
|
|
}
|
|
}
|
|
|
|
|
|
authorRec.bookId.push(id);
|
|
authorRec.bookId.push(id);
|
|
- authorIds.push(authorRec.id);
|
|
|
|
}
|
|
}
|
|
|
|
+ }
|
|
|
|
|
|
- //серии
|
|
|
|
- if (rec.series) {
|
|
|
|
- const series = rec.series;
|
|
|
|
|
|
+ await db.insert({table: 'book', rows: chunk});
|
|
|
|
+
|
|
|
|
+ recsLoaded += chunk.length;
|
|
|
|
+ callback({recsLoaded});
|
|
|
|
|
|
- let seriesRec;
|
|
|
|
- if (seriesMap.has(series)) {
|
|
|
|
- const seriesId = seriesMap.get(series);
|
|
|
|
- seriesRec = seriesArr[seriesId];
|
|
|
|
- } else {
|
|
|
|
- seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
|
|
|
|
- seriesArr.push(seriesRec);
|
|
|
|
- seriesMap.set(series, seriesRec.id);
|
|
|
|
- }
|
|
|
|
|
|
+ if (chunkNum++ % 10 == 0)
|
|
|
|
+ utils.freeMemory();
|
|
|
|
+ };
|
|
|
|
|
|
- for (const id of authorIds) {
|
|
|
|
- seriesRec.authorId.add(id);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ //парсинг 1
|
|
|
|
+ const parser = new InpxParser();
|
|
|
|
+ await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
|
|
|
|
|
|
- //названия
|
|
|
|
- if (rec.title) {
|
|
|
|
- const title = rec.title;
|
|
|
|
|
|
+ utils.freeMemory();
|
|
|
|
|
|
- let titleRec;
|
|
|
|
- if (titleMap.has(title)) {
|
|
|
|
- const titleId = titleMap.get(title);
|
|
|
|
- titleRec = titleArr[titleId];
|
|
|
|
- } else {
|
|
|
|
- titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
|
|
|
|
- titleArr.push(titleRec);
|
|
|
|
- titleMap.set(title, titleRec.id);
|
|
|
|
- }
|
|
|
|
|
|
+ //отсортируем авторов и выдадим им правильные id
|
|
|
|
+ //порядок id соответствует ASC-сортировке по author.toLowerCase
|
|
|
|
+ callback({job: 'author sort', jobMessage: 'Сортировка'});
|
|
|
|
+ authorArr.sort((a, b) => a.value.localeCompare(b.value));
|
|
|
|
+
|
|
|
|
+ id = 0;
|
|
|
|
+ authorMap = new Map();
|
|
|
|
+ for (const authorRec of authorArr) {
|
|
|
|
+ authorRec.id = ++id;
|
|
|
|
+ authorMap.set(authorRec.author, id);
|
|
|
|
+ delete authorRec.tmpId;
|
|
|
|
+ }
|
|
|
|
|
|
- for (const id of authorIds) {
|
|
|
|
- titleRec.authorId.add(id);
|
|
|
|
- }
|
|
|
|
|
|
+ utils.freeMemory();
|
|
|
|
+
|
|
|
|
+ //теперь можно создавать остальные поисковые таблицы
|
|
|
|
+ const parseBookRec = (rec) => {
|
|
|
|
+ //авторы
|
|
|
|
+ if (!rec.author) {
|
|
|
|
+ if (!rec.del)
|
|
|
|
+ noAuthorBookCount++;
|
|
|
|
+ rec.author = 'Автор не указан';
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const author = rec.author.split(',');
|
|
|
|
+ if (author.length > 1)
|
|
|
|
+ author.push(rec.author);
|
|
|
|
+
|
|
|
|
+ const authorIds = [];
|
|
|
|
+ for (const a of author) {
|
|
|
|
+ const authorId = authorMap.get(a);
|
|
|
|
+ if (!authorId) //подстраховка
|
|
|
|
+ continue;
|
|
|
|
+ authorIds.push(authorId);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ //серии
|
|
|
|
+ if (rec.series) {
|
|
|
|
+ const series = rec.series;
|
|
|
|
+
|
|
|
|
+ let seriesRec;
|
|
|
|
+ if (seriesMap.has(series)) {
|
|
|
|
+ const seriesId = seriesMap.get(series);
|
|
|
|
+ seriesRec = seriesArr[seriesId];
|
|
|
|
+ } else {
|
|
|
|
+ seriesRec = {id: seriesArr.length, value: series.toLowerCase(), authorId: new Set()};
|
|
|
|
+ seriesArr.push(seriesRec);
|
|
|
|
+ seriesMap.set(series, seriesRec.id);
|
|
}
|
|
}
|
|
|
|
|
|
- //жанры
|
|
|
|
- if (rec.genre) {
|
|
|
|
- const genre = rec.genre.split(',');
|
|
|
|
-
|
|
|
|
- for (const g of genre) {
|
|
|
|
- let genreRec;
|
|
|
|
- if (genreMap.has(g)) {
|
|
|
|
- const genreId = genreMap.get(g);
|
|
|
|
- genreRec = genreArr[genreId];
|
|
|
|
- } else {
|
|
|
|
- genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
|
|
|
- genreArr.push(genreRec);
|
|
|
|
- genreMap.set(g, genreRec.id);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- for (const id of authorIds) {
|
|
|
|
- genreRec.authorId.add(id);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ for (const id of authorIds) {
|
|
|
|
+ seriesRec.authorId.add(id);
|
|
}
|
|
}
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ //названия
|
|
|
|
+ if (rec.title) {
|
|
|
|
+ const title = rec.title;
|
|
|
|
+
|
|
|
|
+ let titleRec;
|
|
|
|
+ if (titleMap.has(title)) {
|
|
|
|
+ const titleId = titleMap.get(title);
|
|
|
|
+ titleRec = titleArr[titleId];
|
|
|
|
+ } else {
|
|
|
|
+ titleRec = {id: titleArr.length, value: title.toLowerCase(), authorId: new Set()};
|
|
|
|
+ titleArr.push(titleRec);
|
|
|
|
+ titleMap.set(title, titleRec.id);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for (const id of authorIds) {
|
|
|
|
+ titleRec.authorId.add(id);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
- //языки
|
|
|
|
- if (rec.lang) {
|
|
|
|
- const lang = rec.lang;
|
|
|
|
|
|
+ //жанры
|
|
|
|
+ if (rec.genre) {
|
|
|
|
+ const genre = rec.genre.split(',');
|
|
|
|
|
|
- let langRec;
|
|
|
|
- if (langMap.has(lang)) {
|
|
|
|
- const langId = langMap.get(lang);
|
|
|
|
- langRec = langArr[langId];
|
|
|
|
|
|
+ for (const g of genre) {
|
|
|
|
+ let genreRec;
|
|
|
|
+ if (genreMap.has(g)) {
|
|
|
|
+ const genreId = genreMap.get(g);
|
|
|
|
+ genreRec = genreArr[genreId];
|
|
} else {
|
|
} else {
|
|
- langRec = {id: langArr.length, value: lang, authorId: new Set()};
|
|
|
|
- langArr.push(langRec);
|
|
|
|
- langMap.set(lang, langRec.id);
|
|
|
|
|
|
+ genreRec = {id: genreArr.length, value: g, authorId: new Set()};
|
|
|
|
+ genreArr.push(genreRec);
|
|
|
|
+ genreMap.set(g, genreRec.id);
|
|
}
|
|
}
|
|
|
|
|
|
for (const id of authorIds) {
|
|
for (const id of authorIds) {
|
|
- langRec.authorId.add(id);
|
|
|
|
|
|
+ genreRec.authorId.add(id);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- await db.insert({table: 'book', rows: chunk});
|
|
|
|
-
|
|
|
|
- recsLoaded += chunk.length;
|
|
|
|
- callback({recsLoaded});
|
|
|
|
|
|
+ //языки
|
|
|
|
+ if (rec.lang) {
|
|
|
|
+ const lang = rec.lang;
|
|
|
|
+
|
|
|
|
+ let langRec;
|
|
|
|
+ if (langMap.has(lang)) {
|
|
|
|
+ const langId = langMap.get(lang);
|
|
|
|
+ langRec = langArr[langId];
|
|
|
|
+ } else {
|
|
|
|
+ langRec = {id: langArr.length, value: lang, authorId: new Set()};
|
|
|
|
+ langArr.push(langRec);
|
|
|
|
+ langMap.set(lang, langRec.id);
|
|
|
|
+ }
|
|
|
|
|
|
- if (chunkNum++ % 10 == 0)
|
|
|
|
- utils.freeMemory();
|
|
|
|
- };
|
|
|
|
|
|
+ for (const id of authorIds) {
|
|
|
|
+ langRec.authorId.add(id);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
- //парсинг
|
|
|
|
- const parser = new InpxParser();
|
|
|
|
- await parser.parse(config.inpxFile, readFileCallback, parsedCallback);
|
|
|
|
|
|
+ callback({job: 'search tables create', jobMessage: 'Создание поисковых таблиц'});
|
|
|
|
+
|
|
|
|
+ //парсинг 2
|
|
|
|
+ while (1) {// eslint-disable-line
|
|
|
|
+ //пробегаемся по сохраненным книгам
|
|
|
|
+ const rows = await db.select({
|
|
|
|
+ table: 'book',
|
|
|
|
+ where: `
|
|
|
|
+ let iter = @getItem('book_parsing');
|
|
|
|
+ if (!iter) {
|
|
|
|
+ iter = @all();
|
|
|
|
+ @setItem('book_parsing', iter);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const ids = new Set();
|
|
|
|
+ let id = iter.next();
|
|
|
|
+ while (!id.done && ids.size < 10000) {
|
|
|
|
+ ids.add(id.value);
|
|
|
|
+ id = iter.next();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return ids;
|
|
|
|
+ `
|
|
|
|
+ });
|
|
|
|
+
|
|
|
|
+ if (rows.length) {
|
|
|
|
+ for (const rec of rows)
|
|
|
|
+ parseBookRec(rec);
|
|
|
|
+ } else {
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
|
|
//чистка памяти, ибо жрет как не в себя
|
|
//чистка памяти, ибо жрет как не в себя
|
|
authorMap = null;
|
|
authorMap = null;
|
|
@@ -180,7 +250,10 @@ class DbCreator {
|
|
titleMap = null;
|
|
titleMap = null;
|
|
genreMap = null;
|
|
genreMap = null;
|
|
|
|
|
|
- utils.freeMemory();
|
|
|
|
|
|
+ for (let i = 0; i < 3; i++) {
|
|
|
|
+ utils.freeMemory();
|
|
|
|
+ await utils.sleep(1000);
|
|
|
|
+ }
|
|
|
|
|
|
//config
|
|
//config
|
|
callback({job: 'config save', jobMessage: 'Сохранение конфигурации'});
|
|
callback({job: 'config save', jobMessage: 'Сохранение конфигурации'});
|
|
@@ -303,7 +376,7 @@ class DbCreator {
|
|
utils.freeMemory();
|
|
utils.freeMemory();
|
|
|
|
|
|
//кэш-таблицы
|
|
//кэш-таблицы
|
|
-
|
|
|
|
|
|
+
|
|
|
|
|
|
callback({job: 'done', jobMessage: ''});
|
|
callback({job: 'done', jobMessage: ''});
|
|
}
|
|
}
|