123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- const path = require('path');
- const crypto = require('crypto');
- const ZipReader = require('./ZipReader');
- const utils = require('./utils');
- const collectionInfo = 'collection.info';
- const structureInfo = 'structure.info';
- const versionInfo = 'version.info';
- const defaultStructure = 'AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;LANG;LIBRATE;KEYWORDS';
- //'AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;INSNO;FOLDER;LANG;LIBRATE;KEYWORDS;'
- const recStructType = {
- author: 'S',
- genre: 'S',
- title: 'S',
- series: 'S',
- serno: 'N',
- file: 'S',
- size: 'N',
- libid: 'S',
- del: 'N',
- ext: 'S',
- date: 'S',
- insno: 'N',
- folder: 'S',
- lang: 'S',
- librate: 'N',
- keywords: 'S',
- }
- class InpxParser {
- constructor() {
- this.inpxInfo = {};
- }
- async safeExtractToString(zipReader, fileName) {
- let result = '';
- try {
- result = (await zipReader.extractToBuf(fileName)).toString().trim();
- } catch (e) {
- //quiet
- }
- return result;
- }
- getRecStruct(structure) {
- const result = [];
- let struct = structure;
- //folder есть всегда
- if (!struct.includes('folder'))
- struct = struct.concat(['folder']);
- for (const field of struct) {
- if (utils.hasProp(recStructType, field))
- result.push({field, type: recStructType[field]});
- }
- return result;
- }
- async parse(inpxFile, readFileCallback, parsedCallback) {
- if (!readFileCallback)
- readFileCallback = async() => {};
- if (!parsedCallback)
- parsedCallback = async() => {};
- const zipReader = new ZipReader();
- await zipReader.open(inpxFile);
- try {
- const info = this.inpxInfo;
- //посчитаем inp-файлы
- const entries = Object.values(zipReader.entries);
- const inpFiles = [];
- for (const entry of entries) {
- if (!entry.isDirectory && path.extname(entry.name) == '.inp')
- inpFiles.push(entry.name);
- }
- //плюс 3 файла .info
- await readFileCallback({totalFiles: inpFiles.length + 3});
- let current = 0;
- //info
- await readFileCallback({fileName: collectionInfo, current: ++current});
- info.collection = await this.safeExtractToString(zipReader, collectionInfo);
-
- await readFileCallback({fileName: structureInfo, current: ++current});
- info.structure = await this.safeExtractToString(zipReader, structureInfo);
-
- await readFileCallback({fileName: versionInfo, current: ++current});
- info.version = await this.safeExtractToString(zipReader, versionInfo);
- //структура
- if (!info.structure)
- info.structure = defaultStructure;
- const structure = info.structure.toLowerCase().split(';');
- info.recStruct = this.getRecStruct(structure);
- //парсим inp-файлы
- this.chunk = [];
- for (const inpFile of inpFiles) {
- await readFileCallback({fileName: inpFile, current: ++current});
-
- await this.parseInp(zipReader, inpFile, structure, parsedCallback);
- }
- if (this.chunk.length) {
- await parsedCallback(this.chunk);
- }
-
- } finally {
- await zipReader.close();
- }
- }
- async parseInp(zipReader, inpFile, structure, parsedCallback) {
- const inpBuf = await zipReader.extractToBuf(inpFile);
- const rows = inpBuf.toString().split('\n');
- const defaultFolder = `${path.basename(inpFile, '.inp')}.zip`;
- const structLen = structure.length;
- for (const row of rows) {
- let line = row;
- if (!line)
- continue;
- if (line[line.length - 1] == '\x0D')
- line = line.substring(0, line.length - 1);
- const rec = {};
- //уникальный идентификатор записи
- const sha256 = crypto.createHash('sha256');
- rec._uid = sha256.update(line).digest('base64');
- //парсим запись
- const parts = line.split('\x04');
- const len = (parts.length > structLen ? structLen : parts.length);
- for (let i = 0; i < len; i++) {
- if (structure[i])
- rec[structure[i]] = parts[i];
- }
- //специальная обработка некоторых полей
- if (rec.author) {
- rec.author = rec.author.split(':').map(s => s.replace(/,/g, ' ').trim()).filter(s => s).join(',');
- }
- if (rec.genre) {
- rec.genre = rec.genre.split(':').filter(s => s).join(',');
- }
- if (!rec.folder)
- rec.folder = defaultFolder;
- rec.serno = parseInt(rec.serno, 10) || 0;
- rec.size = parseInt(rec.size, 10) || 0;
- rec.del = parseInt(rec.del, 10) || 0;
- rec.insno = parseInt(rec.insno, 10) || 0;
- rec.librate = parseInt(rec.librate, 10) || 0;
- //пушим
- this.chunk.push(rec);
- if (this.chunk.length >= 10000) {
- await parsedCallback(this.chunk);
- this.chunk = [];
- }
- }
- }
- get info() {
- return this.inpxInfo;
- }
- }
- module.exports = InpxParser;
|