InpxParser.js 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. const path = require('path');
  2. const crypto = require('crypto');
  3. const ZipReader = require('./ZipReader');
  4. const utils = require('./utils');
  5. const collectionInfo = 'collection.info';
  6. const structureInfo = 'structure.info';
  7. const versionInfo = 'version.info';
  8. const defaultStructure = 'AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;LANG;LIBRATE;KEYWORDS';
  9. //'AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;INSNO;FOLDER;LANG;LIBRATE;KEYWORDS;'
  10. const recStructType = {
  11. author: 'S',
  12. genre: 'S',
  13. title: 'S',
  14. series: 'S',
  15. serno: 'N',
  16. file: 'S',
  17. size: 'N',
  18. libid: 'S',
  19. del: 'N',
  20. ext: 'S',
  21. date: 'S',
  22. insno: 'N',
  23. folder: 'S',
  24. lang: 'S',
  25. librate: 'N',
  26. keywords: 'S',
  27. }
  28. class InpxParser {
  29. constructor() {
  30. this.inpxInfo = {};
  31. }
  32. async safeExtractToString(zipReader, fileName) {
  33. let result = '';
  34. try {
  35. result = (await zipReader.extractToBuf(fileName)).toString().trim();
  36. } catch (e) {
  37. //quiet
  38. }
  39. return result;
  40. }
  41. getRecStruct(structure) {
  42. const result = [];
  43. let struct = structure;
  44. //folder есть всегда
  45. if (!struct.includes('folder'))
  46. struct = struct.concat(['folder']);
  47. for (const field of struct) {
  48. if (utils.hasProp(recStructType, field))
  49. result.push({field, type: recStructType[field]});
  50. }
  51. return result;
  52. }
  53. async parse(inpxFile, readFileCallback, parsedCallback) {
  54. if (!readFileCallback)
  55. readFileCallback = async() => {};
  56. if (!parsedCallback)
  57. parsedCallback = async() => {};
  58. const zipReader = new ZipReader();
  59. await zipReader.open(inpxFile);
  60. try {
  61. const info = this.inpxInfo;
  62. //посчитаем inp-файлы
  63. const entries = Object.values(zipReader.entries);
  64. const inpFiles = [];
  65. for (const entry of entries) {
  66. if (!entry.isDirectory && path.extname(entry.name) == '.inp')
  67. inpFiles.push(entry.name);
  68. }
  69. //плюс 3 файла .info
  70. await readFileCallback({totalFiles: inpFiles.length + 3});
  71. let current = 0;
  72. //info
  73. await readFileCallback({fileName: collectionInfo, current: ++current});
  74. info.collection = await this.safeExtractToString(zipReader, collectionInfo);
  75. await readFileCallback({fileName: structureInfo, current: ++current});
  76. info.structure = await this.safeExtractToString(zipReader, structureInfo);
  77. await readFileCallback({fileName: versionInfo, current: ++current});
  78. info.version = await this.safeExtractToString(zipReader, versionInfo);
  79. //структура
  80. if (!info.structure)
  81. info.structure = defaultStructure;
  82. const structure = info.structure.toLowerCase().split(';');
  83. info.recStruct = this.getRecStruct(structure);
  84. //парсим inp-файлы
  85. this.chunk = [];
  86. for (const inpFile of inpFiles) {
  87. await readFileCallback({fileName: inpFile, current: ++current});
  88. await this.parseInp(zipReader, inpFile, structure, parsedCallback);
  89. }
  90. if (this.chunk.length) {
  91. await parsedCallback(this.chunk);
  92. }
  93. } finally {
  94. await zipReader.close();
  95. }
  96. }
  97. async parseInp(zipReader, inpFile, structure, parsedCallback) {
  98. const inpBuf = await zipReader.extractToBuf(inpFile);
  99. const rows = inpBuf.toString().split('\n');
  100. const defaultFolder = `${path.basename(inpFile, '.inp')}.zip`;
  101. const structLen = structure.length;
  102. for (const row of rows) {
  103. let line = row;
  104. if (!line)
  105. continue;
  106. if (line[line.length - 1] == '\x0D')
  107. line = line.substring(0, line.length - 1);
  108. const rec = {};
  109. //уникальный идентификатор записи
  110. const sha256 = crypto.createHash('sha256');
  111. rec._uid = sha256.update(line).digest('base64');
  112. //парсим запись
  113. const parts = line.split('\x04');
  114. const len = (parts.length > structLen ? structLen : parts.length);
  115. for (let i = 0; i < len; i++) {
  116. if (structure[i])
  117. rec[structure[i]] = parts[i];
  118. }
  119. //специальная обработка некоторых полей
  120. if (rec.author) {
  121. rec.author = rec.author.split(':').map(s => s.replace(/,/g, ' ').trim()).filter(s => s).join(',');
  122. }
  123. if (rec.genre) {
  124. rec.genre = rec.genre.split(':').filter(s => s).join(',');
  125. }
  126. if (!rec.folder)
  127. rec.folder = defaultFolder;
  128. rec.serno = parseInt(rec.serno, 10) || 0;
  129. rec.size = parseInt(rec.size, 10) || 0;
  130. rec.del = parseInt(rec.del, 10) || 0;
  131. rec.insno = parseInt(rec.insno, 10) || 0;
  132. rec.librate = parseInt(rec.librate, 10) || 0;
  133. //пушим
  134. this.chunk.push(rec);
  135. if (this.chunk.length >= 10000) {
  136. await parsedCallback(this.chunk);
  137. this.chunk = [];
  138. }
  139. }
  140. }
  141. get info() {
  142. return this.inpxInfo;
  143. }
  144. }
  145. module.exports = InpxParser;