InpxParser.js 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. const path = require('path');
  2. const crypto = require('crypto');
  3. const ZipReader = require('./ZipReader');
  4. const collectionInfo = 'collection.info';
  5. const structureInfo = 'structure.info';
  6. const versionInfo = 'version.info';
  7. const defaultStructure = 'AUTHOR;GENRE;TITLE;SERIES;SERNO;FILE;SIZE;LIBID;DEL;EXT;DATE;LANG;LIBRATE;KEYWORDS';
  8. class InpxParser {
  9. constructor() {
  10. this.inpxInfo = {};
  11. }
  12. async safeExtractToString(zipReader, fileName) {
  13. let result = '';
  14. try {
  15. result = (await zipReader.extractToBuf(fileName)).toString().trim();
  16. } catch (e) {
  17. //quiet
  18. }
  19. return result;
  20. }
  21. async parse(inpxFile, readFileCallback, parsedCallback) {
  22. if (!readFileCallback)
  23. readFileCallback = async() => {};
  24. if (!parsedCallback)
  25. parsedCallback = async() => {};
  26. const zipReader = new ZipReader();
  27. await zipReader.open(inpxFile);
  28. try {
  29. const info = this.inpxInfo;
  30. //посчитаем inp-файлы
  31. const entries = Object.values(zipReader.entries);
  32. const inpFiles = [];
  33. for (const entry of entries) {
  34. if (!entry.isDirectory && path.extname(entry.name) == '.inp')
  35. inpFiles.push(entry.name);
  36. }
  37. //плюс 3 файла .info
  38. await readFileCallback({totalFiles: inpFiles.length + 3});
  39. let current = 0;
  40. //info
  41. await readFileCallback({fileName: collectionInfo, current: ++current});
  42. info.collection = await this.safeExtractToString(zipReader, collectionInfo);
  43. await readFileCallback({fileName: structureInfo, current: ++current});
  44. info.structure = await this.safeExtractToString(zipReader, structureInfo);
  45. await readFileCallback({fileName: versionInfo, current: ++current});
  46. info.version = await this.safeExtractToString(zipReader, versionInfo);
  47. //структура
  48. let inpxStructure = info.structure;
  49. if (!inpxStructure)
  50. inpxStructure = defaultStructure;
  51. inpxStructure = inpxStructure.toLowerCase();
  52. const structure = inpxStructure.split(';');
  53. //парсим inp-файлы
  54. this.chunk = [];
  55. for (const inpFile of inpFiles) {
  56. await readFileCallback({fileName: inpFile, current: ++current});
  57. await this.parseInp(zipReader, inpFile, structure, parsedCallback);
  58. }
  59. if (this.chunk.length) {
  60. await parsedCallback(this.chunk);
  61. }
  62. } finally {
  63. await zipReader.close();
  64. }
  65. }
  66. async parseInp(zipReader, inpFile, structure, parsedCallback) {
  67. const inpBuf = await zipReader.extractToBuf(inpFile);
  68. const rows = inpBuf.toString().split('\n');
  69. const defaultFolder = `${path.basename(inpFile, '.inp')}.zip`;
  70. const structLen = structure.length;
  71. for (const row of rows) {
  72. let line = row;
  73. if (!line)
  74. continue;
  75. if (line[line.length - 1] == '\x0D')
  76. line = line.substring(0, line.length - 1);
  77. const rec = {};
  78. //уникальный идентификатор записи
  79. const sha256 = crypto.createHash('sha256');
  80. rec._uid = sha256.update(line).digest('base64');
  81. //парсим запись
  82. const parts = line.split('\x04');
  83. const len = (parts.length > structLen ? structLen : parts.length);
  84. for (let i = 0; i < len; i++) {
  85. if (structure[i])
  86. rec[structure[i]] = parts[i];
  87. }
  88. //специальная обработка некоторых полей
  89. if (rec.author) {
  90. rec.author = rec.author.split(':').map(s => s.replace(/,/g, ' ').trim()).filter(s => s).join(',');
  91. }
  92. if (rec.genre) {
  93. rec.genre = rec.genre.split(':').filter(s => s).join(',');
  94. }
  95. if (!rec.folder)
  96. rec.folder = defaultFolder;
  97. rec.serno = parseInt(rec.serno, 10) || 0;
  98. rec.size = parseInt(rec.size, 10) || 0;
  99. rec.del = parseInt(rec.del, 10) || 0;
  100. rec.insno = parseInt(rec.insno, 10) || 0;
  101. rec.librate = parseInt(rec.librate, 10) || 0;
  102. //пушим
  103. this.chunk.push(rec);
  104. if (this.chunk.length >= 10000) {
  105. await parsedCallback(this.chunk);
  106. this.chunk = [];
  107. }
  108. }
  109. }
  110. get info() {
  111. return this.inpxInfo;
  112. }
  113. }
  114. module.exports = InpxParser;