Browse Source

Улучшено определение кодировки

Book Pauk 2 years ago
parent
commit
4b4f7bd697
2 changed files with 3 additions and 12 deletions
  1. 1 1
      server/core/fb2/Fb2Helper.js
  2. 2 11
      server/core/fb2/textUtils.js

+ 1 - 1
server/core/fb2/Fb2Helper.js

@@ -35,7 +35,7 @@ class Fb2Helper {
                 if (m) {
                     let enc = m[1].toLowerCase();
                     if (enc != 'utf-8') {
-                        //enc может не соответсвовать реальной кодировке файла, поэтому:
+                        //если кодировка не определена в getEncoding, используем enc
                         if (encoding.indexOf('ISO-8859') >= 0) {
                             encoding = enc;
                         }

+ 2 - 11
server/core/fb2/textUtils.js

@@ -4,7 +4,7 @@ function getEncoding(buf) {
     let selected = getEncodingLite(buf);
 
     if (selected == 'ISO-8859-5' && buf.length > 10) {
-        const charsetAll = chardet.analyse(buf.slice(0, 20000));
+        const charsetAll = chardet.analyse(buf.slice(0, 100000));
         for (const charset of charsetAll) {
             if (charset.name.indexOf('ISO-8859') < 0) {
                 selected = charset.name;
@@ -39,9 +39,7 @@ function getEncodingLite(buf, returnAll) {
         'u': 0,
     };
 
-    const len = buf.length;
-    const blockSize = (len > 5*3000 ? 3000 : len);
-    let counter = 0;
+    const len = (buf.length > 100000 ? 100000 : buf.length);
     let i = 0;
     let totalChecked = 0;
     while (i < len) {
@@ -76,13 +74,6 @@ function getEncodingLite(buf, returnAll) {
             if (char > 207 && char < 240) charsets['i'] += lowerCase;
             if (char > 175 && char < 208) charsets['i'] += upperCase;
         }
-
-        counter++;
-
-        if (counter > blockSize) {
-            counter = 0;
-            i += Math.round(len/2 - 2*blockSize);
-        }
     }
 
     let sorted = Object.keys(charsets).map(function(key) {