|
@@ -55,6 +55,10 @@ class ConvertPdf extends ConvertHtml {
|
|
let images = [];
|
|
let images = [];
|
|
let loading = [];
|
|
let loading = [];
|
|
|
|
|
|
|
|
+ let inText = false;
|
|
|
|
+ let bold = false;
|
|
|
|
+ let italic = false;
|
|
|
|
+
|
|
let i = -1;
|
|
let i = -1;
|
|
|
|
|
|
const loadImage = async(image) => {
|
|
const loadImage = async(image) => {
|
|
@@ -81,22 +85,30 @@ class ConvertPdf extends ConvertHtml {
|
|
}
|
|
}
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+ const isTextBold = (text) => {
|
|
|
|
+ const m = text.trim().match(/^<b>(.*)<\/b>$/);
|
|
|
|
+ return m && !m[1].match(/<b>|<\/b>|<i>|<\/i>/g);
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const isTextEmpty = (text) => {
|
|
|
|
+ return text.replace(/<b>|<\/b>|<i>|<\/i>/g, '').trim() == '';
|
|
|
|
+ };
|
|
|
|
+
|
|
const putPageLines = () => {
|
|
const putPageLines = () => {
|
|
- pagelines.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left))
|
|
|
|
|
|
+ pagelines.sort((a, b) => (Math.abs(a.top - b.top) > 3 ? a.top - b.top : 0)*10000 + (a.left - b.left))
|
|
|
|
|
|
//объединяем в одну строку равные по высоте
|
|
//объединяем в одну строку равные по высоте
|
|
const pl = [];
|
|
const pl = [];
|
|
let pt = 0;
|
|
let pt = 0;
|
|
let j = -1;
|
|
let j = -1;
|
|
pagelines.forEach(line => {
|
|
pagelines.forEach(line => {
|
|
- //добавим закрывающий тег стиля
|
|
|
|
- line.text += line.tClose;
|
|
|
|
|
|
+ if (isTextEmpty(line.text))
|
|
|
|
+ return;
|
|
|
|
|
|
//проверим, возможно это заголовок
|
|
//проверим, возможно это заголовок
|
|
- if (line.fonts.length == 1 && line.pageWidth) {
|
|
|
|
- const f = (line.fonts.length ? fonts[line.fonts[0]] : null);
|
|
|
|
|
|
+ if (line.fontId && line.pageWidth) {
|
|
const centerLeft = (line.pageWidth - line.width)/2;
|
|
const centerLeft = (line.pageWidth - line.width)/2;
|
|
- if (f && f.isBold && Math.abs(centerLeft - line.left) < 3) {
|
|
|
|
|
|
+ if (isTextBold(line.text) && Math.abs(centerLeft - line.left) < 10) {
|
|
if (!sectionTitleFound) {
|
|
if (!sectionTitleFound) {
|
|
line.isSectionTitle = true;
|
|
line.isSectionTitle = true;
|
|
sectionTitleFound = true;
|
|
sectionTitleFound = true;
|
|
@@ -124,8 +136,8 @@ class ConvertPdf extends ConvertHtml {
|
|
//добавим пустую строку, если надо
|
|
//добавим пустую строку, если надо
|
|
const prevLine = (i > lastIndex ? lines[i] : {fonts: [], top: 0});
|
|
const prevLine = (i > lastIndex ? lines[i] : {fonts: [], top: 0});
|
|
if (prevLine && !prevLine.isImage) {
|
|
if (prevLine && !prevLine.isImage) {
|
|
- const f = (prevLine.fonts.length ? fonts[prevLine.fonts[0]] : (line.fonts.length ? fonts[line.fonts[0]] : null));
|
|
|
|
- if (f && f.fontSize && !line.isImage && line.top - prevLine.top > f.fontSize*1.8) {
|
|
|
|
|
|
+ const f = (prevLine.fontId ? fonts[prevLine.fontId] : (line.fontId ? fonts[line.fontId] : null));
|
|
|
|
+ if (f && f.fontSize && !line.isImage && line.top - prevLine.top > f.fontSize * 1.8) {
|
|
i++;
|
|
i++;
|
|
lines[i] = {text: '<br>'};
|
|
lines[i] = {text: '<br>'};
|
|
}
|
|
}
|
|
@@ -138,29 +150,26 @@ class ConvertPdf extends ConvertHtml {
|
|
putImage(100000);
|
|
putImage(100000);
|
|
};
|
|
};
|
|
|
|
|
|
- const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
|
|
|
- if (tag == 'textstyle') {
|
|
|
|
- const attrs = sax.getAttrsSync(tail);
|
|
|
|
- const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
|
|
|
|
- const fontStyle = (attrs.fontstyle && attrs.fontstyle.value ? attrs.fontstyle.value : '');
|
|
|
|
- const fontSize = (attrs.fontsize && attrs.fontsize.value ? attrs.fontsize.value : '');
|
|
|
|
|
|
+ const onTextNode = (text, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
|
|
|
+ if (!cutCounter && inText) {
|
|
|
|
+ let tOpen = (bold ? '<b>' : '');
|
|
|
|
+ tOpen += (italic ? '<i>' : '');
|
|
|
|
+ let tClose = (italic ? '</i>' : '');
|
|
|
|
+ tClose += (bold ? '</b>' : '');
|
|
|
|
|
|
- if (fontId) {
|
|
|
|
- const styleTags = {bold: 'b', italics: 'i', superscript: 'sup', subscript: 'sub'};
|
|
|
|
- const f = fonts[fontId] = {tOpen: '', tClose: '', isBold: false, fontSize};
|
|
|
|
-
|
|
|
|
- if (fontStyle) {
|
|
|
|
- const styles = fontStyle.split(' ');
|
|
|
|
- styles.forEach(style => {
|
|
|
|
- const s = styleTags[style];
|
|
|
|
- if (s) {
|
|
|
|
- f.tOpen += `<${s}>`;
|
|
|
|
- f.tClose = `</${s}>${f.tClose}`;
|
|
|
|
- if (s == 'b')
|
|
|
|
- f.isBold = true;
|
|
|
|
- }
|
|
|
|
- });
|
|
|
|
- }
|
|
|
|
|
|
+ line.text += ` ${tOpen}${text}${tClose}`;
|
|
|
|
+ }
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ const onStartNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
|
|
|
+ if (inText) {
|
|
|
|
+ switch (tag) {
|
|
|
|
+ case 'i':
|
|
|
|
+ italic = true;
|
|
|
|
+ break;
|
|
|
|
+ case 'b':
|
|
|
|
+ bold = true;
|
|
|
|
+ break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -173,80 +182,78 @@ class ConvertPdf extends ConvertHtml {
|
|
putPageLines();
|
|
putPageLines();
|
|
}
|
|
}
|
|
|
|
|
|
- if (tag == 'textline') {
|
|
|
|
|
|
+ if (tag == 'fontspec') {
|
|
|
|
+ const attrs = sax.getAttrsSync(tail);
|
|
|
|
+ const fontId = (attrs.id && attrs.id.value ? attrs.id.value : '');
|
|
|
|
+ const fontSize = (attrs.size && attrs.size.value ? attrs.size.value : '');
|
|
|
|
+
|
|
|
|
+ if (fontId) {
|
|
|
|
+ fonts[fontId] = {fontSize};
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (tag == 'text' && !inText) {
|
|
const attrs = sax.getAttrsSync(tail);
|
|
const attrs = sax.getAttrsSync(tail);
|
|
line = {
|
|
line = {
|
|
text: '',
|
|
text: '',
|
|
- top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10),
|
|
|
|
- left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10),
|
|
|
|
|
|
+ top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10),
|
|
|
|
+ left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10),
|
|
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
|
width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10),
|
|
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
|
|
height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10),
|
|
- tOpen: '',
|
|
|
|
- tClose: '',
|
|
|
|
isSectionTitle: false,
|
|
isSectionTitle: false,
|
|
isSubtitle: false,
|
|
isSubtitle: false,
|
|
pageWidth: page.width,
|
|
pageWidth: page.width,
|
|
- fonts: [],
|
|
|
|
|
|
+ fontId: (attrs.font && attrs.font.value ? attrs.font.value : ''),
|
|
};
|
|
};
|
|
|
|
|
|
if (line.width != 0 || line.height != 0) {
|
|
if (line.width != 0 || line.height != 0) {
|
|
|
|
+ inText = true;
|
|
pagelines.push(line);
|
|
pagelines.push(line);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- if (tag == 'string') {
|
|
|
|
|
|
+ if (tag == 'image') {
|
|
const attrs = sax.getAttrsSync(tail);
|
|
const attrs = sax.getAttrsSync(tail);
|
|
- if (attrs.content && attrs.content.value) {
|
|
|
|
-
|
|
|
|
- let tOpen = '';
|
|
|
|
- let tClose = '';
|
|
|
|
- const fontId = (attrs.stylerefs && attrs.stylerefs.value ? attrs.stylerefs.value : '');
|
|
|
|
- if (fontId && fonts[fontId]) {
|
|
|
|
- tOpen = fonts[fontId].tOpen;
|
|
|
|
- tClose = fonts[fontId].tClose;
|
|
|
|
- if (!line.fonts.length || line.fonts[0] != fontId)
|
|
|
|
- line.fonts.push(fontId);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (line.tOpen != tOpen) {
|
|
|
|
- line.text += line.tClose + tOpen;
|
|
|
|
- line.tOpen = tOpen;
|
|
|
|
- line.tClose = tClose;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- line.text += `${line.text.length ? ' ' : ''}${attrs.content.value}`;
|
|
|
|
|
|
+ let src = (attrs.src && attrs.src.value ? attrs.src.value : '');
|
|
|
|
+ if (src) {
|
|
|
|
+ const image = {
|
|
|
|
+ isImage: true,
|
|
|
|
+ src,
|
|
|
|
+ data: '',
|
|
|
|
+ type: '',
|
|
|
|
+ top: parseInt((attrs.top && attrs.top.value ? attrs.top.value : null), 10) || 0,
|
|
|
|
+ left: parseInt((attrs.left && attrs.left.value ? attrs.left.value : null), 10) || 0,
|
|
|
|
+ width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10) || 0,
|
|
|
|
+ height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10) || 0,
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ loading.push(loadImage(image));
|
|
|
|
+ images.push(image);
|
|
|
|
+ images.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+ };
|
|
|
|
|
|
- if (tag == 'illustration') {
|
|
|
|
- const attrs = sax.getAttrsSync(tail);
|
|
|
|
- if (attrs.type && attrs.type.value == 'image') {
|
|
|
|
- let src = (attrs.fileid && attrs.fileid.value ? attrs.fileid.value : '');
|
|
|
|
- if (src) {
|
|
|
|
- const image = {
|
|
|
|
- isImage: true,
|
|
|
|
- src,
|
|
|
|
- data: '',
|
|
|
|
- type: '',
|
|
|
|
- top: parseInt((attrs.vpos && attrs.vpos.value ? attrs.vpos.value : null), 10) || 0,
|
|
|
|
- left: parseInt((attrs.hpos && attrs.hpos.value ? attrs.hpos.value : null), 10) || 0,
|
|
|
|
- width: parseInt((attrs.width && attrs.width.value ? attrs.width.value : null), 10) || 0,
|
|
|
|
- height: parseInt((attrs.height && attrs.height.value ? attrs.height.value : null), 10) || 0,
|
|
|
|
- };
|
|
|
|
- const exists = images.filter(img => (img.top == image.top && img.left == image.left && img.width == image.width && img.height == image.height));
|
|
|
|
- if (!exists.length) {
|
|
|
|
- loading.push(loadImage(image));
|
|
|
|
- images.push(image);
|
|
|
|
- images.sort((a, b) => (a.top - b.top)*10000 + (a.left - b.left));
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ const onEndNode = (tag, tail, singleTag, cutCounter, cutTag) => {// eslint-disable-line no-unused-vars
|
|
|
|
+ if (inText) {
|
|
|
|
+ switch (tag) {
|
|
|
|
+ case 'i':
|
|
|
|
+ italic = false;
|
|
|
|
+ break;
|
|
|
|
+ case 'b':
|
|
|
|
+ bold = false;
|
|
|
|
+ break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ if (tag == 'text')
|
|
|
|
+ inText = false;
|
|
};
|
|
};
|
|
|
|
|
|
let buf = this.decode(data).toString();
|
|
let buf = this.decode(data).toString();
|
|
sax.parseSync(buf, {
|
|
sax.parseSync(buf, {
|
|
- onStartNode
|
|
|
|
|
|
+ onStartNode, onEndNode, onTextNode
|
|
});
|
|
});
|
|
|
|
|
|
putPageLines();
|
|
putPageLines();
|
|
@@ -290,6 +297,7 @@ class ConvertPdf extends ConvertHtml {
|
|
|
|
|
|
let concat = '';
|
|
let concat = '';
|
|
let sp = '';
|
|
let sp = '';
|
|
|
|
+ let firstLine = true;
|
|
for (const line of lines) {
|
|
for (const line of lines) {
|
|
if (text.length > limitSize) {
|
|
if (text.length > limitSize) {
|
|
throw new Error(`Файл для конвертирования слишком большой|FORLOG| text.length: ${text.length} > ${limitSize}`);
|
|
throw new Error(`Файл для конвертирования слишком большой|FORLOG| text.length: ${text.length} > ${limitSize}`);
|
|
@@ -301,10 +309,15 @@ class ConvertPdf extends ConvertHtml {
|
|
}
|
|
}
|
|
|
|
|
|
if (line.isSectionTitle) {
|
|
if (line.isSectionTitle) {
|
|
- text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
|
|
|
|
|
|
+ if (firstLine)
|
|
|
|
+ text += `<fb2-section-title>${line.text.trim()}</fb2-section-title>`;
|
|
|
|
+ else
|
|
|
|
+ text += `<fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ firstLine = false;
|
|
|
|
+
|
|
if (line.isSubtitle) {
|
|
if (line.isSubtitle) {
|
|
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
|
|
text += `<br><fb2-subtitle>${line.text.trim()}</fb2-subtitle>`;
|
|
continue;
|
|
continue;
|