瀏覽代碼

Add html parsing

painor 4 年之前
父節點
當前提交
1ddfcd12f4

+ 4 - 0
gramjs/Utils.ts

@@ -8,6 +8,7 @@ import type {ParseInterface} from "./client/messageParse";
 import {MarkdownParser} from "./extensions/markdown";
 import {CustomFile} from "./client/uploads";
 import TypeInputFile = Api.TypeInputFile;
+import {HTMLParser} from "./extensions/html";
 
 
 const USERNAME_RE = new RegExp('@|(?:https?:\\/\\/)?(?:www\\.)?' + '(?:telegram\\.(?:me|dog)|t\\.me)\\/(@|joinchat\\/)?', 'i');
@@ -928,6 +929,9 @@ export function sanitizeParseMode(mode: string | ParseInterface): ParseInterface
     if (mode === "md" || mode === "markdown") {
         return MarkdownParser;
     }
+    if (mode=="html"){
+        return HTMLParser;
+    }
     if (typeof mode == "object") {
         if ("parse" in mode && "unparse" in mode) {
             return mode;

+ 1 - 1
gramjs/Version.ts

@@ -1 +1 @@
-export const version = "1.5.27";
+export const version = "1.6.8";

+ 2 - 1
gramjs/client/TelegramClient.ts

@@ -20,6 +20,7 @@ import {MTProtoSender, UpdateConnectionState} from "../network";
 import {LAYER} from "../tl/AllTLObjects";
 import {IS_NODE} from "../Helpers";
 import {DownloadMediaInterface} from "./downloads";
+import type {Message} from "../tl/custom/message";
 
 export class TelegramClient extends TelegramBaseClient {
 
@@ -115,7 +116,7 @@ export class TelegramClient extends TelegramBaseClient {
         return downloadMethods._downloadWebDocument(this, webDocument, args);
     }
 
-    downloadMedia(messageOrMedia: Api.Message | Api.TypeMessageMedia, args: DownloadMediaInterface) {
+    downloadMedia(messageOrMedia: Api.Message | Api.TypeMessageMedia | Message, args: DownloadMediaInterface) {
         return downloadMethods.downloadMedia(this, messageOrMedia, args);
     }
 

+ 2 - 1
gramjs/client/downloads.ts

@@ -3,6 +3,7 @@ import type {TelegramClient} from './TelegramClient';
 import {getAppropriatedPartSize, strippedPhotoToJpg} from '../Utils';
 import {sleep} from '../Helpers';
 import {MTProtoSender} from "../network";
+import type {Message} from "../tl/custom/message";
 
 
 export interface progressCallback {
@@ -205,7 +206,7 @@ export interface DownloadMediaInterface {
 
 }
 
-export async function downloadMedia(client: TelegramClient, messageOrMedia: Api.Message | Api.TypeMessageMedia, args: DownloadMediaInterface): Promise<Buffer> {
+export async function downloadMedia(client: TelegramClient, messageOrMedia: Api.Message | Api.TypeMessageMedia | Message , args: DownloadMediaInterface): Promise<Buffer> {
     let date;
     let media;
     if (messageOrMedia instanceof Api.Message) {

+ 6 - 6
gramjs/client/messageParse.ts

@@ -18,8 +18,8 @@ export const DEFAULT_DELIMITERS: {
 // export class MessageParseMethods {
 
 export interface ParseInterface {
-    parse: (message: string, delimiters?: typeof DEFAULT_DELIMITERS) => [string, ValueOf<typeof DEFAULT_DELIMITERS>[]],
-    unparse: (text: string, entities: Api.TypeMessageEntity[] | undefined, delimiters?: typeof DEFAULT_DELIMITERS) => string
+    parse: (message: string) => [string, Api.TypeMessageEntity[]],
+    unparse: (text: string, entities: Api.TypeMessageEntity[] | undefined) => string
 }
 
 export async function _replaceWithMention(client: TelegramClient, entities: Api.TypeMessageEntity[], i: number, user: EntityLike) {
@@ -38,14 +38,14 @@ export async function _replaceWithMention(client: TelegramClient, entities: Api.
 }
 
 export function _parseMessageText(client: TelegramClient, message: string, parseMode: any) {
-    if (!parseMode) {
+    if (parseMode==false) {
+        return [message, []]
+    }
+    if (parseMode==undefined) {
         parseMode = client.parseMode;
     } else if (typeof parseMode === "string") {
         parseMode = sanitizeParseMode(parseMode);
     }
-    if (!parseMode) {
-        return [message, []]
-    }
     return parseMode.parse(message);
 }
 

+ 225 - 0
gramjs/extensions/html.ts

@@ -0,0 +1,225 @@
+import {Parser} from "htmlparser2";
+import {Handler} from "htmlparser2/lib/Parser";
+import {Api} from "../tl";
+
+class HTMLToTelegramParser implements Handler {
+    text: string;
+    entities: Api.TypeMessageEntity[];
+    private _buildingEntities: Map<string, Api.TypeMessageEntity>;
+    private _openTags: string[];
+    private _openTagsMeta: (string | undefined)[];
+
+    constructor() {
+        this.text = '';
+        this.entities = [];
+        this._buildingEntities = new Map<string, Api.TypeMessageEntity>();
+        this._openTags = [];
+        this._openTagsMeta = [];
+    }
+
+    onopentag(name: string, attributes: {
+        [s: string]: string;
+    }) {
+        /*
+         * This fires when a new tag is opened.
+         *
+         * If you don't need an aggregated `attributes` object,
+         * have a look at the `onopentagname` and `onattribute` events.
+         */
+        this._openTags.unshift(name);
+        this._openTagsMeta.unshift(undefined);
+        let EntityType;
+        const args: any = {};
+        if (name == "strong" || name == "b") {
+            EntityType = Api.MessageEntityBold;
+        } else if (name == "em" || name == "i") {
+            EntityType = Api.MessageEntityItalic;
+        } else if (name == "u") {
+            EntityType = Api.MessageEntityUnderline;
+        } else if (name == "del" || name == "s") {
+            EntityType = Api.MessageEntityStrike;
+        } else if (name == "blockquote") {
+            EntityType = Api.MessageEntityBlockquote;
+        } else if (name == "code") {
+            const pre = this._buildingEntities.get('pre');
+            if (pre && pre instanceof Api.MessageEntityPre) {
+                try {
+                    pre.language = attributes.class.slice('language-'.length, attributes.class.length);
+                } catch (e) {
+                    // no language block
+                }
+            } else {
+                EntityType = Api.MessageEntityCode;
+            }
+        } else if (name == "pre") {
+            EntityType = Api.MessageEntityPre;
+            args["language"] = "";
+        } else if (name == "a") {
+            let url: string | undefined = attributes.href;
+            if (!url) {
+                return;
+            }
+            if (url.startsWith("mailto:")) {
+                url = url.slice("mailto:".length, url.length);
+                EntityType = Api.MessageEntityEmail;
+            } else {
+
+                EntityType = Api.MessageEntityTextUrl;
+                args["url"] = url;
+                url = undefined;
+            }
+            this._openTagsMeta.shift();
+            this._openTagsMeta.unshift(url);
+        }
+
+        if (EntityType && !this._buildingEntities.has(name)) {
+            this._buildingEntities.set(name, new EntityType({
+                offset: this.text.length,
+                length: 0,
+                ...args
+            }));
+        }
+
+    }
+
+    ontext(text: string) {
+        const previousTag = this._openTags.length > 0 ? this._openTags[0] : "";
+        if (previousTag == "a") {
+            const url = this._openTagsMeta[0]
+            if (url) {
+                text = url
+            }
+        }
+        for (let [tag, entity] of this._buildingEntities) {
+            entity.length += text.length;
+        }
+        this.text += text
+    }
+
+    onclosetag(tagname: string) {
+        this._openTagsMeta.shift();
+        this._openTags.shift();
+        const entity = this._buildingEntities.get(tagname);
+        if (entity) {
+            this._buildingEntities.delete(tagname);
+            this.entities.push(entity);
+
+        }
+    }
+
+    onattribute(name: string, value: string, quote?: string | undefined | null): void {
+    }
+
+    oncdataend(): void {
+    }
+
+    oncdatastart(): void {
+    }
+
+    oncomment(data: string): void {
+    }
+
+    oncommentend(): void {
+    }
+
+    onend(): void {
+    }
+
+    onerror(error: Error): void {
+    }
+
+    onopentagname(name: string): void {
+    }
+
+    onparserinit(parser: Parser): void {
+    }
+
+    onprocessinginstruction(name: string, data: string): void {
+    }
+
+    onreset(): void {
+    }
+
+}
+
+export class HTMLParser {
+    static parse(html: string): [string, Api.TypeMessageEntity[]] {
+        if (!html) {
+            return [html, []]
+        }
+        const handler = new HTMLToTelegramParser();
+        const parser = new Parser(handler);
+        parser.write(html);
+        parser.end();
+        return [handler.text, handler.entities];
+
+    }
+
+    static unparse(text: string, entities: Api.TypeMessageEntity[] | undefined, _offset: number = 0, _length?: number): string {
+        if (!text || !entities || !entities.length) {
+            return text;
+        }
+        if (_length == undefined) {
+            _length = text.length;
+        }
+        const html = [];
+        let lastOffset = 0;
+        for (let i = 0; i < entities.length; i++) {
+            const entity = entities[i];
+            if (entity.offset >= _offset + _length) {
+                break;
+            }
+            let relativeOffset = entity.offset - _offset;
+            if (relativeOffset > lastOffset) {
+                html.push(text.slice(lastOffset, relativeOffset));
+            } else if (relativeOffset < lastOffset) {
+                continue
+            }
+            let skipEntity = false;
+            let length = entity.length;
+            let entityText = this.unparse(text.slice(relativeOffset, relativeOffset + length),
+                entities.slice(i + 1, entities.length),
+                entity.offset, length)
+            if (entity instanceof Api.MessageEntityBold) {
+                html.push(`<strong>${entityText}</strong>`)
+            } else if (entity instanceof Api.MessageEntityItalic) {
+                html.push(`<em>${entityText}</em>`)
+            } else if (entity instanceof Api.MessageEntityBold) {
+                html.push(`<strong>${entityText}</strong>`)
+            } else if (entity instanceof Api.MessageEntityCode) {
+                html.push(`<code>${entityText}</code>`)
+            } else if (entity instanceof Api.MessageEntityUnderline) {
+                html.push(`<u>${entityText}</u>`)
+            } else if (entity instanceof Api.MessageEntityStrike) {
+                html.push(`<del>${entityText}</del>`)
+            } else if (entity instanceof Api.MessageEntityBlockquote) {
+                html.push(`<blockquote>${entityText}</blockquote>`)
+            } else if (entity instanceof Api.MessageEntityPre) {
+                if (entity.language) {
+                    html.push(`<pre>
+<code class="language-${entity.language}">
+    ${entityText}
+</code>
+</pre>`)
+                } else {
+                    html.push(`<pre></pre><code>${entityText}</code><pre>`)
+
+                }
+            } else if (entity instanceof Api.MessageEntityEmail) {
+                html.push(`<a href="mailto:${entityText}">${entityText}</a>`)
+            } else if (entity instanceof Api.MessageEntityUrl) {
+                html.push(`<a href="${entityText}">${entityText}</a>`)
+            } else if (entity instanceof Api.MessageEntityTextUrl) {
+                html.push(`<a href="${entity.url}">${entityText}</a>`)
+            } else if (entity instanceof Api.MessageEntityMentionName) {
+                html.push(`<a href="tg://user?id=${entity.userId}">${entityText}</a>`)
+            } else {
+                skipEntity = true;
+            }
+            lastOffset = relativeOffset + (skipEntity ? 0 : length);
+        }
+        html.push(text.slice(lastOffset, text.length));
+        return html.join("");
+    }
+}
+

+ 3 - 2
gramjs/extensions/markdown.ts

@@ -5,7 +5,7 @@ import {DEFAULT_DELIMITERS, messageEntities} from "../client/messageParse";
 export class MarkdownParser {
 
     // TODO maybe there is a better way :shrug:
-    static parse(message: string, delimiters = DEFAULT_DELIMITERS): [string, ValueOf<typeof DEFAULT_DELIMITERS>[]] {
+    static parse(message: string): [string, Api.TypeMessageEntity[]] {
         let i = 0;
         const keys: { [key: string]: boolean } = {};
         for (const k in DEFAULT_DELIMITERS) {
@@ -47,7 +47,8 @@ export class MarkdownParser {
         return [message, entities];
     }
 
-    static unparse(text: string, entities: Api.TypeMessageEntity[] | undefined, delimiters = DEFAULT_DELIMITERS) {
+    static unparse(text: string, entities: Api.TypeMessageEntity[] | undefined) {
+        const delimiters = DEFAULT_DELIMITERS;
         if (!text || !entities) {
             return text;
         }

+ 1 - 1
gramjs/tl/custom/draft.ts

@@ -29,7 +29,7 @@ export class Draft {
         }
         if (!(draft instanceof Api.DraftMessageEmpty)) {
             this.linkPreview = !draft.noWebpage;
-            this._text = MarkdownParser.unparse(draft.message,draft.entities);
+            this._text = client.parseMode.unparse(draft.message,draft.entities);
             this._rawText = draft.message;
             this.date = draft.date;
             this.replyToMsgId = draft.replyToMsgId;

File diff suppressed because it is too large
+ 235 - 441
package-lock.json


+ 2 - 1
package.json

@@ -1,6 +1,6 @@
 {
   "name": "telegram",
-  "version": "1.6.6",
+  "version": "1.6.8",
   "description": "NodeJS MTProto API Telegram client library,",
   "main": "index.js",
   "types": "index.d.ts",
@@ -43,6 +43,7 @@
     "big-integer": "^1.6.48",
     "browser-or-node": "^1.3.0",
     "buffer": "^6.0.3",
+    "htmlparser2": "^6.1.0",
     "mime-types": "^2.1.30",
     "node-localstorage": "^2.1.6",
     "os-browserify": "^0.3.0",

Some files were not shown because too many files changed in this diff