瀏覽代碼

Merge pull request #15 from watzon/master

Add HTMLParser and more tests
painor 5 年之前
父節點
當前提交
bf96832cf6

+ 1 - 0
.gitignore

@@ -9,6 +9,7 @@
 /gramjs/tl/AllTLObjects.js
 /gramjs/errors/RPCErrorList.js
 /dist/
+/coverage/
 
 # User session
 *.session

+ 2 - 2
__tests__/AES.spec.js → __tests__/crypto/AES.spec.js

@@ -1,5 +1,5 @@
-const AES = require('../gramjs/crypto/AES')
-const AESModeCTR = require('../gramjs/crypto/AESCTR')
+const AES = require('../../gramjs/crypto/AES')
+const AESModeCTR = require('../../gramjs/crypto/AESCTR')
 describe('IGE encrypt function', () => {
     test('it should return 4a657a834edc2956ec95b2a42ec8c1f2d1f0a6028ac26fd830ed23855574b4e69dd1a2be2ba18a53a49b879b2' +
         '45e1065e14b6e8ac5ba9b24befaff3209b77b5f', () => {

+ 1 - 1
__tests__/calcKey.spec.js → __tests__/crypto/calcKey.spec.js

@@ -1,4 +1,4 @@
-const MTProtoState = require('../gramjs/network/MTProtoState')
+const MTProtoState = require('../../gramjs/network/MTProtoState')
 
 describe('calcKey function', () => {
     test('it should return 0x93355e3f1f50529b6fb93eaf97f29b69c16345f53621e9d45cd9a11ddfbebac9 and' +

+ 1 - 1
__tests__/factorizator.spec.js → __tests__/crypto/factorizator.spec.js

@@ -1,4 +1,4 @@
-const Factorizator = require('../gramjs/crypto/Factorizator')
+const Factorizator = require('../../gramjs/crypto/Factorizator')
 
 describe('calcKey function', () => {
     test('it should return 0x20a13b25e1726bfc', () => {

+ 1 - 1
__tests__/readBuffer.spec.js → __tests__/crypto/readBuffer.spec.js

@@ -1,4 +1,4 @@
-const Helpers = require('../gramjs/Helpers')
+const Helpers = require('../../gramjs/Helpers')
 
 describe('readBufferFromBigInt 8 bytes function', () => {
     test('it should return 0x20a13b25e1726bfc', () => {

+ 107 - 0
__tests__/extensions/HTML.spec.js

@@ -0,0 +1,107 @@
+const { HTMLParser } = require('../../gramjs/extensions/HTML')
+const types = require('../../gramjs/tl/types')
+
+describe('HTMLParser', () => {
+    test('it should construct a new HTMLParser', () => {
+        const parser = new HTMLParser('Hello world')
+        expect(parser.text).toEqual('')
+        expect(parser.entities).toEqual([])
+    })
+
+    describe('.parse', () => {
+        test('it should parse bold entities', () => {
+            const parser = new HTMLParser('Hello <strong>world</strong>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse italic entities', () => {
+            const parser = new HTMLParser('Hello <em>world</em>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+        })
+
+        test('it should parse code entities', () => {
+            const parser = new HTMLParser('Hello <code>world</code>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityCode)
+        })
+
+        test('it should parse pre entities', () => {
+            const parser = new HTMLParser('Hello <pre>world</pre>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityPre)
+        })
+
+        test('it should parse strike entities', () => {
+            const parser = new HTMLParser('Hello <del>world</del>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityStrike)
+        })
+
+        test('it should parse link entities', () => {
+            const parser = new HTMLParser('Hello <a href="https://hello.world">world</a>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityTextUrl)
+            expect(entities[0].url).toEqual('https://hello.world')
+        })
+
+        test('it should parse nested entities', () => {
+            const parser = new HTMLParser('Hello <strong><em>world</em></strong>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(2)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+            expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse multiple entities', () => {
+            const parser = new HTMLParser('<em>Hello</em> <strong>world</strong>')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(2)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+            expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
+        })
+    })
+
+    describe('.unparse', () => {
+        test('it should create a markdown string from raw text and entities', () => {
+            const unparsed = '<strong>hello</strong> <em>hello</em> <del>hello</del> <code>hello</code> <pre>hello</pre> <a href="https://hello.world">hello</a>'
+            const strippedText = 'hello hello hello hello hello hello'
+            const rawEntities = [
+                new types.MessageEntityBold({ offset: 0, length: 5 }),
+                new types.MessageEntityItalic({ offset: 6, length: 5 }),
+                new types.MessageEntityStrike({ offset: 12, length: 5 }),
+                new types.MessageEntityCode({ offset: 18, length: 5 }),
+                new types.MessageEntityPre({ offset: 24, length: 5 }),
+                new types.MessageEntityTextUrl({ offset: 30, length: 5, url: 'https://hello.world' }),
+            ]
+            const text = HTMLParser.unparse(strippedText, rawEntities)
+            expect(text).toEqual(unparsed)
+        })
+
+        test('it should unparse nested entities', () => {
+            const unparsed = '<strong><em>Hello world</em></strong>'
+            const strippedText = 'Hello world'
+            const rawEntities = [
+                new types.MessageEntityBold({ offset: 0, length: 11 }),
+                new types.MessageEntityItalic({ offset: 0, length: 11 }),
+            ]
+            const text = HTMLParser.unparse(strippedText, rawEntities)
+            expect(text).toEqual(unparsed)
+        })
+    })
+})

+ 95 - 0
__tests__/extensions/Markdown.spec.js

@@ -0,0 +1,95 @@
+const { MarkdownParser } = require('../../gramjs/extensions/Markdown')
+const types = require('../../gramjs/tl/types')
+
+describe('MarkdownParser', () => {
+    test('it should construct a new MarkdownParser', () => {
+        const parser = new MarkdownParser('Hello world')
+        expect(parser.text).toEqual('')
+        expect(parser.entities).toEqual([])
+    })
+
+    describe('.parse', () => {
+        test('it should parse bold entities', () => {
+            const parser = new MarkdownParser('Hello **world**')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse italic entities', () => {
+            const parser = new MarkdownParser('Hello __world__')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+        })
+
+        test('it should parse code entities', () => {
+            const parser = new MarkdownParser('Hello `world`')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityCode)
+        })
+
+        test('it should parse pre entities', () => {
+            const parser = new MarkdownParser('Hello ```world```')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityPre)
+        })
+
+        test('it should parse strike entities', () => {
+            const parser = new MarkdownParser('Hello ~~world~~')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityStrike)
+        })
+
+        test('it should parse link entities', () => {
+            const parser = new MarkdownParser('Hello [world](https://hello.world)')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityTextUrl)
+            expect(entities[0].url).toEqual('https://hello.world')
+        })
+
+        test('it should not parse nested entities', () => {
+            const parser = new MarkdownParser('Hello **__world__**')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello __world__')
+            expect(entities.length).toEqual(1)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityBold)
+        })
+
+        test('it should parse multiple entities', () => {
+            const parser = new MarkdownParser('__Hello__ **world**')
+            const [text, entities] = parser.parse()
+            expect(text).toEqual('Hello world')
+            expect(entities.length).toEqual(2)
+            expect(entities[0]).toBeInstanceOf(types.MessageEntityItalic)
+            expect(entities[1]).toBeInstanceOf(types.MessageEntityBold)
+        })
+    })
+
+    describe('.unparse', () => {
+        test('it should create a markdown string from raw text and entities', () => {
+            const unparsed = '**hello** __hello__ ~~hello~~ `hello` ```hello``` [hello](https://hello.world)'
+            const strippedText = 'hello hello hello hello hello hello'
+            const rawEntities = [
+                new types.MessageEntityBold({ offset: 0, length: 5 }),
+                new types.MessageEntityItalic({ offset: 6, length: 5 }),
+                new types.MessageEntityStrike({ offset: 12, length: 5 }),
+                new types.MessageEntityCode({ offset: 18, length: 5 }),
+                new types.MessageEntityPre({ offset: 24, length: 5 }),
+                new types.MessageEntityTextUrl({ offset: 30, length: 5, url: 'https://hello.world' }),
+            ]
+            const text = MarkdownParser.unparse(strippedText, rawEntities)
+            expect(text).toEqual(unparsed)
+        })
+    })
+})

+ 100 - 0
__tests__/extensions/Scanner.spec.js

@@ -0,0 +1,100 @@
+const Scanner = require('../../gramjs/extensions/Scanner')
+
+const helloScanner = new Scanner('Hello world')
+
+describe('Scanner', () => {
+    beforeEach(() => helloScanner.reset())
+
+    test('it should construct a new Scanner', () => {
+        expect(helloScanner.str).toEqual('Hello world')
+        expect(helloScanner.pos).toEqual(0)
+        expect(helloScanner.lastMatch).toBeNull()
+    })
+
+    describe('.chr', () => {
+        test('it should return the character at the current pos', () => {
+            expect(helloScanner.chr).toEqual('H')
+        })
+    })
+
+    describe('.peek', () => {
+        test('it should return the character at the current pos', () => {
+            expect(helloScanner.peek()).toEqual('H')
+        })
+
+        test('it should return the next n characters', () => {
+            expect(helloScanner.peek(3)).toEqual('Hel')
+            expect(helloScanner.peek(5)).toEqual('Hello')
+        })
+    })
+
+    describe('.consume', () => {
+        test('it should consume the current character', () => {
+            const char = helloScanner.consume()
+            expect(char).toEqual('H')
+            expect(helloScanner.pos).toEqual(1)
+        })
+
+        test('it should consume the next n characters', () => {
+            const chars = helloScanner.consume(5)
+            expect(chars).toEqual('Hello')
+            expect(helloScanner.pos).toEqual(5)
+        })
+    })
+
+    describe('.reverse', () => {
+        test('it should set pos back n characters', () => {
+            helloScanner.consume(5)
+            helloScanner.reverse(5)
+            expect(helloScanner.pos).toEqual(0)
+        })
+
+        test('it should not go back further than 0', () => {
+            helloScanner.reverse(10)
+            expect(helloScanner.pos).toEqual(0)
+        })
+    })
+
+    describe('.scanUntil', () => {
+        test('it should scan the string for a regular expression starting at the current pos', () => {
+            helloScanner.scanUntil(/w/)
+            expect(helloScanner.pos).toEqual(6)
+        })
+
+        test('it should do nothing if the pattern is not found', () => {
+            helloScanner.scanUntil(/G/)
+            expect(helloScanner.pos).toEqual(0)
+        })
+    })
+
+    describe('.rest', () => {
+        test('it should return the unconsumed input', () => {
+            helloScanner.consume(6)
+            expect(helloScanner.rest).toEqual('world')
+        })
+    })
+
+    describe('.reset', () => {
+        test('it should reset the pos to 0', () => {
+            helloScanner.consume(5)
+            helloScanner.reset()
+            expect(helloScanner.pos).toEqual(0)
+        })
+    })
+
+    describe('.eof', () => {
+        test('it should return true if the scanner has reached the end of the input', () => {
+            expect(helloScanner.eof()).toBe(false)
+            helloScanner.consume(11)
+            expect(helloScanner.eof()).toBe(true)
+        })
+    })
+
+    describe('.bof', () => {
+        test('it should return true if pos is 0', () => {
+            expect(helloScanner.bof()).toBe(true)
+            helloScanner.consume(11)
+            expect(helloScanner.bof()).toBe(false)
+        })
+    })
+})

+ 3 - 3
gramjs/Utils.js

@@ -1,7 +1,7 @@
 const path = require('path')
 const mime = require('mime-types')
 const struct = require('python-struct')
-const { MarkdownParser, HTMLParser } = require('./extensions')
+const { markdown, html } = require('./extensions')
 const { types } = require('./tl')
 
 const USERNAME_RE = new RegExp('@|(?:https?:\\/\\/)?(?:www\\.)?' +
@@ -892,10 +892,10 @@ function sanitizeParseMode(mode) {
         switch (mode.toLowerCase()) {
         case 'md':
         case 'markdown':
-            return MarkdownParser
+            return markdown
         case 'htm':
         case 'html':
-            return HTMLParser
+            return html
         default:
             throw new Error(`Unknown parse mode ${mode}`)
         }

+ 273 - 0
gramjs/extensions/HTML.js

@@ -0,0 +1,273 @@
+/* eslint-disable no-extend-native */
+/* eslint-disable no-case-declarations, no-fallthrough */
+const Scanner = require('./Scanner')
+const {
+    MessageEntityBold, MessageEntityItalic, MessageEntityCode,
+    MessageEntityPre, MessageEntityEmail, MessageEntityTextUrl,
+    MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote,
+} = require('../tl/types')
+
+class HTMLParser extends Scanner {
+    constructor(str) {
+        super(str)
+        this.text = ''
+        this.entities = []
+        this._buildingEntities = {}
+        this._openTags = []
+        this._openTagsMeta = []
+    }
+
+    parse() {
+        while (!this.eof()) {
+            switch (this.peek(1)) {
+            case '<':
+                this.consume(1)
+                if (this.peek(1) === '/') {
+                    // Closing tag
+                    this.consume(1)
+                    const tag = this.scanUntil('>').trim()
+
+                    // Consume the closing bracket
+                    this.consume(1)
+
+                    this.handleEndTag(tag)
+                } else {
+                    // Opening tag
+                    let tag = this.scanUntil('>').trim()
+                    let attrs
+
+                    // Consume the closing bracket
+                    this.consume(1);
+
+                    [tag, ...attrs] = tag.split(/\s+/)
+                    attrs = attrs
+                        // Split on `=`
+                        .map((a) => a.split('='))
+                        // Take non key/value items and make them `true`
+                        .map((a) => a.length === 1 ? a.concat([true]) : a)
+                        // Remove quotes if they exist
+                        .map((a) => {
+                            const attr = a[1].replace(/^('|")|('|")$/g, '')
+                            return [a[0], attr]
+                        })
+                        .reduce((p, c) => {
+                            p[c[0]] = c[1]
+                            return p
+                        }, {})
+
+                    this.handleStartTag(tag, attrs)
+                }
+                break
+            default:
+                if (this.eof()) break
+                this.handleData(this.chr)
+                this.pos += 1
+            }
+        }
+
+        return [this.text, this.entities]
+    }
+
+    static unparse(text, entities, _offset = 0, _length = null) {
+        if (!_length) {
+            _length = text.length
+        }
+
+        const html = []
+        let lastOffset = 0
+
+        for (const [i, entity] of entities.entries()) {
+            if (entity.offset > _offset + _length) {
+                break
+            }
+
+            const relativeOffset = entity.offset - _offset
+            if (relativeOffset > lastOffset) {
+                html.push(text.substring(lastOffset, relativeOffset))
+            } else if (relativeOffset < lastOffset) {
+                continue
+            }
+
+            let skipEntity = false
+            let length = entity.length
+
+            while ((relativeOffset < _length) &&
+                   ('\ud800' <= text.substring(relativeOffset, length)) &&
+                   (text.substring(relativeOffset, length) <= '\udfff')) {
+                length += 1
+            }
+
+            const entityText = this.unparse(
+                text.substring(relativeOffset, relativeOffset + length),
+                entities.slice(i + 1, entities.length),
+                entity.offset,
+                length,
+            )
+
+            const entityType = entity.constructor.name
+
+            switch (entityType) {
+            case 'MessageEntityBold':
+                html.push(`<strong>${entityText}</strong>`)
+                break
+            case 'MessageEntityItalic':
+                html.push(`<em>${entityText}</em>`)
+                break
+            case 'MessageEntityCode':
+                html.push(`<code>${entityText}</code>`)
+                break
+            case 'MessageEntityUnderline':
+                html.push(`<u>${entityText}</u>`)
+                break
+            case 'MessageEntityStrike':
+                html.push(`<del>${entityText}</del>`)
+                break
+            case 'MessageEntityBlockquote':
+                html.push(`<blockquote>${entityText}</blockquote>`)
+                break
+            case 'MessageEntityPre':
+                if (entity.language) {
+                    html.push(`<pre>
+                      <code class="language-${entity.language}">
+                        ${entityText}
+                      </code>
+                    </pre>`)
+                } else {
+                    html.push(`<pre>${entityText}</pre>`)
+                }
+                break
+            case 'MessageEntityEmail':
+                html.push(`<a href="mailto:${entityText}">${entityText}</a>`)
+                break
+            case 'MessageEntityUrl':
+                html.push(`<a href="${entityText}">${entityText}</a>`)
+                break
+            case 'MessageEntityTextUrl':
+                html.push(`<a href="${entity.url}">${entityText}</a>`)
+                break
+            case 'MessageEntityMentionName':
+                html.push(`<a href="tg://user?id=${entity.userId}">${entityText}</a>`)
+                break
+            default:
+                skipEntity = true
+            }
+
+            lastOffset = relativeOffset + (skipEntity ? 0 : length)
+        }
+
+        while ((lastOffset < _length) &&
+               ('\ud800' <= text.substring(lastOffset)) &&
+               (text.substring(lastOffset) <= '\udfff')) {
+            lastOffset += 1
+        }
+
+        html.push(text.substring(lastOffset, text.length))
+        return html.join('')
+    }
+
+    handleStartTag(tag, attrs = {}) {
+        this._openTags.unshift(tag)
+        this._openTagsMeta.unshift(null)
+
+        let EntityType
+        const args = {}
+
+        switch (tag) {
+        case 'b':
+        case 'strong':
+            EntityType = MessageEntityBold
+            break
+        case 'i':
+        case 'em':
+            EntityType = MessageEntityItalic
+            break
+        case 'u':
+            EntityType = MessageEntityUnderline
+            break
+        case 's':
+        case 'del':
+            EntityType = MessageEntityStrike
+            break
+        case 'blockquote':
+            EntityType = MessageEntityBlockquote
+            break
+        case 'code':
+            // If we're in the middle of a <pre> tag, this <code> tag is
+            // probably intended for syntax highlighting.
+            //
+            // Syntax highlighting is set with
+            //     <code class='language-...'>codeblock</code>
+            // inside <pre> tags
+            const pre = this._buildingEntities['pre']
+            const language = attrs['class'] ? attrs['class'].match(/language-(\S+)/)[1] : null
+            if (pre && language) {
+                pre.language = language
+            } else {
+                EntityType = MessageEntityCode
+            }
+            break
+        case 'pre':
+            EntityType = MessageEntityPre
+            args['language'] = ''
+            break
+        case 'a':
+            let url = attrs['href']
+            if (!url) return
+
+            if (url.indexOf('mailto:') === 0) {
+                EntityType = MessageEntityEmail
+            } else {
+                EntityType = MessageEntityTextUrl
+                args['url'] = url
+                url = null
+            }
+
+            this._openTagsMeta.shift()
+            this._openTagsMeta.unshift(url)
+            break
+        default:
+            // Do nothing
+        }
+
+        if (EntityType && !(tag in this._buildingEntities)) {
+            this._buildingEntities[tag] = new EntityType({
+                offset: this.text.length,
+                // The length will be determined when closing the tag.
+                length: 0,
+                ...args,
+            })
+        }
+    }
+
+    handleData(text) {
+        for (const [, entity] of Object.entries(this._buildingEntities)) {
+            entity.length += text.length
+        }
+
+        this.text += text
+    }
+
+    handleEndTag(tag) {
+        this._openTags.shift()
+        this._openTagsMeta.shift()
+
+        const entity = this._buildingEntities[tag]
+        if (entity) {
+            delete this._buildingEntities[tag]
+            this.entities.push(entity)
+        }
+    }
+}
+
+const parse = (str) => {
+    const parser = new HTMLParser(str)
+    return parser.parse()
+}
+
+const unparse = HTMLParser.unparse
+
+module.exports = {
+    HTMLParser,
+    parse,
+    unparse,
+}

+ 27 - 20
gramjs/extensions/Markdown.js

@@ -11,7 +11,7 @@ const URL_RE = /\[([\S\s]+?)\]\((.+?)\)/
 const DELIMITERS = {
     'MessageEntityBold': '**',
     'MessageEntityItalic': '__',
-    'MessageEntityCode': '``',
+    'MessageEntityCode': '`',
     'MessageEntityPre': '```',
     'MessageEntityStrike': '~~',
 }
@@ -19,17 +19,13 @@ const DELIMITERS = {
 class MarkdownParser extends Scanner {
     constructor(str) {
         super(str)
-        this.stripped = ''
+        this.text = ''
         this.entities = []
     }
 
-    get strippedPos() {
-        return this.stripped.length - 1
-    }
-
     parse() {
         // Do a little reset
-        this.stripped = ''
+        this.text = ''
         this.entities = []
 
         while (!this.eof()) {
@@ -49,18 +45,18 @@ class MarkdownParser extends Scanner {
             case '`':
                 if (this.peek(3) == '```') {
                     if (this.parseEntity(MessageEntityPre, '```')) break
-                } else if (this.peek(2) == '``') {
-                    if (this.parseEntity(MessageEntityCode, '``')) break
+                } else if (this.peek(1) == '`') {
+                    if (this.parseEntity(MessageEntityCode, '`')) break
                 }
             case '[':
                 if (this.parseURL()) break
             default:
-                this.stripped += this.chr
+                this.text += this.chr
                 this.pos += 1
             }
         }
 
-        return [this.stripped, this.entities]
+        return [this.text, this.entities]
     }
 
     static unparse(text, entities) {
@@ -71,13 +67,12 @@ class MarkdownParser extends Scanner {
         for (const entity of entities) {
             const s = entity.offset
             const e = entity.offset + entity.length
-            const delimiter = DELIMITERS[typeof(entity)]
+            const delimiter = DELIMITERS[entity.constructor.name]
             if (delimiter) {
                 insertAt.push([s, delimiter])
                 insertAt.push([e, delimiter])
             } else {
                 let url = null
-
                 if (entity instanceof MessageEntityTextUrl) {
                     url = entity.url
                 } else if (entity instanceof MessageEntityMentionName) {
@@ -107,8 +102,8 @@ class MarkdownParser extends Scanner {
 
     parseEntity(EntityType, delimiter) {
         // The offset for this entity should be the end of the
-        // stripped string
-        const offset = this.strippedPos
+        // text string
+        const offset = this.text.length
 
         // Consume the delimiter
         this.consume(delimiter.length)
@@ -121,8 +116,8 @@ class MarkdownParser extends Scanner {
             // Consume the delimiter again
             this.consume(delimiter.length)
 
-            // Add the entire content to the stripped content
-            this.stripped += content
+            // Add the entire content to the text
+            this.text += content
 
             // Create and return a new Entity
             const entity = new EntityType({
@@ -140,11 +135,12 @@ class MarkdownParser extends Scanner {
 
         const [full, txt, url] = match
         const len = full.length
+        const offset = this.text.length
 
-        this.stripped += txt
+        this.text += txt
 
         const entity = new MessageEntityTextUrl({
-            offset: this.pos,
+            offset: offset,
             length: txt.length,
             url: url,
         })
@@ -156,4 +152,15 @@ class MarkdownParser extends Scanner {
     }
 }
 
-module.exports = MarkdownParser
+const parse = (str) => {
+    const parser = new MarkdownParser(str)
+    return parser.parse()
+}
+
+const unparse = MarkdownParser.unparse
+
+module.exports = {
+    MarkdownParser,
+    parse,
+    unparse,
+}

+ 11 - 5
gramjs/extensions/Scanner.js

@@ -15,7 +15,7 @@ class Scanner {
 
     reverse(n = 1) {
         const pos = this.pos - n
-        return pos < 0 ? 0 : pos
+        this.pos = pos < 0 ? 0 : pos
     }
 
     consume(n = 1) {
@@ -23,7 +23,13 @@ class Scanner {
     }
 
     scanUntil(re, consumeMatch = false) {
-        const match = this.lastMatch = this.rest.match(re)
+        let match
+        try {
+            match = this.lastMatch = this.rest.match(re)
+        } catch {
+            match = null
+        }
+
         if (!match) return null
 
         let len = match.index
@@ -33,7 +39,7 @@ class Scanner {
     }
 
     get rest() {
-        return this.str.slice(this.pos, this.str.length)
+        return this.str.slice(this.pos, this.str.length) || null
     }
 
     reset() {
@@ -41,11 +47,11 @@ class Scanner {
     }
 
     bof() {
-        return this.pos === 0
+        return this.pos <= 0
     }
 
     eof() {
-        return this.pos === this.str.length
+        return this.pos >= this.str.length
     }
 }
 

+ 10 - 4
gramjs/extensions/index.js

@@ -6,8 +6,8 @@ const MessagePacker = require('./MessagePacker')
 const AsyncQueue = require('./AsyncQueue')
 const PromisedNetSocket = require('./PromisedNetSockets')
 const Scanner = require('./Scanner')
-const MarkdownParser = require('./Markdown')
-const HTMLParser = null
+const markdown = require('./Markdown')
+const html = require('./HTML')
 
 module.exports = {
     BinaryWriter,
@@ -18,6 +18,12 @@ module.exports = {
     PromisedWebSockets,
     PromisedNetSocket,
     Scanner,
-    MarkdownParser,
-    HTMLParser,
+    markdown: {
+        parse: markdown.parse,
+        unparse: markdown.unparse,
+    },
+    html: {
+        parse: html.parse,
+        unparse: html.unparse,
+    }
 }

+ 132 - 132
jest.config.js

@@ -2,187 +2,187 @@
 // https://jestjs.io/docs/en/configuration.html
 
 module.exports = {
-  // All imported modules in your tests should be mocked automatically
-  // automock: false,
+    // All imported modules in your tests should be mocked automatically
+    // automock: false,
 
-  // Stop running tests after `n` failures
-  // bail: 0,
+    // Stop running tests after `n` failures
+    // bail: 0,
 
-  // Respect "browser" field in package.json when resolving modules
-  // browser: false,
+    // Respect "browser" field in package.json when resolving modules
+    // browser: false,
 
-  // The directory where Jest should store its cached dependency information
-  // cacheDirectory: "C:\\Users\\painor\\AppData\\Local\\Temp\\jest",
+    // The directory where Jest should store its cached dependency information
+    // cacheDirectory: "C:\\Users\\painor\\AppData\\Local\\Temp\\jest",
 
-  // Automatically clear mock calls and instances between every test
-  clearMocks: true,
+    // Automatically clear mock calls and instances between every test
+    clearMocks: true,
 
-  // Indicates whether the coverage information should be collected while executing the test
-  // collectCoverage: false,
+    // Indicates whether the coverage information should be collected while executing the test
+    // collectCoverage: false,
 
-  // An array of glob patterns indicating a set of files for which coverage information should be collected
-  // collectCoverageFrom: null,
+    // An array of glob patterns indicating a set of files for which coverage information should be collected
+    // collectCoverageFrom: null,
 
-  // The directory where Jest should output its coverage files
-  coverageDirectory: "coverage",
+    // The directory where Jest should output its coverage files
+    coverageDirectory: 'coverage',
 
-  // An array of regexp pattern strings used to skip coverage collection
-  // coveragePathIgnorePatterns: [
-  //   "\\\\node_modules\\\\"
-  // ],
+    // An array of regexp pattern strings used to skip coverage collection
+    // coveragePathIgnorePatterns: [
+    //   "\\\\node_modules\\\\"
+    // ],
 
-  // A list of reporter names that Jest uses when writing coverage reports
-  // coverageReporters: [
-  //   "json",
-  //   "text",
-  //   "lcov",
-  //   "clover"
-  // ],
+    // A list of reporter names that Jest uses when writing coverage reports
+    // coverageReporters: [
+    //   "json",
+    //   "text",
+    //   "lcov",
+    //   "clover"
+    // ],
 
-  // An object that configures minimum threshold enforcement for coverage results
-  // coverageThreshold: null,
+    // An object that configures minimum threshold enforcement for coverage results
+    // coverageThreshold: null,
 
-  // A path to a custom dependency extractor
-  // dependencyExtractor: null,
+    // A path to a custom dependency extractor
+    // dependencyExtractor: null,
 
-  // Make calling deprecated APIs throw helpful error messages
-  // errorOnDeprecated: false,
+    // Make calling deprecated APIs throw helpful error messages
+    // errorOnDeprecated: false,
 
-  // Force coverage collection from ignored files using an array of glob patterns
-  // forceCoverageMatch: [],
+    // Force coverage collection from ignored files using an array of glob patterns
+    // forceCoverageMatch: [],
 
-  // A path to a module which exports an async function that is triggered once before all test suites
-  // globalSetup: null,
+    // A path to a module which exports an async function that is triggered once before all test suites
+    // globalSetup: null,
 
-  // A path to a module which exports an async function that is triggered once after all test suites
-  // globalTeardown: null,
+    // A path to a module which exports an async function that is triggered once after all test suites
+    // globalTeardown: null,
 
-  // A set of global variables that need to be available in all test environments
-  // globals: {},
+    // A set of global variables that need to be available in all test environments
+    // globals: {},
 
-  // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
-  // maxWorkers: "50%",
+    // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
+    // maxWorkers: "50%",
 
-  // An array of directory names to be searched recursively up from the requiring module's location
-  // moduleDirectories: [
-  //   "node_modules"
-  // ],
+    // An array of directory names to be searched recursively up from the requiring module's location
+    // moduleDirectories: [
+    //   "node_modules"
+    // ],
 
-  // An array of file extensions your modules use
-  // moduleFileExtensions: [
-  //   "js",
-  //   "json",
-  //   "jsx",
-  //   "ts",
-  //   "tsx",
-  //   "node"
-  // ],
+    // An array of file extensions your modules use
+    // moduleFileExtensions: [
+    //   "js",
+    //   "json",
+    //   "jsx",
+    //   "ts",
+    //   "tsx",
+    //   "node"
+    // ],
 
-  // A map from regular expressions to module names that allow to stub out resources with a single module
-  // moduleNameMapper: {},
+    // A map from regular expressions to module names that allow to stub out resources with a single module
+    // moduleNameMapper: {},
 
-  // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
-  // modulePathIgnorePatterns: [],
+    // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
+    // modulePathIgnorePatterns: [],
 
-  // Activates notifications for test results
-  // notify: false,
+    // Activates notifications for test results
+    // notify: false,
 
-  // An enum that specifies notification mode. Requires { notify: true }
-  // notifyMode: "failure-change",
+    // An enum that specifies notification mode. Requires { notify: true }
+    // notifyMode: "failure-change",
 
-  // A preset that is used as a base for Jest's configuration
-  // preset: null,
+    // A preset that is used as a base for Jest's configuration
+    // preset: null,
 
-  // Run tests from one or more projects
-  // projects: null,
+    // Run tests from one or more projects
+    // projects: null,
 
-  // Use this configuration option to add custom reporters to Jest
-  // reporters: undefined,
+    // Use this configuration option to add custom reporters to Jest
+    // reporters: undefined,
 
-  // Automatically reset mock state between every test
-  // resetMocks: false,
+    // Automatically reset mock state between every test
+    // resetMocks: false,
 
-  // Reset the module registry before running each individual test
-  // resetModules: false,
+    // Reset the module registry before running each individual test
+    // resetModules: false,
 
-  // A path to a custom resolver
-  // resolver: null,
+    // A path to a custom resolver
+    // resolver: null,
 
-  // Automatically restore mock state between every test
-  // restoreMocks: false,
+    // Automatically restore mock state between every test
+    // restoreMocks: false,
 
-  // The root directory that Jest should scan for tests and modules within
-  // rootDir: null,
+    // The root directory that Jest should scan for tests and modules within
+    // rootDir: null,
 
-  // A list of paths to directories that Jest should use to search for files in
-  // roots: [
-  //   "<rootDir>"
-  // ],
+    // A list of paths to directories that Jest should use to search for files in
+    // roots: [
+    //   "<rootDir>"
+    // ],
 
-  // Allows you to use a custom runner instead of Jest's default test runner
-  // runner: "jest-runner",
+    // Allows you to use a custom runner instead of Jest's default test runner
+    // runner: "jest-runner",
 
-  // The paths to modules that run some code to configure or set up the testing environment before each test
-  // setupFiles: [],
+    // The paths to modules that run some code to configure or set up the testing environment before each test
+    // setupFiles: [],
 
-  // A list of paths to modules that run some code to configure or set up the testing framework before each test
-  // setupFilesAfterEnv: [],
+    // A list of paths to modules that run some code to configure or set up the testing framework before each test
+    // setupFilesAfterEnv: [],
 
-  // A list of paths to snapshot serializer modules Jest should use for snapshot testing
-  // snapshotSerializers: [],
+    // A list of paths to snapshot serializer modules Jest should use for snapshot testing
+    // snapshotSerializers: [],
 
-  // The test environment that will be used for testing
-  testEnvironment: "node",
+    // The test environment that will be used for testing
+    testEnvironment: 'node',
 
-  // Options that will be passed to the testEnvironment
-  // testEnvironmentOptions: {},
+    // Options that will be passed to the testEnvironment
+    // testEnvironmentOptions: {},
 
-  // Adds a location field to test results
-  // testLocationInResults: false,
+    // Adds a location field to test results
+    // testLocationInResults: false,
 
-  // The glob patterns Jest uses to detect test files
-  // testMatch: [
-  //   "**/__tests__/**/*.[jt]s?(x)",
-  //   "**/?(*.)+(spec|test).[tj]s?(x)"
-  // ],
+    // The glob patterns Jest uses to detect test files
+    // testMatch: [
+    //   "**/__tests__/**/*.[jt]s?(x)",
+    //   "**/?(*.)+(spec|test).[tj]s?(x)"
+    // ],
 
-  // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
-  // testPathIgnorePatterns: [
-  //   "\\\\node_modules\\\\"
-  // ],
+    // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
+    // testPathIgnorePatterns: [
+    //   "\\\\node_modules\\\\"
+    // ],
 
-  // The regexp pattern or array of patterns that Jest uses to detect test files
-  // testRegex: [],
+    // The regexp pattern or array of patterns that Jest uses to detect test files
+    // testRegex: [],
 
-  // This option allows the use of a custom results processor
-  // testResultsProcessor: null,
+    // This option allows the use of a custom results processor
+    // testResultsProcessor: null,
 
-  // This option allows use of a custom test runner
-  // testRunner: "jasmine2",
+    // This option allows use of a custom test runner
+    // testRunner: "jasmine2",
 
-  // This option sets the URL for the jsdom environment. It is reflected in properties such as location.href
-  // testURL: "http://localhost",
+    // This option sets the URL for the jsdom environment. It is reflected in properties such as location.href
+    // testURL: "http://localhost",
 
-  // Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout"
-  // timers: "real",
+    // Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout"
+    // timers: "real",
 
-  // A map from regular expressions to paths to transformers
-  // transform: null,
+    // A map from regular expressions to paths to transformers
+    // transform: null,
 
-  // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
-  // transformIgnorePatterns: [
-  //   "\\\\node_modules\\\\"
-  // ],
+    // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
+    // transformIgnorePatterns: [
+    //   "\\\\node_modules\\\\"
+    // ],
 
-  // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
-  // unmockedModulePathPatterns: undefined,
+    // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
+    // unmockedModulePathPatterns: undefined,
 
-  // Indicates whether each individual test should be reported during the run
-  // verbose: null,
+    // Indicates whether each individual test should be reported during the run
+    // verbose: null,
 
-  // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
-  // watchPathIgnorePatterns: [],
+    // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
+    // watchPathIgnorePatterns: [],
 
-  // Whether to use watchman for file crawling
-  // watchman: true,
-};
+    // Whether to use watchman for file crawling
+    // watchman: true,
+}

文件差異過大導致無法顯示
+ 1287 - 257
package-lock.json


部分文件因文件數量過多而無法顯示