Bläddra i källkod

Fixes Microsoft/monaco-editor#1009

Alex Dima 7 år sedan
förälder
incheckning
711d51d9fc

+ 41 - 0
src/javascript/javascript.test.ts

@@ -430,6 +430,47 @@ testTokenization('javascript', [
 		]
 	}],
 
+[{
+		line: 'x = /foo/.test(\'\')',
+		tokens: [
+			{ startIndex: 0, type: 'identifier.js' },
+			{ startIndex: 1, type: '' },
+			{ startIndex: 2, type: 'delimiter.js' },
+			{ startIndex: 3, type: '' },
+			{ startIndex: 4, type: 'regexp.js' },
+			{ startIndex: 9, type: 'delimiter.js' },
+			{ startIndex: 10, type: 'identifier.js' },
+			{ startIndex: 14, type: 'delimiter.parenthesis.js' },
+			{ startIndex: 15, type: 'string.js' },
+			{ startIndex: 17, type: 'delimiter.parenthesis.js' }
+		]
+	}],
+
+	[{
+		line: 'x = 1 + f(2 / 3, /foo/)',
+		tokens: [
+			{ startIndex: 0, type: 'identifier.js' },
+			{ startIndex: 1, type: '' },
+			{ startIndex: 2, type: 'delimiter.js' },
+			{ startIndex: 3, type: '' },
+			{ startIndex: 4, type: 'number.js' },
+			{ startIndex: 5, type: '' },
+			{ startIndex: 6, type: 'delimiter.js' },
+			{ startIndex: 7, type: '' },
+			{ startIndex: 8, type: 'identifier.js' },
+			{ startIndex: 9, type: 'delimiter.parenthesis.js' },
+			{ startIndex: 10, type: 'number.js' },
+			{ startIndex: 11, type: '' },
+			{ startIndex: 12, type: 'delimiter.js' },
+			{ startIndex: 13, type: '' },
+			{ startIndex: 14, type: 'number.js' },
+			{ startIndex: 15, type: 'delimiter.js' },
+			{ startIndex: 16, type: '' },
+			{ startIndex: 17, type: 'regexp.js' },
+			{ startIndex: 22, type: 'delimiter.parenthesis.js' }
+		]
+	}],
+
 	[{
 		line: 'a /ads/ b;',
 		tokens: [

+ 2 - 0
src/javascript/javascript.ts

@@ -37,5 +37,7 @@ export const language = <ILanguage>{
 	octaldigits: tsLanguage.octaldigits,
 	binarydigits: tsLanguage.binarydigits,
 	hexdigits: tsLanguage.hexdigits,
+	regexpctl: tsLanguage.regexpctl,
+	regexpesc: tsLanguage.regexpesc,
 	tokenizer: tsLanguage.tokenizer,
 };

+ 41 - 0
src/typescript/typescript.test.ts

@@ -430,6 +430,47 @@ testTokenization('typescript', [
 		]
 	}],
 
+	[{
+		line: 'x = /foo/.test(\'\')',
+		tokens: [
+			{ startIndex: 0, type: 'identifier.ts' },
+			{ startIndex: 1, type: '' },
+			{ startIndex: 2, type: 'delimiter.ts' },
+			{ startIndex: 3, type: '' },
+			{ startIndex: 4, type: 'regexp.ts' },
+			{ startIndex: 9, type: 'delimiter.ts' },
+			{ startIndex: 10, type: 'identifier.ts' },
+			{ startIndex: 14, type: 'delimiter.parenthesis.ts' },
+			{ startIndex: 15, type: 'string.ts' },
+			{ startIndex: 17, type: 'delimiter.parenthesis.ts' }
+		]
+	}],
+
+	[{
+		line: 'x = 1 + f(2 / 3, /foo/)',
+		tokens: [
+			{ startIndex: 0, type: 'identifier.ts' },
+			{ startIndex: 1, type: '' },
+			{ startIndex: 2, type: 'delimiter.ts' },
+			{ startIndex: 3, type: '' },
+			{ startIndex: 4, type: 'number.ts' },
+			{ startIndex: 5, type: '' },
+			{ startIndex: 6, type: 'delimiter.ts' },
+			{ startIndex: 7, type: '' },
+			{ startIndex: 8, type: 'identifier.ts' },
+			{ startIndex: 9, type: 'delimiter.parenthesis.ts' },
+			{ startIndex: 10, type: 'number.ts' },
+			{ startIndex: 11, type: '' },
+			{ startIndex: 12, type: 'delimiter.ts' },
+			{ startIndex: 13, type: '' },
+			{ startIndex: 14, type: 'number.ts' },
+			{ startIndex: 15, type: 'delimiter.ts' },
+			{ startIndex: 16, type: '' },
+			{ startIndex: 17, type: 'regexp.ts' },
+			{ startIndex: 22, type: 'delimiter.parenthesis.ts' }
+		]
+	}],
+
 	[{
 		line: 'a /ads/ b;',
 		tokens: [

+ 28 - 1
src/typescript/typescript.ts

@@ -104,6 +104,9 @@ export const language = {
 	binarydigits: /[0-1]+(_+[0-1]+)*/,
 	hexdigits: /[[0-9a-fA-F]+(_+[0-9a-fA-F]+)*/,
 
+	regexpctl: /[(){}\[\]\$\^|\-*+?\.]/,
+	regexpesc: /\\(?:[bBdDfnrstvwWn0\\\/]|@regexpctl|c[A-Z]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4})/,
+
 	// The main tokenizer for our languages
 	tokenizer: {
 		root: [
@@ -120,12 +123,15 @@ export const language = {
 					'@default': 'identifier'
 				}
 			}],
-			[/[A-Z][\w\$]*/, 'type.identifier' ],  // to show class names nicely
+			[/[A-Z][\w\$]*/, 'type.identifier'],  // to show class names nicely
 			// [/[A-Z][\w\$]*/, 'identifier'],
 
 			// whitespace
 			{ include: '@whitespace' },
 
+			// regular expression: ensure it is terminated before beginning (otherwise it is an opeator)
+			[/\/(?=([^\\\/]|\\.)+\/([gimuy]*)(\s*)(\.|;|\/|,|\)|\]|\}|$))/, { token: 'regexp', bracket: '@open', next: '@regexp' }],
+
 			// delimiters and operators
 			[/[()\[\]]/, '@brackets'],
 			[/[<>](?!@symbols)/, '@brackets'],
@@ -174,6 +180,27 @@ export const language = {
 			[/[\/*]/, 'comment.doc']
 		],
 
+		// We match regular expression quite precisely
+		regexp: [
+			[/(\{)(\d+(?:,\d*)?)(\})/, ['regexp.escape.control', 'regexp.escape.control', 'regexp.escape.control']],
+			[/(\[)(\^?)(?=(?:[^\]\\\/]|\\.)+)/, ['regexp.escape.control', { token: 'regexp.escape.control', next: '@regexrange' }]],
+			[/(\()(\?:|\?=|\?!)/, ['regexp.escape.control', 'regexp.escape.control']],
+			[/[()]/, 'regexp.escape.control'],
+			[/@regexpctl/, 'regexp.escape.control'],
+			[/[^\\\/]/, 'regexp'],
+			[/@regexpesc/, 'regexp.escape'],
+			[/\\\./, 'regexp.invalid'],
+			['/', { token: 'regexp', bracket: '@close' }, '@pop'],
+		],
+
+		regexrange: [
+			[/-/, 'regexp.escape.control'],
+			[/\^/, 'regexp.invalid'],
+			[/@regexpesc/, 'regexp.escape'],
+			[/[^\]]/, 'regexp'],
+			[/\]/, '@brackets.regexp.escape.control', '@pop'],
+		],
+
 		string_double: [
 			[/[^\\"]+/, 'string'],
 			[/@escapes/, 'string.escape'],