Selaa lähdekoodia

Merge pull request #56 from finalfantasia/master

[clojure] Improve the regular expressions for various symbols
Alexandru Dima 6 vuotta sitten
vanhempi
commit
f9595842a3
3 muutettua tiedostoa jossa 129 lisäystä ja 38 poistoa
  1. 1 1
      src/clojure/clojure.contribution.ts
  2. 90 7
      src/clojure/clojure.test.ts
  3. 38 30
      src/clojure/clojure.ts

+ 1 - 1
src/clojure/clojure.contribution.ts

@@ -8,7 +8,7 @@ import { registerLanguage } from '../_.contribution';
 
 registerLanguage({
 	id: 'clojure',
-	extensions: ['.clj', '.clojure'],
+	extensions: ['.clj', '.cljs', '.cljc', '.edn'],
 	aliases: ['clojure', 'Clojure'],
 	loader: () => import('./clojure')
 });

+ 90 - 7
src/clojure/clojure.test.ts

@@ -754,13 +754,72 @@ testTokenization('clojure', [
 	], 'string'),
 
 	// strings
-	createTestCases([
-		'\"I\'m a little teapot.\"',
-		'\"I\'m a \\\"little\\\" teapot.\"',
-		'\"I\'m',      // this is
-		'a little',    // a multi-line
-		'teapot.\"'    // string
-	], 'string'),
+	[
+		{
+			line: '"I\'m a little teapot."',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+			]
+		},
+		{
+			line: '"I\'m a \\"little\\" teapot."',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+				{startIndex: 7, type: 'string.escape.clj'},
+				{startIndex: 9, type: 'string.clj'},
+				{startIndex: 15, type: 'string.escape.clj'},
+				{startIndex: 17, type: 'string.clj'},
+			]
+		}
+	],
+
+	// multi-line strings
+	[
+		{
+			line: '"I\'m',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+			]
+		},
+		{
+			line: '\\"a little\\"',
+			tokens: [
+				{startIndex: 0, type: 'string.escape.clj'},
+				{startIndex: 2, type: 'string.clj'},
+				{startIndex: 10, type: 'string.escape.clj'},
+			]
+		},
+		{
+			line: 'teapot."',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+			]
+		}
+	],
+
+	// strings with other escapes in them (\" \' \\ \b \f \n \r \t)
+	[{
+		line: '"the escape \\" \\\' \\\\ \\b \\f \\n \\r \\t characters"',
+		tokens: [
+			{startIndex: 0, type: 'string.clj'},
+			{startIndex: 12, type: 'string.escape.clj'},
+			{startIndex: 14, type: 'string.clj'},
+			{startIndex: 15, type: 'string.escape.clj'},
+			{startIndex: 17, type: 'string.clj'},
+			{startIndex: 18, type: 'string.escape.clj'},
+			{startIndex: 20, type: 'string.clj'},
+			{startIndex: 21, type: 'string.escape.clj'},
+			{startIndex: 23, type: 'string.clj'},
+			{startIndex: 24, type: 'string.escape.clj'},
+			{startIndex: 26, type: 'string.clj'},
+			{startIndex: 27, type: 'string.escape.clj'},
+			{startIndex: 29, type: 'string.clj'},
+			{startIndex: 30, type: 'string.escape.clj'},
+			{startIndex: 32, type: 'string.clj'},
+			{startIndex: 33, type: 'string.escape.clj'},
+			{startIndex: 35, type: 'string.clj'},
+		]
+	}],
 
 	// comments
 	createTestCases([
@@ -792,6 +851,30 @@ testTokenization('clojure', [
 				{startIndex: 0, type: 'comment.clj'},
 			],
 		},
+		{
+			line: '(comments foo bar)',
+			tokens: [
+				{startIndex: 0, type: 'delimiter.parenthesis.clj'},
+				{startIndex: 1, type: 'identifier.clj'},
+				{startIndex: 9, type: 'white.clj'},
+				{startIndex: 10, type: 'identifier.clj'},
+				{startIndex: 13, type: 'white.clj'},
+				{startIndex: 14, type: 'identifier.clj'},
+				{startIndex: 17, type: 'delimiter.parenthesis.clj'},
+			]
+		},
+		{
+			line: '(comment6 foo bar)',
+			tokens: [
+				{startIndex: 0, type: 'delimiter.parenthesis.clj'},
+				{startIndex: 1, type: 'identifier.clj'},
+				{startIndex: 9, type: 'white.clj'},
+				{startIndex: 10, type: 'identifier.clj'},
+				{startIndex: 13, type: 'white.clj'},
+				{startIndex: 14, type: 'identifier.clj'},
+				{startIndex: 17, type: 'delimiter.parenthesis.clj'},
+			]
+		},
 		{
 			line: '(comment foo',
 			tokens: [

+ 38 - 30
src/clojure/clojure.ts

@@ -13,20 +13,24 @@ export const conf: IRichLanguageConfiguration = {
 		lineComment: ';;',
 	},
 
-	brackets: [['(', ')'], ['[', ']'], ['{', '}']],
+	brackets: [
+		['[', ']'],
+		['(', ')'],
+		['{', '}']
+	],
 
 	autoClosingPairs: [
-		{open: '(', close: ')'},
 		{open: '[', close: ']'},
-		{open: '{', close: '}'},
 		{open: '"', close: '"'},
+		{open: '(', close: ')'},
+		{open: '{', close: '}'},
 	],
 
 	surroundingPairs: [
-		{open: '(', close: ')'},
 		{open: '[', close: ']'},
-		{open: '{', close: '}'},
 		{open: '"', close: '"'},
+		{open: '(', close: ')'},
+		{open: '{', close: '}'},
 	],
 };
 
@@ -36,11 +40,26 @@ export const language = <ILanguage>{
 	tokenPostfix: '.clj',
 
 	brackets: [
+		{open: '[', close: ']', token: 'delimiter.square'},
 		{open: '(', close: ')', token: 'delimiter.parenthesis'},
 		{open: '{', close: '}', token: 'delimiter.curly'},
-		{open: '[', close: ']', token: 'delimiter.square'},
 	],
 
+	constants: ['true', 'false', 'nil'],
+
+	// delimiters: /[\\\[\]\s"#'(),;@^`{}~]|$/,
+
+	numbers: /^(?:[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?(?=[\\\[\]\s"#'(),;@^`{}~]|$))/,
+
+	characters: /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/,
+
+	escapes: /^\\(?:["'\\bfnrt]|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
+
+	// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
+	// simple-symbol    := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
+	// qualified-symbol := (<simple-namespace>(<.><simple-namespace>)*</>)?<simple-symbol>
+	qualifiedSymbols: /^(?:(?:[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*(?:\.[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*\/)?(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)*(?=[\\\[\]\s"(),;@^`{}~]|$))/,
+
 	specialForms: [
 		'.',
 		'catch',
@@ -712,14 +731,6 @@ export const language = <ILanguage>{
 		'zipmap',
 	],
 
-	constants: ['true', 'false', 'nil'],
-
-	symbolCharacter: /[!#'*+\-.\/:<=>?_\w\xa1-\uffff]/,
-
-	numbers: /[+\-]?\d+(?:(?:N|(?:[eE][+\-]?\d+))|(?:\.?\d*(?:M|(?:[eE][+\-]?\d+))?)|\/\d+|[xX][0-9a-fA-F]+|r[0-9a-zA-Z]+)?/,
-
-	characters: /\\(?:backspace|formfeed|newline|return|space|tab|x[0-9A-Fa-f]{4}|u[0-9A-Fa-f]{4}|o[0-7]{3}|@symbolCharacter|[\\"()\[\]{}])/,
-
 	tokenizer: {
 		root: [
 			// whitespaces and comments
@@ -743,32 +754,29 @@ export const language = <ILanguage>{
 			// reader macro characters
 			[/[#'@^`~]/, 'meta'],
 
-			// keywords
-			[/:@symbolCharacter+/, 'constant'],
-
 			// symbols
-			[/@symbolCharacter+/, {
-				cases: {
-					'@specialForms': 'keyword',
-					'@coreSymbols': 'keyword',
-					'@constants': 'constant',
-					'@default': 'identifier',
+			[/@qualifiedSymbols/, {
+					cases: {
+						'^:.+$': 'constant',  // Clojure keywords (e.g., `:foo/bar`)
+						'@specialForms': 'keyword',
+						'@coreSymbols': 'keyword',
+						'@constants': 'constant',
+						'@default': 'identifier',
+					},
 				},
-			},
 			],
-
 		],
 
 		whitespace: [
 			[/\s+/, 'white'],
 			[/;.*$/, 'comment'],
-			[/\(comment/, 'comment', '@comment'],
+			[/\(comment\b/, 'comment', '@comment'],
 		],
 
 		comment: [
 			[/\(/, 'comment', '@push'],
 			[/\)/, 'comment', '@pop'],
-			[/[^)]/, 'comment'],
+			[/[^()]/, 'comment'],
 		],
 
 		string: [
@@ -776,9 +784,9 @@ export const language = <ILanguage>{
 		],
 
 		multiLineString: [
-			[/[^\\"]+/, 'string'],
-			[/@characters/, 'string'],
-			[/"/, 'string', '@pop']
+			[/"/, 'string', '@popall'],
+			[/@escapes/, 'string.escape'],
+			[/./, 'string']
 		],
 	},
 };