Pārlūkot izejas kodu

[clojure] Fix tokenization of escapes in strings

The notation for escapes in strings follows that of the host platforms
which is different from the notation for character literals in Clojure
(e.g., "\n" vs `\newline`).
Abdussalam Abdurrahman 6 gadi atpakaļ
vecāks
revīzija
3f16600ec1
2 mainītis faili ar 77 papildinājumiem un 19 dzēšanām
  1. 66 7
      src/clojure/clojure.test.ts
  2. 11 12
      src/clojure/clojure.ts

+ 66 - 7
src/clojure/clojure.test.ts

@@ -754,13 +754,72 @@ testTokenization('clojure', [
 	], 'string'),
 
 	// strings
-	createTestCases([
-		'\"I\'m a little teapot.\"',
-		'\"I\'m a \\\"little\\\" teapot.\"',
-		'\"I\'m',      // this is
-		'a little',    // a multi-line
-		'teapot.\"'    // string
-	], 'string'),
+	[
+		{
+			line: '"I\'m a little teapot."',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+			]
+		},
+		{
+			line: '"I\'m a \\"little\\" teapot."',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+				{startIndex: 7, type: 'string.escape.clj'},
+				{startIndex: 9, type: 'string.clj'},
+				{startIndex: 15, type: 'string.escape.clj'},
+				{startIndex: 17, type: 'string.clj'},
+			]
+		}
+	],
+
+	// multi-line strings
+	[
+		{
+			line: '"I\'m',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+			]
+		},
+		{
+			line: '\\"a little\\"',
+			tokens: [
+				{startIndex: 0, type: 'string.escape.clj'},
+				{startIndex: 2, type: 'string.clj'},
+				{startIndex: 10, type: 'string.escape.clj'},
+			]
+		},
+		{
+			line: 'teapot."',
+			tokens: [
+				{startIndex: 0, type: 'string.clj'},
+			]
+		}
+	],
+
+	// strings with other escapes in them (\" \' \\ \b \f \n \r \t)
+	[{
+		line: '"the escape \\" \\\' \\\\ \\b \\f \\n \\r \\t characters"',
+		tokens: [
+			{startIndex: 0, type: 'string.clj'},
+			{startIndex: 12, type: 'string.escape.clj'},
+			{startIndex: 14, type: 'string.clj'},
+			{startIndex: 15, type: 'string.escape.clj'},
+			{startIndex: 17, type: 'string.clj'},
+			{startIndex: 18, type: 'string.escape.clj'},
+			{startIndex: 20, type: 'string.clj'},
+			{startIndex: 21, type: 'string.escape.clj'},
+			{startIndex: 23, type: 'string.clj'},
+			{startIndex: 24, type: 'string.escape.clj'},
+			{startIndex: 26, type: 'string.clj'},
+			{startIndex: 27, type: 'string.escape.clj'},
+			{startIndex: 29, type: 'string.clj'},
+			{startIndex: 30, type: 'string.escape.clj'},
+			{startIndex: 32, type: 'string.clj'},
+			{startIndex: 33, type: 'string.escape.clj'},
+			{startIndex: 35, type: 'string.clj'},
+		]
+	}],
 
 	// comments
 	createTestCases([

+ 11 - 12
src/clojure/clojure.ts

@@ -53,7 +53,7 @@ export const language = <ILanguage>{
 
 	characters: /^(?:\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?(?=[\\\[\]\s"(),;@^`{}~]|$))/,
 
-	escapes: /^\\(?:backspace|formfeed|newline|return|space|tab|o[0-7]{3}|u[0-9A-Fa-f]{4}|x[0-9A-Fa-f]{4}|.)?/,
+	escapes: /^\\(?:["'\\bfnrt]|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
 
 	// simple-namespace := /^[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*/
 	// simple-symbol    := /^(?:\/|[^\\\/\[\]\d\s"#'(),;@^`{}~][^\\\[\]\s"(),;@^`{}~]*)/
@@ -756,16 +756,15 @@ export const language = <ILanguage>{
 
 			// symbols
 			[/@qualifiedSymbols/, {
-				cases: {
-					'^:.+$': 'constant',  // Clojure keywords (e.g., `:foo/bar`)
-					'@specialForms': 'keyword',
-					'@coreSymbols': 'keyword',
-					'@constants': 'constant',
-					'@default': 'identifier',
+					cases: {
+						'^:.+$': 'constant',  // Clojure keywords (e.g., `:foo/bar`)
+						'@specialForms': 'keyword',
+						'@coreSymbols': 'keyword',
+						'@constants': 'constant',
+						'@default': 'identifier',
+					},
 				},
-			},
 			],
-
 		],
 
 		whitespace: [
@@ -785,9 +784,9 @@ export const language = <ILanguage>{
 		],
 
 		multiLineString: [
-			[/[^\\"]+/, 'string'],
-			[/@escapes/, 'string'],
-			[/"/, 'string', '@pop']
+			[/"/, 'string', '@popall'],
+			[/@escapes/, 'string.escape'],
+			[/./, 'string']
 		],
 	},
 };