Browse Source

Merge pull request #3211 from microsoft/pr-3102

Add config and tokenizer for query language Cypher
Henning Dieterichs 3 years ago
parent
commit
62aa423a2b

+ 24 - 0
src/basic-languages/cypher/cypher.contribution.ts

@@ -0,0 +1,24 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { registerLanguage } from '../_.contribution';
+
+declare var AMD: any;
+declare var require: any;
+
+registerLanguage({
+	id: 'cypher',
+	extensions: ['.cypher', '.cyp'],
+	aliases: ['Cypher', 'OpenCypher'],
+	loader: () => {
+		if (AMD) {
+			return new Promise((resolve, reject) => {
+				require(['vs/basic-languages/cypher/cypher'], resolve, reject);
+			});
+		} else {
+			return import('./cypher');
+		}
+	}
+});

+ 327 - 0
src/basic-languages/cypher/cypher.test.ts

@@ -0,0 +1,327 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { testTokenization } from '../test/testRunner';
+
+testTokenization('cypher', [
+	// Comments
+	[
+		{
+			line: '// Single line comment',
+			tokens: [{ startIndex: 0, type: 'comment.cypher' }]
+		}
+	],
+	[
+		{
+			line: 'MATCH /* comment part */ xyz',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.cypher' },
+				{ startIndex: 5, type: 'white.cypher' },
+				{ startIndex: 6, type: 'comment.cypher' },
+				{ startIndex: 24, type: 'white.cypher' },
+				{ startIndex: 25, type: 'identifier.cypher' }
+			]
+		}
+	],
+	[
+		{
+			line: '/* multi line comment',
+			tokens: [{ startIndex: 0, type: 'comment.cypher' }]
+		},
+		{
+			line: 'comment continues MATCH // not done yet',
+			tokens: [{ startIndex: 0, type: 'comment.cypher' }]
+		},
+		{
+			line: 'comment ends */ MATCH',
+			tokens: [
+				{ startIndex: 0, type: 'comment.cypher' },
+				{ startIndex: 15, type: 'white.cypher' },
+				{ startIndex: 16, type: 'keyword.cypher' }
+			]
+		}
+	],
+
+	// Numbers: A decimal (integer or float) literal:
+	[
+		{
+			line: '13',
+			tokens: [{ startIndex: 0, type: 'number.cypher' }]
+		}
+	],
+	[
+		{
+			line: '-40000',
+			tokens: [{ startIndex: 0, type: 'number.cypher' }]
+		}
+	],
+	[
+		{
+			line: '3.14',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '.314',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '-.314',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '6.022E23',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '-6.022e23',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '12E10',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '12e10',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '12e-10',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+	[
+		{
+			line: '12E-10',
+			tokens: [{ startIndex: 0, type: 'number.float.cypher' }]
+		}
+	],
+
+	// Numbers: A hexadecimal integer literal (starting with 0x)
+	[
+		{
+			line: '0x13af',
+			tokens: [{ startIndex: 0, type: 'number.hex.cypher' }]
+		}
+	],
+	[
+		{
+			line: '0xFC3A9',
+			tokens: [{ startIndex: 0, type: 'number.hex.cypher' }]
+		}
+	],
+	[
+		{
+			line: '-0x66eff',
+			tokens: [{ startIndex: 0, type: 'number.hex.cypher' }]
+		}
+	],
+
+	// Numbers: An octal integer literal (starting with 0)
+	[
+		{
+			line: '01372',
+			tokens: [{ startIndex: 0, type: 'number.octal.cypher' }]
+		}
+	],
+	[
+		{
+			line: '02127',
+			tokens: [{ startIndex: 0, type: 'number.octal.cypher' }]
+		}
+	],
+	[
+		{
+			line: '-05671',
+			tokens: [{ startIndex: 0, type: 'number.octal.cypher' }]
+		}
+	],
+
+	// Strings: A String literal ('', ""), escaped and non-escaped
+	[
+		{
+			line: '"two \'words\'"',
+			tokens: [{ startIndex: 0, type: 'string.cypher' }]
+		}
+	],
+	[
+		{
+			line: '"two \\"words\\""',
+			tokens: [{ startIndex: 0, type: 'string.cypher' }]
+		}
+	],
+	[
+		{
+			line: '\'two "words"\'',
+			tokens: [{ startIndex: 0, type: 'string.cypher' }]
+		}
+	],
+	[
+		{
+			line: "'two \\'words\\''",
+			tokens: [{ startIndex: 0, type: 'string.cypher' }]
+		}
+	],
+
+	// Identifiers wrapped with backtick (``)
+	[
+		{
+			line: '`variable`',
+			tokens: [{ startIndex: 0, type: 'identifier.escape.cypher' }]
+		}
+	],
+	[
+		{
+			line: '`A variable with weird stuff in it[]!`',
+			tokens: [{ startIndex: 0, type: 'identifier.escape.cypher' }]
+		}
+	],
+	[
+		{
+			line: '`Escaped \\`variable\\``',
+			tokens: [{ startIndex: 0, type: 'identifier.escape.cypher' }]
+		}
+	],
+
+	// Operators
+	[
+		{
+			line: '1+2',
+			tokens: [
+				{ startIndex: 0, type: 'number.cypher' },
+				{ startIndex: 1, type: 'delimiter.cypher' },
+				{ startIndex: 2, type: 'number.cypher' }
+			]
+		}
+	],
+	[
+		{
+			line: '1++2',
+			tokens: [
+				{ startIndex: 0, type: 'number.cypher' },
+				{ startIndex: 1, type: '' },
+				{ startIndex: 3, type: 'number.cypher' }
+			]
+		}
+	],
+
+	// Builtin literals: A boolean literal (true | false)
+	[
+		{
+			line: 'true',
+			tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }]
+		}
+	],
+	[
+		{
+			line: 'false',
+			tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }]
+		}
+	],
+	[
+		{
+			line: 'TRUE',
+			tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }]
+		}
+	],
+	[
+		{
+			line: 'FALSE',
+			tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }]
+		}
+	],
+
+	// Builtin literals: A null literal
+	[
+		{
+			line: 'null',
+			tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }]
+		}
+	],
+	[
+		{
+			line: 'NULL',
+			tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }]
+		}
+	],
+
+	// Builtin functions
+	[
+		{
+			line: 'properties(node)',
+			tokens: [
+				{ startIndex: 0, type: 'predefined.function.cypher' },
+				{ startIndex: 10, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 11, type: 'identifier.cypher' },
+				{ startIndex: 15, type: 'delimiter.parenthesis.cypher' }
+			]
+		}
+	],
+	[
+		{
+			line: 'left(right("Hello Cypher"))',
+			tokens: [
+				{ startIndex: 0, type: 'predefined.function.cypher' },
+				{ startIndex: 4, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 5, type: 'predefined.function.cypher' },
+				{ startIndex: 10, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 11, type: 'string.cypher' },
+				{ startIndex: 25, type: 'delimiter.parenthesis.cypher' }
+			]
+		}
+	],
+
+	// Keywords
+	[
+		{
+			line: 'MATCH (n) RETURN n',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.cypher' },
+				{ startIndex: 5, type: 'white.cypher' },
+				{ startIndex: 6, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 7, type: 'identifier.cypher' },
+				{ startIndex: 8, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 9, type: 'white.cypher' },
+				{ startIndex: 10, type: 'keyword.cypher' },
+				{ startIndex: 16, type: 'white.cypher' },
+				{ startIndex: 17, type: 'identifier.cypher' }
+			]
+		}
+	],
+
+	// Labels on nodes and relationships
+	[
+		{
+			line: '(n:NodeLabel1)-[:RelationshipType]->(:NodeLabel2:NodeLabel3)',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 1, type: 'identifier.cypher' },
+				{ startIndex: 2, type: 'type.identifier.cypher' },
+				{ startIndex: 13, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 14, type: 'delimiter.cypher' },
+				{ startIndex: 15, type: 'delimiter.bracket.cypher' },
+				{ startIndex: 16, type: 'type.identifier.cypher' },
+				{ startIndex: 33, type: 'delimiter.bracket.cypher' },
+				{ startIndex: 34, type: 'delimiter.cypher' },
+				{ startIndex: 36, type: 'delimiter.parenthesis.cypher' },
+				{ startIndex: 37, type: 'type.identifier.cypher' },
+				{ startIndex: 59, type: 'delimiter.parenthesis.cypher' }
+			]
+		}
+	]
+]);

+ 274 - 0
src/basic-languages/cypher/cypher.ts

@@ -0,0 +1,274 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { languages } from '../../fillers/monaco-editor-core';
+
+export const conf: languages.LanguageConfiguration = {
+	comments: {
+		lineComment: '//',
+		blockComment: ['/*', '*/']
+	},
+	brackets: [
+		['{', '}'],
+		['[', ']'],
+		['(', ')']
+	],
+	autoClosingPairs: [
+		{ open: '{', close: '}' },
+		{ open: '[', close: ']' },
+		{ open: '(', close: ')' },
+		{ open: '"', close: '"' },
+		{ open: "'", close: "'" },
+		{ open: '`', close: '`' }
+	],
+	surroundingPairs: [
+		{ open: '{', close: '}' },
+		{ open: '[', close: ']' },
+		{ open: '(', close: ')' },
+		{ open: '"', close: '"' },
+		{ open: "'", close: "'" },
+		{ open: '`', close: '`' }
+	]
+};
+
+// Ref: Cypher Query Language Reference, Version 9 (https://opencypher.org/resources/)
+export const language = <languages.IMonarchLanguage>{
+	defaultToken: '',
+	tokenPostfix: `.cypher`,
+	ignoreCase: true,
+
+	brackets: [
+		{ open: '{', close: '}', token: 'delimiter.curly' },
+		{ open: '[', close: ']', token: 'delimiter.bracket' },
+		{ open: '(', close: ')', token: 'delimiter.parenthesis' }
+	],
+
+	keywords: [
+		'ALL',
+		'AND',
+		'AS',
+		'ASC',
+		'ASCENDING',
+		'BY',
+		'CALL',
+		'CASE',
+		'CONTAINS',
+		'CREATE',
+		'DELETE',
+		'DESC',
+		'DESCENDING',
+		'DETACH',
+		'DISTINCT',
+		'ELSE',
+		'END',
+		'ENDS',
+		'EXISTS',
+		'IN',
+		'IS',
+		'LIMIT',
+		'MANDATORY',
+		'MATCH',
+		'MERGE',
+		'NOT',
+		'ON',
+		'ON',
+		'OPTIONAL',
+		'OR',
+		'ORDER',
+		'REMOVE',
+		'RETURN',
+		'SET',
+		'SKIP',
+		'STARTS',
+		'THEN',
+		'UNION',
+		'UNWIND',
+		'WHEN',
+		'WHERE',
+		'WITH',
+		'XOR',
+		'YIELD'
+	],
+	builtinLiterals: ['true', 'TRUE', 'false', 'FALSE', 'null', 'NULL'],
+	builtinFunctions: [
+		'abs',
+		'acos',
+		'asin',
+		'atan',
+		'atan2',
+		'avg',
+		'ceil',
+		'coalesce',
+		'collect',
+		'cos',
+		'cot',
+		'count',
+		'degrees',
+		'e',
+		'endNode',
+		'exists',
+		'exp',
+		'floor',
+		'head',
+		'id',
+		'keys',
+		'labels',
+		'last',
+		'left',
+		'length',
+		'log',
+		'log10',
+		'lTrim',
+		'max',
+		'min',
+		'nodes',
+		'percentileCont',
+		'percentileDisc',
+		'pi',
+		'properties',
+		'radians',
+		'rand',
+		'range',
+		'relationships',
+		'replace',
+		'reverse',
+		'right',
+		'round',
+		'rTrim',
+		'sign',
+		'sin',
+		'size',
+		'split',
+		'sqrt',
+		'startNode',
+		'stDev',
+		'stDevP',
+		'substring',
+		'sum',
+		'tail',
+		'tan',
+		'timestamp',
+		'toBoolean',
+		'toFloat',
+		'toInteger',
+		'toLower',
+		'toString',
+		'toUpper',
+		'trim',
+		'type'
+	],
+
+	operators: [
+		// Math operators
+		'+',
+		'-',
+		'*',
+		'/',
+		'%',
+		'^',
+		// Comparison operators
+		'=',
+		'<>',
+		'<',
+		'>',
+		'<=',
+		'>=',
+		// Pattern operators
+		'->',
+		'<-',
+		'-->',
+		'<--'
+	],
+
+	escapes: /\\(?:[tbnrf\\"'`]|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
+	digits: /\d+/,
+	octaldigits: /[0-7]+/,
+	hexdigits: /[0-9a-fA-F]+/,
+
+	tokenizer: {
+		root: [[/[{}[\]()]/, '@brackets'], { include: 'common' }],
+		common: [
+			{ include: '@whitespace' },
+			{ include: '@numbers' },
+			{ include: '@strings' },
+
+			// Cypher labels on nodes/relationships, e.g. (n:NodeLabel)-[e:RelationshipLabel]
+			[/:[a-zA-Z_][\w]*/, 'type.identifier'],
+			[
+				/[a-zA-Z_][\w]*(?=\()/,
+				{
+					cases: {
+						'@builtinFunctions': 'predefined.function'
+					}
+				}
+			],
+			[
+				/[a-zA-Z_$][\w$]*/,
+				{
+					cases: {
+						'@keywords': 'keyword',
+						'@builtinLiterals': 'predefined.literal',
+						'@default': 'identifier'
+					}
+				}
+			],
+			[/`/, 'identifier.escape', '@identifierBacktick'],
+
+			// delimiter and operator after number because of `.\d` floats and `:` in labels
+			[/[;,.:|]/, 'delimiter'],
+			[
+				/[<>=%+\-*/^]+/,
+				{
+					cases: {
+						'@operators': 'delimiter',
+						'@default': ''
+					}
+				}
+			]
+		],
+		numbers: [
+			[/-?(@digits)[eE](-?(@digits))?/, 'number.float'],
+			[/-?(@digits)?\.(@digits)([eE]-?(@digits))?/, 'number.float'],
+			[/-?0x(@hexdigits)/, 'number.hex'],
+			[/-?0(@octaldigits)/, 'number.octal'],
+			[/-?(@digits)/, 'number']
+		],
+		strings: [
+			[/"([^"\\]|\\.)*$/, 'string.invalid'], // non-teminated string
+			[/'([^'\\]|\\.)*$/, 'string.invalid'], // non-teminated string
+			[/"/, 'string', '@stringDouble'],
+			[/'/, 'string', '@stringSingle']
+		],
+		whitespace: [
+			[/[ \t\r\n]+/, 'white'],
+			[/\/\*/, 'comment', '@comment'],
+			[/\/\/.*$/, 'comment']
+		],
+		comment: [
+			[/\/\/.*/, 'comment'],
+			[/[^/*]+/, 'comment'],
+			[/\*\//, 'comment', '@pop'],
+			[/[/*]/, 'comment']
+		],
+		stringDouble: [
+			[/[^\\"]+/, 'string'],
+			[/@escapes/, 'string'],
+			[/\\./, 'string.invalid'],
+			[/"/, 'string', '@pop']
+		],
+		stringSingle: [
+			[/[^\\']+/, 'string'],
+			[/@escapes/, 'string'],
+			[/\\./, 'string.invalid'],
+			[/'/, 'string', '@pop']
+		],
+		identifierBacktick: [
+			[/[^\\`]+/, 'identifier.escape'],
+			[/@escapes/, 'identifier.escape'],
+			[/\\./, 'identifier.escape.invalid'],
+			[/`/, 'identifier.escape', '@pop']
+		]
+	}
+};

+ 1 - 0
src/basic-languages/monaco.contribution.ts

@@ -15,6 +15,7 @@ import './cpp/cpp.contribution';
 import './csharp/csharp.contribution';
 import './csp/csp.contribution';
 import './css/css.contribution';
+import './cypher/cypher.contribution';
 import './dart/dart.contribution';
 import './dockerfile/dockerfile.contribution';
 import './ecl/ecl.contribution';

+ 3 - 0
website/index/samples/sample.cypher.txt

@@ -0,0 +1,3 @@
+MATCH (nicole:Actor {name: 'Nicole Kidman'})-[:ACTED_IN]->(movie:Movie)
+WHERE movie.year < $yearParameter
+RETURN movie