Răsfoiți Sursa

Merge pull request #130 from jonatanklosko/elixir

Add Elixir language definition and tokenizer
Alexandru Dima 4 ani în urmă
părinte
comite
3c896884e9

+ 13 - 0
src/elixir/elixir.contribution.ts

@@ -0,0 +1,13 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { registerLanguage } from '../_.contribution';
+
+registerLanguage({
+	id: 'elixir',
+	extensions: ['.ex', '.exs'],
+	aliases: ['Elixir', 'elixir', 'ex'],
+	loader: () => import('./elixir')
+});

+ 376 - 0
src/elixir/elixir.test.ts

@@ -0,0 +1,376 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import { testTokenization } from '../test/testRunner';
+
+testTokenization('elixir', [
+	// Keywords - module definition
+	[
+		{
+			line: 'defmodule Foo do end',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.declaration.elixir' },
+				{ startIndex: 9, type: 'white.elixir' },
+				{ startIndex: 10, type: 'type.identifier.elixir' },
+				{ startIndex: 13, type: 'white.elixir' },
+				{ startIndex: 14, type: 'keyword.elixir' },
+				{ startIndex: 16, type: 'white.elixir' },
+				{ startIndex: 17, type: 'keyword.elixir' }
+			]
+		}
+	],
+	// Keywords - function definition
+	[
+		{
+			line: 'def foo(x) do end',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.declaration.elixir' },
+				{ startIndex: 3, type: 'white.elixir' },
+				{ startIndex: 4, type: 'function.elixir' },
+				{ startIndex: 7, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 8, type: 'identifier.elixir' },
+				{ startIndex: 9, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 10, type: 'white.elixir' },
+				{ startIndex: 11, type: 'keyword.elixir' },
+				{ startIndex: 13, type: 'white.elixir' },
+				{ startIndex: 14, type: 'keyword.elixir' }
+			]
+		}
+	],
+	// Keywords - macro
+	[
+		{
+			line: 'defmacro mac(name) do quote do def unquote(name)() do nil end end end',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.declaration.elixir' },
+				{ startIndex: 8, type: 'white.elixir' },
+				{ startIndex: 9, type: 'function.elixir' },
+				{ startIndex: 12, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 13, type: 'identifier.elixir' },
+				{ startIndex: 17, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 18, type: 'white.elixir' },
+				{ startIndex: 19, type: 'keyword.elixir' },
+				{ startIndex: 21, type: 'white.elixir' },
+				{ startIndex: 22, type: 'keyword.elixir' },
+				{ startIndex: 27, type: 'white.elixir' },
+				{ startIndex: 28, type: 'keyword.elixir' },
+				{ startIndex: 30, type: 'white.elixir' },
+				{ startIndex: 31, type: 'keyword.declaration.elixir' },
+				{ startIndex: 34, type: 'white.elixir' },
+				{ startIndex: 35, type: 'keyword.elixir' },
+				{ startIndex: 42, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 43, type: 'identifier.elixir' },
+				{ startIndex: 47, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 50, type: 'white.elixir' },
+				{ startIndex: 51, type: 'keyword.elixir' },
+				{ startIndex: 53, type: 'white.elixir' },
+				{ startIndex: 54, type: 'constant.language.elixir' },
+				{ startIndex: 57, type: 'white.elixir' },
+				{ startIndex: 58, type: 'keyword.elixir' },
+				{ startIndex: 61, type: 'white.elixir' },
+				{ startIndex: 62, type: 'keyword.elixir' },
+				{ startIndex: 65, type: 'white.elixir' },
+				{ startIndex: 66, type: 'keyword.elixir' }
+			]
+		}
+	],
+	// Comments
+	[
+		{
+			line: 'nil # comment',
+			tokens: [
+				{ startIndex: 0, type: 'constant.language.elixir' },
+				{ startIndex: 3, type: 'white.elixir' },
+				{ startIndex: 4, type: 'comment.punctuation.elixir' },
+				{ startIndex: 5, type: 'comment.elixir' }
+			]
+		}
+	],
+	// Keyword list shorthand
+	[
+		{
+			line: '["key": value]',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.square.elixir' },
+				{ startIndex: 1, type: 'constant.delimiter.elixir' },
+				{ startIndex: 2, type: 'constant.elixir' },
+				{ startIndex: 5, type: 'constant.delimiter.elixir' },
+				{ startIndex: 7, type: 'white.elixir' },
+				{ startIndex: 8, type: 'identifier.elixir' },
+				{ startIndex: 13, type: 'delimiter.square.elixir' }
+			]
+		}
+	],
+	// Numbers
+	[
+		{
+			line: '[1,1.23,1.23e-10,0xab,0o171,0b01001]',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.square.elixir' },
+				{ startIndex: 1, type: 'number.elixir' },
+				{ startIndex: 2, type: 'punctuation.elixir' },
+				{ startIndex: 3, type: 'number.float.elixir' },
+				{ startIndex: 7, type: 'punctuation.elixir' },
+				{ startIndex: 8, type: 'number.float.elixir' },
+				{ startIndex: 16, type: 'punctuation.elixir' },
+				{ startIndex: 17, type: 'number.hex.elixir' },
+				{ startIndex: 21, type: 'punctuation.elixir' },
+				{ startIndex: 22, type: 'number.octal.elixir' },
+				{ startIndex: 27, type: 'punctuation.elixir' },
+				{ startIndex: 28, type: 'number.binary.elixir' },
+				{ startIndex: 35, type: 'delimiter.square.elixir' }
+			]
+		}
+	],
+	// Unused bindings
+	[
+		{
+			line: 'def foo(_x) do _y = 1 end',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.declaration.elixir' },
+				{ startIndex: 3, type: 'white.elixir' },
+				{ startIndex: 4, type: 'function.elixir' },
+				{ startIndex: 7, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 8, type: 'comment.unused.elixir' },
+				{ startIndex: 10, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 11, type: 'white.elixir' },
+				{ startIndex: 12, type: 'keyword.elixir' },
+				{ startIndex: 14, type: 'white.elixir' },
+				{ startIndex: 15, type: 'comment.unused.elixir' },
+				{ startIndex: 17, type: 'white.elixir' },
+				{ startIndex: 18, type: 'operator.elixir' },
+				{ startIndex: 19, type: 'white.elixir' },
+				{ startIndex: 20, type: 'number.elixir' },
+				{ startIndex: 21, type: 'white.elixir' },
+				{ startIndex: 22, type: 'keyword.elixir' }
+			]
+		}
+	],
+	// Function calls
+	[
+		{
+			line: 'foo(x)',
+			tokens: [
+				{ startIndex: 0, type: 'function.call.elixir' },
+				{ startIndex: 3, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 4, type: 'identifier.elixir' },
+				{ startIndex: 5, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	[
+		{
+			line: 'foo.()',
+			tokens: [
+				{ startIndex: 0, type: 'function.call.elixir' },
+				{ startIndex: 3, type: 'operator.elixir' },
+				{ startIndex: 4, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	[
+		{
+			line: 'Mod.foo()',
+			tokens: [
+				{ startIndex: 0, type: 'type.identifier.elixir' },
+				{ startIndex: 3, type: 'operator.elixir' },
+				{ startIndex: 4, type: 'function.call.elixir' },
+				{ startIndex: 7, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	// Function call (Erlang module)
+	[
+		{
+			line: ':mo.foo()',
+			tokens: [
+				{ startIndex: 0, type: 'constant.punctuation.elixir' },
+				{ startIndex: 1, type: 'constant.elixir' },
+				{ startIndex: 3, type: 'operator.elixir' },
+				{ startIndex: 4, type: 'function.call.elixir' },
+				{ startIndex: 7, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	// Function call (pipe)
+	[
+		{
+			line: '1 |> abs()',
+			tokens: [
+				{ startIndex: 0, type: 'number.elixir' },
+				{ startIndex: 1, type: 'white.elixir' },
+				{ startIndex: 2, type: 'operator.elixir' },
+				{ startIndex: 4, type: 'white.elixir' },
+				{ startIndex: 5, type: 'function.call.elixir' },
+				{ startIndex: 8, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	// Function reference
+	[
+		{
+			line: '&max(&1,&2)',
+			tokens: [
+				{ startIndex: 0, type: 'operator.elixir' },
+				{ startIndex: 1, type: 'function.call.elixir' },
+				{ startIndex: 4, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 5, type: 'operator.elixir' },
+				{ startIndex: 7, type: 'punctuation.elixir' },
+				{ startIndex: 8, type: 'operator.elixir' },
+				{ startIndex: 10, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	// Strings
+	[
+		{
+			line: '"foo"',
+			tokens: [
+				{ startIndex: 0, type: 'string.delimiter.elixir' },
+				{ startIndex: 1, type: 'string.elixir' },
+				{ startIndex: 4, type: 'string.delimiter.elixir' }
+			]
+		}
+	],
+	[
+		{
+			line: '"foo \\u0065\\u0301 #{1}"',
+			tokens: [
+				{ startIndex: 0, type: 'string.delimiter.elixir' },
+				{ startIndex: 1, type: 'string.elixir' },
+				{ startIndex: 5, type: 'constant.character.escape.elixir' },
+				{ startIndex: 17, type: 'string.elixir' },
+				{ startIndex: 18, type: 'delimiter.bracket.embed.elixir' },
+				{ startIndex: 20, type: 'number.elixir' },
+				{ startIndex: 21, type: 'delimiter.bracket.embed.elixir' },
+				{ startIndex: 22, type: 'string.delimiter.elixir' }
+			]
+		}
+	],
+	[
+		{
+			line: '"""heredoc"""',
+			tokens: [
+				{ startIndex: 0, type: 'string.delimiter.elixir' },
+				{ startIndex: 3, type: 'string.elixir' },
+				{ startIndex: 10, type: 'string.delimiter.elixir' }
+			]
+		}
+	],
+	// Atom strings
+	[
+		{
+			line: ':"atom"',
+			tokens: [
+				{ startIndex: 0, type: 'constant.delimiter.elixir' },
+				{ startIndex: 2, type: 'constant.elixir' },
+				{ startIndex: 6, type: 'constant.delimiter.elixir' }
+			]
+		}
+	],
+	// Sigils (string)
+	[
+		{
+			line: '~s{foo}',
+			tokens: [
+				{ startIndex: 0, type: 'string.delimiter.elixir' },
+				{ startIndex: 3, type: 'string.elixir' },
+				{ startIndex: 6, type: 'string.delimiter.elixir' }
+			]
+		}
+	],
+	// Sigils (regexp)
+	[
+		{
+			line: '~r/foo/',
+			tokens: [
+				{ startIndex: 0, type: 'regexp.delimiter.elixir' },
+				{ startIndex: 3, type: 'regexp.elixir' },
+				{ startIndex: 6, type: 'regexp.delimiter.elixir' }
+			]
+		}
+	],
+	// Sigils (other)
+	[
+		{
+			line: '~D/foo/',
+			tokens: [
+				{ startIndex: 0, type: 'sigil.delimiter.elixir' },
+				{ startIndex: 3, type: 'sigil.elixir' },
+				{ startIndex: 6, type: 'sigil.delimiter.elixir' }
+			]
+		}
+	],
+	// Sigils (no interpolation)
+	[
+		{
+			line: '~W/foo#{1}/',
+			tokens: [
+				{ startIndex: 0, type: 'sigil.delimiter.elixir' },
+				{ startIndex: 3, type: 'sigil.elixir' },
+				{ startIndex: 10, type: 'sigil.delimiter.elixir' }
+			]
+		}
+	],
+	// Module attributes
+	[
+		{
+			line: '@attr 1',
+			tokens: [
+				{ startIndex: 0, type: 'variable.elixir' },
+				{ startIndex: 5, type: 'white.elixir' },
+				{ startIndex: 6, type: 'number.elixir' }
+			]
+		}
+	],
+	// Module attributes (docs)
+	[
+		{
+			line: '@doc "foo"',
+			tokens: [{ startIndex: 0, type: 'comment.block.documentation.elixir' }]
+		}
+	],
+	// Operator definition
+	[
+		{
+			line: 'def a ~> b, do: max(a,b)',
+			tokens: [
+				{ startIndex: 0, type: 'keyword.declaration.elixir' },
+				{ startIndex: 3, type: 'white.elixir' },
+				{ startIndex: 4, type: 'identifier.elixir' },
+				{ startIndex: 5, type: 'white.elixir' },
+				{ startIndex: 6, type: 'operator.elixir' },
+				{ startIndex: 8, type: 'white.elixir' },
+				{ startIndex: 9, type: 'identifier.elixir' },
+				{ startIndex: 10, type: 'punctuation.elixir' },
+				{ startIndex: 11, type: 'white.elixir' },
+				{ startIndex: 12, type: 'constant.elixir' },
+				{ startIndex: 14, type: 'constant.punctuation.elixir' },
+				{ startIndex: 15, type: 'white.elixir' },
+				{ startIndex: 16, type: 'function.call.elixir' },
+				{ startIndex: 19, type: 'delimiter.parenthesis.elixir' },
+				{ startIndex: 20, type: 'identifier.elixir' },
+				{ startIndex: 21, type: 'punctuation.elixir' },
+				{ startIndex: 22, type: 'identifier.elixir' },
+				{ startIndex: 23, type: 'delimiter.parenthesis.elixir' }
+			]
+		}
+	],
+	// Constants
+	[
+		{
+			line: '[true,false,nil]',
+			tokens: [
+				{ startIndex: 0, type: 'delimiter.square.elixir' },
+				{ startIndex: 1, type: 'constant.language.elixir' },
+				{ startIndex: 5, type: 'punctuation.elixir' },
+				{ startIndex: 6, type: 'constant.language.elixir' },
+				{ startIndex: 11, type: 'punctuation.elixir' },
+				{ startIndex: 12, type: 'constant.language.elixir' },
+				{ startIndex: 15, type: 'delimiter.square.elixir' }
+			]
+		}
+	]
+]);

+ 631 - 0
src/elixir/elixir.ts

@@ -0,0 +1,631 @@
+/*---------------------------------------------------------------------------------------------
+ *  Copyright (c) Microsoft Corporation. All rights reserved.
+ *  Licensed under the MIT License. See License.txt in the project root for license information.
+ *--------------------------------------------------------------------------------------------*/
+
+import type { languages } from '../fillers/monaco-editor-core';
+
+export const conf: languages.LanguageConfiguration = {
+	comments: {
+		lineComment: '#'
+	},
+	brackets: [
+		['{', '}'],
+		['[', ']'],
+		['(', ')']
+	],
+	surroundingPairs: [
+		{ open: '{', close: '}' },
+		{ open: '[', close: ']' },
+		{ open: '(', close: ')' },
+		{ open: "'", close: "'" },
+		{ open: '"', close: '"' }
+	],
+	autoClosingPairs: [
+		{ open: "'", close: "'", notIn: ['string', 'comment'] },
+		{ open: '"', close: '"', notIn: ['comment'] },
+		{ open: '"""', close: '"""' },
+		{ open: '`', close: '`', notIn: ['string', 'comment'] },
+		{ open: '(', close: ')' },
+		{ open: '{', close: '}' },
+		{ open: '[', close: ']' },
+		{ open: '<<', close: '>>' }
+	],
+	indentationRules: {
+		increaseIndentPattern: /^\s*(after|else|catch|rescue|fn|[^#]*(do|<\-|\->|\{|\[|\=))\s*$/,
+		decreaseIndentPattern: /^\s*((\}|\])\s*$|(after|else|catch|rescue|end)\b)/
+	}
+};
+
+/**
+ * A Monarch lexer for the Elixir language.
+ *
+ * References:
+ *
+ * * Monarch documentation - https://microsoft.github.io/monaco-editor/monarch.html
+ * * Elixir lexer - https://github.com/elixir-makeup/makeup_elixir/blob/master/lib/makeup/lexers/elixir_lexer.ex
+ * * TextMate lexer (elixir-tmbundle) - https://github.com/elixir-editors/elixir-tmbundle/blob/master/Syntaxes/Elixir.tmLanguage
+ * * TextMate lexer (vscode-elixir-ls) - https://github.com/elixir-lsp/vscode-elixir-ls/blob/master/syntaxes/elixir.json
+ */
+export const language = <languages.IMonarchLanguage>{
+	defaultToken: 'source',
+	tokenPostfix: '.elixir',
+
+	brackets: [
+		{ open: '[', close: ']', token: 'delimiter.square' },
+		{ open: '(', close: ')', token: 'delimiter.parenthesis' },
+		{ open: '{', close: '}', token: 'delimiter.curly' },
+		{ open: '<<', close: '>>', token: 'delimiter.angle.special' }
+	],
+
+	// Below are lists/regexps to which we reference later.
+
+	declarationKeywords: [
+		'def',
+		'defp',
+		'defn',
+		'defnp',
+		'defguard',
+		'defguardp',
+		'defmacro',
+		'defmacrop',
+		'defdelegate',
+		'defcallback',
+		'defmacrocallback',
+		'defmodule',
+		'defprotocol',
+		'defexception',
+		'defimpl',
+		'defstruct'
+	],
+	operatorKeywords: ['and', 'in', 'not', 'or', 'when'],
+	namespaceKeywords: ['alias', 'import', 'require', 'use'],
+	otherKeywords: [
+		'after',
+		'case',
+		'catch',
+		'cond',
+		'do',
+		'else',
+		'end',
+		'fn',
+		'for',
+		'if',
+		'quote',
+		'raise',
+		'receive',
+		'rescue',
+		'super',
+		'throw',
+		'try',
+		'unless',
+		'unquote_splicing',
+		'unquote',
+		'with'
+	],
+	constants: ['true', 'false', 'nil'],
+	nameBuiltin: ['__MODULE__', '__DIR__', '__ENV__', '__CALLER__', '__STACKTRACE__'],
+
+	// Matches any of the operator names:
+	// <<< >>> ||| &&& ^^^ ~~~ === !== ~>> <~> |~> <|> == != <= >= && || \\ <> ++ -- |> =~ -> <- ~> <~ :: .. = < > + - * / | . ^ & !
+	operator: /-[->]?|!={0,2}|\*|\/|\\\\|&{1,3}|\.\.?|\^(?:\^\^)?|\+\+?|<(?:-|<<|=|>|\|>|~>?)?|=~|={1,3}|>(?:=|>>)?|\|~>|\|>|\|{1,3}|~>>?|~~~|::/,
+
+	// See https://hexdocs.pm/elixir/syntax-reference.html#variables
+	variableName: /[a-z_][a-zA-Z0-9_]*[?!]?/,
+
+	// See https://hexdocs.pm/elixir/syntax-reference.html#atoms
+	atomName: /[a-zA-Z_][a-zA-Z0-9_@]*[?!]?|@specialAtomName|@operator/,
+	specialAtomName: /\.\.\.|<<>>|%\{\}|%|\{\}/,
+
+	aliasPart: /[A-Z][a-zA-Z0-9_]*/,
+	moduleName: /@aliasPart(?:\.@aliasPart)*/,
+
+	// Sigil pairs are: """ """, ''' ''', " ", ' ', / /, | |, < >, { }, [ ], ( )
+	sigilSymmetricDelimiter: /"""|'''|"|'|\/|\|/,
+	sigilStartDelimiter: /@sigilSymmetricDelimiter|<|\{|\[|\(/,
+	sigilEndDelimiter: /@sigilSymmetricDelimiter|>|\}|\]|\)/,
+
+	decimal: /\d(?:_?\d)*/,
+	hex: /[0-9a-fA-F](_?[0-9a-fA-F])*/,
+	octal: /[0-7](_?[0-7])*/,
+	binary: /[01](_?[01])*/,
+
+	// See https://hexdocs.pm/elixir/master/String.html#module-escape-characters
+	escape: /\\u[0-9a-fA-F]{4}|\\x[0-9a-fA-F]{2}|\\./,
+
+	// The keys below correspond to tokenizer states.
+	// We start from the root state and match against its rules
+	// until we explicitly transition into another state.
+	// The `include` simply brings in all operations from the given state
+	// and is useful for improving readability.
+	tokenizer: {
+		root: [
+			{ include: '@whitespace' },
+			{ include: '@comments' },
+			// Keywords start as either an identifier or a string,
+			// but end with a : so it's important to match this first.
+			{ include: '@keywordsShorthand' },
+			{ include: '@numbers' },
+			{ include: '@identifiers' },
+			{ include: '@strings' },
+			{ include: '@atoms' },
+			{ include: '@sigils' },
+			{ include: '@attributes' },
+			{ include: '@symbols' }
+		],
+
+		// Whitespace
+
+		whitespace: [[/\s+/, 'white']],
+
+		// Comments
+
+		comments: [[/(#)(.*)/, ['comment.punctuation', 'comment']]],
+
+		// Keyword list shorthand
+
+		keywordsShorthand: [
+			[/(@atomName)(:)/, ['constant', 'constant.punctuation']],
+			// Use positive look-ahead to ensure the string is followed by :
+			// and should be considered a keyword.
+			[
+				/"(?=([^"]|#\{.*?\}|\\")*":)/,
+				{ token: 'constant.delimiter', next: '@doubleQuotedStringKeyword' }
+			],
+			[
+				/'(?=([^']|#\{.*?\}|\\')*':)/,
+				{ token: 'constant.delimiter', next: '@singleQuotedStringKeyword' }
+			]
+		],
+
+		doubleQuotedStringKeyword: [
+			[/":/, { token: 'constant.delimiter', next: '@pop' }],
+			{ include: '@stringConstantContentInterpol' }
+		],
+
+		singleQuotedStringKeyword: [
+			[/':/, { token: 'constant.delimiter', next: '@pop' }],
+			{ include: '@stringConstantContentInterpol' }
+		],
+
+		// Numbers
+
+		numbers: [
+			[/0b@binary/, 'number.binary'],
+			[/0o@octal/, 'number.octal'],
+			[/0x@hex/, 'number.hex'],
+			[/@decimal\.@decimal([eE]-?@decimal)?/, 'number.float'],
+			[/@decimal/, 'number']
+		],
+
+		// Identifiers
+
+		identifiers: [
+			// Tokenize identifier name in function-like definitions.
+			// Note: given `def a + b, do: nil`, `a` is not a function name,
+			// so we use negative look-ahead to ensure there's no operator.
+			[
+				/\b(defp?|defnp?|defmacrop?|defguardp?|defdelegate)(\s+)(@variableName)(?!\s+@operator)/,
+				[
+					'keyword.declaration',
+					'white',
+					{
+						cases: {
+							unquote: 'keyword',
+							'@default': 'function'
+						}
+					}
+				]
+			],
+			// Tokenize function calls
+			[
+				// In-scope call - an identifier followed by ( or .(
+				/(@variableName)(?=\s*\.?\s*\()/,
+				{
+					cases: {
+						// Tokenize as keyword in cases like `if(..., do: ..., else: ...)`
+						'@declarationKeywords': 'keyword.declaration',
+						'@namespaceKeywords': 'keyword',
+						'@otherKeywords': 'keyword',
+						'@default': 'function.call'
+					}
+				}
+			],
+			[
+				// Referencing function in a module
+				/(@moduleName)(\s*)(\.)(\s*)(@variableName)/,
+				['type.identifier', 'white', 'operator', 'white', 'function.call']
+			],
+			[
+				// Referencing function in an Erlang module
+				/(:)(@atomName)(\s*)(\.)(\s*)(@variableName)/,
+				['constant.punctuation', 'constant', 'white', 'operator', 'white', 'function.call']
+			],
+			[
+				// Piping into a function (tokenized separately as it may not have parentheses)
+				/(\|>)(\s*)(@variableName)/,
+				[
+					'operator',
+					'white',
+					{
+						cases: {
+							'@otherKeywords': 'keyword',
+							'@default': 'function.call'
+						}
+					}
+				]
+			],
+			[
+				// Function reference passed to another function
+				/(&)(\s*)(@variableName)/,
+				['operator', 'white', 'function.call']
+			],
+			// Language keywords, builtins, constants and variables
+			[
+				/@variableName/,
+				{
+					cases: {
+						'@declarationKeywords': 'keyword.declaration',
+						'@operatorKeywords': 'keyword.operator',
+						'@namespaceKeywords': 'keyword',
+						'@otherKeywords': 'keyword',
+						'@constants': 'constant.language',
+						'@nameBuiltin': 'variable.language',
+						'_.*': 'comment.unused',
+						'@default': 'identifier'
+					}
+				}
+			],
+			// Module names
+			[/@moduleName/, 'type.identifier']
+		],
+
+		// Strings
+
+		strings: [
+			[/"""/, { token: 'string.delimiter', next: '@doubleQuotedHeredoc' }],
+			[/'''/, { token: 'string.delimiter', next: '@singleQuotedHeredoc' }],
+			[/"/, { token: 'string.delimiter', next: '@doubleQuotedString' }],
+			[/'/, { token: 'string.delimiter', next: '@singleQuotedString' }]
+		],
+
+		doubleQuotedHeredoc: [
+			[/"""/, { token: 'string.delimiter', next: '@pop' }],
+			{ include: '@stringContentInterpol' }
+		],
+
+		singleQuotedHeredoc: [
+			[/'''/, { token: 'string.delimiter', next: '@pop' }],
+			{ include: '@stringContentInterpol' }
+		],
+
+		doubleQuotedString: [
+			[/"/, { token: 'string.delimiter', next: '@pop' }],
+			{ include: '@stringContentInterpol' }
+		],
+
+		singleQuotedString: [
+			[/'/, { token: 'string.delimiter', next: '@pop' }],
+			{ include: '@stringContentInterpol' }
+		],
+
+		// Atoms
+
+		atoms: [
+			[/(:)(@atomName)/, ['constant.punctuation', 'constant']],
+			[/:"/, { token: 'constant.delimiter', next: '@doubleQuotedStringAtom' }],
+			[/:'/, { token: 'constant.delimiter', next: '@singleQuotedStringAtom' }]
+		],
+
+		doubleQuotedStringAtom: [
+			[/"/, { token: 'constant.delimiter', next: '@pop' }],
+			{ include: '@stringConstantContentInterpol' }
+		],
+
+		singleQuotedStringAtom: [
+			[/'/, { token: 'constant.delimiter', next: '@pop' }],
+			{ include: '@stringConstantContentInterpol' }
+		],
+
+		// Sigils
+
+		// See https://elixir-lang.org/getting-started/sigils.html
+		// Sigils allow for typing values using their textual representation.
+		// All sigils start with ~ followed by a letter indicating sigil type
+		// and then a delimiter pair enclosing the textual representation.
+		// Optional modifiers are allowed after the closing delimiter.
+		// For instance a regular expressions can be written as:
+		// ~r/foo|bar/ ~r{foo|bar} ~r/foo|bar/g
+		//
+		// In general lowercase sigils allow for interpolation
+		// and escaped characters, whereas uppercase sigils don't
+		//
+		// During tokenization we want to distinguish some
+		// specific sigil types, namely string and regexp,
+		// so that they cen be themed separately.
+		//
+		// To reasonably handle all those combinations we leverage
+		// dot-separated states, so if we transition to @sigilStart.interpol.s.{.}
+		// then "sigilStart.interpol.s" state will match and also all
+		// the individual dot-separated parameters can be accessed.
+
+		sigils: [
+			[/~[a-z]@sigilStartDelimiter/, { token: '@rematch', next: '@sigil.interpol' }],
+			[/~[A-Z]@sigilStartDelimiter/, { token: '@rematch', next: '@sigil.noInterpol' }]
+		],
+
+		sigil: [
+			[/~([a-zA-Z])\{/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.{.}' }],
+			[/~([a-zA-Z])\[/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.[.]' }],
+			[/~([a-zA-Z])\(/, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.(.)' }],
+			[/~([a-zA-Z])\</, { token: '@rematch', switchTo: '@sigilStart.$S2.$1.<.>' }],
+			[
+				/~([a-zA-Z])(@sigilSymmetricDelimiter)/,
+				{ token: '@rematch', switchTo: '@sigilStart.$S2.$1.$2.$2' }
+			]
+		],
+
+		// The definitions below expect states to be of the form:
+		//
+		// sigilStart.<interpol-or-noInterpol>.<sigil-letter>.<start-delimiter>.<end-delimiter>
+		// sigilContinue.<interpol-or-noInterpol>.<sigil-letter>.<start-delimiter>.<end-delimiter>
+		//
+		// The sigilStart state is used only to properly classify the token (as string/regex/sigil)
+		// and immediately switches to the sigilContinue sate, which handles the actual content
+		// and waits for the corresponding end delimiter.
+
+		'sigilStart.interpol.s': [
+			[
+				/~s@sigilStartDelimiter/,
+				{
+					token: 'string.delimiter',
+					switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
+				}
+			]
+		],
+
+		'sigilContinue.interpol.s': [
+			[
+				/(@sigilEndDelimiter)[a-zA-Z]*/,
+				{
+					cases: {
+						'$1==$S5': { token: 'string.delimiter', next: '@pop' },
+						'@default': 'string'
+					}
+				}
+			],
+			{ include: '@stringContentInterpol' }
+		],
+
+		'sigilStart.noInterpol.S': [
+			[
+				/~S@sigilStartDelimiter/,
+				{
+					token: 'string.delimiter',
+					switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
+				}
+			]
+		],
+
+		'sigilContinue.noInterpol.S': [
+			// Ignore escaped sigil end
+			[/(^|[^\\])\\@sigilEndDelimiter/, 'string'],
+			[
+				/(@sigilEndDelimiter)[a-zA-Z]*/,
+				{
+					cases: {
+						'$1==$S5': { token: 'string.delimiter', next: '@pop' },
+						'@default': 'string'
+					}
+				}
+			],
+			{ include: '@stringContent' }
+		],
+
+		'sigilStart.interpol.r': [
+			[
+				/~r@sigilStartDelimiter/,
+				{
+					token: 'regexp.delimiter',
+					switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
+				}
+			]
+		],
+
+		'sigilContinue.interpol.r': [
+			[
+				/(@sigilEndDelimiter)[a-zA-Z]*/,
+				{
+					cases: {
+						'$1==$S5': { token: 'regexp.delimiter', next: '@pop' },
+						'@default': 'regexp'
+					}
+				}
+			],
+			{ include: '@regexpContentInterpol' }
+		],
+
+		'sigilStart.noInterpol.R': [
+			[
+				/~R@sigilStartDelimiter/,
+				{
+					token: 'regexp.delimiter',
+					switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
+				}
+			]
+		],
+
+		'sigilContinue.noInterpol.R': [
+			// Ignore escaped sigil end
+			[/(^|[^\\])\\@sigilEndDelimiter/, 'regexp'],
+			[
+				/(@sigilEndDelimiter)[a-zA-Z]*/,
+				{
+					cases: {
+						'$1==$S5': { token: 'regexp.delimiter', next: '@pop' },
+						'@default': 'regexp'
+					}
+				}
+			],
+			{ include: '@regexpContent' }
+		],
+
+		// Fallback to the generic sigil by default
+		'sigilStart.interpol': [
+			[
+				/~([a-zA-Z])@sigilStartDelimiter/,
+				{
+					token: 'sigil.delimiter',
+					switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
+				}
+			]
+		],
+
+		'sigilContinue.interpol': [
+			[
+				/(@sigilEndDelimiter)[a-zA-Z]*/,
+				{
+					cases: {
+						'$1==$S5': { token: 'sigil.delimiter', next: '@pop' },
+						'@default': 'sigil'
+					}
+				}
+			],
+			{ include: '@sigilContentInterpol' }
+		],
+
+		'sigilStart.noInterpol': [
+			[
+				/~([a-zA-Z])@sigilStartDelimiter/,
+				{
+					token: 'sigil.delimiter',
+					switchTo: '@sigilContinue.$S2.$S3.$S4.$S5'
+				}
+			]
+		],
+
+		'sigilContinue.noInterpol': [
+			// Ignore escaped sigil end
+			[/(^|[^\\])\\@sigilEndDelimiter/, 'sigil'],
+			[
+				/(@sigilEndDelimiter)[a-zA-Z]*/,
+				{
+					cases: {
+						'$1==$S5': { token: 'sigil.delimiter', next: '@pop' },
+						'@default': 'sigil'
+					}
+				}
+			],
+			{ include: '@sigilContent' }
+		],
+
+		// Attributes
+
+		attributes: [
+			// Module @doc* attributes - tokenized as comments
+			[
+				/\@(module|type)?doc (~[sS])?"""/,
+				{
+					token: 'comment.block.documentation',
+					next: '@doubleQuotedHeredocDocstring'
+				}
+			],
+			[
+				/\@(module|type)?doc (~[sS])?"/,
+				{
+					token: 'comment.block.documentation',
+					next: '@doubleQuotedStringDocstring'
+				}
+			],
+			[/\@(module|type)?doc false/, 'comment.block.documentation'],
+			// Module attributes
+			[/\@(@variableName)/, 'variable']
+		],
+
+		doubleQuotedHeredocDocstring: [
+			[/"""/, { token: 'comment.block.documentation', next: '@pop' }],
+			{ include: '@docstringContent' }
+		],
+
+		doubleQuotedStringDocstring: [
+			[/"/, { token: 'comment.block.documentation', next: '@pop' }],
+			{ include: '@docstringContent' }
+		],
+
+		// Operators, punctuation, brackets
+
+		symbols: [
+			// Code point operator (either with regular character ?a or an escaped one ?\n)
+			[/\?(\\.|[^\\\s])/, 'number.constant'],
+			// Anonymous function arguments
+			[/&\d+/, 'operator'],
+			// Bitshift operators (must go before delimiters, so that << >> don't match first)
+			[/<<<|>>>/, 'operator'],
+			// Delimiter pairs
+			[/[()\[\]\{\}]|<<|>>/, '@brackets'],
+			// Triple dot is a valid name (must go before operators, so that .. doesn't match instead)
+			[/\.\.\./, 'identifier'],
+			// Punctuation => (must go before operators, so it's not tokenized as = then >)
+			[/=>/, 'punctuation'],
+			// Operators
+			[/@operator/, 'operator'],
+			// Punctuation
+			[/[:;,.%]/, 'punctuation']
+		],
+
+		// Generic helpers
+
+		stringContentInterpol: [
+			{ include: '@interpolation' },
+			{ include: '@escapeChar' },
+			{ include: '@stringContent' }
+		],
+
+		stringContent: [[/./, 'string']],
+
+		stringConstantContentInterpol: [
+			{ include: '@interpolation' },
+			{ include: '@escapeChar' },
+			{ include: '@stringConstantContent' }
+		],
+
+		stringConstantContent: [[/./, 'constant']],
+
+		regexpContentInterpol: [
+			{ include: '@interpolation' },
+			{ include: '@escapeChar' },
+			{ include: '@regexpContent' }
+		],
+
+		regexpContent: [
+			// # may be a regular regexp char, so we use a heuristic
+			// assuming a # surrounded by whitespace is actually a comment.
+			[/(\s)(#)(\s.*)$/, ['white', 'comment.punctuation', 'comment']],
+			[/./, 'regexp']
+		],
+
+		sigilContentInterpol: [
+			{ include: '@interpolation' },
+			{ include: '@escapeChar' },
+			{ include: '@sigilContent' }
+		],
+
+		sigilContent: [[/./, 'sigil']],
+
+		docstringContent: [[/./, 'comment.block.documentation']],
+
+		escapeChar: [[/@escape/, 'constant.character.escape']],
+
+		interpolation: [
+			[/#{/, { token: 'delimiter.bracket.embed', next: '@interpolationContinue' }]
+		],
+
+		interpolationContinue: [
+			[/}/, { token: 'delimiter.bracket.embed', next: '@pop' }],
+			// Interpolation brackets may contain arbitrary code,
+			// so we simply match against all the root rules,
+			// until we reach interpolation end (the above matches).
+			{ include: '@root' }
+		]
+	}
+};

+ 1 - 0
src/monaco.contribution.ts

@@ -17,6 +17,7 @@ import './css/css.contribution';
 import './dart/dart.contribution';
 import './dockerfile/dockerfile.contribution';
 import './ecl/ecl.contribution';
+import './elixir/elixir.contribution';
 import './fsharp/fsharp.contribution';
 import './go/go.contribution';
 import './graphql/graphql.contribution';