ruby.ts 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. /*---------------------------------------------------------------------------------------------
  2. * Copyright (c) Microsoft Corporation. All rights reserved.
  3. * Licensed under the MIT License. See License.txt in the project root for license information.
  4. *--------------------------------------------------------------------------------------------*/
  5. import type { languages } from '../fillers/monaco-editor-core';
  6. export const conf: languages.LanguageConfiguration = {
  7. comments: {
  8. lineComment: '#',
  9. blockComment: ['=begin', '=end']
  10. },
  11. brackets: [
  12. ['(', ')'],
  13. ['{', '}'],
  14. ['[', ']']
  15. ],
  16. autoClosingPairs: [
  17. { open: '{', close: '}' },
  18. { open: '[', close: ']' },
  19. { open: '(', close: ')' },
  20. { open: '"', close: '"' },
  21. { open: "'", close: "'" }
  22. ],
  23. surroundingPairs: [
  24. { open: '{', close: '}' },
  25. { open: '[', close: ']' },
  26. { open: '(', close: ')' },
  27. { open: '"', close: '"' },
  28. { open: "'", close: "'" }
  29. ],
  30. indentationRules: {
  31. increaseIndentPattern: new RegExp(
  32. '^\\s*((begin|class|(private|protected)\\s+def|def|else|elsif|ensure|for|if|module|rescue|unless|until|when|while|case)|([^#]*\\sdo\\b)|([^#]*=\\s*(case|if|unless)))\\b([^#\\{;]|("|\'|/).*\\4)*(#.*)?$'
  33. ),
  34. decreaseIndentPattern: new RegExp(
  35. '^\\s*([}\\]]([,)]?\\s*(#|$)|\\.[a-zA-Z_]\\w*\\b)|(end|rescue|ensure|else|elsif|when)\\b)'
  36. )
  37. }
  38. };
  39. /*
  40. * Ruby language definition
  41. *
  42. * Quite a complex language due to elaborate escape sequences
  43. * and quoting of literate strings/regular expressions, and
  44. * an 'end' keyword that does not always apply to modifiers like until and while,
  45. * and a 'do' keyword that sometimes starts a block, but sometimes is part of
  46. * another statement (like 'while').
  47. *
  48. * (1) end blocks:
  49. * 'end' may end declarations like if or until, but sometimes 'if' or 'until'
  50. * are modifiers where there is no 'end'. Also, 'do' sometimes starts a block
  51. * that is ended by 'end', but sometimes it is part of a 'while', 'for', or 'until'
  52. * To do proper brace matching we do some elaborate state manipulation.
  53. * some examples:
  54. *
  55. * until bla do
  56. * work until tired
  57. * list.each do
  58. * something if test
  59. * end
  60. * end
  61. *
  62. * or
  63. *
  64. * if test
  65. * something (if test then x end)
  66. * bar if bla
  67. * end
  68. *
  69. * or, how about using class as a property..
  70. *
  71. * class Test
  72. * def endpoint
  73. * self.class.endpoint || routes
  74. * end
  75. * end
  76. *
  77. * (2) quoting:
  78. * there are many kinds of strings and escape sequences. But also, one can
  79. * start many string-like things as '%qx' where q specifies the kind of string
  80. * (like a command, escape expanded, regular expression, symbol etc.), and x is
  81. * some character and only another 'x' ends the sequence. Except for brackets
  82. * where the closing bracket ends the sequence.. and except for a nested bracket
  83. * inside the string like entity. Also, such strings can contain interpolated
  84. * ruby expressions again (and span multiple lines). Moreover, expanded
  85. * regular expression can also contain comments.
  86. */
  87. export const language = <languages.IMonarchLanguage>{
  88. tokenPostfix: '.ruby',
  89. keywords: [
  90. '__LINE__',
  91. '__ENCODING__',
  92. '__FILE__',
  93. 'BEGIN',
  94. 'END',
  95. 'alias',
  96. 'and',
  97. 'begin',
  98. 'break',
  99. 'case',
  100. 'class',
  101. 'def',
  102. 'defined?',
  103. 'do',
  104. 'else',
  105. 'elsif',
  106. 'end',
  107. 'ensure',
  108. 'for',
  109. 'false',
  110. 'if',
  111. 'in',
  112. 'module',
  113. 'next',
  114. 'nil',
  115. 'not',
  116. 'or',
  117. 'redo',
  118. 'rescue',
  119. 'retry',
  120. 'return',
  121. 'self',
  122. 'super',
  123. 'then',
  124. 'true',
  125. 'undef',
  126. 'unless',
  127. 'until',
  128. 'when',
  129. 'while',
  130. 'yield'
  131. ],
  132. keywordops: ['::', '..', '...', '?', ':', '=>'],
  133. builtins: [
  134. 'require',
  135. 'public',
  136. 'private',
  137. 'include',
  138. 'extend',
  139. 'attr_reader',
  140. 'protected',
  141. 'private_class_method',
  142. 'protected_class_method',
  143. 'new'
  144. ],
  145. // these are closed by 'end' (if, while and until are handled separately)
  146. declarations: [
  147. 'module',
  148. 'class',
  149. 'def',
  150. 'case',
  151. 'do',
  152. 'begin',
  153. 'for',
  154. 'if',
  155. 'while',
  156. 'until',
  157. 'unless'
  158. ],
  159. linedecls: ['def', 'case', 'do', 'begin', 'for', 'if', 'while', 'until', 'unless'],
  160. operators: [
  161. '^',
  162. '&',
  163. '|',
  164. '<=>',
  165. '==',
  166. '===',
  167. '!~',
  168. '=~',
  169. '>',
  170. '>=',
  171. '<',
  172. '<=',
  173. '<<',
  174. '>>',
  175. '+',
  176. '-',
  177. '*',
  178. '/',
  179. '%',
  180. '**',
  181. '~',
  182. '+@',
  183. '-@',
  184. '[]',
  185. '[]=',
  186. '`',
  187. '+=',
  188. '-=',
  189. '*=',
  190. '**=',
  191. '/=',
  192. '^=',
  193. '%=',
  194. '<<=',
  195. '>>=',
  196. '&=',
  197. '&&=',
  198. '||=',
  199. '|='
  200. ],
  201. brackets: [
  202. { open: '(', close: ')', token: 'delimiter.parenthesis' },
  203. { open: '{', close: '}', token: 'delimiter.curly' },
  204. { open: '[', close: ']', token: 'delimiter.square' }
  205. ],
  206. // we include these common regular expressions
  207. symbols: /[=><!~?:&|+\-*\/\^%\.]+/,
  208. // escape sequences
  209. escape: /(?:[abefnrstv\\"'\n\r]|[0-7]{1,3}|x[0-9A-Fa-f]{1,2}|u[0-9A-Fa-f]{4})/,
  210. escapes: /\\(?:C\-(@escape|.)|c(@escape|.)|@escape)/,
  211. decpart: /\d(_?\d)*/,
  212. decimal: /0|@decpart/,
  213. delim: /[^a-zA-Z0-9\s\n\r]/,
  214. heredelim: /(?:\w+|'[^']*'|"[^"]*"|`[^`]*`)/,
  215. regexpctl: /[(){}\[\]\$\^|\-*+?\.]/,
  216. regexpesc: /\\(?:[AzZbBdDfnrstvwWn0\\\/]|@regexpctl|c[A-Z]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4})?/,
  217. // The main tokenizer for our languages
  218. tokenizer: {
  219. // Main entry.
  220. // root.<decl> where decl is the current opening declaration (like 'class')
  221. root: [
  222. // identifiers and keywords
  223. // most complexity here is due to matching 'end' correctly with declarations.
  224. // We distinguish a declaration that comes first on a line, versus declarations further on a line (which are most likey modifiers)
  225. [
  226. /^(\s*)([a-z_]\w*[!?=]?)/,
  227. [
  228. 'white',
  229. {
  230. cases: {
  231. 'for|until|while': {
  232. token: 'keyword.$2',
  233. next: '@dodecl.$2'
  234. },
  235. '@declarations': {
  236. token: 'keyword.$2',
  237. next: '@root.$2'
  238. },
  239. end: { token: 'keyword.$S2', next: '@pop' },
  240. '@keywords': 'keyword',
  241. '@builtins': 'predefined',
  242. '@default': 'identifier'
  243. }
  244. }
  245. ]
  246. ],
  247. [
  248. /[a-z_]\w*[!?=]?/,
  249. {
  250. cases: {
  251. 'if|unless|while|until': {
  252. token: 'keyword.$0x',
  253. next: '@modifier.$0x'
  254. },
  255. for: { token: 'keyword.$2', next: '@dodecl.$2' },
  256. '@linedecls': { token: 'keyword.$0', next: '@root.$0' },
  257. end: { token: 'keyword.$S2', next: '@pop' },
  258. '@keywords': 'keyword',
  259. '@builtins': 'predefined',
  260. '@default': 'identifier'
  261. }
  262. }
  263. ],
  264. [/[A-Z][\w]*[!?=]?/, 'constructor.identifier'], // constant
  265. [/\$[\w]*/, 'global.constant'], // global
  266. [/@[\w]*/, 'namespace.instance.identifier'], // instance
  267. [/@@@[\w]*/, 'namespace.class.identifier'], // class
  268. // here document
  269. [/<<[-~](@heredelim).*/, { token: 'string.heredoc.delimiter', next: '@heredoc.$1' }],
  270. [
  271. /[ \t\r\n]+<<(@heredelim).*/,
  272. { token: 'string.heredoc.delimiter', next: '@heredoc.$1' }
  273. ],
  274. [/^<<(@heredelim).*/, { token: 'string.heredoc.delimiter', next: '@heredoc.$1' }],
  275. // whitespace
  276. { include: '@whitespace' },
  277. // strings
  278. [/"/, { token: 'string.d.delim', next: '@dstring.d."' }],
  279. [/'/, { token: 'string.sq.delim', next: '@sstring.sq' }],
  280. // % literals. For efficiency, rematch in the 'pstring' state
  281. [/%([rsqxwW]|Q?)/, { token: '@rematch', next: 'pstring' }],
  282. // commands and symbols
  283. [/`/, { token: 'string.x.delim', next: '@dstring.x.`' }],
  284. [/:(\w|[$@])\w*[!?=]?/, 'string.s'],
  285. [/:"/, { token: 'string.s.delim', next: '@dstring.s."' }],
  286. [/:'/, { token: 'string.s.delim', next: '@sstring.s' }],
  287. // regular expressions. Lookahead for a (not escaped) closing forwardslash on the same line
  288. [/\/(?=(\\\/|[^\/\n])+\/)/, { token: 'regexp.delim', next: '@regexp' }],
  289. // delimiters and operators
  290. [/[{}()\[\]]/, '@brackets'],
  291. [
  292. /@symbols/,
  293. {
  294. cases: {
  295. '@keywordops': 'keyword',
  296. '@operators': 'operator',
  297. '@default': ''
  298. }
  299. }
  300. ],
  301. [/[;,]/, 'delimiter'],
  302. // numbers
  303. [/0[xX][0-9a-fA-F](_?[0-9a-fA-F])*/, 'number.hex'],
  304. [/0[_oO][0-7](_?[0-7])*/, 'number.octal'],
  305. [/0[bB][01](_?[01])*/, 'number.binary'],
  306. [/0[dD]@decpart/, 'number'],
  307. [
  308. /@decimal((\.@decpart)?([eE][\-+]?@decpart)?)/,
  309. {
  310. cases: {
  311. $1: 'number.float',
  312. '@default': 'number'
  313. }
  314. }
  315. ]
  316. ],
  317. // used to not treat a 'do' as a block opener if it occurs on the same
  318. // line as a 'do' statement: 'while|until|for'
  319. // dodecl.<decl> where decl is the declarations started, like 'while'
  320. dodecl: [
  321. [/^/, { token: '', switchTo: '@root.$S2' }], // get out of do-skipping mode on a new line
  322. [
  323. /[a-z_]\w*[!?=]?/,
  324. {
  325. cases: {
  326. end: { token: 'keyword.$S2', next: '@pop' }, // end on same line
  327. do: { token: 'keyword', switchTo: '@root.$S2' }, // do on same line: not an open bracket here
  328. '@linedecls': {
  329. token: '@rematch',
  330. switchTo: '@root.$S2'
  331. }, // other declaration on same line: rematch
  332. '@keywords': 'keyword',
  333. '@builtins': 'predefined',
  334. '@default': 'identifier'
  335. }
  336. }
  337. ],
  338. { include: '@root' }
  339. ],
  340. // used to prevent potential modifiers ('if|until|while|unless') to match
  341. // with 'end' keywords.
  342. // modifier.<decl>x where decl is the declaration starter, like 'if'
  343. modifier: [
  344. [/^/, '', '@pop'], // it was a modifier: get out of modifier mode on a new line
  345. [
  346. /[a-z_]\w*[!?=]?/,
  347. {
  348. cases: {
  349. end: { token: 'keyword.$S2', next: '@pop' }, // end on same line
  350. 'then|else|elsif|do': {
  351. token: 'keyword',
  352. switchTo: '@root.$S2'
  353. }, // real declaration and not a modifier
  354. '@linedecls': {
  355. token: '@rematch',
  356. switchTo: '@root.$S2'
  357. }, // other declaration => not a modifier
  358. '@keywords': 'keyword',
  359. '@builtins': 'predefined',
  360. '@default': 'identifier'
  361. }
  362. }
  363. ],
  364. { include: '@root' }
  365. ],
  366. // single quote strings (also used for symbols)
  367. // sstring.<kind> where kind is 'sq' (single quote) or 's' (symbol)
  368. sstring: [
  369. [/[^\\']+/, 'string.$S2'],
  370. [/\\\\|\\'|\\$/, 'string.$S2.escape'],
  371. [/\\./, 'string.$S2.invalid'],
  372. [/'/, { token: 'string.$S2.delim', next: '@pop' }]
  373. ],
  374. // double quoted "string".
  375. // dstring.<kind>.<delim> where kind is 'd' (double quoted), 'x' (command), or 's' (symbol)
  376. // and delim is the ending delimiter (" or `)
  377. dstring: [
  378. [/[^\\`"#]+/, 'string.$S2'],
  379. [/#/, 'string.$S2.escape', '@interpolated'],
  380. [/\\$/, 'string.$S2.escape'],
  381. [/@escapes/, 'string.$S2.escape'],
  382. [/\\./, 'string.$S2.escape.invalid'],
  383. [
  384. /[`"]/,
  385. {
  386. cases: {
  387. '$#==$S3': { token: 'string.$S2.delim', next: '@pop' },
  388. '@default': 'string.$S2'
  389. }
  390. }
  391. ]
  392. ],
  393. // literal documents
  394. // heredoc.<close> where close is the closing delimiter
  395. heredoc: [
  396. [
  397. /^(\s*)(@heredelim)$/,
  398. {
  399. cases: {
  400. '$2==$S2': [
  401. 'string.heredoc',
  402. { token: 'string.heredoc.delimiter', next: '@pop' }
  403. ],
  404. '@default': ['string.heredoc', 'string.heredoc']
  405. }
  406. }
  407. ],
  408. [/.*/, 'string.heredoc']
  409. ],
  410. // interpolated sequence
  411. interpolated: [
  412. [/\$\w*/, 'global.constant', '@pop'],
  413. [/@\w*/, 'namespace.class.identifier', '@pop'],
  414. [/@@@\w*/, 'namespace.instance.identifier', '@pop'],
  415. [
  416. /[{]/,
  417. {
  418. token: 'string.escape.curly',
  419. switchTo: '@interpolated_compound'
  420. }
  421. ],
  422. ['', '', '@pop'] // just a # is interpreted as a #
  423. ],
  424. // any code
  425. interpolated_compound: [
  426. [/[}]/, { token: 'string.escape.curly', next: '@pop' }],
  427. { include: '@root' }
  428. ],
  429. // %r quoted regexp
  430. // pregexp.<open>.<close> where open/close are the open/close delimiter
  431. pregexp: [
  432. { include: '@whitespace' },
  433. // turns out that you can quote using regex control characters, aargh!
  434. // for example; %r|kgjgaj| is ok (even though | is used for alternation)
  435. // so, we need to match those first
  436. [
  437. /[^\(\{\[\\]/,
  438. {
  439. cases: {
  440. '$#==$S3': { token: 'regexp.delim', next: '@pop' },
  441. '$#==$S2': { token: 'regexp.delim', next: '@push' }, // nested delimiters are allowed..
  442. '~[)}\\]]': '@brackets.regexp.escape.control',
  443. '~@regexpctl': 'regexp.escape.control',
  444. '@default': 'regexp'
  445. }
  446. }
  447. ],
  448. { include: '@regexcontrol' }
  449. ],
  450. // We match regular expression quite precisely
  451. regexp: [
  452. { include: '@regexcontrol' },
  453. [/[^\\\/]/, 'regexp'],
  454. ['/[ixmp]*', { token: 'regexp.delim' }, '@pop']
  455. ],
  456. regexcontrol: [
  457. [
  458. /(\{)(\d+(?:,\d*)?)(\})/,
  459. [
  460. '@brackets.regexp.escape.control',
  461. 'regexp.escape.control',
  462. '@brackets.regexp.escape.control'
  463. ]
  464. ],
  465. [
  466. /(\[)(\^?)/,
  467. [
  468. '@brackets.regexp.escape.control',
  469. { token: 'regexp.escape.control', next: '@regexrange' }
  470. ]
  471. ],
  472. [/(\()(\?[:=!])/, ['@brackets.regexp.escape.control', 'regexp.escape.control']],
  473. [/\(\?#/, { token: 'regexp.escape.control', next: '@regexpcomment' }],
  474. [/[()]/, '@brackets.regexp.escape.control'],
  475. [/@regexpctl/, 'regexp.escape.control'],
  476. [/\\$/, 'regexp.escape'],
  477. [/@regexpesc/, 'regexp.escape'],
  478. [/\\\./, 'regexp.invalid'],
  479. [/#/, 'regexp.escape', '@interpolated']
  480. ],
  481. regexrange: [
  482. [/-/, 'regexp.escape.control'],
  483. [/\^/, 'regexp.invalid'],
  484. [/\\$/, 'regexp.escape'],
  485. [/@regexpesc/, 'regexp.escape'],
  486. [/[^\]]/, 'regexp'],
  487. [/\]/, '@brackets.regexp.escape.control', '@pop']
  488. ],
  489. regexpcomment: [
  490. [/[^)]+/, 'comment'],
  491. [/\)/, { token: 'regexp.escape.control', next: '@pop' }]
  492. ],
  493. // % quoted strings
  494. // A bit repetitive since we need to often special case the kind of ending delimiter
  495. pstring: [
  496. [/%([qws])\(/, { token: 'string.$1.delim', switchTo: '@qstring.$1.(.)' }],
  497. [/%([qws])\[/, { token: 'string.$1.delim', switchTo: '@qstring.$1.[.]' }],
  498. [/%([qws])\{/, { token: 'string.$1.delim', switchTo: '@qstring.$1.{.}' }],
  499. [/%([qws])</, { token: 'string.$1.delim', switchTo: '@qstring.$1.<.>' }],
  500. [/%([qws])(@delim)/, { token: 'string.$1.delim', switchTo: '@qstring.$1.$2.$2' }],
  501. [/%r\(/, { token: 'regexp.delim', switchTo: '@pregexp.(.)' }],
  502. [/%r\[/, { token: 'regexp.delim', switchTo: '@pregexp.[.]' }],
  503. [/%r\{/, { token: 'regexp.delim', switchTo: '@pregexp.{.}' }],
  504. [/%r</, { token: 'regexp.delim', switchTo: '@pregexp.<.>' }],
  505. [/%r(@delim)/, { token: 'regexp.delim', switchTo: '@pregexp.$1.$1' }],
  506. [/%(x|W|Q?)\(/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.(.)' }],
  507. [/%(x|W|Q?)\[/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.[.]' }],
  508. [/%(x|W|Q?)\{/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.{.}' }],
  509. [/%(x|W|Q?)</, { token: 'string.$1.delim', switchTo: '@qqstring.$1.<.>' }],
  510. [/%(x|W|Q?)(@delim)/, { token: 'string.$1.delim', switchTo: '@qqstring.$1.$2.$2' }],
  511. [/%([rqwsxW]|Q?)./, { token: 'invalid', next: '@pop' }], // recover
  512. [/./, { token: 'invalid', next: '@pop' }] // recover
  513. ],
  514. // non-expanded quoted string.
  515. // qstring.<kind>.<open>.<close>
  516. // kind = q|w|s (single quote, array, symbol)
  517. // open = open delimiter
  518. // close = close delimiter
  519. qstring: [
  520. [/\\$/, 'string.$S2.escape'],
  521. [/\\./, 'string.$S2.escape'],
  522. [
  523. /./,
  524. {
  525. cases: {
  526. '$#==$S4': { token: 'string.$S2.delim', next: '@pop' },
  527. '$#==$S3': { token: 'string.$S2.delim', next: '@push' }, // nested delimiters are allowed..
  528. '@default': 'string.$S2'
  529. }
  530. }
  531. ]
  532. ],
  533. // expanded quoted string.
  534. // qqstring.<kind>.<open>.<close>
  535. // kind = Q|W|x (double quote, array, command)
  536. // open = open delimiter
  537. // close = close delimiter
  538. qqstring: [[/#/, 'string.$S2.escape', '@interpolated'], { include: '@qstring' }],
  539. // whitespace & comments
  540. whitespace: [
  541. [/[ \t\r\n]+/, ''],
  542. [/^\s*=begin\b/, 'comment', '@comment'],
  543. [/#.*$/, 'comment']
  544. ],
  545. comment: [
  546. [/[^=]+/, 'comment'],
  547. [/^\s*=begin\b/, 'comment.invalid'], // nested comment
  548. [/^\s*=end\b.*/, 'comment', '@pop'],
  549. [/[=]/, 'comment']
  550. ]
  551. }
  552. };