LexingUtils.ts 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. type UpperCaseCharacter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z";
  2. type SpecialCharacters = "$" | "{" | "}" | "[" | "]" | "," | "." | ";" | "\"" | "'" | "(" | ")" | "`" | "´" | "~" | "^";
  3. type Character = UpperCaseCharacter | Lowercase<UpperCaseCharacter> | SpecialCharacters;
  4. interface ITokenState {
  5. levels: number;
  6. line: number;
  7. column: number;
  8. content: string;
  9. index: number;
  10. token: string;
  11. lastOpenLevel: string | null;
  12. escaping: boolean;
  13. }
  14. class UnexpectedTokenError extends Error {
  15. constructor(public state: ITokenState) {
  16. super(`Unexpected token "${state.token}" @ ${state.line}:${state.column}`);
  17. }
  18. }
  19. const CharsToEscape = ["'", '"', '`'];
  20. /**
  21. * Reads between two tokens.
  22. * @param string The content to be read.
  23. * @param start The starting token.
  24. * @param end The ending token.
  25. * @returns The contents between the tokens, or null if the starting token couldn't be found.
  26. */
  27. export function readBetweenTokens(string: string, start: Character, end: Character, options?: {
  28. allowNewLines?: boolean
  29. }) {
  30. const startIndex = string.indexOf(start);
  31. if (startIndex === -1) {
  32. return null;
  33. }
  34. const state: ITokenState = {
  35. levels: 0,
  36. line: 1,
  37. column: 1,
  38. content: "",
  39. index: startIndex + 1,
  40. lastOpenLevel: "",
  41. escaping: false,
  42. token: ""
  43. };
  44. while(state.index < string.length) {
  45. state.token = string[state.index++];
  46. state.column++;
  47. // If a new line was found
  48. if ((state.token === "\n")) {
  49. // If doesn't allow new lines
  50. if (options?.allowNewLines === false) {
  51. throw new UnexpectedTokenError(state);
  52. }
  53. state.line++;
  54. state.column = 1;
  55. }
  56. // If it's inside a string and it's escaping something
  57. if (state.lastOpenLevel && state.token === "\\" && CharsToEscape.includes(string[state.index])) {
  58. state.escaping = true;
  59. } else
  60. // If it's opening a string and hasn't opened a level or it's possibly closing a level
  61. if (CharsToEscape.includes(state.token) && (!state.lastOpenLevel || state.lastOpenLevel === state.token)) {
  62. // If was escaping
  63. if (state.escaping) {
  64. // Ignore the following check
  65. state.escaping = false;
  66. } else
  67. // If has an open level
  68. if (state.lastOpenLevel) {
  69. // Decrease a level
  70. state.levels--;
  71. state.lastOpenLevel = null;
  72. } else {
  73. // Increase one level
  74. state.levels++;
  75. state.lastOpenLevel = state.token;
  76. }
  77. } else
  78. // If reached the ending token
  79. if (state.levels === 0 && state.token === end) {
  80. break;
  81. }
  82. // Add the token to the contents
  83. state.content += state.token;
  84. }
  85. return state.content;
  86. }
  87. /**
  88. * Reads an array of strings until an outdent is found.
  89. * @param lines A string array containing the lines to be read.
  90. * @param ident The detected identation.
  91. * @returns
  92. */
  93. export function readLinesUntilOutdent(lines: string[], ident: string) {
  94. let line = 0;
  95. let lineContents = "";
  96. let content = "";
  97. do {
  98. lineContents = lines[line];
  99. if (lineContents === undefined) {
  100. break;
  101. }
  102. // If the line isn't idented and isn't empty
  103. if (!lineContents.startsWith(ident) && lineContents.trim().length > 0) {
  104. break;
  105. }
  106. content += lineContents + "\n";
  107. line++;
  108. } while (true);
  109. return content;
  110. }
  111. /**
  112. * Reads an array of strings until a new identation level is found.
  113. * @param lines A string array containing the lines to be read.
  114. * @param ident The detected identation.
  115. * @returns
  116. */
  117. export function readLinesUntilIdent(lines: string[], ident: string) {
  118. let index = 0;
  119. let line = "";
  120. let content = "";
  121. do {
  122. line = lines[index];
  123. if (line === undefined) {
  124. break;
  125. }
  126. content += line + "\n";
  127. index++;
  128. } while (line.length === 0 || line.startsWith(ident + ident) || !line.startsWith(ident));
  129. return content;
  130. }
  131. /**
  132. * Reads the next tag with their attributes inside.
  133. * @param contents The lines to be read.
  134. * @returns
  135. */
  136. export function readTagWithAttributes(contents: string[]|string) {
  137. const state = {
  138. index: 0,
  139. column: 0,
  140. line: 1,
  141. content: "",
  142. token: "",
  143. tag: "",
  144. attributes: ""
  145. };
  146. contents = Array.isArray(contents) ? contents.join("\n") : contents;
  147. while(state.index < contents.length) {
  148. state.token = contents[state.index];
  149. state.column++;
  150. // If it's a line break
  151. if (state.token === "\n") {
  152. state.line++;
  153. state.column = 0;
  154. } else
  155. // If has found a "start-attribute" token
  156. if (state.token === "(") {
  157. // Read the attributes
  158. state.attributes = "(" + readBetweenTokens(contents.substring(state.index), "(", ")") + ")";
  159. state.content += state.attributes;
  160. // Skip the read attributes lines
  161. state.index += state.attributes.length;
  162. state.line += state.attributes.split("\n").length - 1;
  163. // Skip the current token
  164. continue;
  165. }
  166. // If got into a new line
  167. if (state.token === "\n") {
  168. // No possible attributes here
  169. break;
  170. }
  171. // If no attribute has been read yet
  172. if (!state.attributes) {
  173. // Read it as the tag
  174. state.tag += state.token;
  175. }
  176. state.content += state.token;
  177. state.index++;
  178. }
  179. return {
  180. content: state.content,
  181. tag: state.tag.trimStart(),
  182. attributes: state.attributes,
  183. line: state.line,
  184. column: state.column
  185. };
  186. }