1use crate::{kind::TypeScriptSyntaxKind, language::TypeScriptLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4#[derive(Clone)]
5pub struct TypeScriptLexer<'config> {
6 _config: &'config TypeScriptLanguage,
7}
8
9type State<'a, S> = LexerState<'a, S, TypeScriptLanguage>;
10
11impl<'config> TypeScriptLexer<'config> {
12 pub fn new(config: &'config TypeScriptLanguage) -> Self {
13 Self { _config: config }
14 }
15}
16
17impl<'config> Lexer<TypeScriptLanguage> for TypeScriptLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, text: &S, _edits: &[TextEdit], cache: &'a mut impl LexerCache<TypeScriptLanguage>) -> LexOutput<TypeScriptLanguage> {
19 let mut state: State<'_, S> = LexerState::new(text);
20 let result = self.run(&mut state);
21 if result.is_ok() {
22 state.add_eof();
23 }
24 state.finish_with_cache(result, cache)
25 }
26}
27
28impl<'config> TypeScriptLexer<'config> {
29 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
30 while state.not_at_end() {
31 let safe_point = state.get_position();
32
33 if self.skip_whitespace(state) {
34 continue;
35 }
36
37 if self.lex_newline(state) {
38 continue;
39 }
40
41 if self.skip_comment(state) {
42 continue;
43 }
44
45 if self.lex_string_literal(state) {
46 continue;
47 }
48
49 if self.lex_template_literal(state) {
50 continue;
51 }
52
53 if self.lex_numeric_literal(state) {
54 continue;
55 }
56
57 if self.lex_identifier_or_keyword(state) {
58 continue;
59 }
60
61 if self.lex_operator_or_punctuation(state) {
62 continue;
63 }
64
65 let start_pos = state.get_position();
67 if let Some(ch) = state.peek() {
68 state.advance(ch.len_utf8());
69 state.add_token(TypeScriptSyntaxKind::Error, start_pos, state.get_position());
70 }
71
72 state.advance_if_dead_lock(safe_point);
73 }
74
75 Ok(())
76 }
77
78 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
79 let start = state.get_position();
80 let mut found = false;
81
82 while let Some(ch) = state.peek() {
83 if ch == ' ' || ch == '\t' {
84 state.advance(ch.len_utf8());
85 found = true;
86 }
87 else {
88 break;
89 }
90 }
91
92 if found {
93 state.add_token(TypeScriptSyntaxKind::Whitespace, start, state.get_position());
94 }
95
96 found
97 }
98
99 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
100 let start = state.get_position();
101
102 if let Some(ch) = state.peek() {
103 if ch == '\n' {
104 state.advance(1);
105 state.add_token(TypeScriptSyntaxKind::Newline, start, state.get_position());
106 return true;
107 }
108 else if ch == '\r' {
109 state.advance(1);
110 if state.peek() == Some('\n') {
111 state.advance(1);
112 }
113 state.add_token(TypeScriptSyntaxKind::Newline, start, state.get_position());
114 return true;
115 }
116 }
117
118 false
119 }
120
121 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
122 let start = state.get_position();
123 let rest = state.rest();
124
125 if rest.starts_with("//") {
127 state.advance(2);
128 while let Some(ch) = state.peek() {
129 if ch == '\n' || ch == '\r' {
130 break;
131 }
132 state.advance(ch.len_utf8());
133 }
134 state.add_token(TypeScriptSyntaxKind::LineComment, start, state.get_position());
135 return true;
136 }
137
138 if rest.starts_with("/*") {
140 state.advance(2);
141 while let Some(ch) = state.peek() {
142 if ch == '*' && state.peek_next_n(1) == Some('/') {
143 state.advance(2);
144 break;
145 }
146 state.advance(ch.len_utf8());
147 }
148 state.add_token(TypeScriptSyntaxKind::BlockComment, start, state.get_position());
149 return true;
150 }
151
152 false
153 }
154
155 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
156 let start = state.get_position();
157
158 if let Some(quote) = state.peek() {
159 if quote == '"' || quote == '\'' {
160 state.advance(1);
161
162 while let Some(ch) = state.peek() {
163 if ch == quote {
164 state.advance(1);
165 break;
166 }
167 else if ch == '\\' {
168 state.advance(1);
169 if let Some(_) = state.peek() {
170 state.advance(1);
171 }
172 }
173 else {
174 state.advance(ch.len_utf8());
175 }
176 }
177
178 state.add_token(TypeScriptSyntaxKind::StringLiteral, start, state.get_position());
179 return true;
180 }
181 }
182
183 false
184 }
185
186 fn lex_template_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
187 let start = state.get_position();
188
189 if state.peek() == Some('`') {
190 state.advance(1);
191
192 while let Some(ch) = state.peek() {
193 if ch == '`' {
194 state.advance(1);
195 break;
196 }
197 else if ch == '\\' {
198 state.advance(1);
199 if let Some(_) = state.peek() {
200 state.advance(1);
201 }
202 }
203 else {
204 state.advance(ch.len_utf8());
205 }
206 }
207
208 state.add_token(TypeScriptSyntaxKind::TemplateString, start, state.get_position());
209 return true;
210 }
211
212 false
213 }
214
215 fn lex_numeric_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
216 let start = state.get_position();
217
218 if let Some(ch) = state.peek() {
219 if ch.is_ascii_digit() {
220 state.advance(1);
221
222 if ch == '0' && (state.peek() == Some('x') || state.peek() == Some('X')) {
224 state.advance(1);
225 while let Some(ch) = state.peek() {
226 if ch.is_ascii_hexdigit() {
227 state.advance(1);
228 }
229 else {
230 break;
231 }
232 }
233 }
234 else {
235 while let Some(ch) = state.peek() {
237 if ch.is_ascii_digit() {
238 state.advance(1);
239 }
240 else if ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
241 state.advance(1);
242 while let Some(ch) = state.peek() {
243 if ch.is_ascii_digit() {
244 state.advance(1);
245 }
246 else {
247 break;
248 }
249 }
250 break;
251 }
252 else {
253 break;
254 }
255 }
256 }
257
258 if state.peek() == Some('n') {
260 state.advance(1);
261 state.add_token(TypeScriptSyntaxKind::BigIntLiteral, start, state.get_position());
262 }
263 else {
264 state.add_token(TypeScriptSyntaxKind::NumericLiteral, start, state.get_position());
265 }
266
267 return true;
268 }
269 }
270
271 false
272 }
273
274 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
275 let start = state.get_position();
276
277 if let Some(ch) = state.peek() {
278 if ch.is_alphabetic() || ch == '_' || ch == '$' {
279 state.advance(ch.len_utf8());
280
281 while let Some(ch) = state.peek() {
282 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
283 state.advance(ch.len_utf8());
284 }
285 else {
286 break;
287 }
288 }
289
290 let end = state.get_position();
292 let text = state.get_text_in(oak_core::Range { start, end });
293 let kind = self.keyword_or_identifier(&text);
294
295 state.add_token(kind, start, state.get_position());
296 return true;
297 }
298 }
299
300 false
301 }
302
303 fn keyword_or_identifier(&self, text: &str) -> TypeScriptSyntaxKind {
304 match text {
305 "abstract" => TypeScriptSyntaxKind::Abstract,
306 "any" => TypeScriptSyntaxKind::Any,
307 "as" => TypeScriptSyntaxKind::As,
308 "asserts" => TypeScriptSyntaxKind::Asserts,
309 "async" => TypeScriptSyntaxKind::Async,
310 "await" => TypeScriptSyntaxKind::Await,
311 "boolean" => TypeScriptSyntaxKind::Boolean,
312 "break" => TypeScriptSyntaxKind::Break,
313 "case" => TypeScriptSyntaxKind::Case,
314 "catch" => TypeScriptSyntaxKind::Catch,
315 "class" => TypeScriptSyntaxKind::Class,
316 "const" => TypeScriptSyntaxKind::Const,
317 "constructor" => TypeScriptSyntaxKind::Constructor,
318 "continue" => TypeScriptSyntaxKind::Continue,
319 "debugger" => TypeScriptSyntaxKind::Debugger,
320 "declare" => TypeScriptSyntaxKind::Declare,
321 "default" => TypeScriptSyntaxKind::Default,
322 "delete" => TypeScriptSyntaxKind::Delete,
323 "do" => TypeScriptSyntaxKind::Do,
324 "else" => TypeScriptSyntaxKind::Else,
325 "enum" => TypeScriptSyntaxKind::Enum,
326 "export" => TypeScriptSyntaxKind::Export,
327 "extends" => TypeScriptSyntaxKind::Extends,
328 "false" => TypeScriptSyntaxKind::False,
329 "finally" => TypeScriptSyntaxKind::Finally,
330 "for" => TypeScriptSyntaxKind::For,
331 "from" => TypeScriptSyntaxKind::From,
332 "function" => TypeScriptSyntaxKind::Function,
333 "get" => TypeScriptSyntaxKind::Get,
334 "global" => TypeScriptSyntaxKind::Global,
335 "if" => TypeScriptSyntaxKind::If,
336 "implements" => TypeScriptSyntaxKind::Implements,
337 "import" => TypeScriptSyntaxKind::Import,
338 "in" => TypeScriptSyntaxKind::In,
339 "infer" => TypeScriptSyntaxKind::Infer,
340 "instanceof" => TypeScriptSyntaxKind::Instanceof,
341 "interface" => TypeScriptSyntaxKind::Interface,
342 "is" => TypeScriptSyntaxKind::Is,
343 "keyof" => TypeScriptSyntaxKind::Keyof,
344 "let" => TypeScriptSyntaxKind::Let,
345 "namespace" => TypeScriptSyntaxKind::Namespace,
346 "never" => TypeScriptSyntaxKind::Never,
347 "new" => TypeScriptSyntaxKind::New,
348 "null" => TypeScriptSyntaxKind::Null,
349 "number" => TypeScriptSyntaxKind::Number,
350 "object" => TypeScriptSyntaxKind::Object,
351 "of" => TypeScriptSyntaxKind::Of,
352 "package" => TypeScriptSyntaxKind::Package,
353 "private" => TypeScriptSyntaxKind::Private,
354 "protected" => TypeScriptSyntaxKind::Protected,
355 "public" => TypeScriptSyntaxKind::Public,
356 "readonly" => TypeScriptSyntaxKind::Readonly,
357 "require" => TypeScriptSyntaxKind::Require,
358 "return" => TypeScriptSyntaxKind::Return,
359 "set" => TypeScriptSyntaxKind::Set,
360 "static" => TypeScriptSyntaxKind::Static,
361 "string" => TypeScriptSyntaxKind::String,
362 "super" => TypeScriptSyntaxKind::Super,
363 "switch" => TypeScriptSyntaxKind::Switch,
364 "symbol" => TypeScriptSyntaxKind::Symbol,
365 "this" => TypeScriptSyntaxKind::This,
366 "throw" => TypeScriptSyntaxKind::Throw,
367 "true" => TypeScriptSyntaxKind::True,
368 "try" => TypeScriptSyntaxKind::Try,
369 "type" => TypeScriptSyntaxKind::Type,
370 "typeof" => TypeScriptSyntaxKind::Typeof,
371 "undefined" => TypeScriptSyntaxKind::Undefined,
372 "unique" => TypeScriptSyntaxKind::Unique,
373 "unknown" => TypeScriptSyntaxKind::Unknown,
374 "var" => TypeScriptSyntaxKind::Var,
375 "void" => TypeScriptSyntaxKind::Void,
376 "while" => TypeScriptSyntaxKind::While,
377 "with" => TypeScriptSyntaxKind::With,
378 "yield" => TypeScriptSyntaxKind::Yield,
379 _ => TypeScriptSyntaxKind::IdentifierName,
380 }
381 }
382
383 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
384 let start = state.get_position();
385 let rest = state.rest();
386
387 if rest.starts_with("===") {
389 state.advance(3);
390 state.add_token(TypeScriptSyntaxKind::EqualEqualEqual, start, state.get_position());
391 return true;
392 }
393 if rest.starts_with("!==") {
394 state.advance(3);
395 state.add_token(TypeScriptSyntaxKind::NotEqualEqual, start, state.get_position());
396 return true;
397 }
398 if rest.starts_with(">>>") {
399 state.advance(3);
400 state.add_token(TypeScriptSyntaxKind::UnsignedRightShift, start, state.get_position());
401 return true;
402 }
403 if rest.starts_with("...") {
404 state.advance(3);
405 state.add_token(TypeScriptSyntaxKind::DotDotDot, start, state.get_position());
406 return true;
407 }
408
409 if rest.starts_with("**") {
411 state.advance(2);
412 state.add_token(TypeScriptSyntaxKind::StarStar, start, state.get_position());
413 return true;
414 }
415 if rest.starts_with("<=") {
416 state.advance(2);
417 state.add_token(TypeScriptSyntaxKind::LessEqual, start, state.get_position());
418 return true;
419 }
420 if rest.starts_with(">=") {
421 state.advance(2);
422 state.add_token(TypeScriptSyntaxKind::GreaterEqual, start, state.get_position());
423 return true;
424 }
425 if rest.starts_with("==") {
426 state.advance(2);
427 state.add_token(TypeScriptSyntaxKind::EqualEqual, start, state.get_position());
428 return true;
429 }
430 if rest.starts_with("!=") {
431 state.advance(2);
432 state.add_token(TypeScriptSyntaxKind::NotEqual, start, state.get_position());
433 return true;
434 }
435 if rest.starts_with("&&") {
436 state.advance(2);
437 state.add_token(TypeScriptSyntaxKind::AmpersandAmpersand, start, state.get_position());
438 return true;
439 }
440 if rest.starts_with("||") {
441 state.advance(2);
442 state.add_token(TypeScriptSyntaxKind::PipePipe, start, state.get_position());
443 return true;
444 }
445 if rest.starts_with("<<") {
446 state.advance(2);
447 state.add_token(TypeScriptSyntaxKind::LeftShift, start, state.get_position());
448 return true;
449 }
450 if rest.starts_with(">>") {
451 state.advance(2);
452 state.add_token(TypeScriptSyntaxKind::RightShift, start, state.get_position());
453 return true;
454 }
455 if rest.starts_with("++") {
456 state.advance(2);
457 state.add_token(TypeScriptSyntaxKind::PlusPlus, start, state.get_position());
458 return true;
459 }
460 if rest.starts_with("--") {
461 state.advance(2);
462 state.add_token(TypeScriptSyntaxKind::MinusMinus, start, state.get_position());
463 return true;
464 }
465 if rest.starts_with("=>") {
466 state.advance(2);
467 state.add_token(TypeScriptSyntaxKind::Arrow, start, state.get_position());
468 return true;
469 }
470 if rest.starts_with("?.") {
471 state.advance(2);
472 state.add_token(TypeScriptSyntaxKind::QuestionDot, start, state.get_position());
473 return true;
474 }
475 if rest.starts_with("??") {
476 state.advance(2);
477 state.add_token(TypeScriptSyntaxKind::QuestionQuestion, start, state.get_position());
478 return true;
479 }
480
481 if let Some(ch) = state.peek() {
483 let kind = match ch {
484 '+' => TypeScriptSyntaxKind::Plus,
485 '-' => TypeScriptSyntaxKind::Minus,
486 '*' => TypeScriptSyntaxKind::Star,
487 '/' => TypeScriptSyntaxKind::Slash,
488 '%' => TypeScriptSyntaxKind::Percent,
489 '<' => TypeScriptSyntaxKind::Less,
490 '>' => TypeScriptSyntaxKind::Greater,
491 '!' => TypeScriptSyntaxKind::Exclamation,
492 '&' => TypeScriptSyntaxKind::Ampersand,
493 '|' => TypeScriptSyntaxKind::Pipe,
494 '^' => TypeScriptSyntaxKind::Caret,
495 '~' => TypeScriptSyntaxKind::Tilde,
496 '=' => TypeScriptSyntaxKind::Equal,
497 '?' => TypeScriptSyntaxKind::Question,
498 '(' => TypeScriptSyntaxKind::LeftParen,
499 ')' => TypeScriptSyntaxKind::RightParen,
500 '{' => TypeScriptSyntaxKind::LeftBrace,
501 '}' => TypeScriptSyntaxKind::RightBrace,
502 '[' => TypeScriptSyntaxKind::LeftBracket,
503 ']' => TypeScriptSyntaxKind::RightBracket,
504 ';' => TypeScriptSyntaxKind::Semicolon,
505 ',' => TypeScriptSyntaxKind::Comma,
506 '.' => TypeScriptSyntaxKind::Dot,
507 ':' => TypeScriptSyntaxKind::Colon,
508 _ => return false,
509 };
510
511 state.advance(1);
512 state.add_token(kind, start, state.get_position());
513 return true;
514 }
515
516 false
517 }
518}