1use crate::{kind::TypeScriptSyntaxKind, language::TypeScriptLanguage};
2use oak_core::{IncrementalCache, Lexer, LexerState, OakError, lexer::LexOutput, source::Source};
3
4#[derive(Clone)]
5pub struct TypeScriptLexer<'config> {
6 config: &'config TypeScriptLanguage,
7}
8
9type State<S> = LexerState<S, TypeScriptLanguage>;
10
11impl<'config> TypeScriptLexer<'config> {
12 pub fn new(config: &'config TypeScriptLanguage) -> Self {
13 Self { config }
14 }
15}
16
17impl<'config> Lexer<TypeScriptLanguage> for TypeScriptLexer<'config> {
18 fn lex_incremental(
19 &self,
20 source: impl Source,
21 changed: usize,
22 cache: IncrementalCache<TypeScriptLanguage>,
23 ) -> LexOutput<TypeScriptLanguage> {
24 let mut state = LexerState::new_with_cache(source, changed, cache);
25 let result = self.run(&mut state);
26 state.finish(result)
27 }
28}
29
30impl<'config> TypeScriptLexer<'config> {
31 fn run<S: Source>(&self, state: &mut State<S>) -> Result<(), OakError> {
32 while state.not_at_end() {
33 let safe_point = state.get_position();
34
35 if self.skip_whitespace(state) {
36 continue;
37 }
38
39 if self.lex_newline(state) {
40 continue;
41 }
42
43 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_string_literal(state) {
48 continue;
49 }
50
51 if self.lex_template_literal(state) {
52 continue;
53 }
54
55 if self.lex_numeric_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_operator_or_punctuation(state) {
64 continue;
65 }
66
67 let start_pos = state.get_position();
69 if let Some(ch) = state.peek() {
70 state.advance(ch.len_utf8());
71 state.add_token(TypeScriptSyntaxKind::Error, start_pos, state.get_position());
72 }
73
74 state.safe_check(safe_point);
75 }
76
77 let eof_pos = state.get_position();
79 state.add_token(TypeScriptSyntaxKind::Eof, eof_pos, eof_pos);
80 Ok(())
81 }
82
83 fn skip_whitespace<S: Source>(&self, state: &mut State<S>) -> bool {
84 let start = state.get_position();
85 let mut found = false;
86
87 while let Some(ch) = state.peek() {
88 if ch == ' ' || ch == '\t' {
89 state.advance(ch.len_utf8());
90 found = true;
91 }
92 else {
93 break;
94 }
95 }
96
97 if found {
98 state.add_token(TypeScriptSyntaxKind::Whitespace, start, state.get_position());
99 }
100
101 found
102 }
103
104 fn lex_newline<S: Source>(&self, state: &mut State<S>) -> bool {
105 let start = state.get_position();
106
107 if let Some(ch) = state.peek() {
108 if ch == '\n' {
109 state.advance(1);
110 state.add_token(TypeScriptSyntaxKind::Newline, start, state.get_position());
111 return true;
112 }
113 else if ch == '\r' {
114 state.advance(1);
115 if state.peek() == Some('\n') {
116 state.advance(1);
117 }
118 state.add_token(TypeScriptSyntaxKind::Newline, start, state.get_position());
119 return true;
120 }
121 }
122
123 false
124 }
125
126 fn skip_comment<S: Source>(&self, state: &mut State<S>) -> bool {
127 let start = state.get_position();
128 let rest = state.rest();
129
130 if rest.starts_with("//") {
132 state.advance(2);
133 while let Some(ch) = state.peek() {
134 if ch == '\n' || ch == '\r' {
135 break;
136 }
137 state.advance(ch.len_utf8());
138 }
139 state.add_token(TypeScriptSyntaxKind::LineComment, start, state.get_position());
140 return true;
141 }
142
143 if rest.starts_with("/*") {
145 state.advance(2);
146 while let Some(ch) = state.peek() {
147 if ch == '*' && state.peek_next_n(1) == Some('/') {
148 state.advance(2);
149 break;
150 }
151 state.advance(ch.len_utf8());
152 }
153 state.add_token(TypeScriptSyntaxKind::BlockComment, start, state.get_position());
154 return true;
155 }
156
157 false
158 }
159
160 fn lex_string_literal<S: Source>(&self, state: &mut State<S>) -> bool {
161 let start = state.get_position();
162
163 if let Some(quote) = state.peek() {
164 if quote == '"' || quote == '\'' {
165 state.advance(1);
166
167 while let Some(ch) = state.peek() {
168 if ch == quote {
169 state.advance(1);
170 break;
171 }
172 else if ch == '\\' {
173 state.advance(1);
174 if let Some(_) = state.peek() {
175 state.advance(1);
176 }
177 }
178 else {
179 state.advance(ch.len_utf8());
180 }
181 }
182
183 state.add_token(TypeScriptSyntaxKind::StringLiteral, start, state.get_position());
184 return true;
185 }
186 }
187
188 false
189 }
190
191 fn lex_template_literal<S: Source>(&self, state: &mut State<S>) -> bool {
192 let start = state.get_position();
193
194 if state.peek() == Some('`') {
195 state.advance(1);
196
197 while let Some(ch) = state.peek() {
198 if ch == '`' {
199 state.advance(1);
200 break;
201 }
202 else if ch == '\\' {
203 state.advance(1);
204 if let Some(_) = state.peek() {
205 state.advance(1);
206 }
207 }
208 else {
209 state.advance(ch.len_utf8());
210 }
211 }
212
213 state.add_token(TypeScriptSyntaxKind::TemplateString, start, state.get_position());
214 return true;
215 }
216
217 false
218 }
219
220 fn lex_numeric_literal<S: Source>(&self, state: &mut State<S>) -> bool {
221 let start = state.get_position();
222
223 if let Some(ch) = state.peek() {
224 if ch.is_ascii_digit() {
225 state.advance(1);
226
227 if ch == '0' && (state.peek() == Some('x') || state.peek() == Some('X')) {
229 state.advance(1);
230 while let Some(ch) = state.peek() {
231 if ch.is_ascii_hexdigit() {
232 state.advance(1);
233 }
234 else {
235 break;
236 }
237 }
238 }
239 else {
240 while let Some(ch) = state.peek() {
242 if ch.is_ascii_digit() {
243 state.advance(1);
244 }
245 else if ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
246 state.advance(1);
247 while let Some(ch) = state.peek() {
248 if ch.is_ascii_digit() {
249 state.advance(1);
250 }
251 else {
252 break;
253 }
254 }
255 break;
256 }
257 else {
258 break;
259 }
260 }
261 }
262
263 if state.peek() == Some('n') {
265 state.advance(1);
266 state.add_token(TypeScriptSyntaxKind::BigIntLiteral, start, state.get_position());
267 }
268 else {
269 state.add_token(TypeScriptSyntaxKind::NumericLiteral, start, state.get_position());
270 }
271
272 return true;
273 }
274 }
275
276 false
277 }
278
279 fn lex_identifier_or_keyword<S: Source>(&self, state: &mut State<S>) -> bool {
280 let start = state.get_position();
281
282 if let Some(ch) = state.peek() {
283 if ch.is_alphabetic() || ch == '_' || ch == '$' {
284 state.advance(ch.len_utf8());
285
286 while let Some(ch) = state.peek() {
287 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
288 state.advance(ch.len_utf8());
289 }
290 else {
291 break;
292 }
293 }
294
295 let end = state.get_position();
297 let text = state.get_text_in((start..end).into());
298 let kind = self.keyword_or_identifier(&text);
299
300 state.add_token(kind, start, state.get_position());
301 return true;
302 }
303 }
304
305 false
306 }
307
308 fn keyword_or_identifier(&self, text: &str) -> TypeScriptSyntaxKind {
309 match text {
310 "abstract" => TypeScriptSyntaxKind::Abstract,
311 "any" => TypeScriptSyntaxKind::Any,
312 "as" => TypeScriptSyntaxKind::As,
313 "asserts" => TypeScriptSyntaxKind::Asserts,
314 "async" => TypeScriptSyntaxKind::Async,
315 "await" => TypeScriptSyntaxKind::Await,
316 "boolean" => TypeScriptSyntaxKind::Boolean,
317 "break" => TypeScriptSyntaxKind::Break,
318 "case" => TypeScriptSyntaxKind::Case,
319 "catch" => TypeScriptSyntaxKind::Catch,
320 "class" => TypeScriptSyntaxKind::Class,
321 "const" => TypeScriptSyntaxKind::Const,
322 "constructor" => TypeScriptSyntaxKind::Constructor,
323 "continue" => TypeScriptSyntaxKind::Continue,
324 "debugger" => TypeScriptSyntaxKind::Debugger,
325 "declare" => TypeScriptSyntaxKind::Declare,
326 "default" => TypeScriptSyntaxKind::Default,
327 "delete" => TypeScriptSyntaxKind::Delete,
328 "do" => TypeScriptSyntaxKind::Do,
329 "else" => TypeScriptSyntaxKind::Else,
330 "enum" => TypeScriptSyntaxKind::Enum,
331 "export" => TypeScriptSyntaxKind::Export,
332 "extends" => TypeScriptSyntaxKind::Extends,
333 "false" => TypeScriptSyntaxKind::False,
334 "finally" => TypeScriptSyntaxKind::Finally,
335 "for" => TypeScriptSyntaxKind::For,
336 "from" => TypeScriptSyntaxKind::From,
337 "function" => TypeScriptSyntaxKind::Function,
338 "get" => TypeScriptSyntaxKind::Get,
339 "global" => TypeScriptSyntaxKind::Global,
340 "if" => TypeScriptSyntaxKind::If,
341 "implements" => TypeScriptSyntaxKind::Implements,
342 "import" => TypeScriptSyntaxKind::Import,
343 "in" => TypeScriptSyntaxKind::In,
344 "infer" => TypeScriptSyntaxKind::Infer,
345 "instanceof" => TypeScriptSyntaxKind::Instanceof,
346 "interface" => TypeScriptSyntaxKind::Interface,
347 "is" => TypeScriptSyntaxKind::Is,
348 "keyof" => TypeScriptSyntaxKind::Keyof,
349 "let" => TypeScriptSyntaxKind::Let,
350 "namespace" => TypeScriptSyntaxKind::Namespace,
351 "never" => TypeScriptSyntaxKind::Never,
352 "new" => TypeScriptSyntaxKind::New,
353 "null" => TypeScriptSyntaxKind::Null,
354 "number" => TypeScriptSyntaxKind::Number,
355 "object" => TypeScriptSyntaxKind::Object,
356 "of" => TypeScriptSyntaxKind::Of,
357 "package" => TypeScriptSyntaxKind::Package,
358 "private" => TypeScriptSyntaxKind::Private,
359 "protected" => TypeScriptSyntaxKind::Protected,
360 "public" => TypeScriptSyntaxKind::Public,
361 "readonly" => TypeScriptSyntaxKind::Readonly,
362 "require" => TypeScriptSyntaxKind::Require,
363 "return" => TypeScriptSyntaxKind::Return,
364 "set" => TypeScriptSyntaxKind::Set,
365 "static" => TypeScriptSyntaxKind::Static,
366 "string" => TypeScriptSyntaxKind::String,
367 "super" => TypeScriptSyntaxKind::Super,
368 "switch" => TypeScriptSyntaxKind::Switch,
369 "symbol" => TypeScriptSyntaxKind::Symbol,
370 "this" => TypeScriptSyntaxKind::This,
371 "throw" => TypeScriptSyntaxKind::Throw,
372 "true" => TypeScriptSyntaxKind::True,
373 "try" => TypeScriptSyntaxKind::Try,
374 "type" => TypeScriptSyntaxKind::Type,
375 "typeof" => TypeScriptSyntaxKind::Typeof,
376 "undefined" => TypeScriptSyntaxKind::Undefined,
377 "unique" => TypeScriptSyntaxKind::Unique,
378 "unknown" => TypeScriptSyntaxKind::Unknown,
379 "var" => TypeScriptSyntaxKind::Var,
380 "void" => TypeScriptSyntaxKind::Void,
381 "while" => TypeScriptSyntaxKind::While,
382 "with" => TypeScriptSyntaxKind::With,
383 "yield" => TypeScriptSyntaxKind::Yield,
384 _ => TypeScriptSyntaxKind::IdentifierName,
385 }
386 }
387
388 fn lex_operator_or_punctuation<S: Source>(&self, state: &mut State<S>) -> bool {
389 let start = state.get_position();
390 let rest = state.rest();
391
392 if rest.starts_with("===") {
394 state.advance(3);
395 state.add_token(TypeScriptSyntaxKind::EqualEqualEqual, start, state.get_position());
396 return true;
397 }
398 if rest.starts_with("!==") {
399 state.advance(3);
400 state.add_token(TypeScriptSyntaxKind::NotEqualEqual, start, state.get_position());
401 return true;
402 }
403 if rest.starts_with(">>>") {
404 state.advance(3);
405 state.add_token(TypeScriptSyntaxKind::UnsignedRightShift, start, state.get_position());
406 return true;
407 }
408 if rest.starts_with("...") {
409 state.advance(3);
410 state.add_token(TypeScriptSyntaxKind::DotDotDot, start, state.get_position());
411 return true;
412 }
413
414 if rest.starts_with("**") {
416 state.advance(2);
417 state.add_token(TypeScriptSyntaxKind::StarStar, start, state.get_position());
418 return true;
419 }
420 if rest.starts_with("<=") {
421 state.advance(2);
422 state.add_token(TypeScriptSyntaxKind::LessEqual, start, state.get_position());
423 return true;
424 }
425 if rest.starts_with(">=") {
426 state.advance(2);
427 state.add_token(TypeScriptSyntaxKind::GreaterEqual, start, state.get_position());
428 return true;
429 }
430 if rest.starts_with("==") {
431 state.advance(2);
432 state.add_token(TypeScriptSyntaxKind::EqualEqual, start, state.get_position());
433 return true;
434 }
435 if rest.starts_with("!=") {
436 state.advance(2);
437 state.add_token(TypeScriptSyntaxKind::NotEqual, start, state.get_position());
438 return true;
439 }
440 if rest.starts_with("&&") {
441 state.advance(2);
442 state.add_token(TypeScriptSyntaxKind::AmpersandAmpersand, start, state.get_position());
443 return true;
444 }
445 if rest.starts_with("||") {
446 state.advance(2);
447 state.add_token(TypeScriptSyntaxKind::PipePipe, start, state.get_position());
448 return true;
449 }
450 if rest.starts_with("<<") {
451 state.advance(2);
452 state.add_token(TypeScriptSyntaxKind::LeftShift, start, state.get_position());
453 return true;
454 }
455 if rest.starts_with(">>") {
456 state.advance(2);
457 state.add_token(TypeScriptSyntaxKind::RightShift, start, state.get_position());
458 return true;
459 }
460 if rest.starts_with("++") {
461 state.advance(2);
462 state.add_token(TypeScriptSyntaxKind::PlusPlus, start, state.get_position());
463 return true;
464 }
465 if rest.starts_with("--") {
466 state.advance(2);
467 state.add_token(TypeScriptSyntaxKind::MinusMinus, start, state.get_position());
468 return true;
469 }
470 if rest.starts_with("=>") {
471 state.advance(2);
472 state.add_token(TypeScriptSyntaxKind::Arrow, start, state.get_position());
473 return true;
474 }
475 if rest.starts_with("?.") {
476 state.advance(2);
477 state.add_token(TypeScriptSyntaxKind::QuestionDot, start, state.get_position());
478 return true;
479 }
480 if rest.starts_with("??") {
481 state.advance(2);
482 state.add_token(TypeScriptSyntaxKind::QuestionQuestion, start, state.get_position());
483 return true;
484 }
485
486 if let Some(ch) = state.peek() {
488 let kind = match ch {
489 '+' => TypeScriptSyntaxKind::Plus,
490 '-' => TypeScriptSyntaxKind::Minus,
491 '*' => TypeScriptSyntaxKind::Star,
492 '/' => TypeScriptSyntaxKind::Slash,
493 '%' => TypeScriptSyntaxKind::Percent,
494 '<' => TypeScriptSyntaxKind::Less,
495 '>' => TypeScriptSyntaxKind::Greater,
496 '!' => TypeScriptSyntaxKind::Exclamation,
497 '&' => TypeScriptSyntaxKind::Ampersand,
498 '|' => TypeScriptSyntaxKind::Pipe,
499 '^' => TypeScriptSyntaxKind::Caret,
500 '~' => TypeScriptSyntaxKind::Tilde,
501 '=' => TypeScriptSyntaxKind::Equal,
502 '?' => TypeScriptSyntaxKind::Question,
503 '(' => TypeScriptSyntaxKind::LeftParen,
504 ')' => TypeScriptSyntaxKind::RightParen,
505 '{' => TypeScriptSyntaxKind::LeftBrace,
506 '}' => TypeScriptSyntaxKind::RightBrace,
507 '[' => TypeScriptSyntaxKind::LeftBracket,
508 ']' => TypeScriptSyntaxKind::RightBracket,
509 ';' => TypeScriptSyntaxKind::Semicolon,
510 ',' => TypeScriptSyntaxKind::Comma,
511 '.' => TypeScriptSyntaxKind::Dot,
512 ':' => TypeScriptSyntaxKind::Colon,
513 _ => return false,
514 };
515
516 state.advance(1);
517 state.add_token(kind, start, state.get_position());
518 return true;
519 }
520
521 false
522 }
523}