1use crate::{kind::TypeScriptSyntaxKind, language::TypeScriptLanguage};
2use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
3
4#[derive(Clone, Debug)]
5pub struct TypeScriptLexer<'config> {
6 _config: &'config TypeScriptLanguage,
7}
8
9type State<'a, S> = LexerState<'a, S, TypeScriptLanguage>;
10
11impl<'config> TypeScriptLexer<'config> {
12 pub fn new(config: &'config TypeScriptLanguage) -> Self {
13 Self { _config: config }
14 }
15}
16
17impl<'config> Lexer<TypeScriptLanguage> for TypeScriptLexer<'config> {
18 fn lex<'a, S: Source + ?Sized>(&self, text: &S, edits: &[TextEdit], cache: &'a mut impl LexerCache<TypeScriptLanguage>) -> LexOutput<TypeScriptLanguage> {
19 let relex_from = edits.iter().map(|e| e.span.start).min().unwrap_or(0);
20 let mut state: State<'_, S> = LexerState::new_with_cache(text, relex_from, cache);
21
22 let result = self.run(&mut state);
23 if result.is_ok() {
24 state.add_eof();
25 }
26 state.finish_with_cache(result, cache)
27 }
28}
29
30impl<'config> TypeScriptLexer<'config> {
31 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
32 while state.not_at_end() {
33 let safe_point = state.get_position();
34
35 if self.skip_whitespace(state) {
36 continue;
37 }
38
39 if self.lex_newline(state) {
40 continue;
41 }
42
43 if self.skip_comment(state) {
44 continue;
45 }
46
47 if self.lex_string_literal(state) {
48 continue;
49 }
50
51 if self.lex_template_literal(state) {
52 continue;
53 }
54
55 if self.lex_numeric_literal(state) {
56 continue;
57 }
58
59 if self.lex_identifier_or_keyword(state) {
60 continue;
61 }
62
63 if self.lex_operator_or_punctuation(state) {
64 continue;
65 }
66
67 let start_pos = state.get_position();
69 if let Some(ch) = state.peek() {
70 state.advance(ch.len_utf8());
71 state.add_token(TypeScriptSyntaxKind::Error, start_pos, state.get_position());
72 }
73
74 state.advance_if_dead_lock(safe_point);
75 }
76
77 Ok(())
78 }
79
80 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
81 let start = state.get_position();
82 let mut found = false;
83
84 while let Some(ch) = state.peek() {
85 if ch == ' ' || ch == '\t' {
86 state.advance(ch.len_utf8());
87 found = true;
88 }
89 else {
90 break;
91 }
92 }
93
94 if found {
95 state.add_token(TypeScriptSyntaxKind::Whitespace, start, state.get_position());
96 }
97
98 found
99 }
100
101 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
102 let start = state.get_position();
103
104 if let Some(ch) = state.peek() {
105 if ch == '\n' {
106 state.advance(1);
107 state.add_token(TypeScriptSyntaxKind::Newline, start, state.get_position());
108 return true;
109 }
110 else if ch == '\r' {
111 state.advance(1);
112 if state.peek() == Some('\n') {
113 state.advance(1);
114 }
115 state.add_token(TypeScriptSyntaxKind::Newline, start, state.get_position());
116 return true;
117 }
118 }
119
120 false
121 }
122
123 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
124 let start = state.get_position();
125 let rest = state.rest();
126
127 if rest.starts_with("//") {
129 state.advance(2);
130 while let Some(ch) = state.peek() {
131 if ch == '\n' || ch == '\r' {
132 break;
133 }
134 state.advance(ch.len_utf8());
135 }
136 state.add_token(TypeScriptSyntaxKind::LineComment, start, state.get_position());
137 return true;
138 }
139
140 if rest.starts_with("/*") {
142 state.advance(2);
143 while let Some(ch) = state.peek() {
144 if ch == '*' && state.peek_next_n(1) == Some('/') {
145 state.advance(2);
146 break;
147 }
148 state.advance(ch.len_utf8());
149 }
150 state.add_token(TypeScriptSyntaxKind::BlockComment, start, state.get_position());
151 return true;
152 }
153
154 false
155 }
156
157 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
158 let start = state.get_position();
159
160 if let Some(quote) = state.peek() {
161 if quote == '"' || quote == '\'' {
162 state.advance(1);
163
164 while let Some(ch) = state.peek() {
165 if ch == quote {
166 state.advance(1);
167 break;
168 }
169 else if ch == '\\' {
170 state.advance(1);
171 if let Some(_) = state.peek() {
172 state.advance(1);
173 }
174 }
175 else {
176 state.advance(ch.len_utf8());
177 }
178 }
179
180 state.add_token(TypeScriptSyntaxKind::StringLiteral, start, state.get_position());
181 return true;
182 }
183 }
184
185 false
186 }
187
188 fn lex_template_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
189 let start = state.get_position();
190
191 if state.peek() == Some('`') {
192 state.advance(1);
193
194 while let Some(ch) = state.peek() {
195 if ch == '`' {
196 state.advance(1);
197 break;
198 }
199 else if ch == '\\' {
200 state.advance(1);
201 if let Some(_) = state.peek() {
202 state.advance(1);
203 }
204 }
205 else {
206 state.advance(ch.len_utf8());
207 }
208 }
209
210 state.add_token(TypeScriptSyntaxKind::TemplateString, start, state.get_position());
211 return true;
212 }
213
214 false
215 }
216
217 fn lex_numeric_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
218 let start = state.get_position();
219
220 if let Some(ch) = state.peek() {
221 if ch.is_ascii_digit() {
222 state.advance(1);
223
224 if ch == '0' && (state.peek() == Some('x') || state.peek() == Some('X')) {
226 state.advance(1);
227 while let Some(ch) = state.peek() {
228 if ch.is_ascii_hexdigit() {
229 state.advance(1);
230 }
231 else {
232 break;
233 }
234 }
235 }
236 else {
237 while let Some(ch) = state.peek() {
239 if ch.is_ascii_digit() {
240 state.advance(1);
241 }
242 else if ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
243 state.advance(1);
244 while let Some(ch) = state.peek() {
245 if ch.is_ascii_digit() {
246 state.advance(1);
247 }
248 else {
249 break;
250 }
251 }
252 break;
253 }
254 else {
255 break;
256 }
257 }
258 }
259
260 if state.peek() == Some('n') {
262 state.advance(1);
263 state.add_token(TypeScriptSyntaxKind::BigIntLiteral, start, state.get_position());
264 }
265 else {
266 state.add_token(TypeScriptSyntaxKind::NumericLiteral, start, state.get_position());
267 }
268
269 return true;
270 }
271 }
272
273 false
274 }
275
276 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
277 let start = state.get_position();
278
279 if let Some(ch) = state.peek() {
280 if ch.is_alphabetic() || ch == '_' || ch == '$' {
281 state.advance(ch.len_utf8());
282
283 while let Some(ch) = state.peek() {
284 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
285 state.advance(ch.len_utf8());
286 }
287 else {
288 break;
289 }
290 }
291
292 let end = state.get_position();
294 let text = state.get_text_in(oak_core::Range { start, end });
295 let kind = self.keyword_or_identifier(&text);
296
297 state.add_token(kind, start, state.get_position());
298 return true;
299 }
300 }
301
302 false
303 }
304
305 fn keyword_or_identifier(&self, text: &str) -> TypeScriptSyntaxKind {
306 TypeScriptSyntaxKind::from_keyword(text).unwrap_or(TypeScriptSyntaxKind::IdentifierName)
307 }
308
309 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
310 let start = state.get_position();
311 let rest = state.rest();
312
313 let ops = [
314 ("===", TypeScriptSyntaxKind::EqualEqualEqual),
315 ("!==", TypeScriptSyntaxKind::NotEqualEqual),
316 (">>>", TypeScriptSyntaxKind::UnsignedRightShift),
317 ("...", TypeScriptSyntaxKind::DotDotDot),
318 ("**=", TypeScriptSyntaxKind::StarStarEqual),
319 ("<<=", TypeScriptSyntaxKind::LeftShiftEqual),
320 (">>=", TypeScriptSyntaxKind::RightShiftEqual),
321 ("&&=", TypeScriptSyntaxKind::AmpersandAmpersandEqual),
322 ("||=", TypeScriptSyntaxKind::PipePipeEqual),
323 ("??=", TypeScriptSyntaxKind::QuestionQuestionEqual),
324 ("**", TypeScriptSyntaxKind::StarStar),
325 ("<=", TypeScriptSyntaxKind::LessEqual),
326 (">=", TypeScriptSyntaxKind::GreaterEqual),
327 ("==", TypeScriptSyntaxKind::EqualEqual),
328 ("!=", TypeScriptSyntaxKind::NotEqual),
329 ("&&", TypeScriptSyntaxKind::AmpersandAmpersand),
330 ("||", TypeScriptSyntaxKind::PipePipe),
331 ("<<", TypeScriptSyntaxKind::LeftShift),
332 (">>", TypeScriptSyntaxKind::RightShift),
333 ("++", TypeScriptSyntaxKind::PlusPlus),
334 ("--", TypeScriptSyntaxKind::MinusMinus),
335 ("=>", TypeScriptSyntaxKind::Arrow),
336 ("?.", TypeScriptSyntaxKind::QuestionDot),
337 ("??", TypeScriptSyntaxKind::QuestionQuestion),
338 ("+=", TypeScriptSyntaxKind::PlusEqual),
339 ("-=", TypeScriptSyntaxKind::MinusEqual),
340 ("*=", TypeScriptSyntaxKind::StarEqual),
341 ("/=", TypeScriptSyntaxKind::SlashEqual),
342 ("%=", TypeScriptSyntaxKind::PercentEqual),
343 ("&=", TypeScriptSyntaxKind::AmpersandEqual),
344 ("|=", TypeScriptSyntaxKind::PipeEqual),
345 ("^=", TypeScriptSyntaxKind::CaretEqual),
346 ];
347
348 for (op, kind) in ops {
349 if rest.starts_with(op) {
350 state.advance(op.len());
351 state.add_token(kind, start, state.get_position());
352 return true;
353 }
354 }
355
356 if let Some(ch) = state.peek() {
357 let kind = match ch {
358 '+' => TypeScriptSyntaxKind::Plus,
359 '-' => TypeScriptSyntaxKind::Minus,
360 '*' => TypeScriptSyntaxKind::Star,
361 '/' => TypeScriptSyntaxKind::Slash,
362 '%' => TypeScriptSyntaxKind::Percent,
363 '<' => TypeScriptSyntaxKind::Less,
364 '>' => TypeScriptSyntaxKind::Greater,
365 '!' => TypeScriptSyntaxKind::Exclamation,
366 '&' => TypeScriptSyntaxKind::Ampersand,
367 '|' => TypeScriptSyntaxKind::Pipe,
368 '^' => TypeScriptSyntaxKind::Caret,
369 '~' => TypeScriptSyntaxKind::Tilde,
370 '=' => TypeScriptSyntaxKind::Equal,
371 '?' => TypeScriptSyntaxKind::Question,
372 '(' => TypeScriptSyntaxKind::LeftParen,
373 ')' => TypeScriptSyntaxKind::RightParen,
374 '{' => TypeScriptSyntaxKind::LeftBrace,
375 '}' => TypeScriptSyntaxKind::RightBrace,
376 '[' => TypeScriptSyntaxKind::LeftBracket,
377 ']' => TypeScriptSyntaxKind::RightBracket,
378 ';' => TypeScriptSyntaxKind::Semicolon,
379 ',' => TypeScriptSyntaxKind::Comma,
380 '.' => TypeScriptSyntaxKind::Dot,
381 ':' => TypeScriptSyntaxKind::Colon,
382 _ => return false,
383 };
384
385 state.advance(1);
386 state.add_token(kind, start, state.get_position());
387 return true;
388 }
389
390 false
391 }
392}