1#![doc = include_str!("readme.md")]
2pub mod token_type;
4
5pub use self::token_type::TypeScriptTokenType;
6use crate::language::TypeScriptLanguage;
7use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
8
9#[derive(Clone, Debug)]
11pub struct TypeScriptLexer<'config> {
12 config: &'config TypeScriptLanguage,
13}
14
15pub(crate) type State<'a, S> = LexerState<'a, S, TypeScriptLanguage>;
16
17impl<'config> TypeScriptLexer<'config> {
18 pub fn new(config: &'config TypeScriptLanguage) -> Self {
20 Self { config }
21 }
22}
23
24impl<'config> Lexer<TypeScriptLanguage> for TypeScriptLexer<'config> {
25 fn lex<'a, S: Source + ?Sized>(&self, text: &S, edits: &[TextEdit], cache: &'a mut impl LexerCache<TypeScriptLanguage>) -> LexOutput<TypeScriptLanguage> {
26 let relex_from = edits.iter().map(|e| e.span.start).min().unwrap_or(0);
27 let mut state: State<'_, S> = LexerState::new_with_cache(text, relex_from, cache);
28
29 let result = self.run(&mut state);
30 if result.is_ok() {
31 state.add_eof()
32 }
33 state.finish_with_cache(result, cache)
34 }
35}
36
37impl<'config> TypeScriptLexer<'config> {
38 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
39 while state.not_at_end() {
40 let safe_point = state.get_position();
41
42 if self.skip_whitespace(state) {
43 continue;
44 }
45
46 if self.lex_newline(state) {
47 continue;
48 }
49
50 if self.skip_comment(state) {
51 continue;
52 }
53
54 if self.lex_string_literal(state) {
55 continue;
56 }
57
58 if self.lex_template_literal(state) {
59 continue;
60 }
61
62 if self.lex_numeric_literal(state) {
63 continue;
64 }
65
66 if self.lex_identifier_or_keyword(state) {
67 continue;
68 }
69
70 if self.lex_operator_or_punctuation(state) {
71 continue;
72 }
73
74 let start_pos = state.get_position();
76 if let Some(ch) = state.peek() {
77 state.advance(ch.len_utf8());
78 state.add_token(TypeScriptTokenType::Error, start_pos, state.get_position());
79 }
80
81 state.advance_if_dead_lock(safe_point);
82 }
83
84 Ok(())
85 }
86
87 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
88 let start = state.get_position();
89 let mut found = false;
90
91 while let Some(ch) = state.peek() {
92 if ch == ' ' || ch == '\t' {
93 state.advance(ch.len_utf8());
94 found = true;
95 }
96 else {
97 break;
98 }
99 }
100
101 if found {
102 state.add_token(TypeScriptTokenType::Whitespace, start, state.get_position());
103 }
104
105 found
106 }
107
108 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
109 let start = state.get_position();
110
111 if let Some(ch) = state.peek() {
112 if ch == '\n' {
113 state.advance(1);
114 state.add_token(TypeScriptTokenType::Newline, start, state.get_position());
115 return true;
116 }
117 else if ch == '\r' {
118 state.advance(1);
119 if state.peek() == Some('\n') {
120 state.advance(1);
121 }
122 state.add_token(TypeScriptTokenType::Newline, start, state.get_position());
123 return true;
124 }
125 }
126
127 false
128 }
129
130 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
131 let start = state.get_position();
132 let rest = state.rest();
133
134 if rest.starts_with("//") {
136 state.advance(2);
137 while let Some(ch) = state.peek() {
138 if ch == '\n' || ch == '\r' {
139 break;
140 }
141 state.advance(ch.len_utf8());
142 }
143 state.add_token(TypeScriptTokenType::LineComment, start, state.get_position());
144 return true;
145 }
146
147 if rest.starts_with("/*") {
149 state.advance(2);
150 while let Some(ch) = state.peek() {
151 if ch == '*' && state.peek_next_n(1) == Some('/') {
152 state.advance(2);
153 break;
154 }
155 state.advance(ch.len_utf8());
156 }
157 state.add_token(TypeScriptTokenType::BlockComment, start, state.get_position());
158 return true;
159 }
160
161 false
162 }
163
164 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
165 let start = state.get_position();
166
167 if let Some(quote) = state.peek() {
168 if quote == '"' || quote == '\'' {
169 state.advance(1);
170
171 while let Some(ch) = state.peek() {
172 if ch == quote {
173 state.advance(1);
174 break;
175 }
176 else if ch == '\\' {
177 state.advance(1);
178 if let Some(_) = state.peek() {
179 state.advance(1);
180 }
181 }
182 else {
183 state.advance(ch.len_utf8());
184 }
185 }
186
187 state.add_token(TypeScriptTokenType::StringLiteral, start, state.get_position());
188 return true;
189 }
190 }
191
192 false
193 }
194
195 fn lex_template_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
196 let start = state.get_position();
197
198 if state.peek() == Some('`') {
199 state.advance(1);
200
201 while let Some(ch) = state.peek() {
202 if ch == '`' {
203 state.advance(1);
204 break;
205 }
206 else if ch == '\\' {
207 state.advance(1);
208 if let Some(_) = state.peek() {
209 state.advance(1);
210 }
211 }
212 else {
213 state.advance(ch.len_utf8());
214 }
215 }
216
217 state.add_token(TypeScriptTokenType::TemplateString, start, state.get_position());
218 return true;
219 }
220
221 false
222 }
223
224 fn lex_numeric_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
225 let start = state.get_position();
226
227 if let Some(ch) = state.peek() {
228 if ch.is_ascii_digit() {
229 state.advance(1);
230
231 if ch == '0' && (state.peek() == Some('x') || state.peek() == Some('X')) {
233 state.advance(1);
234 while let Some(ch) = state.peek() {
235 if ch.is_ascii_hexdigit() {
236 state.advance(1);
237 }
238 else {
239 break;
240 }
241 }
242 }
243 else {
244 while let Some(ch) = state.peek() {
246 if ch.is_ascii_digit() {
247 state.advance(1);
248 }
249 else if ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
250 state.advance(1);
251 while let Some(ch) = state.peek() {
252 if ch.is_ascii_digit() {
253 state.advance(1);
254 }
255 else {
256 break;
257 }
258 }
259 break;
260 }
261 else {
262 break;
263 }
264 }
265 }
266
267 if state.peek() == Some('n') {
269 state.advance(1);
270 state.add_token(TypeScriptTokenType::BigIntLiteral, start, state.get_position());
271 }
272 else {
273 state.add_token(TypeScriptTokenType::NumericLiteral, start, state.get_position());
274 }
275
276 return true;
277 }
278 }
279
280 false
281 }
282
283 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
284 let start = state.get_position();
285
286 if let Some(ch) = state.peek() {
287 if ch.is_alphabetic() || ch == '_' || ch == '$' {
288 state.advance(ch.len_utf8());
289
290 while let Some(ch) = state.peek() {
291 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
292 state.advance(ch.len_utf8());
293 }
294 else {
295 break;
296 }
297 }
298
299 let end = state.get_position();
301 let text = state.get_text_in(oak_core::Range { start, end });
302 let kind = self.keyword_or_identifier(&text);
303
304 state.add_token(kind, start, state.get_position());
305 return true;
306 }
307 }
308
309 false
310 }
311
312 fn keyword_or_identifier(&self, text: &str) -> TypeScriptTokenType {
313 TypeScriptTokenType::from_keyword(text).unwrap_or(TypeScriptTokenType::IdentifierName)
314 }
315
316 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
317 let start = state.get_position();
318 let rest = state.rest();
319
320 let ops = [
321 ("===", TypeScriptTokenType::EqualEqualEqual),
322 ("!==", TypeScriptTokenType::NotEqualEqual),
323 (">>>", TypeScriptTokenType::UnsignedRightShift),
324 ("...", TypeScriptTokenType::DotDotDot),
325 ("**=", TypeScriptTokenType::StarStarEqual),
326 ("<<=", TypeScriptTokenType::LeftShiftEqual),
327 (">>=", TypeScriptTokenType::RightShiftEqual),
328 ("&&=", TypeScriptTokenType::AmpersandAmpersandEqual),
329 ("||=", TypeScriptTokenType::PipePipeEqual),
330 ("??=", TypeScriptTokenType::QuestionQuestionEqual),
331 ("**", TypeScriptTokenType::StarStar),
332 ("<=", TypeScriptTokenType::LessEqual),
333 (">=", TypeScriptTokenType::GreaterEqual),
334 ("==", TypeScriptTokenType::EqualEqual),
335 ("!=", TypeScriptTokenType::NotEqual),
336 ("&&", TypeScriptTokenType::AmpersandAmpersand),
337 ("||", TypeScriptTokenType::PipePipe),
338 ("<<", TypeScriptTokenType::LeftShift),
339 (">>", TypeScriptTokenType::RightShift),
340 ("++", TypeScriptTokenType::PlusPlus),
341 ("--", TypeScriptTokenType::MinusMinus),
342 ("=>", TypeScriptTokenType::Arrow),
343 ("?.", TypeScriptTokenType::QuestionDot),
344 ("??", TypeScriptTokenType::QuestionQuestion),
345 ("+=", TypeScriptTokenType::PlusEqual),
346 ("-=", TypeScriptTokenType::MinusEqual),
347 ("*=", TypeScriptTokenType::StarEqual),
348 ("/=", TypeScriptTokenType::SlashEqual),
349 ("%=", TypeScriptTokenType::PercentEqual),
350 ("&=", TypeScriptTokenType::AmpersandEqual),
351 ("|=", TypeScriptTokenType::PipeEqual),
352 ("^=", TypeScriptTokenType::CaretEqual),
353 ];
354
355 for (op, kind) in ops {
356 if rest.starts_with(op) {
357 state.advance(op.len());
358 state.add_token(kind, start, state.get_position());
359 return true;
360 }
361 }
362
363 if let Some(ch) = state.peek() {
364 let kind = match ch {
365 '+' => TypeScriptTokenType::Plus,
366 '-' => TypeScriptTokenType::Minus,
367 '*' => TypeScriptTokenType::Star,
368 '/' => TypeScriptTokenType::Slash,
369 '%' => TypeScriptTokenType::Percent,
370 '<' => TypeScriptTokenType::Less,
371 '>' => TypeScriptTokenType::Greater,
372 '!' => TypeScriptTokenType::Exclamation,
373 '&' => TypeScriptTokenType::Ampersand,
374 '|' => TypeScriptTokenType::Pipe,
375 '^' => TypeScriptTokenType::Caret,
376 '~' => TypeScriptTokenType::Tilde,
377 '=' => TypeScriptTokenType::Equal,
378 '?' => TypeScriptTokenType::Question,
379 '(' => TypeScriptTokenType::LeftParen,
380 ')' => TypeScriptTokenType::RightParen,
381 '{' => TypeScriptTokenType::LeftBrace,
382 '}' => TypeScriptTokenType::RightBrace,
383 '[' => TypeScriptTokenType::LeftBracket,
384 ']' => TypeScriptTokenType::RightBracket,
385 ';' => TypeScriptTokenType::Semicolon,
386 ',' => TypeScriptTokenType::Comma,
387 '.' => TypeScriptTokenType::Dot,
388 ':' => TypeScriptTokenType::Colon,
389 '@' => TypeScriptTokenType::At,
390 _ => return false,
391 };
392
393 state.advance(1);
394 state.add_token(kind, start, state.get_position());
395 return true;
396 }
397
398 false
399 }
400}