1#![doc = include_str!("readme.md")]
2pub mod token_type;
3
4pub use self::token_type::TypeScriptTokenType;
5use crate::language::TypeScriptLanguage;
6use oak_core::{Lexer, LexerCache, LexerState, OakError, TextEdit, lexer::LexOutput, source::Source};
7
8#[derive(Clone, Debug)]
9pub struct TypeScriptLexer<'config> {
10 _config: &'config TypeScriptLanguage,
11}
12
13type State<'a, S> = LexerState<'a, S, TypeScriptLanguage>;
14
15impl<'config> TypeScriptLexer<'config> {
16 pub fn new(config: &'config TypeScriptLanguage) -> Self {
17 Self { _config: config }
18 }
19}
20
21impl<'config> Lexer<TypeScriptLanguage> for TypeScriptLexer<'config> {
22 fn lex<'a, S: Source + ?Sized>(&self, text: &S, edits: &[TextEdit], cache: &'a mut impl LexerCache<TypeScriptLanguage>) -> LexOutput<TypeScriptLanguage> {
23 let relex_from = edits.iter().map(|e| e.span.start).min().unwrap_or(0);
24 let mut state: State<'_, S> = LexerState::new_with_cache(text, relex_from, cache);
25
26 let result = self.run(&mut state);
27 if result.is_ok() {
28 state.add_eof()
29 }
30 state.finish_with_cache(result, cache)
31 }
32}
33
34impl<'config> TypeScriptLexer<'config> {
35 fn run<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> Result<(), OakError> {
36 while state.not_at_end() {
37 let safe_point = state.get_position();
38
39 if self.skip_whitespace(state) {
40 continue;
41 }
42
43 if self.lex_newline(state) {
44 continue;
45 }
46
47 if self.skip_comment(state) {
48 continue;
49 }
50
51 if self.lex_string_literal(state) {
52 continue;
53 }
54
55 if self.lex_template_literal(state) {
56 continue;
57 }
58
59 if self.lex_numeric_literal(state) {
60 continue;
61 }
62
63 if self.lex_identifier_or_keyword(state) {
64 continue;
65 }
66
67 if self.lex_operator_or_punctuation(state) {
68 continue;
69 }
70
71 let start_pos = state.get_position();
73 if let Some(ch) = state.peek() {
74 state.advance(ch.len_utf8());
75 state.add_token(TypeScriptTokenType::Error, start_pos, state.get_position());
76 }
77
78 state.advance_if_dead_lock(safe_point);
79 }
80
81 Ok(())
82 }
83
84 fn skip_whitespace<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
85 let start = state.get_position();
86 let mut found = false;
87
88 while let Some(ch) = state.peek() {
89 if ch == ' ' || ch == '\t' {
90 state.advance(ch.len_utf8());
91 found = true;
92 }
93 else {
94 break;
95 }
96 }
97
98 if found {
99 state.add_token(TypeScriptTokenType::Whitespace, start, state.get_position());
100 }
101
102 found
103 }
104
105 fn lex_newline<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
106 let start = state.get_position();
107
108 if let Some(ch) = state.peek() {
109 if ch == '\n' {
110 state.advance(1);
111 state.add_token(TypeScriptTokenType::Newline, start, state.get_position());
112 return true;
113 }
114 else if ch == '\r' {
115 state.advance(1);
116 if state.peek() == Some('\n') {
117 state.advance(1);
118 }
119 state.add_token(TypeScriptTokenType::Newline, start, state.get_position());
120 return true;
121 }
122 }
123
124 false
125 }
126
127 fn skip_comment<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
128 let start = state.get_position();
129 let rest = state.rest();
130
131 if rest.starts_with("//") {
133 state.advance(2);
134 while let Some(ch) = state.peek() {
135 if ch == '\n' || ch == '\r' {
136 break;
137 }
138 state.advance(ch.len_utf8());
139 }
140 state.add_token(TypeScriptTokenType::LineComment, start, state.get_position());
141 return true;
142 }
143
144 if rest.starts_with("/*") {
146 state.advance(2);
147 while let Some(ch) = state.peek() {
148 if ch == '*' && state.peek_next_n(1) == Some('/') {
149 state.advance(2);
150 break;
151 }
152 state.advance(ch.len_utf8());
153 }
154 state.add_token(TypeScriptTokenType::BlockComment, start, state.get_position());
155 return true;
156 }
157
158 false
159 }
160
161 fn lex_string_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
162 let start = state.get_position();
163
164 if let Some(quote) = state.peek() {
165 if quote == '"' || quote == '\'' {
166 state.advance(1);
167
168 while let Some(ch) = state.peek() {
169 if ch == quote {
170 state.advance(1);
171 break;
172 }
173 else if ch == '\\' {
174 state.advance(1);
175 if let Some(_) = state.peek() {
176 state.advance(1);
177 }
178 }
179 else {
180 state.advance(ch.len_utf8());
181 }
182 }
183
184 state.add_token(TypeScriptTokenType::StringLiteral, start, state.get_position());
185 return true;
186 }
187 }
188
189 false
190 }
191
192 fn lex_template_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
193 let start = state.get_position();
194
195 if state.peek() == Some('`') {
196 state.advance(1);
197
198 while let Some(ch) = state.peek() {
199 if ch == '`' {
200 state.advance(1);
201 break;
202 }
203 else if ch == '\\' {
204 state.advance(1);
205 if let Some(_) = state.peek() {
206 state.advance(1);
207 }
208 }
209 else {
210 state.advance(ch.len_utf8());
211 }
212 }
213
214 state.add_token(TypeScriptTokenType::TemplateString, start, state.get_position());
215 return true;
216 }
217
218 false
219 }
220
221 fn lex_numeric_literal<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
222 let start = state.get_position();
223
224 if let Some(ch) = state.peek() {
225 if ch.is_ascii_digit() {
226 state.advance(1);
227
228 if ch == '0' && (state.peek() == Some('x') || state.peek() == Some('X')) {
230 state.advance(1);
231 while let Some(ch) = state.peek() {
232 if ch.is_ascii_hexdigit() {
233 state.advance(1);
234 }
235 else {
236 break;
237 }
238 }
239 }
240 else {
241 while let Some(ch) = state.peek() {
243 if ch.is_ascii_digit() {
244 state.advance(1);
245 }
246 else if ch == '.' && state.peek_next_n(1).map_or(false, |c| c.is_ascii_digit()) {
247 state.advance(1);
248 while let Some(ch) = state.peek() {
249 if ch.is_ascii_digit() {
250 state.advance(1);
251 }
252 else {
253 break;
254 }
255 }
256 break;
257 }
258 else {
259 break;
260 }
261 }
262 }
263
264 if state.peek() == Some('n') {
266 state.advance(1);
267 state.add_token(TypeScriptTokenType::BigIntLiteral, start, state.get_position());
268 }
269 else {
270 state.add_token(TypeScriptTokenType::NumericLiteral, start, state.get_position());
271 }
272
273 return true;
274 }
275 }
276
277 false
278 }
279
280 fn lex_identifier_or_keyword<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
281 let start = state.get_position();
282
283 if let Some(ch) = state.peek() {
284 if ch.is_alphabetic() || ch == '_' || ch == '$' {
285 state.advance(ch.len_utf8());
286
287 while let Some(ch) = state.peek() {
288 if ch.is_alphanumeric() || ch == '_' || ch == '$' {
289 state.advance(ch.len_utf8());
290 }
291 else {
292 break;
293 }
294 }
295
296 let end = state.get_position();
298 let text = state.get_text_in(oak_core::Range { start, end });
299 let kind = self.keyword_or_identifier(&text);
300
301 state.add_token(kind, start, state.get_position());
302 return true;
303 }
304 }
305
306 false
307 }
308
309 fn keyword_or_identifier(&self, text: &str) -> TypeScriptTokenType {
310 TypeScriptTokenType::from_keyword(text).unwrap_or(TypeScriptTokenType::IdentifierName)
311 }
312
313 fn lex_operator_or_punctuation<'a, S: Source + ?Sized>(&self, state: &mut State<'a, S>) -> bool {
314 let start = state.get_position();
315 let rest = state.rest();
316
317 let ops = [
318 ("===", TypeScriptTokenType::EqualEqualEqual),
319 ("!==", TypeScriptTokenType::NotEqualEqual),
320 (">>>", TypeScriptTokenType::UnsignedRightShift),
321 ("...", TypeScriptTokenType::DotDotDot),
322 ("**=", TypeScriptTokenType::StarStarEqual),
323 ("<<=", TypeScriptTokenType::LeftShiftEqual),
324 (">>=", TypeScriptTokenType::RightShiftEqual),
325 ("&&=", TypeScriptTokenType::AmpersandAmpersandEqual),
326 ("||=", TypeScriptTokenType::PipePipeEqual),
327 ("??=", TypeScriptTokenType::QuestionQuestionEqual),
328 ("**", TypeScriptTokenType::StarStar),
329 ("<=", TypeScriptTokenType::LessEqual),
330 (">=", TypeScriptTokenType::GreaterEqual),
331 ("==", TypeScriptTokenType::EqualEqual),
332 ("!=", TypeScriptTokenType::NotEqual),
333 ("&&", TypeScriptTokenType::AmpersandAmpersand),
334 ("||", TypeScriptTokenType::PipePipe),
335 ("<<", TypeScriptTokenType::LeftShift),
336 (">>", TypeScriptTokenType::RightShift),
337 ("++", TypeScriptTokenType::PlusPlus),
338 ("--", TypeScriptTokenType::MinusMinus),
339 ("=>", TypeScriptTokenType::Arrow),
340 ("?.", TypeScriptTokenType::QuestionDot),
341 ("??", TypeScriptTokenType::QuestionQuestion),
342 ("+=", TypeScriptTokenType::PlusEqual),
343 ("-=", TypeScriptTokenType::MinusEqual),
344 ("*=", TypeScriptTokenType::StarEqual),
345 ("/=", TypeScriptTokenType::SlashEqual),
346 ("%=", TypeScriptTokenType::PercentEqual),
347 ("&=", TypeScriptTokenType::AmpersandEqual),
348 ("|=", TypeScriptTokenType::PipeEqual),
349 ("^=", TypeScriptTokenType::CaretEqual),
350 ];
351
352 for (op, kind) in ops {
353 if rest.starts_with(op) {
354 state.advance(op.len());
355 state.add_token(kind, start, state.get_position());
356 return true;
357 }
358 }
359
360 if let Some(ch) = state.peek() {
361 let kind = match ch {
362 '+' => TypeScriptTokenType::Plus,
363 '-' => TypeScriptTokenType::Minus,
364 '*' => TypeScriptTokenType::Star,
365 '/' => TypeScriptTokenType::Slash,
366 '%' => TypeScriptTokenType::Percent,
367 '<' => TypeScriptTokenType::Less,
368 '>' => TypeScriptTokenType::Greater,
369 '!' => TypeScriptTokenType::Exclamation,
370 '&' => TypeScriptTokenType::Ampersand,
371 '|' => TypeScriptTokenType::Pipe,
372 '^' => TypeScriptTokenType::Caret,
373 '~' => TypeScriptTokenType::Tilde,
374 '=' => TypeScriptTokenType::Equal,
375 '?' => TypeScriptTokenType::Question,
376 '(' => TypeScriptTokenType::LeftParen,
377 ')' => TypeScriptTokenType::RightParen,
378 '{' => TypeScriptTokenType::LeftBrace,
379 '}' => TypeScriptTokenType::RightBrace,
380 '[' => TypeScriptTokenType::LeftBracket,
381 ']' => TypeScriptTokenType::RightBracket,
382 ';' => TypeScriptTokenType::Semicolon,
383 ',' => TypeScriptTokenType::Comma,
384 '.' => TypeScriptTokenType::Dot,
385 ':' => TypeScriptTokenType::Colon,
386 '@' => TypeScriptTokenType::At,
387 _ => return false,
388 };
389
390 state.advance(1);
391 state.add_token(kind, start, state.get_position());
392 return true;
393 }
394
395 false
396 }
397}