1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2pub enum TokenType {
3 Eof,
4 Identifier,
5 Keyword,
6 Number,
7 BigInt,
8 String,
9 Punctuator,
10 Template,
11 Regex,
12 Hashbang,
13 PrivateIdentifier,
14
15 NoSubstitutionTemplate,
16
17 TemplateHead,
18
19 TemplateMiddle,
20
21 TemplateTail,
22}
23
24#[derive(Debug, Clone)]
25pub struct Token {
26 pub token_type: TokenType,
27 pub value: String,
28 pub line: u32,
29 pub column: u32,
30}
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq)]
33pub enum LastTokenKind {
34 None,
35
36 Dividend,
37
38 RegexPrefix,
39}
40
41#[derive(Debug, Clone)]
42struct PeekCache {
43 pre_pos: usize,
44 pre_line: u32,
45 pre_column: u32,
46 pre_last_token_kind: LastTokenKind,
47 post_pos: usize,
48 post_line: u32,
49 post_column: u32,
50 post_last_token_kind: LastTokenKind,
51 token: Token,
52}
53
54pub struct Lexer {
55 pub source: Vec<char>,
56 pub pos: usize,
57 pub line: u32,
58 pub column: u32,
59
60 last_token_kind: LastTokenKind,
61
62 cached_peek: Option<PeekCache>,
63
64 pub last_string_had_escape: bool,
65}
66
67impl Lexer {
68 pub fn new(source: &str) -> Self {
69 Lexer {
70 source: source.chars().collect(),
71 pos: 0,
72 line: 1,
73 column: 1,
74 last_token_kind: LastTokenKind::None,
75 cached_peek: None,
76 last_string_had_escape: false,
77 }
78 }
79
80 pub fn next_token(&mut self) -> Option<Token> {
81 if let Some(cache) = self.cached_peek.take() {
82 if cache.pre_pos == self.pos
83 && cache.pre_line == self.line
84 && cache.pre_column == self.column
85 && cache.pre_last_token_kind == self.last_token_kind
86 {
87 self.pos = cache.post_pos;
88 self.line = cache.post_line;
89 self.column = cache.post_column;
90 self.last_token_kind = cache.post_last_token_kind;
91 return Some(cache.token);
92 }
93 }
94
95 self.next_token_uncached()
96 }
97
98 fn next_token_uncached(&mut self) -> Option<Token> {
99 self.skip_whitespace()?;
100 if self.pos >= self.source.len() {
101 return None;
102 }
103
104 if self.pos == 0 && self.source[self.pos] == '#' {
105 if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '!' {
106 let token = self.read_hashbang();
107 self.last_token_kind = LastTokenKind::RegexPrefix;
108 return Some(token);
109 }
110 }
111
112 let c = self.source[self.pos];
113
114 if c.is_ascii_digit() {
115 let token = self.read_number();
116 self.last_token_kind = LastTokenKind::Dividend;
117 return Some(token);
118 }
119
120 if c == '.'
121 && self.pos + 1 < self.source.len()
122 && self.source[self.pos + 1].is_ascii_digit()
123 {
124 let token = self.read_number();
125 self.last_token_kind = LastTokenKind::Dividend;
126 return Some(token);
127 }
128
129 if c == '"' || c == '\'' {
130 let token = self.read_string(c);
131 self.last_token_kind = LastTokenKind::Dividend;
132 return Some(token);
133 }
134
135 if c == '`' {
136 self.advance();
137 let (value, terminated_by_interp) = self.scan_template_segment();
138 self.last_token_kind = LastTokenKind::Dividend;
139 let token_type = if terminated_by_interp {
140 TokenType::TemplateHead
141 } else {
142 TokenType::NoSubstitutionTemplate
143 };
144 return Some(Token {
145 token_type,
146 value,
147 line: self.line,
148 column: self.column,
149 });
150 }
151
152 if Self::is_identifier_start(c) || c == '\\' {
153 let token = self.read_identifier();
154 self.last_token_kind = if matches!(
155 token.value.as_str(),
156 "return"
157 | "throw"
158 | "case"
159 | "typeof"
160 | "void"
161 | "new"
162 | "delete"
163 | "in"
164 | "instanceof"
165 | "yield"
166 ) {
167 LastTokenKind::RegexPrefix
168 } else {
169 LastTokenKind::Dividend
170 };
171 return Some(token);
172 }
173
174 if c == '#' && self.pos > 0 {
175 if self.pos + 1 < self.source.len() {
176 let next = self.source[self.pos + 1];
177 if Self::is_identifier_start(next) || next == '\\' {
178 let token = self.read_private_identifier();
179 self.last_token_kind = LastTokenKind::Dividend;
180 return Some(token);
181 }
182 }
183 }
184
185 if c == '/' {
186 let token = self.read_comment_or_regex();
187 if token.token_type == TokenType::Eof {
188 return None;
189 }
190 return Some(token);
191 }
192
193 let token = self.read_punctuator();
194 self.last_token_kind = match token.value.as_str() {
195 ")" | "]" | "}" | "++" | "--" => LastTokenKind::Dividend,
196 _ => LastTokenKind::RegexPrefix,
197 };
198 Some(token)
199 }
200
201 pub fn set_pos(&mut self, pos: usize) {
202 self.pos = pos;
203
204 self.cached_peek = None;
205 self.last_token_kind = LastTokenKind::None;
206 }
207
208 pub fn pos(&self) -> usize {
209 self.pos
210 }
211
212 pub fn column(&self) -> u32 {
213 self.column
214 }
215
216 pub fn set_column(&mut self, col: u32) {
217 self.column = col;
218 }
219
220 pub fn last_token_kind(&self) -> LastTokenKind {
221 self.last_token_kind
222 }
223
224 pub fn set_last_token_kind(&mut self, kind: LastTokenKind) {
225 self.last_token_kind = kind;
226 }
227
228 pub fn line(&self) -> u32 {
229 self.line
230 }
231
232 pub fn set_line(&mut self, line: u32) {
233 self.line = line;
234 }
235
236 pub fn get_current_line(&self) -> String {
237 let mut line_start = self.pos;
238 while line_start > 0 && self.source[line_start - 1] != '\n' {
239 line_start -= 1;
240 }
241
242 let mut line_end = self.pos;
243 while line_end < self.source.len() && self.source[line_end] != '\n' {
244 line_end += 1;
245 }
246
247 self.source[line_start..line_end].iter().collect()
248 }
249
250 pub fn peek(&mut self) -> Option<Token> {
251 if let Some(cache) = self.cached_peek.as_ref() {
252 if cache.pre_pos == self.pos
253 && cache.pre_line == self.line
254 && cache.pre_column == self.column
255 && cache.pre_last_token_kind == self.last_token_kind
256 {
257 return Some(cache.token.clone());
258 }
259 }
260
261 let old_pos = self.pos;
262 let old_line = self.line;
263 let old_column = self.column;
264 let old_last_token_kind = self.last_token_kind;
265
266 self.cached_peek = None;
267 let result = self.next_token_uncached();
268
269 let new_pos = self.pos;
270 let new_line = self.line;
271 let new_column = self.column;
272 let new_last_token_kind = self.last_token_kind;
273
274 self.pos = old_pos;
275 self.line = old_line;
276 self.column = old_column;
277 self.last_token_kind = old_last_token_kind;
278
279 if let Some(token) = result {
280 self.cached_peek = Some(PeekCache {
281 pre_pos: old_pos,
282 pre_line: old_line,
283 pre_column: old_column,
284 pre_last_token_kind: old_last_token_kind,
285 post_pos: new_pos,
286 post_line: new_line,
287 post_column: new_column,
288 post_last_token_kind: new_last_token_kind,
289 token: token.clone(),
290 });
291 return Some(token);
292 }
293
294 None
295 }
296
297 fn skip_whitespace(&mut self) -> Option<()> {
298 while self.pos < self.source.len() {
299 match self.source[self.pos] {
300 '\n' | '\r' | '\u{2028}' | '\u{2029}' => {
301 self.line += 1;
302 self.column = 1;
303 self.pos += 1;
304 }
305 ' ' | '\t' | '\u{000B}' | '\u{000C}' | '\u{00A0}' | '\u{1680}' | '\u{202F}'
306 | '\u{205F}' | '\u{3000}' | '\u{FEFF}' => {
307 self.column += 1;
308 self.pos += 1;
309 }
310 c if (c as u32) >= 0x2000 && (c as u32) <= 0x200A => {
311 self.column += 1;
312 self.pos += 1;
313 }
314 _ => break,
315 }
316 }
317 Some(())
318 }
319
320 fn advance(&mut self) {
321 if self.pos < self.source.len() {
322 if self.source[self.pos] == '\n' || self.source[self.pos] == '\r' {
323 self.line += 1;
324 self.column = 1;
325 } else {
326 self.column += 1;
327 }
328 self.pos += 1;
329 }
330 }
331
332 fn read_number(&mut self) -> Token {
333 let start = self.pos;
334
335 if self.source[self.pos] == '0'
336 && self.pos + 1 < self.source.len()
337 && (self.source[self.pos + 1] == 'x' || self.source[self.pos + 1] == 'X')
338 {
339 self.advance();
340 self.advance();
341 while self.pos < self.source.len() {
342 if self.source[self.pos].is_ascii_hexdigit() {
343 self.advance();
344 } else if self.source[self.pos] == '_' {
345 if self.pos + 1 < self.source.len()
346 && (self.source[self.pos + 1].is_ascii_hexdigit())
347 {
348 self.advance();
349 } else {
350 break;
351 }
352 } else {
353 break;
354 }
355 }
356 let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
357 if is_bigint {
358 self.advance();
359 }
360 let value: String = self.source[start..self.pos].iter().collect();
361 return Token {
362 token_type: if is_bigint {
363 TokenType::BigInt
364 } else {
365 TokenType::Number
366 },
367 value,
368 line: self.line,
369 column: self.column,
370 };
371 }
372
373 if self.source[self.pos] == '0'
374 && self.pos + 1 < self.source.len()
375 && (self.source[self.pos + 1] == 'o' || self.source[self.pos + 1] == 'O')
376 {
377 self.advance();
378 self.advance();
379 while self.pos < self.source.len() {
380 if matches!(self.source[self.pos], '0'..='7') {
381 self.advance();
382 } else if self.source[self.pos] == '_' {
383 if self.pos + 1 < self.source.len()
384 && matches!(self.source[self.pos + 1], '0'..='7')
385 {
386 self.advance();
387 } else {
388 break;
389 }
390 } else {
391 break;
392 }
393 }
394 let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
395 if is_bigint {
396 self.advance();
397 }
398 let value: String = self.source[start..self.pos].iter().collect();
399 return Token {
400 token_type: if is_bigint {
401 TokenType::BigInt
402 } else {
403 TokenType::Number
404 },
405 value,
406 line: self.line,
407 column: self.column,
408 };
409 }
410
411 if self.source[self.pos] == '0'
412 && self.pos + 1 < self.source.len()
413 && (self.source[self.pos + 1] == 'b' || self.source[self.pos + 1] == 'B')
414 {
415 self.advance();
416 self.advance();
417 while self.pos < self.source.len() {
418 if matches!(self.source[self.pos], '0' | '1') {
419 self.advance();
420 } else if self.source[self.pos] == '_' {
421 if self.pos + 1 < self.source.len()
422 && matches!(self.source[self.pos + 1], '0' | '1')
423 {
424 self.advance();
425 } else {
426 break;
427 }
428 } else {
429 break;
430 }
431 }
432 let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
433 if is_bigint {
434 self.advance();
435 }
436 let value: String = self.source[start..self.pos].iter().collect();
437 return Token {
438 token_type: if is_bigint {
439 TokenType::BigInt
440 } else {
441 TokenType::Number
442 },
443 value,
444 line: self.line,
445 column: self.column,
446 };
447 }
448
449 let mut has_dot = false;
450 while self.pos < self.source.len() {
451 if self.source[self.pos].is_ascii_digit() {
452 self.advance();
453 } else if self.source[self.pos] == '_' {
454 if self.pos + 1 < self.source.len() && self.source[self.pos + 1].is_ascii_digit() {
455 self.advance();
456 } else {
457 break;
458 }
459 } else if self.source[self.pos] == '.' && !has_dot {
460 has_dot = true;
461 self.advance();
462 } else {
463 break;
464 }
465 }
466
467 if self.pos < self.source.len()
468 && (self.source[self.pos] == 'e' || self.source[self.pos] == 'E')
469 {
470 self.advance();
471 if self.pos < self.source.len()
472 && (self.source[self.pos] == '+' || self.source[self.pos] == '-')
473 {
474 self.advance();
475 }
476 while self.pos < self.source.len() {
477 if self.source[self.pos].is_ascii_digit() {
478 self.advance();
479 } else if self.source[self.pos] == '_' {
480 if self.pos + 1 < self.source.len()
481 && self.source[self.pos + 1].is_ascii_digit()
482 {
483 self.advance();
484 } else {
485 break;
486 }
487 } else {
488 break;
489 }
490 }
491 }
492
493 let value_end = self.pos;
494 let is_bigint = self.pos < self.source.len() && self.source[self.pos] == 'n';
495 if is_bigint {
496 self.advance();
497 }
498 let value: String = self.source[start..value_end].iter().collect();
499 Token {
500 token_type: if is_bigint {
501 TokenType::BigInt
502 } else {
503 TokenType::Number
504 },
505 value,
506 line: self.line,
507 column: self.column,
508 }
509 }
510
511 pub fn get_context(&self, chars: usize) -> String {
512 let start = self.pos.saturating_sub(chars);
513 let end = (self.pos + chars).min(self.source.len());
514 self.source[start..end].iter().collect()
515 }
516
517 fn read_string(&mut self, quote: char) -> Token {
518 self.last_string_had_escape = false;
519 self.advance();
520 let mut value = String::new();
521 while self.pos < self.source.len() && self.source[self.pos] != quote {
522 let ch = self.source[self.pos];
523 if ch != '\\' {
524 value.push(ch);
525 self.advance();
526 continue;
527 }
528
529 self.advance();
530 if self.pos >= self.source.len() {
531 break;
532 }
533
534 self.last_string_had_escape = true;
535 let esc = self.source[self.pos];
536 match esc {
537 '\n' | '\u{2028}' | '\u{2029}' => {}
538 '\r' => {
539 if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '\n' {
540 self.advance();
541 }
542 }
543 'n' => value.push('\n'),
544 'r' => value.push('\r'),
545 't' => value.push('\t'),
546 'b' => value.push('\x08'),
547 'f' => value.push('\x0c'),
548 'v' => value.push('\x0b'),
549 '0'..='7' => {
550 let first_val = esc as u32 - '0' as u32;
551 if first_val == 0
552 && (self.pos + 1 >= self.source.len()
553 || self.source[self.pos + 1] < '0'
554 || self.source[self.pos + 1] > '7')
555 {
556 value.push('\0');
557 } else {
558 let mut code = first_val;
559 let mut count = 1u32;
560 let mut lookahead = 1;
561 while self.pos + lookahead < self.source.len()
562 && count < 3
563 && self.source[self.pos + lookahead] >= '0'
564 && self.source[self.pos + lookahead] <= '7'
565 {
566 let next =
567 code * 8 + (self.source[self.pos + lookahead] as u32 - '0' as u32);
568 if next > 255 {
569 break;
570 }
571 code = next;
572 count += 1;
573 lookahead += 1;
574 }
575 for _ in 1..lookahead {
576 self.advance();
577 }
578 if let Some(ch) = char::from_u32(code) {
579 value.push(ch);
580 }
581 }
582 }
583 '\\' => value.push('\\'),
584 '\'' => value.push('\''),
585 '"' => value.push('"'),
586 '`' => value.push('`'),
587 'x' => {
588 if self.pos + 2 < self.source.len() {
589 let h1 = self.source[self.pos + 1];
590 let h2 = self.source[self.pos + 2];
591 if let (Some(a), Some(b)) = (h1.to_digit(16), h2.to_digit(16)) {
592 let code = (a << 4) | b;
593 if let Some(c) = char::from_u32(code) {
594 value.push(c);
595 }
596 self.advance();
597 self.advance();
598 } else {
599 value.push('x');
600 }
601 } else {
602 value.push('x');
603 }
604 }
605 'u' => {
606 if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
607 let mut code: u32 = 0;
608 let mut i = self.pos + 2;
609 while i < self.source.len() && self.source[i] != '}' {
610 if let Some(d) = self.source[i].to_digit(16) {
611 code = code.wrapping_mul(16).wrapping_add(d);
612 i += 1;
613 } else {
614 break;
615 }
616 }
617 if i < self.source.len() && self.source[i] == '}' {
618 if let Some(decoded) = char::from_u32(code) {
619 value.push(decoded);
620 }
621
622 let steps = i - self.pos;
623 for _ in 0..steps {
624 self.advance();
625 }
626 } else {
627 value.push('u');
628 }
629 } else if self.pos + 4 < self.source.len() {
630 let h1 = self.source[self.pos + 1];
631 let h2 = self.source[self.pos + 2];
632 let h3 = self.source[self.pos + 3];
633 let h4 = self.source[self.pos + 4];
634 if let (Some(a), Some(b), Some(c), Some(d)) = (
635 h1.to_digit(16),
636 h2.to_digit(16),
637 h3.to_digit(16),
638 h4.to_digit(16),
639 ) {
640 let code = (a << 12) | (b << 8) | (c << 4) | d;
641 if let Some(decoded) = char::from_u32(code) {
642 value.push(decoded);
643 }
644 self.advance();
645 self.advance();
646 self.advance();
647 self.advance();
648 } else {
649 value.push('u');
650 }
651 } else {
652 value.push('u');
653 }
654 }
655 _ => value.push(esc),
656 }
657 self.advance();
658 }
659 if self.pos < self.source.len() {
660 self.advance();
661 }
662 Token {
663 token_type: TokenType::String,
664 value,
665 line: self.line,
666 column: self.column,
667 }
668 }
669
670 fn read_hashbang(&mut self) -> Token {
671 let start = self.pos;
672
673 self.pos += 2;
674
675 while self.pos < self.source.len() {
676 let c = self.source[self.pos];
677 if c == '\n' || c == '\r' {
678 break;
679 }
680 self.pos += 1;
681 }
682 let value: String = self.source[start..self.pos].iter().collect();
683 Token {
684 token_type: TokenType::Hashbang,
685 value,
686 line: self.line,
687 column: self.column,
688 }
689 }
690
691 fn read_identifier(&mut self) -> Token {
692 let start = self.pos;
693 let mut value = String::new();
694 while self.pos < self.source.len() {
695 let c = self.source[self.pos];
696 if Self::is_identifier_part(c) {
697 value.push(c);
698 self.advance();
699 } else if c == '\\' {
700 if let Some(ch) = self.read_identifier_escape() {
701 value.push(ch);
702 } else {
703 break;
704 }
705 } else {
706 break;
707 }
708 }
709 if value.is_empty() {
710 value = self.source[start..self.pos].iter().collect();
711 }
712 let token_type = if Self::is_keyword(&value) {
713 TokenType::Keyword
714 } else {
715 TokenType::Identifier
716 };
717 Token {
718 token_type,
719 value,
720 line: self.line,
721 column: self.column,
722 }
723 }
724
725 fn read_identifier_escape(&mut self) -> Option<char> {
726 if self.pos >= self.source.len() || self.source[self.pos] != '\\' {
727 return None;
728 }
729 self.advance();
730 if self.pos >= self.source.len() || self.source[self.pos] != 'u' {
731 return None;
732 }
733 self.advance();
734
735 if self.pos < self.source.len() && self.source[self.pos] == '{' {
736 self.advance();
737 let mut hex = String::new();
738 while self.pos < self.source.len() && self.source[self.pos] != '}' {
739 let c = self.source[self.pos];
740 if !c.is_ascii_hexdigit() {
741 return None;
742 }
743 hex.push(c);
744 self.advance();
745 }
746 if self.pos >= self.source.len() || self.source[self.pos] != '}' {
747 return None;
748 }
749 self.advance();
750 let code = u32::from_str_radix(&hex, 16).ok()?;
751 return char::from_u32(code);
752 }
753
754 if self.pos + 3 >= self.source.len() {
755 return None;
756 }
757 let mut code: u32 = 0;
758 for _ in 0..4 {
759 let c = self.source[self.pos];
760 let d = c.to_digit(16)?;
761 code = (code << 4) | d;
762 self.advance();
763 }
764 char::from_u32(code)
765 }
766
767 fn read_private_identifier(&mut self) -> Token {
768 self.advance();
769 let mut value = String::from("#");
770 while self.pos < self.source.len() {
771 let c = self.source[self.pos];
772 if Self::is_identifier_part(c) {
773 value.push(c);
774 self.advance();
775 } else if c == '\\' {
776 if let Some(ch) = self.read_identifier_escape() {
777 value.push(ch);
778 } else {
779 break;
780 }
781 } else {
782 break;
783 }
784 }
785 Token {
786 token_type: TokenType::PrivateIdentifier,
787 value,
788 line: self.line,
789 column: self.column,
790 }
791 }
792
793 fn read_comment_or_regex(&mut self) -> Token {
794 self.advance();
795 if self.pos < self.source.len() {
796 if self.source[self.pos] == '/' {
797 while self.pos < self.source.len()
798 && self.source[self.pos] != '\n'
799 && self.source[self.pos] != '\r'
800 && self.source[self.pos] != '\u{2028}'
801 && self.source[self.pos] != '\u{2029}'
802 {
803 self.advance();
804 }
805 return self.next_token().unwrap_or(Token {
806 token_type: TokenType::Eof,
807 value: String::new(),
808 line: self.line,
809 column: self.column,
810 });
811 }
812
813 if self.source[self.pos] == '*' {
814 self.advance();
815 while self.pos < self.source.len() {
816 if self.source[self.pos] == '*'
817 && self.pos + 1 < self.source.len()
818 && self.source[self.pos + 1] == '/'
819 {
820 self.advance();
821 self.advance();
822 break;
823 }
824 self.advance();
825 }
826 return self.next_token().unwrap_or(Token {
827 token_type: TokenType::Eof,
828 value: String::new(),
829 line: self.line,
830 column: self.column,
831 });
832 }
833
834 let next_char = self.source[self.pos];
835 if self.last_token_kind == LastTokenKind::Dividend {
836 if next_char == '=' {
837 self.advance();
838 self.last_token_kind = LastTokenKind::Dividend;
839 return Token {
840 token_type: TokenType::Punctuator,
841 value: "/=".to_string(),
842 line: self.line,
843 column: self.column,
844 };
845 }
846
847 self.last_token_kind = LastTokenKind::RegexPrefix;
848 return Token {
849 token_type: TokenType::Punctuator,
850 value: "/".to_string(),
851 line: self.line,
852 column: self.column,
853 };
854 }
855
856 let mut pattern = String::new();
857 let mut flags = String::new();
858
859 while self.pos < self.source.len() {
860 let c = self.source[self.pos];
861 if c == '/' {
862 self.advance();
863 break;
864 } else if c == '\\' {
865 pattern.push(c);
866 self.advance();
867 if self.pos < self.source.len() {
868 pattern.push(self.source[self.pos]);
869 self.advance();
870 }
871 } else if c == '[' {
872 pattern.push(c);
873 self.advance();
874 while self.pos < self.source.len() {
875 let cc = self.source[self.pos];
876 pattern.push(cc);
877 self.advance();
878 if cc == ']' {
879 break;
880 }
881 }
882 } else if c == '\n' || c == '\r' {
883 break;
884 } else {
885 pattern.push(c);
886 self.advance();
887 }
888 }
889
890 while self.pos < self.source.len() {
891 let c = self.source[self.pos];
892 if c.is_ascii_alphabetic() {
893 flags.push(c);
894 self.advance();
895 } else {
896 break;
897 }
898 }
899
900 self.last_token_kind = LastTokenKind::Dividend;
901
902 return Token {
903 token_type: TokenType::Regex,
904 value: format!("{}/{}", pattern, flags),
905 line: self.line,
906 column: self.column,
907 };
908 }
909 self.last_token_kind = LastTokenKind::RegexPrefix;
910 Token {
911 token_type: TokenType::Punctuator,
912 value: "/".to_string(),
913 line: self.line,
914 column: self.column,
915 }
916 }
917
918 fn read_punctuator(&mut self) -> Token {
919 let c = self.source[self.pos];
920 self.advance();
921
922 let value: String = if self.pos < self.source.len() {
923 let next = self.source[self.pos];
924 match c {
925 '<' if next == '<' => {
926 self.advance();
927 if self.pos < self.source.len() && self.source[self.pos] == '=' {
928 self.advance();
929 "<<=".to_string()
930 } else {
931 "<<".to_string()
932 }
933 }
934 '<' if next == '=' => {
935 self.advance();
936 "<=".to_string()
937 }
938 '>' if next == '>' => {
939 self.advance();
940 if self.pos < self.source.len() && self.source[self.pos] == '>' {
941 self.advance();
942 if self.pos < self.source.len() && self.source[self.pos] == '=' {
943 self.advance();
944 ">>>=".to_string()
945 } else {
946 ">>>".to_string()
947 }
948 } else if self.pos < self.source.len() && self.source[self.pos] == '=' {
949 self.advance();
950 ">>=".to_string()
951 } else {
952 ">>".to_string()
953 }
954 }
955 '>' if next == '=' => {
956 self.advance();
957 ">=".to_string()
958 }
959 '=' if next == '>' => {
960 self.advance();
961 "=>".to_string()
962 }
963 '.' if next == '.' => {
964 self.advance();
965 if self.pos < self.source.len() && self.source[self.pos] == '.' {
966 self.advance();
967 "...".to_string()
968 } else {
969 "..".to_string()
970 }
971 }
972 '=' if next == '=' => {
973 self.advance();
974 if self.pos < self.source.len() && self.source[self.pos] == '=' {
975 self.advance();
976 "===".to_string()
977 } else {
978 "==".to_string()
979 }
980 }
981 '!' if next == '=' => {
982 self.advance();
983 if self.pos < self.source.len() && self.source[self.pos] == '=' {
984 self.advance();
985 "!==".to_string()
986 } else {
987 "!=".to_string()
988 }
989 }
990 '*' if next == '=' => {
991 self.advance();
992 "*=".to_string()
993 }
994 '/' if next == '=' => {
995 self.advance();
996 "/=".to_string()
997 }
998 '%' if next == '=' => {
999 self.advance();
1000 "%=".to_string()
1001 }
1002 '+' if next == '=' => {
1003 self.advance();
1004 "+=".to_string()
1005 }
1006 '-' if next == '=' => {
1007 self.advance();
1008 "-=".to_string()
1009 }
1010 '&' if next == '=' => {
1011 self.advance();
1012 "&=".to_string()
1013 }
1014 '|' if next == '=' => {
1015 self.advance();
1016 "|=".to_string()
1017 }
1018 '^' if next == '=' => {
1019 self.advance();
1020 "^=".to_string()
1021 }
1022 '<' if next == '=' => {
1023 self.advance();
1024 "<=".to_string()
1025 }
1026 '>' if next == '=' => {
1027 self.advance();
1028 ">=".to_string()
1029 }
1030 '&' if next == '&' => {
1031 self.advance();
1032 if self.pos < self.source.len() && self.source[self.pos] == '=' {
1033 self.advance();
1034 "&&=".to_string()
1035 } else {
1036 "&&".to_string()
1037 }
1038 }
1039 '|' if next == '|' => {
1040 self.advance();
1041 if self.pos < self.source.len() && self.source[self.pos] == '=' {
1042 self.advance();
1043 "||=".to_string()
1044 } else {
1045 "||".to_string()
1046 }
1047 }
1048 '+' if next == '+' => {
1049 self.advance();
1050 "++".to_string()
1051 }
1052 '-' if next == '-' => {
1053 self.advance();
1054 "--".to_string()
1055 }
1056 '?' if next == '?' => {
1057 self.advance();
1058
1059 if self.pos < self.source.len() && self.source[self.pos] == '=' {
1060 self.advance();
1061 "??=".to_string()
1062 } else {
1063 "??".to_string()
1064 }
1065 }
1066 '?' if next == '.' => {
1067 self.advance();
1068 "?.".to_string()
1069 }
1070 '*' if next == '*' => {
1071 self.advance();
1072 if self.pos < self.source.len() && self.source[self.pos] == '=' {
1073 self.advance();
1074 "**=".to_string()
1075 } else {
1076 "**".to_string()
1077 }
1078 }
1079 _ => c.to_string(),
1080 }
1081 } else {
1082 c.to_string()
1083 };
1084
1085 Token {
1086 token_type: TokenType::Punctuator,
1087 value,
1088 line: self.line,
1089 column: self.column,
1090 }
1091 }
1092
1093 fn is_identifier_start(c: char) -> bool {
1094 if c == '$' || c == '_' {
1095 return true;
1096 }
1097 if c.is_ascii() {
1098 return c.is_ascii_alphabetic();
1099 }
1100 crate::builtins::unicode_data::XID_START.contains(c as u32)
1101 || matches!(c, '\u{2118}' | '\u{212E}' | '\u{309B}' | '\u{309C}')
1102 }
1103
1104 fn is_identifier_part(c: char) -> bool {
1105 if c == '$' || c == '_' {
1106 return true;
1107 }
1108 if c.is_ascii() {
1109 return c.is_ascii_alphanumeric();
1110 }
1111 if c == '\u{200C}' || c == '\u{200D}' {
1112 return true;
1113 }
1114 Self::is_identifier_start(c)
1115 || crate::builtins::unicode_data::XID_CONTINUE.contains(c as u32)
1116 }
1117
1118 fn is_keyword(s: &str) -> bool {
1119 matches!(
1120 s,
1121 "break"
1122 | "case"
1123 | "catch"
1124 | "class"
1125 | "const"
1126 | "continue"
1127 | "debugger"
1128 | "default"
1129 | "delete"
1130 | "do"
1131 | "else"
1132 | "export"
1133 | "extends"
1134 | "finally"
1135 | "for"
1136 | "function"
1137 | "if"
1138 | "import"
1139 | "in"
1140 | "instanceof"
1141 | "let"
1142 | "new"
1143 | "return"
1144 | "super"
1145 | "switch"
1146 | "this"
1147 | "throw"
1148 | "try"
1149 | "typeof"
1150 | "var"
1151 | "void"
1152 | "while"
1153 | "with"
1154 | "yield"
1155 | "async"
1156 | "await"
1157 | "static"
1158 | "get"
1159 | "set"
1160 | "true"
1161 | "false"
1162 | "null"
1163 | "from"
1164 | "as"
1165 | "of"
1166 )
1167 }
1168
1169 pub fn read_template_chars(&mut self) -> Option<String> {
1170 let mut result = String::new();
1171 while self.pos < self.source.len() {
1172 let c = self.source[self.pos];
1173
1174 if c == '$' && self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
1175 break;
1176 }
1177
1178 if c == '`' {
1179 break;
1180 }
1181 result.push(c);
1182 self.advance();
1183 }
1184 if result.is_empty() {
1185 None
1186 } else {
1187 Some(result)
1188 }
1189 }
1190
1191 fn scan_template_segment(&mut self) -> (String, bool) {
1192 let mut value = String::new();
1193 while self.pos < self.source.len() {
1194 let c = self.source[self.pos];
1195
1196 if c == '$' && self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
1197 self.advance();
1198 self.advance();
1199 return (value, true);
1200 }
1201
1202 if c == '`' {
1203 self.advance();
1204 return (value, false);
1205 }
1206
1207 if c == '\\' {
1208 self.advance();
1209 if self.pos >= self.source.len() {
1210 break;
1211 }
1212 let esc = self.source[self.pos];
1213 match esc {
1214 'n' => value.push('\n'),
1215 'r' => value.push('\r'),
1216 't' => value.push('\t'),
1217 'b' => value.push('\x08'),
1218 'f' => value.push('\x0c'),
1219 'v' => value.push('\x0b'),
1220 '0' => value.push('\0'),
1221 '\\' => value.push('\\'),
1222 '\'' => value.push('\''),
1223 '"' => value.push('"'),
1224 '`' => value.push('`'),
1225 '$' => value.push('$'),
1226 '\n' => {
1227 self.line += 1;
1228 self.column = 1;
1229 self.pos += 1;
1230 continue;
1231 }
1232 '\r' => {
1233 self.line += 1;
1234 self.column = 1;
1235 self.pos += 1;
1236
1237 if self.pos < self.source.len() && self.source[self.pos] == '\n' {
1238 self.pos += 1;
1239 }
1240 continue;
1241 }
1242 'x' => {
1243 if self.pos + 2 < self.source.len() {
1244 let h1 = self.source[self.pos + 1];
1245 let h2 = self.source[self.pos + 2];
1246 if let (Some(a), Some(b)) = (h1.to_digit(16), h2.to_digit(16)) {
1247 let code = (a << 4) | b;
1248 if let Some(ch) = char::from_u32(code) {
1249 value.push(ch);
1250 }
1251 self.advance();
1252 self.advance();
1253 } else {
1254 value.push('x');
1255 }
1256 } else {
1257 value.push('x');
1258 }
1259 }
1260 'u' => {
1261 if self.pos + 1 < self.source.len() && self.source[self.pos + 1] == '{' {
1262 self.advance();
1263 self.advance();
1264 let mut hex = String::new();
1265 while self.pos < self.source.len() && self.source[self.pos] != '}' {
1266 hex.push(self.source[self.pos]);
1267 self.advance();
1268 }
1269
1270 if let Ok(code) = u32::from_str_radix(&hex, 16) {
1271 if let Some(ch) = char::from_u32(code) {
1272 value.push(ch);
1273 }
1274 }
1275 } else if self.pos + 4 < self.source.len() {
1276 let h1 = self.source[self.pos + 1];
1277 let h2 = self.source[self.pos + 2];
1278 let h3 = self.source[self.pos + 3];
1279 let h4 = self.source[self.pos + 4];
1280 if let (Some(a), Some(b), Some(c), Some(d)) = (
1281 h1.to_digit(16),
1282 h2.to_digit(16),
1283 h3.to_digit(16),
1284 h4.to_digit(16),
1285 ) {
1286 let code = (a << 12) | (b << 8) | (c << 4) | d;
1287 if let Some(decoded) = char::from_u32(code) {
1288 value.push(decoded);
1289 }
1290 self.advance();
1291 self.advance();
1292 self.advance();
1293 self.advance();
1294 } else {
1295 value.push('u');
1296 }
1297 } else {
1298 value.push('u');
1299 }
1300 }
1301 _ => value.push(esc),
1302 }
1303 self.advance();
1304 continue;
1305 }
1306
1307 if c == '\n' {
1308 value.push('\n');
1309 self.pos += 1;
1310 self.line += 1;
1311 self.column = 1;
1312 continue;
1313 }
1314 if c == '\r' {
1315 value.push('\n');
1316 self.pos += 1;
1317 self.line += 1;
1318 self.column = 1;
1319 if self.pos < self.source.len() && self.source[self.pos] == '\n' {
1320 self.pos += 1;
1321 }
1322 continue;
1323 }
1324
1325 value.push(c);
1326 self.advance();
1327 }
1328
1329 (value, false)
1330 }
1331
1332 pub fn scan_template_continuation(&mut self) -> Option<Token> {
1333 let (value, terminated_by_interp) = self.scan_template_segment();
1334 let token_type = if terminated_by_interp {
1335 TokenType::TemplateMiddle
1336 } else {
1337 TokenType::TemplateTail
1338 };
1339 self.last_token_kind = LastTokenKind::Dividend;
1340 Some(Token {
1341 token_type,
1342 value,
1343 line: self.line,
1344 column: self.column,
1345 })
1346 }
1347
1348 pub fn source_from_pos(&self) -> String {
1349 self.source[self.pos..].iter().collect()
1350 }
1351
1352 pub fn advance_char(&mut self) -> Option<char> {
1353 if self.pos < self.source.len() {
1354 let c = self.source[self.pos];
1355 self.advance();
1356 Some(c)
1357 } else {
1358 None
1359 }
1360 }
1361
1362 pub fn at_str(&self, s: &str) -> bool {
1363 let chars: Vec<char> = s.chars().collect();
1364 if self.pos + chars.len() > self.source.len() {
1365 return false;
1366 }
1367 for (i, c) in chars.iter().enumerate() {
1368 if self.source[self.pos + i] != *c {
1369 return false;
1370 }
1371 }
1372 true
1373 }
1374}