1use crate::{JSError, raise_tokenize_error};
2use num_bigint::BigInt;
3use num_traits::{Num, ToPrimitive};
4
5#[derive(Debug, Clone)]
6pub enum Token {
7 Number(f64),
8 BigInt(String),
10 StringLit(Vec<u16>),
11 TemplateString(Vec<TemplatePart>),
12 Identifier(String),
13 PrivateIdentifier(String),
14 Plus,
15 Minus,
16 Multiply,
17 Exponent,
19 Divide,
20 Regex(String, String),
22 Mod,
23 LParen,
24 RParen,
25 LBracket,
26 RBracket,
27 LBrace,
28 RBrace,
29 Colon,
30 Dot,
31 Comma,
32 Let,
33 Var,
34 Const,
35 Class,
36 Extends,
37 Super,
38 This,
39 Static,
40 New,
41 InstanceOf,
42 TypeOf,
43 In,
44 Delete,
45 Void,
46 Function,
47 Return,
48 If,
49 Else,
50 For,
51 While,
52 Do,
53 Switch,
54 Case,
55 Default,
56 Break,
57 Continue,
58 Try,
59 Catch,
60 Finally,
61 Throw,
62 Assign,
63 Semicolon,
64 Equal,
65 StrictEqual,
66 NotEqual,
67 StrictNotEqual,
68 LessThan,
69 GreaterThan,
70 LessEqual,
71 GreaterEqual,
72 True,
73 False,
74 Null,
75 Arrow,
76 Spread,
77 OptionalChain,
78 QuestionMark,
79 NullishCoalescing,
80 LogicalNot,
81 LogicalAnd,
82 LogicalOr,
83 BitXor,
84 LogicalAndAssign,
85 LogicalOrAssign,
86 BitXorAssign,
87 NullishAssign,
88 AddAssign,
89 SubAssign,
90 MulAssign,
91 DivAssign,
92 ModAssign,
93 Increment,
94 Decrement,
95 Async,
96 Await,
97 Yield,
98 YieldStar,
99 FunctionStar,
100 LineTerminator,
101 PowAssign,
103 BitAnd,
104 BitNot,
105 BitAndAssign,
106 BitOr,
107 BitOrAssign,
108 LeftShift,
109 LeftShiftAssign,
110 RightShift,
111 RightShiftAssign,
112 UnsignedRightShift,
113 UnsignedRightShiftAssign,
114 As,
115 Import,
116 Export,
117}
118
119impl Token {
120 pub fn as_identifier_string(&self) -> Option<String> {
122 match self {
123 Token::Identifier(s) => Some(s.clone()),
124 Token::Let => Some("let".to_string()),
125 Token::Var => Some("var".to_string()),
126 Token::Const => Some("const".to_string()),
127 Token::Class => Some("class".to_string()),
128 Token::Extends => Some("extends".to_string()),
129 Token::Super => Some("super".to_string()),
130 Token::This => Some("this".to_string()),
131 Token::Static => Some("static".to_string()),
132 Token::New => Some("new".to_string()),
133 Token::InstanceOf => Some("instanceof".to_string()),
134 Token::TypeOf => Some("typeof".to_string()),
135 Token::In => Some("in".to_string()),
136 Token::Delete => Some("delete".to_string()),
137 Token::Void => Some("void".to_string()),
138 Token::Function => Some("function".to_string()),
139 Token::Return => Some("return".to_string()),
140 Token::If => Some("if".to_string()),
141 Token::Else => Some("else".to_string()),
142 Token::For => Some("for".to_string()),
143 Token::While => Some("while".to_string()),
144 Token::Do => Some("do".to_string()),
145 Token::Switch => Some("switch".to_string()),
146 Token::Case => Some("case".to_string()),
147 Token::Default => Some("default".to_string()),
148 Token::Break => Some("break".to_string()),
149 Token::Continue => Some("continue".to_string()),
150 Token::Try => Some("try".to_string()),
151 Token::Catch => Some("catch".to_string()),
152 Token::Finally => Some("finally".to_string()),
153 Token::Throw => Some("throw".to_string()),
154 Token::True => Some("true".to_string()),
155 Token::False => Some("false".to_string()),
156 Token::Null => Some("null".to_string()),
157 Token::Async => Some("async".to_string()),
158 Token::Await => Some("await".to_string()),
159 Token::Yield => Some("yield".to_string()),
160 Token::FunctionStar => Some("function*".to_string()),
161 _ => None,
162 }
163 }
164}
165
166#[derive(Debug, Clone)]
167pub struct TokenData {
168 pub token: Token,
169 pub line: usize,
170 pub column: usize,
171}
172
173#[derive(Debug, Clone)]
174pub enum TemplatePart {
175 String(Vec<u16>),
176 Expr(Vec<TokenData>),
177}
178
179pub fn tokenize(expr: &str) -> Result<Vec<TokenData>, JSError> {
180 let mut tokens = Vec::new();
181 let chars: Vec<char> = expr.chars().collect();
182 let mut i = 0;
183 let mut line = 1;
184 let mut column = 1;
185
186 while i < chars.len() {
187 let start_col = column;
188 match chars[i] {
189 ' ' | '\t' | '\r' => {
190 i += 1;
191 column += 1;
192 }
193 '\n' => {
194 tokens.push(TokenData {
195 token: Token::LineTerminator,
196 line,
197 column,
198 });
199 i += 1;
200 line += 1;
201 column = 1;
202 }
203 '+' => {
204 if i + 1 < chars.len() && chars[i + 1] == '+' {
205 tokens.push(TokenData {
206 token: Token::Increment,
207 line,
208 column: start_col,
209 });
210 i += 2;
211 column += 2;
212 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
213 tokens.push(TokenData {
214 token: Token::AddAssign,
215 line,
216 column: start_col,
217 });
218 i += 2;
219 column += 2;
220 } else {
221 tokens.push(TokenData {
222 token: Token::Plus,
223 line,
224 column: start_col,
225 });
226 i += 1;
227 column += 1;
228 }
229 }
230 '-' => {
231 if i + 1 < chars.len() && chars[i + 1] == '-' {
232 tokens.push(TokenData {
233 token: Token::Decrement,
234 line,
235 column: start_col,
236 });
237 i += 2;
238 column += 2;
239 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
240 tokens.push(TokenData {
241 token: Token::SubAssign,
242 line,
243 column: start_col,
244 });
245 i += 2;
246 column += 2;
247 } else {
248 tokens.push(TokenData {
249 token: Token::Minus,
250 line,
251 column: start_col,
252 });
253 i += 1;
254 column += 1;
255 }
256 }
257 '*' => {
258 if i + 2 < chars.len() && chars[i + 1] == '*' && chars[i + 2] == '=' {
260 tokens.push(TokenData {
261 token: Token::PowAssign,
262 line,
263 column: start_col,
264 });
265 i += 3;
266 column += 3;
267 } else if i + 1 < chars.len() && chars[i + 1] == '*' {
268 tokens.push(TokenData {
269 token: Token::Exponent,
270 line,
271 column: start_col,
272 });
273 i += 2;
274 column += 2;
275 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
276 tokens.push(TokenData {
277 token: Token::MulAssign,
278 line,
279 column: start_col,
280 });
281 i += 2;
282 column += 2;
283 } else {
284 tokens.push(TokenData {
285 token: Token::Multiply,
286 line,
287 column: start_col,
288 });
289 i += 1;
290 column += 1;
291 }
292 }
293 '/' => {
294 if i + 1 < chars.len() && chars[i + 1] == '=' {
295 tokens.push(TokenData {
296 token: Token::DivAssign,
297 line,
298 column: start_col,
299 });
300 i += 2;
301 column += 2;
302 } else if i + 1 < chars.len() && chars[i + 1] == '/' {
303 while i < chars.len() && chars[i] != '\n' {
305 i += 1;
306 column += 1;
307 }
308 } else if i + 1 < chars.len() && chars[i + 1] == '*' {
310 i += 2; column += 2;
313 let mut terminated = false;
314 while i + 1 < chars.len() {
315 if chars[i] == '*' && chars[i + 1] == '/' {
316 i += 2; column += 2;
318 terminated = true;
319 break;
320 }
321 if chars[i] == '\n' {
322 tokens.push(TokenData {
323 token: Token::LineTerminator,
324 line,
325 column,
326 });
327 line += 1;
328 column = 1;
329 } else {
330 column += 1;
331 }
332 i += 1;
333 }
334 if !terminated {
335 return Err(raise_tokenize_error!("Unterminated comment", line, column)); }
337 } else {
338 let mut prev_end_expr = false;
345 let last_token = tokens.iter().rev().find(|t| !matches!(t.token, Token::LineTerminator));
347
348 if let Some(token_data) = last_token {
349 match token_data.token {
350 Token::Number(_)
351 | Token::BigInt(_)
352 | Token::StringLit(_)
353 | Token::Identifier(_)
354 | Token::RBracket
355 | Token::RParen
356 | Token::RBrace
357 | Token::True
358 | Token::False
359 | Token::Increment
360 | Token::Decrement => {
361 prev_end_expr = true;
362 }
363 _ => {}
364 }
365 }
366
367 if prev_end_expr {
368 tokens.push(TokenData {
369 token: Token::Divide,
370 line,
371 column: start_col,
372 });
373 i += 1;
374 column += 1;
375 } else {
376 let mut j = i + 1;
378 let mut col_j = column + 1;
379 let mut in_class = false;
380 while j < chars.len() {
381 if chars[j] == '\\' {
382 j += 2;
384 col_j += 2;
385 continue;
386 }
387 if !in_class && chars[j] == '/' {
388 break;
389 }
390 if chars[j] == '[' {
391 in_class = true;
392 } else if chars[j] == ']' {
393 in_class = false;
394 }
395 j += 1;
396 col_j += 1;
397 }
398 if j >= chars.len() || chars[j] != '/' {
399 return Err(raise_tokenize_error!("Unterminated regex literal", line, column)); }
401 let pattern: String = chars[i + 1..j].iter().collect();
403 j += 1; col_j += 1;
405
406 let mut flags = String::new();
408 while j < chars.len() && chars[j].is_alphabetic() {
409 flags.push(chars[j]);
410 j += 1;
411 col_j += 1;
412 }
413 tokens.push(TokenData {
414 token: Token::Regex(pattern, flags),
415 line,
416 column: start_col,
417 });
418 i = j;
419 column = col_j;
420 }
421 }
422 }
423 '%' => {
424 if i + 1 < chars.len() && chars[i + 1] == '=' {
425 tokens.push(TokenData {
426 token: Token::ModAssign,
427 line,
428 column: start_col,
429 });
430 i += 2;
431 column += 2;
432 } else {
433 tokens.push(TokenData {
434 token: Token::Mod,
435 line,
436 column: start_col,
437 });
438 i += 1;
439 column += 1;
440 }
441 }
442 '(' => {
443 tokens.push(TokenData {
444 token: Token::LParen,
445 line,
446 column: start_col,
447 });
448 i += 1;
449 column += 1;
450 }
451 ')' => {
452 tokens.push(TokenData {
453 token: Token::RParen,
454 line,
455 column: start_col,
456 });
457 i += 1;
458 column += 1;
459 }
460 '[' => {
461 tokens.push(TokenData {
462 token: Token::LBracket,
463 line,
464 column: start_col,
465 });
466 i += 1;
467 column += 1;
468 }
469 ']' => {
470 tokens.push(TokenData {
471 token: Token::RBracket,
472 line,
473 column: start_col,
474 });
475 i += 1;
476 column += 1;
477 }
478 '{' => {
479 tokens.push(TokenData {
480 token: Token::LBrace,
481 line,
482 column: start_col,
483 });
484 i += 1;
485 column += 1;
486 }
487 '}' => {
488 tokens.push(TokenData {
489 token: Token::RBrace,
490 line,
491 column: start_col,
492 });
493 i += 1;
494 column += 1;
495 }
496 ':' => {
497 tokens.push(TokenData {
498 token: Token::Colon,
499 line,
500 column: start_col,
501 });
502 i += 1;
503 column += 1;
504 }
505 '.' => {
506 if i + 2 < chars.len() && chars[i + 1] == '.' && chars[i + 2] == '.' {
507 tokens.push(TokenData {
508 token: Token::Spread,
509 line,
510 column: start_col,
511 });
512 i += 3;
513 column += 3;
514 } else {
515 tokens.push(TokenData {
516 token: Token::Dot,
517 line,
518 column: start_col,
519 });
520 i += 1;
521 column += 1;
522 }
523 }
524 '?' => {
525 if i + 2 < chars.len() && chars[i + 1] == '?' && chars[i + 2] == '=' {
527 tokens.push(TokenData {
528 token: Token::NullishAssign,
529 line,
530 column: start_col,
531 });
532 i += 3;
533 column += 3;
534 } else if i + 1 < chars.len() && chars[i + 1] == '?' {
535 tokens.push(TokenData {
536 token: Token::NullishCoalescing,
537 line,
538 column: start_col,
539 });
540 i += 2;
541 column += 2;
542 } else if i + 1 < chars.len() && chars[i + 1] == '.' {
543 tokens.push(TokenData {
544 token: Token::OptionalChain,
545 line,
546 column: start_col,
547 });
548 i += 2;
549 column += 2;
550 } else {
551 tokens.push(TokenData {
552 token: Token::QuestionMark,
553 line,
554 column: start_col,
555 });
556 i += 1;
557 column += 1;
558 }
559 }
560 '!' => {
561 if i + 2 < chars.len() && chars[i + 1] == '=' && chars[i + 2] == '=' {
562 tokens.push(TokenData {
563 token: Token::StrictNotEqual,
564 line,
565 column: start_col,
566 });
567 i += 3;
568 column += 3;
569 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
570 tokens.push(TokenData {
571 token: Token::NotEqual,
572 line,
573 column: start_col,
574 });
575 i += 2;
576 column += 2;
577 } else {
578 tokens.push(TokenData {
579 token: Token::LogicalNot,
580 line,
581 column: start_col,
582 });
583 i += 1;
584 column += 1;
585 }
586 }
587 '=' => {
588 if i + 1 < chars.len() && chars[i + 1] == '=' {
589 if i + 2 < chars.len() && chars[i + 2] == '=' {
590 tokens.push(TokenData {
591 token: Token::StrictEqual,
592 line,
593 column: start_col,
594 });
595 i += 3;
596 column += 3;
597 } else {
598 tokens.push(TokenData {
599 token: Token::Equal,
600 line,
601 column: start_col,
602 });
603 i += 2;
604 column += 2;
605 }
606 } else if i + 1 < chars.len() && chars[i + 1] == '>' {
607 tokens.push(TokenData {
608 token: Token::Arrow,
609 line,
610 column: start_col,
611 });
612 i += 2;
613 column += 2;
614 } else if i + 1 < chars.len() && chars[i + 1] == '+' {
615 tokens.push(TokenData {
616 token: Token::AddAssign,
617 line,
618 column: start_col,
619 });
620 i += 2;
621 column += 2;
622 } else if i + 1 < chars.len() && chars[i + 1] == '-' {
623 tokens.push(TokenData {
624 token: Token::SubAssign,
625 line,
626 column: start_col,
627 });
628 i += 2;
629 column += 2;
630 } else if i + 1 < chars.len() && chars[i + 1] == '*' {
631 tokens.push(TokenData {
632 token: Token::MulAssign,
633 line,
634 column: start_col,
635 });
636 i += 2;
637 column += 2;
638 } else if i + 1 < chars.len() && chars[i + 1] == '/' {
639 tokens.push(TokenData {
640 token: Token::DivAssign,
641 line,
642 column: start_col,
643 });
644 i += 2;
645 column += 2;
646 } else if i + 1 < chars.len() && chars[i + 1] == '%' {
647 tokens.push(TokenData {
648 token: Token::ModAssign,
649 line,
650 column: start_col,
651 });
652 i += 2;
653 column += 2;
654 } else {
655 tokens.push(TokenData {
656 token: Token::Assign,
657 line,
658 column: start_col,
659 });
660 i += 1;
661 column += 1;
662 }
663 }
664 '<' => {
665 if i + 1 < chars.len() && chars[i + 1] == '=' {
666 tokens.push(TokenData {
667 token: Token::LessEqual,
668 line,
669 column: start_col,
670 });
671 i += 2;
672 column += 2;
673 } else if i + 2 < chars.len() && chars[i + 1] == '<' && chars[i + 2] == '=' {
674 tokens.push(TokenData {
676 token: Token::LeftShiftAssign,
677 line,
678 column: start_col,
679 });
680 i += 3;
681 column += 3;
682 } else if i + 1 < chars.len() && chars[i + 1] == '<' {
683 tokens.push(TokenData {
684 token: Token::LeftShift,
685 line,
686 column: start_col,
687 });
688 i += 2;
689 column += 2;
690 } else {
691 tokens.push(TokenData {
692 token: Token::LessThan,
693 line,
694 column: start_col,
695 });
696 i += 1;
697 column += 1;
698 }
699 }
700 '>' => {
701 if i + 1 < chars.len() && chars[i + 1] == '=' {
702 tokens.push(TokenData {
703 token: Token::GreaterEqual,
704 line,
705 column: start_col,
706 });
707 i += 2;
708 column += 2;
709 } else if i + 3 < chars.len() && chars[i + 1] == '>' && chars[i + 2] == '>' && chars[i + 3] == '=' {
710 tokens.push(TokenData {
712 token: Token::UnsignedRightShiftAssign,
713 line,
714 column: start_col,
715 });
716 i += 4;
717 column += 4;
718 } else if i + 2 < chars.len() && chars[i + 1] == '>' && chars[i + 2] == '>' {
719 tokens.push(TokenData {
721 token: Token::UnsignedRightShift,
722 line,
723 column: start_col,
724 });
725 i += 3;
726 column += 3;
727 } else if i + 2 < chars.len() && chars[i + 1] == '>' && chars[i + 2] == '=' {
728 tokens.push(TokenData {
730 token: Token::RightShiftAssign,
731 line,
732 column: start_col,
733 });
734 i += 3;
735 column += 3;
736 } else if i + 1 < chars.len() && chars[i + 1] == '>' {
737 tokens.push(TokenData {
738 token: Token::RightShift,
739 line,
740 column: start_col,
741 });
742 i += 2;
743 column += 2;
744 } else {
745 tokens.push(TokenData {
746 token: Token::GreaterThan,
747 line,
748 column: start_col,
749 });
750 i += 1;
751 column += 1;
752 }
753 }
754 '&' => {
755 if i + 2 < chars.len() && chars[i + 1] == '&' && chars[i + 2] == '=' {
757 tokens.push(TokenData {
758 token: Token::LogicalAndAssign,
759 line,
760 column: start_col,
761 });
762 i += 3;
763 column += 3;
764 } else if i + 1 < chars.len() && chars[i + 1] == '&' {
765 tokens.push(TokenData {
766 token: Token::LogicalAnd,
767 line,
768 column: start_col,
769 });
770 i += 2;
771 column += 2;
772 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
773 tokens.push(TokenData {
775 token: Token::BitAndAssign,
776 line,
777 column: start_col,
778 });
779 i += 2;
780 column += 2;
781 } else {
782 tokens.push(TokenData {
783 token: Token::BitAnd,
784 line,
785 column: start_col,
786 });
787 i += 1;
788 column += 1;
789 }
790 }
791 '|' => {
792 if i + 2 < chars.len() && chars[i + 1] == '|' && chars[i + 2] == '=' {
794 tokens.push(TokenData {
795 token: Token::LogicalOrAssign,
796 line,
797 column: start_col,
798 });
799 i += 3;
800 column += 3;
801 } else if i + 1 < chars.len() && chars[i + 1] == '|' {
802 tokens.push(TokenData {
803 token: Token::LogicalOr,
804 line,
805 column: start_col,
806 });
807 i += 2;
808 column += 2;
809 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
810 tokens.push(TokenData {
812 token: Token::BitOrAssign,
813 line,
814 column: start_col,
815 });
816 i += 2;
817 column += 2;
818 } else {
819 tokens.push(TokenData {
820 token: Token::BitOr,
821 line,
822 column: start_col,
823 });
824 i += 1;
825 column += 1;
826 }
827 }
828 '^' => {
829 if i + 1 < chars.len() && chars[i + 1] == '=' {
831 tokens.push(TokenData {
832 token: Token::BitXorAssign,
833 line,
834 column: start_col,
835 });
836 i += 2;
837 column += 2;
838 } else {
839 tokens.push(TokenData {
840 token: Token::BitXor,
841 line,
842 column: start_col,
843 });
844 i += 1;
845 column += 1;
846 }
847 }
848 '~' => {
849 tokens.push(TokenData {
850 token: Token::BitNot,
851 line,
852 column: start_col,
853 });
854 i += 1;
855 column += 1;
856 }
857 '0'..='9' => {
858 let start = i;
859 let mut radix = 10;
860
861 if chars[i] == '0' && i + 1 < chars.len() {
862 match chars[i + 1] {
863 'b' | 'B' => {
864 radix = 2;
865 i += 2;
866 column += 2;
867 }
868 'o' | 'O' => {
869 radix = 8;
870 i += 2;
871 column += 2;
872 }
873 'x' | 'X' => {
874 radix = 16;
875 i += 2;
876 column += 2;
877 }
878 _ => {}
879 }
880 }
881
882 if radix != 10 {
883 while i < chars.len() && (chars[i].is_digit(radix) || chars[i] == '_') {
884 i += 1;
885 column += 1;
886 }
887
888 if i < chars.len() && chars[i] == 'n' {
889 let mut num_str: String = chars[start..i].iter().collect();
890 num_str.retain(|c| c != '_');
891 tokens.push(TokenData {
892 token: Token::BigInt(num_str),
893 line,
894 column: start_col,
895 });
896 i += 1;
897 column += 1;
898 continue;
899 }
900
901 let mut num_str: String = chars[start + 2..i].iter().collect();
902 num_str.retain(|c| c != '_');
903 if num_str.is_empty() {
904 return Err(raise_tokenize_error!("Invalid binary/octal/hex literal", line, column));
905 }
906
907 match BigInt::from_str_radix(&num_str, radix) {
908 Ok(n) => {
909 let f = n.to_f64().unwrap_or(f64::INFINITY);
910 tokens.push(TokenData {
911 token: Token::Number(f),
912 line,
913 column: start_col,
914 });
915 }
916 Err(_) => return Err(raise_tokenize_error!("Invalid BigInt literal", line, column)),
917 }
918 continue;
919 }
920
921 while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '_') {
923 i += 1;
924 column += 1;
925 }
926
927 if i < chars.len() && chars[i] == 'n' {
929 let mut num_str: String = chars[start..i].iter().collect();
930 num_str.retain(|c| c != '_');
931 if num_str.is_empty() || !num_str.chars().all(|c| c.is_ascii_digit()) {
932 return Err(raise_tokenize_error!("Invalid BigInt literal", line, column));
933 }
934 tokens.push(TokenData {
935 token: Token::BigInt(num_str),
936 line,
937 column: start_col,
938 });
939 i += 1; column += 1;
941 continue;
942 }
943
944 if i < chars.len() && chars[i] == '.' {
946 i += 1;
947 column += 1;
948 while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '_') {
949 i += 1;
950 column += 1;
951 }
952 }
953
954 if i < chars.len() && (chars[i] == 'e' || chars[i] == 'E') {
956 let mut j = i + 1;
957 let mut col_j = column + 1;
958 if j < chars.len() && (chars[j] == '+' || chars[j] == '-') {
960 j += 1;
961 col_j += 1;
962 }
963 if j >= chars.len() || !(chars[j].is_ascii_digit()) {
965 return Err(raise_tokenize_error!("Invalid exponent in number literal", line, column));
966 }
967 while j < chars.len() && (chars[j].is_ascii_digit() || chars[j] == '_') {
968 j += 1;
969 col_j += 1;
970 }
971 i = j;
972 column = col_j;
973 }
974
975 let mut num_str: String = chars[start..i].iter().collect();
977 num_str.retain(|c| c != '_');
978 match num_str.parse::<f64>() {
980 Ok(n) => tokens.push(TokenData {
981 token: Token::Number(n),
982 line,
983 column: start_col,
984 }),
985 Err(_) => return Err(raise_tokenize_error!("Invalid number literal", line, column)),
986 }
987 }
988 '"' => {
989 i += 1; column += 1;
991 let mut start = i;
992 let str_lit = parse_string_literal(&chars, &mut start, '"', line, column)?;
993 tokens.push(TokenData {
994 token: Token::StringLit(str_lit),
995 line,
996 column: start_col,
997 });
998
999 for &chars_k in chars[i..start].iter() {
1000 if chars_k == '\n' {
1001 line += 1;
1002 column = 1;
1003 } else {
1004 column += 1;
1005 }
1006 }
1007
1008 i = start + 1; column += 1;
1010 }
1011 '\'' => {
1012 i += 1; column += 1;
1014 let mut start = i;
1015 let str_lit = parse_string_literal(&chars, &mut start, '\'', line, column)?;
1016 tokens.push(TokenData {
1017 token: Token::StringLit(str_lit),
1018 line,
1019 column: start_col,
1020 });
1021
1022 for &chars_k in chars[i..start].iter() {
1023 if chars_k == '\n' {
1024 line += 1;
1025 column = 1;
1026 } else {
1027 column += 1;
1028 }
1029 }
1030
1031 i = start + 1; column += 1;
1033 }
1034 '`' => {
1035 i += 1; column += 1;
1037 let mut parts = Vec::new();
1038 let mut current_start = i;
1039 let mut part_start_line = line;
1040 let mut part_start_col = column;
1041 while i < chars.len() && chars[i] != '`' {
1042 if chars[i] == '$' && i + 1 < chars.len() && chars[i + 1] == '{' {
1043 if current_start < i {
1045 let mut start_idx = current_start;
1046 let str_part = parse_string_literal(&chars, &mut start_idx, '$', part_start_line, part_start_col)?;
1047 parts.push(TemplatePart::String(str_part));
1048
1049 i = start_idx; }
1051 i += 2; column += 2;
1053 let expr_start = i;
1054 let mut brace_count = 1;
1055 while i < chars.len() && brace_count > 0 {
1056 if chars[i] == '{' {
1057 brace_count += 1;
1058 } else if chars[i] == '}' {
1059 brace_count -= 1;
1060 }
1061 if chars[i] == '\n' {
1062 line += 1;
1063 column = 1;
1064 } else {
1065 column += 1;
1066 }
1067 i += 1;
1068 }
1069 if brace_count != 0 {
1070 return Err(raise_tokenize_error!("Unterminated template literal expression", line, column));
1071 }
1072 let expr_str: String = chars[expr_start..i - 1].iter().collect();
1073 let mut expr_tokens = tokenize(&expr_str)?;
1075 if part_start_line > 1 || part_start_col > 1 {
1078 let line_offset = part_start_line - 1;
1079 let col_offset = part_start_col - 1;
1080 for t in expr_tokens.iter_mut() {
1081 t.line += line_offset;
1083 if t.line == part_start_line {
1086 t.column += col_offset;
1087 }
1088 }
1089 }
1090 parts.push(TemplatePart::Expr(expr_tokens));
1091 current_start = i;
1092 part_start_line = line;
1093 part_start_col = column;
1094 } else {
1095 if chars[i] == '\\' {
1097 if chars[i] == '\n' {
1098 line += 1;
1099 column = 1;
1100 } else {
1101 column += 1;
1102 }
1103 i += 1;
1104 if i < chars.len() {
1105 if chars[i] == '\n' {
1106 line += 1;
1107 column = 1;
1108 } else {
1109 column += 1;
1110 }
1111 i += 1;
1112 }
1113 } else {
1114 if chars[i] == '\n' {
1115 line += 1;
1116 column = 1;
1117 } else {
1118 column += 1;
1119 }
1120 i += 1;
1121 }
1122 }
1123 }
1124 if i >= chars.len() {
1125 return Err(raise_tokenize_error!("Unterminated template literal", line, column));
1126 }
1127 if current_start < i {
1129 let mut start_idx = current_start;
1130 let str_part = parse_string_literal(&chars, &mut start_idx, '`', part_start_line, part_start_col)?;
1131 parts.push(TemplatePart::String(str_part));
1132 }
1133 tokens.push(TokenData {
1134 token: Token::TemplateString(parts),
1135 line,
1136 column: start_col,
1137 });
1138 i += 1; column += 1;
1140 }
1141 'a'..='z' | 'A'..='Z' | '_' | '$' | '#' => {
1142 let start = i;
1143 while i < chars.len() && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == '$' || chars[i] == '#') {
1144 i += 1;
1145 column += 1;
1146 }
1147 let ident: String = chars[start..i].iter().collect();
1148 let token = if let Some(stripped) = ident.strip_prefix('#') {
1149 Token::PrivateIdentifier(stripped.to_string())
1150 } else {
1151 match ident.as_str() {
1152 "let" => Token::Let,
1153 "var" => Token::Var,
1154 "const" => Token::Const,
1155 "class" => Token::Class,
1156 "extends" => Token::Extends,
1157 "super" => Token::Super,
1158 "this" => Token::This,
1159 "static" => Token::Static,
1160 "new" => Token::New,
1161 "instanceof" => Token::InstanceOf,
1162 "typeof" => Token::TypeOf,
1163 "delete" => Token::Delete,
1164 "void" => Token::Void,
1165 "in" => Token::In,
1166 "as" => Token::As,
1167 "import" => Token::Import,
1168 "export" => Token::Export,
1169 "try" => Token::Try,
1170 "catch" => Token::Catch,
1171 "finally" => Token::Finally,
1172 "throw" => Token::Throw,
1173 "function" => {
1174 if i < chars.len() && chars[i] == '*' {
1176 i += 1; column += 1;
1178 Token::FunctionStar
1179 } else {
1180 Token::Function
1181 }
1182 }
1183 "return" => Token::Return,
1184 "if" => Token::If,
1185 "else" => Token::Else,
1186 "for" => Token::For,
1187 "while" => Token::While,
1188 "do" => Token::Do,
1189 "switch" => Token::Switch,
1190 "case" => Token::Case,
1191 "default" => Token::Default,
1192 "break" => Token::Break,
1193 "continue" => Token::Continue,
1194 "true" => Token::True,
1195 "false" => Token::False,
1196 "null" => Token::Null,
1197 "async" => Token::Async,
1198 "await" => Token::Await,
1199 "yield" => {
1200 if i < chars.len() && chars[i] == '*' {
1202 i += 1; column += 1;
1204 Token::YieldStar
1205 } else {
1206 Token::Yield
1207 }
1208 }
1209 _ => Token::Identifier(ident),
1210 }
1211 };
1212 tokens.push(TokenData {
1213 token,
1214 line,
1215 column: start_col,
1216 });
1217 }
1218 ',' => {
1219 tokens.push(TokenData {
1220 token: Token::Comma,
1221 line,
1222 column: start_col,
1223 });
1224 i += 1;
1225 column += 1;
1226 }
1227 ';' => {
1228 tokens.push(TokenData {
1229 token: Token::Semicolon,
1230 line,
1231 column: start_col,
1232 });
1233 i += 1;
1234 column += 1;
1235 }
1236
1237 _ => return Err(raise_tokenize_error!(format!("Unexpected character '{}'", chars[i]), line, column)),
1238 }
1239 }
1240 Ok(tokens)
1241}
1242
1243fn parse_string_literal(
1244 chars: &[char],
1245 start: &mut usize,
1246 end_char: char,
1247 start_line: usize,
1248 start_col: usize,
1249) -> Result<Vec<u16>, JSError> {
1250 let mut result = Vec::new();
1251 let mut current_line = start_line;
1252 let mut current_col = start_col;
1253
1254 while *start < chars.len() && chars[*start] != end_char {
1255 if chars[*start] == '\\' {
1256 *start += 1;
1257 current_col += 1;
1258 if *start >= chars.len() {
1259 return Err(raise_tokenize_error!("Unexpected end of string literal", current_line, current_col));
1260 }
1261 match chars[*start] {
1262 'n' => {
1263 result.push('\n' as u16);
1264 current_col += 1;
1265 }
1266 't' => {
1267 result.push('\t' as u16);
1268 current_col += 1;
1269 }
1270 'r' => {
1271 result.push('\r' as u16);
1272 current_col += 1;
1273 }
1274 'b' => {
1275 result.push(0x08);
1276 current_col += 1;
1277 }
1278 'f' => {
1279 result.push(0x0C);
1280 current_col += 1;
1281 }
1282 'v' => {
1283 result.push(0x0B);
1284 current_col += 1;
1285 }
1286 '0' => {
1287 result.push(0x00);
1288 current_col += 1;
1289 }
1290 '\\' => {
1291 result.push('\\' as u16);
1292 current_col += 1;
1293 }
1294 '"' => {
1295 result.push('"' as u16);
1296 current_col += 1;
1297 }
1298 '\'' => {
1299 result.push('\'' as u16);
1300 current_col += 1;
1301 }
1302 '`' => {
1303 result.push('`' as u16);
1304 current_col += 1;
1305 }
1306 '\n' => {
1307 current_line += 1;
1309 current_col = 1;
1310 }
1311 '\r' => {
1312 if *start + 1 < chars.len() && chars[*start + 1] == '\n' {
1314 *start += 1;
1315 }
1316 current_line += 1;
1318 current_col = 1;
1319 }
1320 'u' => {
1321 *start += 1;
1323 current_col += 1;
1324 if *start >= chars.len() {
1325 return Err(raise_tokenize_error!("Unexpected end of string literal", current_line, current_col));
1326 }
1327 if chars[*start] == '{' {
1328 *start += 1; current_col += 1;
1331 let mut hex_str = String::new();
1332 while *start < chars.len() && chars[*start] != '}' {
1333 hex_str.push(chars[*start]);
1334 *start += 1;
1335 current_col += 1;
1336 }
1337 if *start >= chars.len() || chars[*start] != '}' {
1338 return Err(raise_tokenize_error!(
1339 "Unterminated unicode escape sequence",
1340 current_line,
1341 current_col
1342 )); }
1344 match u32::from_str_radix(&hex_str, 16) {
1346 Ok(cp) if cp <= 0x10FFFF => {
1347 if cp <= 0xFFFF {
1348 result.push(cp as u16);
1349 } else {
1350 let u = cp - 0x10000;
1352 let high = 0xD800u16 + ((u >> 10) as u16);
1353 let low = 0xDC00u16 + ((u & 0x3FF) as u16);
1354 result.push(high);
1355 result.push(low);
1356 }
1357 }
1358 _ => {
1359 return Err(raise_tokenize_error!("Invalid unicode escape sequence", current_line, current_col));
1360 }
1361 }
1362 current_col += 1;
1364 } else {
1365 if *start + 4 > chars.len() {
1367 return Err(raise_tokenize_error!(
1368 "Unexpected end of unicode escape sequence",
1369 current_line,
1370 current_col
1371 ));
1372 }
1373 let hex_str: String = chars[*start..*start + 4].iter().collect();
1374 *start += 3; current_col += 3;
1376 match u16::from_str_radix(&hex_str, 16) {
1377 Ok(code) => {
1378 result.push(code);
1379 }
1380 Err(_) => {
1381 return Err(raise_tokenize_error!("Invalid unicode escape sequence", current_line, current_col)); }
1383 }
1384 current_col += 1;
1385 }
1386 }
1387 'x' => {
1388 *start += 1;
1390 current_col += 1;
1391 if *start + 2 > chars.len() {
1392 return Err(raise_tokenize_error!(
1393 "Unexpected end of hex escape sequence",
1394 current_line,
1395 current_col
1396 ));
1397 }
1398 let hex_str: String = chars[*start..*start + 2].iter().collect();
1399 *start += 1; current_col += 1;
1401 match u8::from_str_radix(&hex_str, 16) {
1402 Ok(code) => {
1403 result.push(code as u16);
1404 }
1405 Err(_) => {
1406 return Err(raise_tokenize_error!("Invalid hex escape sequence", current_line, current_col));
1407 }
1408 }
1409 current_col += 1;
1410 }
1411 other => {
1412 result.push(other as u16);
1414 current_col += 1;
1415 }
1416 }
1417 } else {
1418 if (end_char == '"' || end_char == '\'') && (chars[*start] == '\n' || chars[*start] == '\r') {
1420 return Err(raise_tokenize_error!(
1421 "Unterminated string literal (newline in string)",
1422 current_line,
1423 current_col
1424 ));
1425 }
1426 let ch = chars[*start];
1428 for code_unit in ch.to_string().encode_utf16() {
1429 result.push(code_unit);
1430 }
1431 if ch == '\n' {
1432 current_line += 1;
1433 current_col = 1;
1434 } else {
1435 current_col += 1;
1436 }
1437 }
1438 *start += 1;
1439 }
1440 if *start >= chars.len() {
1441 return Err(raise_tokenize_error!("Unterminated string literal", current_line, current_col)); }
1443 Ok(result)
1444}