1use crate::{JSError, raise_tokenize_error};
2
3#[derive(Debug, Clone)]
4pub enum Token {
5 Number(f64),
6 BigInt(String),
8 StringLit(Vec<u16>),
9 TemplateString(Vec<TemplatePart>),
10 Identifier(String),
11 Plus,
12 Minus,
13 Multiply,
14 Exponent,
16 Divide,
17 Regex(String, String),
19 Mod,
20 LParen,
21 RParen,
22 LBracket,
23 RBracket,
24 LBrace,
25 RBrace,
26 Colon,
27 Dot,
28 Comma,
29 Let,
30 Var,
31 Const,
32 Class,
33 Extends,
34 Super,
35 This,
36 Static,
37 New,
38 InstanceOf,
39 TypeOf,
40 In,
41 Delete,
42 Void,
43 Function,
44 Return,
45 If,
46 Else,
47 For,
48 While,
49 Do,
50 Switch,
51 Case,
52 Default,
53 Break,
54 Continue,
55 Try,
56 Catch,
57 Finally,
58 Throw,
59 Assign,
60 Semicolon,
61 Equal,
62 StrictEqual,
63 NotEqual,
64 StrictNotEqual,
65 LessThan,
66 GreaterThan,
67 LessEqual,
68 GreaterEqual,
69 True,
70 False,
71 Null,
72 Arrow,
73 Spread,
74 OptionalChain,
75 QuestionMark,
76 NullishCoalescing,
77 LogicalNot,
78 LogicalAnd,
79 LogicalOr,
80 BitXor,
81 LogicalAndAssign,
82 LogicalOrAssign,
83 BitXorAssign,
84 NullishAssign,
85 AddAssign,
86 SubAssign,
87 MulAssign,
88 DivAssign,
89 ModAssign,
90 Increment,
91 Decrement,
92 Async,
93 Await,
94 Yield,
95 YieldStar,
96 FunctionStar,
97 LineTerminator,
98 PowAssign,
100 BitAnd,
101 BitAndAssign,
102 BitOr,
103 BitOrAssign,
104 LeftShift,
105 LeftShiftAssign,
106 RightShift,
107 RightShiftAssign,
108 UnsignedRightShift,
109 UnsignedRightShiftAssign,
110 As,
111 Import,
112 Export,
113}
114
115impl Token {
116 pub fn as_identifier_string(&self) -> Option<String> {
118 match self {
119 Token::Identifier(s) => Some(s.clone()),
120 Token::Let => Some("let".to_string()),
121 Token::Var => Some("var".to_string()),
122 Token::Const => Some("const".to_string()),
123 Token::Class => Some("class".to_string()),
124 Token::Extends => Some("extends".to_string()),
125 Token::Super => Some("super".to_string()),
126 Token::This => Some("this".to_string()),
127 Token::Static => Some("static".to_string()),
128 Token::New => Some("new".to_string()),
129 Token::InstanceOf => Some("instanceof".to_string()),
130 Token::TypeOf => Some("typeof".to_string()),
131 Token::In => Some("in".to_string()),
132 Token::Delete => Some("delete".to_string()),
133 Token::Void => Some("void".to_string()),
134 Token::As => Some("as".to_string()),
135 Token::Import => Some("import".to_string()),
136 Token::Export => Some("export".to_string()),
137 Token::Function => Some("function".to_string()),
138 Token::Return => Some("return".to_string()),
139 Token::If => Some("if".to_string()),
140 Token::Else => Some("else".to_string()),
141 Token::For => Some("for".to_string()),
142 Token::While => Some("while".to_string()),
143 Token::Do => Some("do".to_string()),
144 Token::Switch => Some("switch".to_string()),
145 Token::Case => Some("case".to_string()),
146 Token::Default => Some("default".to_string()),
147 Token::Break => Some("break".to_string()),
148 Token::Continue => Some("continue".to_string()),
149 Token::Try => Some("try".to_string()),
150 Token::Catch => Some("catch".to_string()),
151 Token::Finally => Some("finally".to_string()),
152 Token::Throw => Some("throw".to_string()),
153 Token::True => Some("true".to_string()),
154 Token::False => Some("false".to_string()),
155 Token::Null => Some("null".to_string()),
156 Token::Async => Some("async".to_string()),
157 Token::Await => Some("await".to_string()),
158 Token::Yield => Some("yield".to_string()),
159 Token::FunctionStar => Some("function*".to_string()),
160 _ => None,
161 }
162 }
163}
164
165#[derive(Debug, Clone)]
166pub enum TemplatePart {
167 String(Vec<u16>),
168 Expr(Vec<Token>),
169}
170
171pub fn tokenize(expr: &str) -> Result<Vec<Token>, JSError> {
172 let mut tokens = Vec::new();
173 let chars: Vec<char> = expr.chars().collect();
174 let mut i = 0;
175 while i < chars.len() {
176 match chars[i] {
177 ' ' | '\t' | '\r' => i += 1,
178 '\n' => {
179 tokens.push(Token::LineTerminator);
180 i += 1;
181 }
182 '+' => {
183 if i + 1 < chars.len() && chars[i + 1] == '+' {
184 tokens.push(Token::Increment);
185 i += 2;
186 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
187 tokens.push(Token::AddAssign);
188 i += 2;
189 } else {
190 tokens.push(Token::Plus);
191 i += 1;
192 }
193 }
194 '-' => {
195 if i + 1 < chars.len() && chars[i + 1] == '-' {
196 tokens.push(Token::Decrement);
197 i += 2;
198 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
199 tokens.push(Token::SubAssign);
200 i += 2;
201 } else {
202 tokens.push(Token::Minus);
203 i += 1;
204 }
205 }
206 '*' => {
207 if i + 2 < chars.len() && chars[i + 1] == '*' && chars[i + 2] == '=' {
209 tokens.push(Token::PowAssign);
210 i += 3;
211 } else if i + 1 < chars.len() && chars[i + 1] == '*' {
212 tokens.push(Token::Exponent);
213 i += 2;
214 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
215 tokens.push(Token::MulAssign);
216 i += 2;
217 } else {
218 tokens.push(Token::Multiply);
219 i += 1;
220 }
221 }
222 '/' => {
223 if i + 1 < chars.len() && chars[i + 1] == '=' {
224 tokens.push(Token::DivAssign);
225 i += 2;
226 } else if i + 1 < chars.len() && chars[i + 1] == '/' {
227 while i < chars.len() && chars[i] != '\n' {
229 i += 1;
230 }
231 } else if i + 1 < chars.len() && chars[i + 1] == '*' {
233 i += 2; while i + 1 < chars.len() {
236 if chars[i] == '*' && chars[i + 1] == '/' {
237 i += 2; break;
239 }
240 if chars[i] == '\n' {
241 tokens.push(Token::LineTerminator);
242 }
243 i += 1;
244 }
245 if i >= chars.len() {
246 return Err(raise_tokenize_error!()); }
248 } else {
249 let mut prev_end_expr = false;
256 if let Some(
257 Token::Number(_)
258 | Token::BigInt(_)
259 | Token::StringLit(_)
260 | Token::Identifier(_)
261 | Token::RBracket
262 | Token::RParen
263 | Token::RBrace
264 | Token::True
265 | Token::False
266 | Token::Increment
267 | Token::Decrement,
268 ) = tokens.iter().rev().find(|t| !matches!(t, Token::LineTerminator))
269 {
270 prev_end_expr = true;
271 }
272
273 if prev_end_expr {
274 tokens.push(Token::Divide);
275 i += 1;
276 } else {
277 let mut j = i + 1;
279 let mut in_class = false;
280 while j < chars.len() {
281 if chars[j] == '\\' {
282 j += 2;
284 continue;
285 }
286 if !in_class && chars[j] == '/' {
287 break;
288 }
289 if chars[j] == '[' {
290 in_class = true;
291 } else if chars[j] == ']' {
292 in_class = false;
293 }
294 j += 1;
295 }
296 if j >= chars.len() || chars[j] != '/' {
297 return Err(raise_tokenize_error!()); }
299 let pattern: String = chars[i + 1..j].iter().collect();
301 j += 1; let mut flags = String::new();
305 while j < chars.len() && chars[j].is_alphabetic() {
306 flags.push(chars[j]);
307 j += 1;
308 }
309 tokens.push(Token::Regex(pattern, flags));
310 i = j;
311 }
312 }
313 }
314 '%' => {
315 if i + 1 < chars.len() && chars[i + 1] == '=' {
316 tokens.push(Token::ModAssign);
317 i += 2;
318 } else {
319 tokens.push(Token::Mod);
320 i += 1;
321 }
322 }
323 '(' => {
324 tokens.push(Token::LParen);
325 i += 1;
326 }
327 ')' => {
328 tokens.push(Token::RParen);
329 i += 1;
330 }
331 '[' => {
332 tokens.push(Token::LBracket);
333 i += 1;
334 }
335 ']' => {
336 tokens.push(Token::RBracket);
337 i += 1;
338 }
339 '{' => {
340 tokens.push(Token::LBrace);
341 i += 1;
342 }
343 '}' => {
344 tokens.push(Token::RBrace);
345 i += 1;
346 }
347 ':' => {
348 tokens.push(Token::Colon);
349 i += 1;
350 }
351 '.' => {
352 if i + 2 < chars.len() && chars[i + 1] == '.' && chars[i + 2] == '.' {
353 tokens.push(Token::Spread);
354 i += 3;
355 } else {
356 tokens.push(Token::Dot);
357 i += 1;
358 }
359 }
360 '?' => {
361 if i + 2 < chars.len() && chars[i + 1] == '?' && chars[i + 2] == '=' {
363 tokens.push(Token::NullishAssign);
364 i += 3;
365 } else if i + 1 < chars.len() && chars[i + 1] == '?' {
366 tokens.push(Token::NullishCoalescing);
367 i += 2;
368 } else if i + 1 < chars.len() && chars[i + 1] == '.' {
369 tokens.push(Token::OptionalChain);
370 i += 2;
371 } else {
372 tokens.push(Token::QuestionMark);
373 i += 1;
374 }
375 }
376 '!' => {
377 if i + 2 < chars.len() && chars[i + 1] == '=' && chars[i + 2] == '=' {
378 tokens.push(Token::StrictNotEqual);
379 i += 3;
380 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
381 tokens.push(Token::NotEqual);
382 i += 2;
383 } else {
384 tokens.push(Token::LogicalNot);
385 i += 1;
386 }
387 }
388 '=' => {
389 if i + 1 < chars.len() && chars[i + 1] == '=' {
390 if i + 2 < chars.len() && chars[i + 2] == '=' {
391 tokens.push(Token::StrictEqual);
392 i += 3;
393 } else {
394 tokens.push(Token::Equal);
395 i += 2;
396 }
397 } else if i + 1 < chars.len() && chars[i + 1] == '>' {
398 tokens.push(Token::Arrow);
399 i += 2;
400 } else if i + 1 < chars.len() && chars[i + 1] == '+' {
401 tokens.push(Token::AddAssign);
402 i += 2;
403 } else if i + 1 < chars.len() && chars[i + 1] == '-' {
404 tokens.push(Token::SubAssign);
405 i += 2;
406 } else if i + 1 < chars.len() && chars[i + 1] == '*' {
407 tokens.push(Token::MulAssign);
408 i += 2;
409 } else if i + 1 < chars.len() && chars[i + 1] == '/' {
410 tokens.push(Token::DivAssign);
411 i += 2;
412 } else if i + 1 < chars.len() && chars[i + 1] == '%' {
413 tokens.push(Token::ModAssign);
414 i += 2;
415 } else {
416 tokens.push(Token::Assign);
417 i += 1;
418 }
419 }
420 '<' => {
421 if i + 1 < chars.len() && chars[i + 1] == '=' {
422 tokens.push(Token::LessEqual);
423 i += 2;
424 } else if i + 2 < chars.len() && chars[i + 1] == '<' && chars[i + 2] == '=' {
425 tokens.push(Token::LeftShiftAssign);
427 i += 3;
428 } else if i + 1 < chars.len() && chars[i + 1] == '<' {
429 tokens.push(Token::LeftShift);
430 i += 2;
431 } else {
432 tokens.push(Token::LessThan);
433 i += 1;
434 }
435 }
436 '>' => {
437 if i + 1 < chars.len() && chars[i + 1] == '=' {
438 tokens.push(Token::GreaterEqual);
439 i += 2;
440 } else if i + 3 < chars.len() && chars[i + 1] == '>' && chars[i + 2] == '>' && chars[i + 3] == '=' {
441 tokens.push(Token::UnsignedRightShiftAssign);
443 i += 4;
444 } else if i + 2 < chars.len() && chars[i + 1] == '>' && chars[i + 2] == '>' {
445 tokens.push(Token::UnsignedRightShift);
447 i += 3;
448 } else if i + 2 < chars.len() && chars[i + 1] == '>' && chars[i + 2] == '=' {
449 tokens.push(Token::RightShiftAssign);
451 i += 3;
452 } else if i + 1 < chars.len() && chars[i + 1] == '>' {
453 tokens.push(Token::RightShift);
454 i += 2;
455 } else {
456 tokens.push(Token::GreaterThan);
457 i += 1;
458 }
459 }
460 '&' => {
461 if i + 2 < chars.len() && chars[i + 1] == '&' && chars[i + 2] == '=' {
463 tokens.push(Token::LogicalAndAssign);
464 i += 3;
465 } else if i + 1 < chars.len() && chars[i + 1] == '&' {
466 tokens.push(Token::LogicalAnd);
467 i += 2;
468 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
469 tokens.push(Token::BitAndAssign);
471 i += 2;
472 } else {
473 tokens.push(Token::BitAnd);
474 i += 1;
475 }
476 }
477 '|' => {
478 if i + 2 < chars.len() && chars[i + 1] == '|' && chars[i + 2] == '=' {
480 tokens.push(Token::LogicalOrAssign);
481 i += 3;
482 } else if i + 1 < chars.len() && chars[i + 1] == '|' {
483 tokens.push(Token::LogicalOr);
484 i += 2;
485 } else if i + 1 < chars.len() && chars[i + 1] == '=' {
486 tokens.push(Token::BitOrAssign);
488 i += 2;
489 } else {
490 tokens.push(Token::BitOr);
491 i += 1;
492 }
493 }
494 '^' => {
495 if i + 1 < chars.len() && chars[i + 1] == '=' {
497 tokens.push(Token::BitXorAssign);
498 i += 2;
499 } else {
500 tokens.push(Token::BitXor);
501 i += 1;
502 }
503 }
504 '0'..='9' => {
505 let start = i;
506 while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '_') {
508 i += 1;
509 }
510
511 if i < chars.len() && chars[i] == 'n' {
513 let mut num_str: String = chars[start..i].iter().collect();
514 num_str.retain(|c| c != '_');
515 if num_str.is_empty() || !num_str.chars().all(|c| c.is_ascii_digit()) {
516 return Err(raise_tokenize_error!());
517 }
518 tokens.push(Token::BigInt(num_str));
519 i += 1; continue;
521 }
522
523 if i < chars.len() && chars[i] == '.' {
525 i += 1;
526 while i < chars.len() && (chars[i].is_ascii_digit() || chars[i] == '_') {
527 i += 1;
528 }
529 }
530
531 if i < chars.len() && (chars[i] == 'e' || chars[i] == 'E') {
533 let mut j = i + 1;
534 if j < chars.len() && (chars[j] == '+' || chars[j] == '-') {
536 j += 1;
537 }
538 if j >= chars.len() || !(chars[j].is_ascii_digit()) {
540 return Err(raise_tokenize_error!());
541 }
542 while j < chars.len() && (chars[j].is_ascii_digit() || chars[j] == '_') {
543 j += 1;
544 }
545 i = j;
546 }
547
548 let mut num_str: String = chars[start..i].iter().collect();
550 num_str.retain(|c| c != '_');
551 match num_str.parse::<f64>() {
553 Ok(n) => tokens.push(Token::Number(n)),
554 Err(_) => return Err(raise_tokenize_error!()),
555 }
556 }
557 '"' => {
558 i += 1; let mut start = i;
560 let str_lit = parse_string_literal(&chars, &mut start, '"')?;
561 tokens.push(Token::StringLit(str_lit));
562 i = start + 1; }
564 '\'' => {
565 i += 1; let mut start = i;
567 let str_lit = parse_string_literal(&chars, &mut start, '\'')?;
568 tokens.push(Token::StringLit(str_lit));
569 i = start + 1; }
571 '`' => {
572 i += 1; let mut parts = Vec::new();
574 let mut current_start = i;
575 while i < chars.len() && chars[i] != '`' {
576 if chars[i] == '$' && i + 1 < chars.len() && chars[i + 1] == '{' {
577 if current_start < i {
579 let mut start_idx = current_start;
580 let str_part = parse_string_literal(&chars, &mut start_idx, '$')?;
581 parts.push(TemplatePart::String(str_part));
582 i = start_idx; }
584 i += 2; let expr_start = i;
586 let mut brace_count = 1;
587 while i < chars.len() && brace_count > 0 {
588 if chars[i] == '{' {
589 brace_count += 1;
590 } else if chars[i] == '}' {
591 brace_count -= 1;
592 }
593 i += 1;
594 }
595 if brace_count != 0 {
596 return Err(raise_tokenize_error!());
597 }
598 let expr_str: String = chars[expr_start..i - 1].iter().collect();
599 let expr_tokens = tokenize(&expr_str)?;
601 parts.push(TemplatePart::Expr(expr_tokens));
602 current_start = i;
603 } else {
604 i += 1;
605 }
606 }
607 if i >= chars.len() {
608 return Err(raise_tokenize_error!());
609 }
610 if current_start < i {
612 let mut start_idx = current_start;
613 let str_part = parse_string_literal(&chars, &mut start_idx, '`')?;
614 parts.push(TemplatePart::String(str_part));
615 }
616 tokens.push(Token::TemplateString(parts));
617 i += 1; }
619 'a'..='z' | 'A'..='Z' | '_' | '$' => {
620 let start = i;
621 while i < chars.len() && (chars[i].is_alphanumeric() || chars[i] == '_' || chars[i] == '$') {
622 i += 1;
623 }
624 let ident: String = chars[start..i].iter().collect();
625 match ident.as_str() {
626 "let" => tokens.push(Token::Let),
627 "var" => tokens.push(Token::Var),
628 "const" => tokens.push(Token::Const),
629 "class" => tokens.push(Token::Class),
630 "extends" => tokens.push(Token::Extends),
631 "super" => tokens.push(Token::Super),
632 "this" => tokens.push(Token::This),
633 "static" => tokens.push(Token::Static),
634 "new" => tokens.push(Token::New),
635 "instanceof" => tokens.push(Token::InstanceOf),
636 "typeof" => tokens.push(Token::TypeOf),
637 "delete" => tokens.push(Token::Delete),
638 "void" => tokens.push(Token::Void),
639 "in" => tokens.push(Token::In),
640 "as" => tokens.push(Token::As),
641 "import" => tokens.push(Token::Import),
642 "export" => tokens.push(Token::Export),
643 "try" => tokens.push(Token::Try),
644 "catch" => tokens.push(Token::Catch),
645 "finally" => tokens.push(Token::Finally),
646 "throw" => tokens.push(Token::Throw),
647 "function" => {
648 if i < chars.len() && chars[i] == '*' {
650 tokens.push(Token::FunctionStar);
651 i += 1; } else {
653 tokens.push(Token::Function);
654 }
655 }
656 "return" => tokens.push(Token::Return),
657 "if" => tokens.push(Token::If),
658 "else" => tokens.push(Token::Else),
659 "for" => tokens.push(Token::For),
660 "while" => tokens.push(Token::While),
661 "do" => tokens.push(Token::Do),
662 "switch" => tokens.push(Token::Switch),
663 "case" => tokens.push(Token::Case),
664 "default" => tokens.push(Token::Default),
665 "break" => tokens.push(Token::Break),
666 "continue" => tokens.push(Token::Continue),
667 "true" => tokens.push(Token::True),
668 "false" => tokens.push(Token::False),
669 "null" => tokens.push(Token::Null),
670 "async" => tokens.push(Token::Async),
671 "await" => tokens.push(Token::Await),
672 "yield" => {
673 if i < chars.len() && chars[i] == '*' {
675 tokens.push(Token::YieldStar);
676 i += 1; } else {
678 tokens.push(Token::Yield);
679 }
680 }
681 _ => tokens.push(Token::Identifier(ident)),
682 }
683 }
684 ',' => {
685 tokens.push(Token::Comma);
686 i += 1;
687 }
688 ';' => {
689 tokens.push(Token::Semicolon);
690 i += 1;
691 }
692 _ => return Err(raise_tokenize_error!()),
693 }
694 }
695 Ok(tokens)
696}
697
698fn parse_string_literal(chars: &[char], start: &mut usize, end_char: char) -> Result<Vec<u16>, JSError> {
699 let mut result = Vec::new();
700 while *start < chars.len() && chars[*start] != end_char {
701 if chars[*start] == '\\' {
702 *start += 1;
703 if *start >= chars.len() {
704 return Err(raise_tokenize_error!());
705 }
706 match chars[*start] {
707 'n' => result.push('\n' as u16),
708 't' => result.push('\t' as u16),
709 'r' => result.push('\r' as u16),
710 '\\' => result.push('\\' as u16),
711 '"' => result.push('"' as u16),
712 '\'' => result.push('\'' as u16),
713 '`' => result.push('`' as u16),
714 'u' => {
715 *start += 1;
717 if *start >= chars.len() {
718 return Err(raise_tokenize_error!());
719 }
720 if chars[*start] == '{' {
721 *start += 1; let mut hex_str = String::new();
724 while *start < chars.len() && chars[*start] != '}' {
725 hex_str.push(chars[*start]);
726 *start += 1;
727 }
728 if *start >= chars.len() || chars[*start] != '}' {
729 return Err(raise_tokenize_error!()); }
731 match u32::from_str_radix(&hex_str, 16) {
733 Ok(cp) if cp <= 0x10FFFF => {
734 if cp <= 0xFFFF {
735 result.push(cp as u16);
736 } else {
737 let u = cp - 0x10000;
739 let high = 0xD800u16 + ((u >> 10) as u16);
740 let low = 0xDC00u16 + ((u & 0x3FF) as u16);
741 result.push(high);
742 result.push(low);
743 }
744 }
745 _ => return Err(raise_tokenize_error!()),
746 }
747 } else {
749 if *start + 4 > chars.len() {
751 return Err(raise_tokenize_error!());
752 }
753 let hex_str: String = chars[*start..*start + 4].iter().collect();
754 *start += 3; match u16::from_str_radix(&hex_str, 16) {
756 Ok(code) => {
757 result.push(code);
758 }
759 Err(_) => return Err(raise_tokenize_error!()), }
761 }
762 }
763 'x' => {
764 *start += 1;
766 if *start + 2 > chars.len() {
767 return Err(raise_tokenize_error!());
768 }
769 let hex_str: String = chars[*start..*start + 2].iter().collect();
770 *start += 1; match u8::from_str_radix(&hex_str, 16) {
772 Ok(code) => {
773 result.push(code as u16);
774 }
775 Err(_) => return Err(raise_tokenize_error!()),
776 }
777 }
778 other => {
781 result.push('\\' as u16);
782 result.push(other as u16);
783 }
784 }
785 } else {
786 let ch = chars[*start];
788 for code_unit in ch.to_string().encode_utf16() {
789 result.push(code_unit);
790 }
791 }
792 *start += 1;
793 }
794 if *start >= chars.len() {
795 return Err(raise_tokenize_error!()); }
797 Ok(result)
798}