1use rhai_codegen::expose_under_internals;
4
5use crate::engine::Precedence;
6use crate::func::native::OnParseTokenCallback;
7use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
8#[cfg(feature = "no_std")]
9use std::prelude::v1::*;
10use std::{
11 cell::RefCell,
12 char, fmt,
13 iter::{repeat, FusedIterator, Peekable},
14 rc::Rc,
15 str::{Chars, FromStr},
16};
17
18#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
20pub struct TokenizerControlBlock {
21 pub is_within_text: bool,
25 #[cfg(feature = "metadata")]
27 pub global_comments: String,
28 pub compressed: Option<String>,
32}
33
34impl TokenizerControlBlock {
35 #[inline]
37 #[must_use]
38 pub const fn new() -> Self {
39 Self {
40 is_within_text: false,
41 #[cfg(feature = "metadata")]
42 global_comments: String::new(),
43 compressed: None,
44 }
45 }
46}
47
48pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
50
51type LERR = LexError;
52
53const NUMBER_SEPARATOR: char = '_';
55
56pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
58
59#[derive(Debug, PartialEq, Clone, Hash)]
62#[non_exhaustive]
63pub enum Token {
64 IntegerConstant(INT),
66 #[cfg(not(feature = "no_float"))]
70 FloatConstant(Box<(crate::types::FloatWrapper<crate::FLOAT>, Identifier)>),
71 #[cfg(feature = "decimal")]
75 DecimalConstant(Box<(rust_decimal::Decimal, Identifier)>),
76 Identifier(Box<Identifier>),
78 CharConstant(char),
80 StringConstant(Box<SmartString>),
82 InterpolatedString(Box<SmartString>),
84 LeftBrace,
86 RightBrace,
88 LeftParen,
90 RightParen,
92 LeftBracket,
94 RightBracket,
96 Unit,
98 Plus,
100 UnaryPlus,
102 Minus,
104 UnaryMinus,
106 Multiply,
108 Divide,
110 Modulo,
112 PowerOf,
114 LeftShift,
116 RightShift,
118 SemiColon,
120 Colon,
122 DoubleColon,
124 DoubleArrow,
126 Underscore,
128 Comma,
130 Period,
132 #[cfg(not(feature = "no_object"))]
136 Elvis,
137 DoubleQuestion,
139 #[cfg(not(feature = "no_index"))]
143 QuestionBracket,
144 ExclusiveRange,
146 InclusiveRange,
148 MapStart,
150 Equals,
152 True,
154 False,
156 Let,
158 Const,
160 If,
162 Else,
164 Switch,
166 Do,
168 While,
170 Until,
172 Loop,
174 For,
176 In,
178 NotIn,
180 LessThan,
182 GreaterThan,
184 LessThanEqualsTo,
186 GreaterThanEqualsTo,
188 EqualsTo,
190 NotEqualsTo,
192 Bang,
194 Pipe,
196 Or,
198 XOr,
200 Ampersand,
202 And,
204 #[cfg(not(feature = "no_function"))]
208 Fn,
209 Continue,
211 Break,
213 Return,
215 Throw,
217 Try,
219 Catch,
221 PlusAssign,
223 MinusAssign,
225 MultiplyAssign,
227 DivideAssign,
229 LeftShiftAssign,
231 RightShiftAssign,
233 AndAssign,
235 OrAssign,
237 XOrAssign,
239 ModuloAssign,
241 PowerOfAssign,
243 #[cfg(not(feature = "no_function"))]
247 Private,
248 #[cfg(not(feature = "no_module"))]
252 Import,
253 #[cfg(not(feature = "no_module"))]
257 Export,
258 #[cfg(not(feature = "no_module"))]
262 As,
263 LexError(Box<LexError>),
265 Comment(Box<String>),
267 Reserved(Box<Identifier>),
269 #[cfg(not(feature = "no_custom_syntax"))]
273 Custom(Box<Identifier>),
274 EOF,
277}
278
279impl fmt::Display for Token {
280 #[inline(always)]
281 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282 #[allow(clippy::enum_glob_use)]
283 use Token::*;
284
285 match self {
286 IntegerConstant(i) => write!(f, "{i}"),
287 #[cfg(not(feature = "no_float"))]
288 FloatConstant(v) => write!(f, "{}", v.0),
289 #[cfg(feature = "decimal")]
290 DecimalConstant(d) => write!(f, "{}", d.0),
291 StringConstant(s) => write!(f, r#""{s}""#),
292 InterpolatedString(..) => f.write_str("string"),
293 CharConstant(c) => write!(f, "{c}"),
294 Identifier(s) => f.write_str(s),
295 Reserved(s) => f.write_str(s),
296 #[cfg(not(feature = "no_custom_syntax"))]
297 Custom(s) => f.write_str(s),
298 LexError(err) => write!(f, "{err}"),
299 Comment(s) => f.write_str(s),
300
301 EOF => f.write_str("{EOF}"),
302
303 token => f.write_str(token.literal_syntax()),
304 }
305 }
306}
307
308const MIN_KEYWORD_LEN: usize = 1;
313const MAX_KEYWORD_LEN: usize = 8;
314const MIN_KEYWORD_HASH_VALUE: usize = 1;
315const MAX_KEYWORD_HASH_VALUE: usize = 152;
316
317static KEYWORD_ASSOC_VALUES: [u8; 257] = [
318 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
319 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
320 105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
321 35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
322 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
323 0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
324 45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
325 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
326 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
327 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
328 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
329 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
330 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
331 153,
332];
333static KEYWORDS_LIST: [(&str, Token); 153] = [
334 ("", Token::EOF),
335 (">", Token::GreaterThan),
336 (">=", Token::GreaterThanEqualsTo),
337 (")", Token::RightParen),
338 ("", Token::EOF),
339 ("const", Token::Const),
340 ("=", Token::Equals),
341 ("==", Token::EqualsTo),
342 ("continue", Token::Continue),
343 ("", Token::EOF),
344 ("catch", Token::Catch),
345 ("<", Token::LessThan),
346 ("<=", Token::LessThanEqualsTo),
347 ("for", Token::For),
348 ("loop", Token::Loop),
349 ("", Token::EOF),
350 (".", Token::Period),
351 ("<<", Token::LeftShift),
352 ("<<=", Token::LeftShiftAssign),
353 ("", Token::EOF),
354 ("false", Token::False),
355 ("*", Token::Multiply),
356 ("*=", Token::MultiplyAssign),
357 ("let", Token::Let),
358 ("", Token::EOF),
359 ("while", Token::While),
360 ("+", Token::Plus),
361 ("+=", Token::PlusAssign),
362 ("", Token::EOF),
363 ("", Token::EOF),
364 ("throw", Token::Throw),
365 ("}", Token::RightBrace),
366 (">>", Token::RightShift),
367 (">>=", Token::RightShiftAssign),
368 ("", Token::EOF),
369 ("", Token::EOF),
370 (";", Token::SemiColon),
371 ("=>", Token::DoubleArrow),
372 ("", Token::EOF),
373 ("else", Token::Else),
374 ("", Token::EOF),
375 ("/", Token::Divide),
376 ("/=", Token::DivideAssign),
377 ("", Token::EOF),
378 ("", Token::EOF),
379 ("", Token::EOF),
380 ("{", Token::LeftBrace),
381 ("**", Token::PowerOf),
382 ("**=", Token::PowerOfAssign),
383 ("", Token::EOF),
384 ("", Token::EOF),
385 ("|", Token::Pipe),
386 ("|=", Token::OrAssign),
387 ("", Token::EOF),
388 ("", Token::EOF),
389 ("", Token::EOF),
390 (":", Token::Colon),
391 ("..", Token::ExclusiveRange),
392 ("..=", Token::InclusiveRange),
393 ("", Token::EOF),
394 ("until", Token::Until),
395 ("switch", Token::Switch),
396 #[cfg(not(feature = "no_function"))]
397 ("private", Token::Private),
398 #[cfg(feature = "no_function")]
399 ("", Token::EOF),
400 ("try", Token::Try),
401 ("true", Token::True),
402 ("break", Token::Break),
403 ("return", Token::Return),
404 #[cfg(not(feature = "no_function"))]
405 ("fn", Token::Fn),
406 #[cfg(feature = "no_function")]
407 ("", Token::EOF),
408 ("", Token::EOF),
409 ("", Token::EOF),
410 ("", Token::EOF),
411 #[cfg(not(feature = "no_module"))]
412 ("import", Token::Import),
413 #[cfg(feature = "no_module")]
414 ("", Token::EOF),
415 #[cfg(not(feature = "no_object"))]
416 ("?.", Token::Elvis),
417 #[cfg(feature = "no_object")]
418 ("", Token::EOF),
419 ("", Token::EOF),
420 ("", Token::EOF),
421 ("", Token::EOF),
422 #[cfg(not(feature = "no_module"))]
423 ("export", Token::Export),
424 #[cfg(feature = "no_module")]
425 ("", Token::EOF),
426 ("in", Token::In),
427 ("", Token::EOF),
428 ("", Token::EOF),
429 ("", Token::EOF),
430 ("(", Token::LeftParen),
431 ("||", Token::Or),
432 ("", Token::EOF),
433 ("", Token::EOF),
434 ("", Token::EOF),
435 ("^", Token::XOr),
436 ("^=", Token::XOrAssign),
437 ("", Token::EOF),
438 ("", Token::EOF),
439 ("", Token::EOF),
440 ("_", Token::Underscore),
441 ("::", Token::DoubleColon),
442 ("", Token::EOF),
443 ("", Token::EOF),
444 ("", Token::EOF),
445 ("-", Token::Minus),
446 ("-=", Token::MinusAssign),
447 ("", Token::EOF),
448 ("", Token::EOF),
449 ("", Token::EOF),
450 ("]", Token::RightBracket),
451 ("()", Token::Unit),
452 ("", Token::EOF),
453 ("", Token::EOF),
454 ("", Token::EOF),
455 ("&", Token::Ampersand),
456 ("&=", Token::AndAssign),
457 ("", Token::EOF),
458 ("", Token::EOF),
459 ("", Token::EOF),
460 ("%", Token::Modulo),
461 ("%=", Token::ModuloAssign),
462 ("", Token::EOF),
463 ("", Token::EOF),
464 ("", Token::EOF),
465 ("!", Token::Bang),
466 ("!=", Token::NotEqualsTo),
467 ("!in", Token::NotIn),
468 ("", Token::EOF),
469 ("", Token::EOF),
470 ("[", Token::LeftBracket),
471 ("if", Token::If),
472 ("", Token::EOF),
473 ("", Token::EOF),
474 ("", Token::EOF),
475 (",", Token::Comma),
476 ("do", Token::Do),
477 ("", Token::EOF),
478 ("", Token::EOF),
479 ("", Token::EOF),
480 ("", Token::EOF),
481 #[cfg(not(feature = "no_module"))]
482 ("as", Token::As),
483 #[cfg(feature = "no_module")]
484 ("", Token::EOF),
485 ("", Token::EOF),
486 ("", Token::EOF),
487 ("", Token::EOF),
488 ("", Token::EOF),
489 #[cfg(not(feature = "no_index"))]
490 ("?[", Token::QuestionBracket),
491 #[cfg(feature = "no_index")]
492 ("", Token::EOF),
493 ("", Token::EOF),
494 ("", Token::EOF),
495 ("", Token::EOF),
496 ("", Token::EOF),
497 ("??", Token::DoubleQuestion),
498 ("", Token::EOF),
499 ("", Token::EOF),
500 ("", Token::EOF),
501 ("", Token::EOF),
502 ("&&", Token::And),
503 ("", Token::EOF),
504 ("", Token::EOF),
505 ("", Token::EOF),
506 ("", Token::EOF),
507 ("#{", Token::MapStart),
508];
509
510const MIN_RESERVED_LEN: usize = 1;
515const MAX_RESERVED_LEN: usize = 10;
516const MIN_RESERVED_HASH_VALUE: usize = 1;
517const MAX_RESERVED_HASH_VALUE: usize = 149;
518
519static RESERVED_ASSOC_VALUES: [u8; 256] = [
520 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
521 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
522 150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
523 30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
524 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
525 0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
526 25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
527 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
528 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
529 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
530 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
531 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
532 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
533];
534static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
535 ("", false, false, false),
536 ("?", true, false, false),
537 ("as", cfg!(feature = "no_module"), false, false),
538 ("use", true, false, false),
539 ("case", true, false, false),
540 ("async", true, false, false),
541 ("public", true, false, false),
542 ("package", true, false, false),
543 ("", false, false, false),
544 ("", false, false, false),
545 ("super", true, false, false),
546 ("#", true, false, false),
547 ("private", cfg!(feature = "no_function"), false, false),
548 ("var", true, false, false),
549 ("protected", true, false, false),
550 ("spawn", true, false, false),
551 ("shared", true, false, false),
552 ("is", true, false, false),
553 ("===", true, false, false),
554 ("sync", true, false, false),
555 ("curry", true, true, true),
556 ("static", true, false, false),
557 ("default", true, false, false),
558 ("!==", true, false, false),
559 ("is_shared", cfg!(not(feature = "no_closure")), true, true),
560 ("print", true, true, false),
561 ("", false, false, false),
562 ("#!", true, false, false),
563 ("", false, false, false),
564 ("this", true, false, false),
565 ("is_def_var", true, true, false),
566 ("thread", true, false, false),
567 ("?.", cfg!(feature = "no_object"), false, false),
568 ("", false, false, false),
569 ("is_def_fn", cfg!(not(feature = "no_function")), true, false),
570 ("yield", true, false, false),
571 ("", false, false, false),
572 ("fn", cfg!(feature = "no_function"), false, false),
573 ("new", true, false, false),
574 ("call", true, true, true),
575 ("match", true, false, false),
576 ("~", true, false, false),
577 ("!.", true, false, false),
578 ("", false, false, false),
579 ("eval", true, true, false),
580 ("await", true, false, false),
581 ("", false, false, false),
582 (":=", true, false, false),
583 ("...", true, false, false),
584 ("null", true, false, false),
585 ("debug", true, true, false),
586 ("@", true, false, false),
587 ("type_of", true, true, true),
588 ("", false, false, false),
589 ("with", true, false, false),
590 ("", false, false, false),
591 ("", false, false, false),
592 ("<-", true, false, false),
593 ("", false, false, false),
594 ("void", true, false, false),
595 ("", false, false, false),
596 ("import", cfg!(feature = "no_module"), false, false),
597 ("--", true, false, false),
598 ("nil", true, false, false),
599 ("exit", false, false, false),
600 ("", false, false, false),
601 ("export", cfg!(feature = "no_module"), false, false),
602 ("<|", true, false, false),
603 ("", false, false, false),
604 ("", false, false, false),
605 ("", false, false, false),
606 ("$", true, false, false),
607 ("->", true, false, false),
608 ("", false, false, false),
609 ("", false, false, false),
610 ("", false, false, false),
611 ("", false, false, false),
612 ("|>", true, false, false),
613 ("", false, false, false),
614 ("", false, false, false),
615 ("", false, false, false),
616 ("module", true, false, false),
617 ("?[", cfg!(feature = "no_index"), false, false),
618 ("", false, false, false),
619 ("", false, false, false),
620 ("", false, false, false),
621 ("", false, false, false),
622 ("Fn", true, true, false),
623 ("::<", true, false, false),
624 ("", false, false, false),
625 ("", false, false, false),
626 ("", false, false, false),
627 ("++", true, false, false),
628 ("", false, false, false),
629 ("", false, false, false),
630 ("", false, false, false),
631 ("", false, false, false),
632 (":;", true, false, false),
633 ("", false, false, false),
634 ("", false, false, false),
635 ("", false, false, false),
636 ("", false, false, false),
637 ("*)", true, false, false),
638 ("", false, false, false),
639 ("", false, false, false),
640 ("", false, false, false),
641 ("", false, false, false),
642 ("(*", true, false, false),
643 ("", false, false, false),
644 ("", false, false, false),
645 ("", false, false, false),
646 ("", false, false, false),
647 ("", false, false, false),
648 ("", false, false, false),
649 ("", false, false, false),
650 ("", false, false, false),
651 ("", false, false, false),
652 ("", false, false, false),
653 ("", false, false, false),
654 ("", false, false, false),
655 ("", false, false, false),
656 ("", false, false, false),
657 ("", false, false, false),
658 ("", false, false, false),
659 ("", false, false, false),
660 ("", false, false, false),
661 ("", false, false, false),
662 ("", false, false, false),
663 ("", false, false, false),
664 ("", false, false, false),
665 ("", false, false, false),
666 ("", false, false, false),
667 ("", false, false, false),
668 ("", false, false, false),
669 ("", false, false, false),
670 ("", false, false, false),
671 ("", false, false, false),
672 ("", false, false, false),
673 ("", false, false, false),
674 ("", false, false, false),
675 ("", false, false, false),
676 ("", false, false, false),
677 ("", false, false, false),
678 ("", false, false, false),
679 ("", false, false, false),
680 ("", false, false, false),
681 ("", false, false, false),
682 ("go", true, false, false),
683 ("", false, false, false),
684 ("goto", true, false, false),
685];
686
687impl Token {
688 #[must_use]
690 pub const fn is_literal(&self) -> bool {
691 #[allow(clippy::enum_glob_use)]
692 use Token::*;
693
694 match self {
695 IntegerConstant(..) => false,
696 #[cfg(not(feature = "no_float"))]
697 FloatConstant(..) => false,
698 #[cfg(feature = "decimal")]
699 DecimalConstant(..) => false,
700 StringConstant(..)
701 | InterpolatedString(..)
702 | CharConstant(..)
703 | Identifier(..)
704 | Reserved(..) => false,
705 #[cfg(not(feature = "no_custom_syntax"))]
706 Custom(..) => false,
707 LexError(..) | Comment(..) => false,
708
709 EOF => false,
710
711 _ => true,
712 }
713 }
714 #[must_use]
720 pub const fn literal_syntax(&self) -> &'static str {
721 #[allow(clippy::enum_glob_use)]
722 use Token::*;
723
724 match self {
725 LeftBrace => "{",
726 RightBrace => "}",
727 LeftParen => "(",
728 RightParen => ")",
729 LeftBracket => "[",
730 RightBracket => "]",
731 Unit => "()",
732 Plus => "+",
733 UnaryPlus => "+",
734 Minus => "-",
735 UnaryMinus => "-",
736 Multiply => "*",
737 Divide => "/",
738 SemiColon => ";",
739 Colon => ":",
740 DoubleColon => "::",
741 DoubleArrow => "=>",
742 Underscore => "_",
743 Comma => ",",
744 Period => ".",
745 #[cfg(not(feature = "no_object"))]
746 Elvis => "?.",
747 DoubleQuestion => "??",
748 #[cfg(not(feature = "no_index"))]
749 QuestionBracket => "?[",
750 ExclusiveRange => "..",
751 InclusiveRange => "..=",
752 MapStart => "#{",
753 Equals => "=",
754 True => "true",
755 False => "false",
756 Let => "let",
757 Const => "const",
758 If => "if",
759 Else => "else",
760 Switch => "switch",
761 Do => "do",
762 While => "while",
763 Until => "until",
764 Loop => "loop",
765 For => "for",
766 In => "in",
767 NotIn => "!in",
768 LessThan => "<",
769 GreaterThan => ">",
770 Bang => "!",
771 LessThanEqualsTo => "<=",
772 GreaterThanEqualsTo => ">=",
773 EqualsTo => "==",
774 NotEqualsTo => "!=",
775 Pipe => "|",
776 Or => "||",
777 Ampersand => "&",
778 And => "&&",
779 Continue => "continue",
780 Break => "break",
781 Return => "return",
782 Throw => "throw",
783 Try => "try",
784 Catch => "catch",
785 PlusAssign => "+=",
786 MinusAssign => "-=",
787 MultiplyAssign => "*=",
788 DivideAssign => "/=",
789 LeftShiftAssign => "<<=",
790 RightShiftAssign => ">>=",
791 AndAssign => "&=",
792 OrAssign => "|=",
793 XOrAssign => "^=",
794 LeftShift => "<<",
795 RightShift => ">>",
796 XOr => "^",
797 Modulo => "%",
798 ModuloAssign => "%=",
799 PowerOf => "**",
800 PowerOfAssign => "**=",
801
802 #[cfg(not(feature = "no_function"))]
803 Fn => "fn",
804 #[cfg(not(feature = "no_function"))]
805 Private => "private",
806
807 #[cfg(not(feature = "no_module"))]
808 Import => "import",
809 #[cfg(not(feature = "no_module"))]
810 Export => "export",
811 #[cfg(not(feature = "no_module"))]
812 As => "as",
813
814 _ => panic!("token is not a literal symbol"),
815 }
816 }
817
818 #[inline]
820 #[must_use]
821 pub const fn is_op_assignment(&self) -> bool {
822 #[allow(clippy::enum_glob_use)]
823 use Token::*;
824
825 matches!(
826 self,
827 PlusAssign
828 | MinusAssign
829 | MultiplyAssign
830 | DivideAssign
831 | LeftShiftAssign
832 | RightShiftAssign
833 | ModuloAssign
834 | PowerOfAssign
835 | AndAssign
836 | OrAssign
837 | XOrAssign
838 )
839 }
840
841 #[must_use]
843 pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
844 #[allow(clippy::enum_glob_use)]
845 use Token::*;
846
847 Some(match self {
848 PlusAssign => Plus,
849 MinusAssign => Minus,
850 MultiplyAssign => Multiply,
851 DivideAssign => Divide,
852 LeftShiftAssign => LeftShift,
853 RightShiftAssign => RightShift,
854 ModuloAssign => Modulo,
855 PowerOfAssign => PowerOf,
856 AndAssign => Ampersand,
857 OrAssign => Pipe,
858 XOrAssign => XOr,
859 _ => return None,
860 })
861 }
862
863 #[inline]
865 #[must_use]
866 pub const fn has_op_assignment(&self) -> bool {
867 #[allow(clippy::enum_glob_use)]
868 use Token::*;
869
870 matches!(
871 self,
872 Plus | Minus
873 | Multiply
874 | Divide
875 | LeftShift
876 | RightShift
877 | Modulo
878 | PowerOf
879 | Ampersand
880 | Pipe
881 | XOr
882 )
883 }
884
885 #[must_use]
887 pub const fn convert_to_op_assignment(&self) -> Option<Self> {
888 #[allow(clippy::enum_glob_use)]
889 use Token::*;
890
891 Some(match self {
892 Plus => PlusAssign,
893 Minus => MinusAssign,
894 Multiply => MultiplyAssign,
895 Divide => DivideAssign,
896 LeftShift => LeftShiftAssign,
897 RightShift => RightShiftAssign,
898 Modulo => ModuloAssign,
899 PowerOf => PowerOfAssign,
900 Ampersand => AndAssign,
901 Pipe => OrAssign,
902 XOr => XOrAssign,
903 _ => return None,
904 })
905 }
906
907 #[inline]
909 #[must_use]
910 pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
911 let utf8 = syntax.as_bytes();
914 let len = utf8.len();
915
916 if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
917 return None;
918 }
919
920 let mut hash_val = len;
921
922 match len {
923 1 => (),
924 _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
925 }
926 hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
927
928 if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
929 return None;
930 }
931
932 match KEYWORDS_LIST[hash_val] {
933 (_, Self::EOF) => None,
934 (s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
937 Some(t.clone())
938 }
939 _ => None,
940 }
941 }
942
943 #[must_use]
946 pub const fn is_next_unary(&self) -> bool {
947 #[allow(clippy::enum_glob_use)]
948 use Token::*;
949
950 match self {
951 SemiColon | Colon | Comma | DoubleQuestion | ExclusiveRange | InclusiveRange | LeftBrace | LeftParen | LeftBracket | Plus |
966 PlusAssign |
967 UnaryPlus |
968 Minus |
969 MinusAssign |
970 UnaryMinus |
971 Multiply |
972 MultiplyAssign |
973 Divide |
974 DivideAssign |
975 Modulo |
976 ModuloAssign |
977 PowerOf |
978 PowerOfAssign |
979 LeftShift |
980 LeftShiftAssign |
981 RightShift |
982 RightShiftAssign |
983 Equals |
984 EqualsTo |
985 NotEqualsTo |
986 LessThan |
987 GreaterThan |
988 Bang |
989 LessThanEqualsTo |
990 GreaterThanEqualsTo |
991 Pipe |
992 Ampersand |
993 If |
994 While |
996 Until |
997 In |
998 NotIn |
999 And |
1000 AndAssign |
1001 Or |
1002 OrAssign |
1003 XOr |
1004 XOrAssign |
1005 Return |
1006 Throw => true,
1007
1008 #[cfg(not(feature = "no_index"))]
1009 QuestionBracket => true, LexError(..) => true,
1012
1013 _ => false,
1014 }
1015 }
1016
1017 #[must_use]
1019 pub const fn precedence(&self) -> Option<Precedence> {
1020 #[allow(clippy::enum_glob_use)]
1021 use Token::*;
1022
1023 Precedence::new(match self {
1024 Or | XOr | Pipe => 30,
1025
1026 And | Ampersand => 60,
1027
1028 EqualsTo | NotEqualsTo => 90,
1029
1030 In | NotIn => 110,
1031
1032 LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
1033
1034 DoubleQuestion => 135,
1035
1036 ExclusiveRange | InclusiveRange => 140,
1037
1038 Plus | Minus => 150,
1039
1040 Divide | Multiply | Modulo => 180,
1041
1042 PowerOf => 190,
1043
1044 LeftShift | RightShift => 210,
1045
1046 _ => 0,
1047 })
1048 }
1049
1050 #[must_use]
1052 pub const fn is_bind_right(&self) -> bool {
1053 #[allow(clippy::enum_glob_use)]
1054 use Token::*;
1055
1056 match self {
1057 PowerOf => true,
1059
1060 _ => false,
1061 }
1062 }
1063
1064 #[must_use]
1066 pub const fn is_standard_symbol(&self) -> bool {
1067 #[allow(clippy::enum_glob_use)]
1068 use Token::*;
1069
1070 match self {
1071 LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
1072 | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
1073 | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
1074 | ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
1075 | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
1076 | Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
1077 | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
1078 | XOrAssign | ModuloAssign | PowerOfAssign => true,
1079
1080 #[cfg(not(feature = "no_object"))]
1081 Elvis => true,
1082
1083 #[cfg(not(feature = "no_index"))]
1084 QuestionBracket => true,
1085
1086 _ => false,
1087 }
1088 }
1089
1090 #[inline]
1092 #[must_use]
1093 pub const fn is_standard_keyword(&self) -> bool {
1094 #[allow(clippy::enum_glob_use)]
1095 use Token::*;
1096
1097 match self {
1098 #[cfg(not(feature = "no_function"))]
1099 Fn | Private => true,
1100
1101 #[cfg(not(feature = "no_module"))]
1102 Import | Export | As => true,
1103
1104 True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
1105 | Continue | Break | Return | Throw | Try | Catch => true,
1106
1107 _ => false,
1108 }
1109 }
1110
1111 #[inline(always)]
1113 #[must_use]
1114 pub const fn is_reserved(&self) -> bool {
1115 matches!(self, Self::Reserved(..))
1116 }
1117
1118 #[cfg(not(feature = "no_custom_syntax"))]
1120 #[inline(always)]
1121 #[must_use]
1122 pub const fn is_custom(&self) -> bool {
1123 matches!(self, Self::Custom(..))
1124 }
1125}
1126
1127impl From<Token> for String {
1128 #[inline(always)]
1129 fn from(token: Token) -> Self {
1130 token.to_string()
1131 }
1132}
1133
1134#[derive(Debug, Clone, Eq, PartialEq, Default)]
1137pub struct TokenizeState {
1138 #[cfg(not(feature = "unchecked"))]
1142 pub max_string_len: Option<std::num::NonZeroUsize>,
1143 pub next_token_cannot_be_unary: bool,
1145 pub tokenizer_control: TokenizerControl,
1147 pub comment_level: usize,
1149 pub include_comments: bool,
1151 pub is_within_text_terminated_by: Option<SmartString>,
1153 pub last_token: Option<SmartString>,
1157}
1158
1159pub trait InputStream {
1162 fn unget(&mut self, ch: char);
1166 fn get_next(&mut self) -> Option<char>;
1168 #[must_use]
1170 fn peek_next(&mut self) -> Option<char>;
1171
1172 #[inline(always)]
1174 fn eat_next_and_advance(&mut self, pos: &mut Position) -> Option<char> {
1175 pos.advance();
1176 self.get_next()
1177 }
1178}
1179
1180pub fn parse_raw_string_literal(
1217 stream: &mut (impl InputStream + ?Sized),
1218 state: &mut TokenizeState,
1219 pos: &mut Position,
1220 mut hash_count: usize,
1221) -> Result<(SmartString, Position), (LexError, Position)> {
1222 let start = *pos;
1223 let mut first_char = Position::NONE;
1224
1225 if hash_count == 0 {
1226 hash_count = 1;
1229
1230 while let Some('#') = stream.peek_next() {
1231 stream.eat_next_and_advance(pos);
1232 hash_count += 1;
1233 }
1234
1235 match stream.get_next() {
1237 Some('"') => pos.advance(),
1238 Some(c) => return Err((LERR::UnexpectedInput(c.to_string()), start)),
1239 None => return Err((LERR::UnterminatedString, start)),
1240 }
1241 }
1242
1243 let collect: SmartString = repeat('#').take(hash_count).collect();
1244 if let Some(ref mut last) = state.last_token {
1245 last.clear();
1246 last.push_str(&collect);
1247 last.push('"');
1248 }
1249 state.is_within_text_terminated_by = Some(collect);
1250
1251 let mut seen_hashes: Option<usize> = None;
1256 let mut result = SmartString::new_const();
1257
1258 loop {
1259 let next_char = match stream.get_next() {
1260 Some(ch) => ch,
1261 None => break, };
1263 pos.advance();
1264
1265 match (next_char, &mut seen_hashes) {
1266 ('"', None) => seen_hashes = Some(0),
1268 ('"', Some(count)) => {
1270 result.push('"');
1272 result.extend(repeat('#').take(*count as usize));
1273 seen_hashes = Some(0);
1274 }
1275 ('#', Some(count)) => {
1277 *count += 1;
1278 if *count == hash_count {
1279 state.is_within_text_terminated_by = None;
1280 break;
1281 }
1282 }
1283 (c, Some(count)) => {
1285 result.push('"');
1287 result.extend(repeat('#').take(*count as usize));
1288 result.push(c);
1289 seen_hashes = None;
1290 }
1291 ('\n', _) => {
1293 result.push('\n');
1294 pos.new_line();
1295 }
1296 (c, None) => result.push(c),
1298 }
1299
1300 #[cfg(not(feature = "unchecked"))]
1302 if let Some(max) = state.max_string_len {
1303 if result.len() > max.get() {
1304 return Err((LexError::StringTooLong(max.get()), start));
1305 }
1306 }
1307
1308 if first_char.is_none() {
1309 first_char = *pos;
1310 }
1311 }
1312
1313 Ok((result, first_char))
1314}
1315
1316pub fn parse_string_literal(
1347 stream: &mut (impl InputStream + ?Sized),
1348 state: &mut TokenizeState,
1349 pos: &mut Position,
1350 termination_char: char,
1351 verbatim: bool,
1352 allow_line_continuation: bool,
1353 allow_interpolation: bool,
1354) -> Result<(SmartString, bool, Position), (LexError, Position)> {
1355 let mut result = SmartString::new_const();
1356 let mut escape = SmartString::new_const();
1357
1358 let start = *pos;
1359 let mut first_char = Position::NONE;
1360 let mut interpolated = false;
1361 #[cfg(not(feature = "no_position"))]
1362 let mut skip_space_until = 0;
1363
1364 state.is_within_text_terminated_by = Some(termination_char.to_string().into());
1365 if let Some(ref mut last) = state.last_token {
1366 last.clear();
1367 last.push(termination_char);
1368 }
1369
1370 loop {
1371 debug_assert!(
1372 !verbatim || escape.is_empty(),
1373 "verbatim strings should not have any escapes"
1374 );
1375
1376 let next_char = match stream.get_next() {
1377 Some(ch) => {
1378 pos.advance();
1379 ch
1380 }
1381 None if verbatim => {
1382 debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1383 pos.advance();
1384 break;
1385 }
1386 None if allow_line_continuation && !escape.is_empty() => {
1387 debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1388 pos.advance();
1389 break;
1390 }
1391 None => {
1392 pos.advance();
1393 state.is_within_text_terminated_by = None;
1394 return Err((LERR::UnterminatedString, start));
1395 }
1396 };
1397
1398 if let Some(ref mut last) = state.last_token {
1399 last.push(next_char);
1400 }
1401
1402 if allow_interpolation
1404 && next_char == '$'
1405 && escape.is_empty()
1406 && stream.peek_next().map_or(false, |ch| ch == '{')
1407 {
1408 interpolated = true;
1409 state.is_within_text_terminated_by = None;
1410 break;
1411 }
1412
1413 #[cfg(not(feature = "unchecked"))]
1415 if let Some(max) = state.max_string_len {
1416 if result.len() > max.get() {
1417 return Err((LexError::StringTooLong(max.get()), start));
1418 }
1419 }
1420
1421 if termination_char == next_char && escape.is_empty() {
1423 if stream.peek_next().map_or(false, |c| c == termination_char) {
1425 stream.eat_next_and_advance(pos);
1426 if let Some(ref mut last) = state.last_token {
1427 last.push(termination_char);
1428 }
1429 } else {
1430 state.is_within_text_terminated_by = None;
1431 break;
1432 }
1433 }
1434
1435 if first_char.is_none() {
1436 first_char = *pos;
1437 }
1438
1439 match next_char {
1440 '\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
1442 'r' if !escape.is_empty() => {
1444 escape.clear();
1445 result.push_str("\r");
1446 }
1447 'n' if !escape.is_empty() => {
1449 escape.clear();
1450 result.push_str("\n");
1451 }
1452 '\\' if !verbatim && escape.is_empty() => {
1454 escape.push_str("\\");
1455 }
1456 '\\' if !escape.is_empty() => {
1458 escape.clear();
1459 result.push_str("\\");
1460 }
1461 't' if !escape.is_empty() => {
1463 escape.clear();
1464 result.push_str("\t");
1465 }
1466 ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
1468 let mut seq = escape.clone();
1469 escape.clear();
1470 seq.push(ch);
1471
1472 let mut out_val: u32 = 0;
1473 let len = match ch {
1474 'x' => 2,
1475 'u' => 4,
1476 'U' => 8,
1477 c => unreachable!("x or u or U expected but gets '{}'", c),
1478 };
1479
1480 for _ in 0..len {
1481 let c = stream
1482 .get_next()
1483 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1484
1485 pos.advance();
1486 seq.push(c);
1487 if let Some(ref mut last) = state.last_token {
1488 last.push(c);
1489 }
1490
1491 out_val *= 16;
1492 out_val += c
1493 .to_digit(16)
1494 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1495 }
1496
1497 result.push(
1498 char::from_u32(out_val)
1499 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
1500 );
1501 }
1502
1503 '\n' if verbatim => {
1505 debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1506 pos.new_line();
1507 result.push_str("\n");
1508 }
1509
1510 '\n' if allow_line_continuation && !escape.is_empty() => {
1512 debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1513 escape.clear();
1514 pos.new_line();
1515
1516 #[cfg(not(feature = "no_position"))]
1517 {
1518 let start_position = start.position().unwrap();
1519 skip_space_until = start_position + 1;
1520 }
1521 }
1522
1523 '\n' => {
1525 pos.rewind();
1526 state.is_within_text_terminated_by = None;
1527 return Err((LERR::UnterminatedString, start));
1528 }
1529
1530 ch if termination_char == ch && !escape.is_empty() => {
1532 escape.clear();
1533 result.push(termination_char);
1534 }
1535
1536 ch if !escape.is_empty() => {
1538 escape.push(ch);
1539
1540 return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
1541 }
1542
1543 #[cfg(not(feature = "no_position"))]
1545 ch if ch.is_whitespace() && pos.position().unwrap() < skip_space_until => (),
1546
1547 ch => {
1549 escape.clear();
1550 result.push(ch);
1551
1552 #[cfg(not(feature = "no_position"))]
1553 {
1554 skip_space_until = 0;
1555 }
1556 }
1557 }
1558 }
1559
1560 #[cfg(not(feature = "unchecked"))]
1562 if let Some(max) = state.max_string_len {
1563 if result.len() > max.get() {
1564 return Err((LexError::StringTooLong(max.get()), start));
1565 }
1566 }
1567
1568 Ok((result, interpolated, first_char))
1569}
1570
1571fn scan_block_comment(
1573 stream: &mut (impl InputStream + ?Sized),
1574 level: usize,
1575 pos: &mut Position,
1576 comment: Option<&mut String>,
1577) -> usize {
1578 let mut level = level;
1579 let mut comment = comment;
1580
1581 while let Some(c) = stream.get_next() {
1582 pos.advance();
1583
1584 if let Some(comment) = comment.as_mut() {
1585 comment.push(c);
1586 }
1587
1588 match c {
1589 '/' => {
1590 if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
1591 stream.eat_next_and_advance(pos);
1592 if let Some(comment) = comment.as_mut() {
1593 comment.push(c2);
1594 }
1595 level += 1;
1596 }
1597 }
1598 '*' => {
1599 if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
1600 stream.eat_next_and_advance(pos);
1601 if let Some(comment) = comment.as_mut() {
1602 comment.push(c2);
1603 }
1604 level -= 1;
1605 }
1606 }
1607 '\n' => pos.new_line(),
1608 _ => (),
1609 }
1610
1611 if level == 0 {
1612 break;
1613 }
1614 }
1615
1616 level
1617}
1618
1619#[inline(always)]
1621const fn is_hex_digit(c: char) -> bool {
1622 c.is_ascii_hexdigit()
1623}
1624
1625#[inline(always)]
1627const fn is_numeric_digit(c: char) -> bool {
1628 c.is_ascii_digit()
1629}
1630
1631#[inline(always)]
1633const fn is_octal_digit(c: char) -> bool {
1634 matches!(c, '0'..='7')
1635}
1636
1637#[inline(always)]
1639const fn is_binary_digit(c: char) -> bool {
1640 c == '0' || c == '1'
1641}
1642
1643#[cfg(not(feature = "no_function"))]
1645#[cfg(feature = "metadata")]
1646#[inline]
1647#[must_use]
1648pub fn is_doc_comment(comment: &str) -> bool {
1649 (comment.starts_with("///") && !comment.starts_with("////"))
1650 || (comment.starts_with("/**") && !comment.starts_with("/***"))
1651}
1652
1653#[inline(always)]
1656#[must_use]
1657pub fn get_next_token(
1658 stream: &mut (impl InputStream + ?Sized),
1659 state: &mut TokenizeState,
1660 pos: &mut Position,
1661) -> (Token, Position) {
1662 let result = get_next_token_inner(stream, state, pos);
1663
1664 state.next_token_cannot_be_unary = !result.0.is_next_unary();
1666
1667 result
1668}
1669
1670#[must_use]
1672fn get_next_token_inner(
1673 stream: &mut (impl InputStream + ?Sized),
1674 state: &mut TokenizeState,
1675 pos: &mut Position,
1676) -> (Token, Position) {
1677 state.last_token.as_mut().map(SmartString::clear);
1678
1679 if state.comment_level > 0 {
1681 let start_pos = *pos;
1682 let mut comment = String::new();
1683 let comment_buf = state.include_comments.then_some(&mut comment);
1684
1685 state.comment_level = scan_block_comment(stream, state.comment_level, pos, comment_buf);
1686
1687 let return_comment = state.include_comments;
1688
1689 #[cfg(not(feature = "no_function"))]
1690 #[cfg(feature = "metadata")]
1691 let return_comment = return_comment || is_doc_comment(&comment);
1692
1693 if return_comment {
1694 return (Token::Comment(comment.into()), start_pos);
1695 }
1696
1697 if state.comment_level > 0 {
1699 return (Token::EOF, *pos);
1700 }
1701 }
1702
1703 match state.is_within_text_terminated_by.take() {
1705 Some(ch) if ch.starts_with('#') => {
1706 return parse_raw_string_literal(stream, state, pos, ch.len()).map_or_else(
1707 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1708 |(result, start_pos)| (Token::StringConstant(result.into()), start_pos),
1709 )
1710 }
1711 Some(ch) => {
1712 let c = ch.chars().next().unwrap();
1713
1714 return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1715 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1716 |(result, interpolated, start_pos)| {
1717 if interpolated {
1718 (Token::InterpolatedString(result.into()), start_pos)
1719 } else {
1720 (Token::StringConstant(result.into()), start_pos)
1721 }
1722 },
1723 );
1724 }
1725 None => (),
1726 }
1727
1728 let mut negated: Option<Position> = None;
1729
1730 while let Some(c) = stream.get_next() {
1731 pos.advance();
1732
1733 let start_pos = *pos;
1734 let cc = stream.peek_next().unwrap_or('\0');
1735
1736 match (c, cc) {
1738 ('0'..='9', ..) => {
1740 let mut result = SmartString::new_const();
1741 let mut radix_base: Option<u32> = None;
1742 let mut valid: fn(char) -> bool = is_numeric_digit;
1743 let mut _has_period = false;
1744 let mut _has_e = false;
1745
1746 result.push(c);
1747
1748 while let Some(next_char) = stream.peek_next() {
1749 match next_char {
1750 NUMBER_SEPARATOR => {
1751 stream.eat_next_and_advance(pos);
1752 }
1753 ch if valid(ch) => {
1754 result.push(ch);
1755 stream.eat_next_and_advance(pos);
1756 }
1757 #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1758 '.' if !_has_period && radix_base.is_none() => {
1759 stream.get_next().unwrap();
1760
1761 match stream.peek_next() {
1763 Some('0'..='9') => {
1765 result.push_str(".");
1766 pos.advance();
1767 _has_period = true;
1768 }
1769 Some(NUMBER_SEPARATOR) => {
1771 stream.unget('.');
1772 break;
1773 }
1774 Some('.') => {
1776 stream.unget('.');
1777 break;
1778 }
1779 Some(ch) if !is_id_first_alphabetic(ch) => {
1781 result.push_str(".");
1782 pos.advance();
1783 result.push_str("0");
1784 _has_period = true;
1785 }
1786 _ => {
1788 stream.unget('.');
1789 break;
1790 }
1791 }
1792 }
1793 #[cfg(not(feature = "no_float"))]
1794 'e' if !_has_e && radix_base.is_none() => {
1795 stream.get_next().unwrap();
1796
1797 match stream.peek_next() {
1799 Some('0'..='9') => {
1801 result.push_str("e");
1802 pos.advance();
1803 _has_e = true;
1804 _has_period = true;
1805 }
1806 Some('+' | '-') => {
1808 result.push_str("e");
1809 pos.advance();
1810 result.push(stream.get_next().unwrap());
1811 pos.advance();
1812 _has_e = true;
1813 _has_period = true;
1814 }
1815 _ => {
1817 stream.unget('e');
1818 break;
1819 }
1820 }
1821 }
1822 ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
1824 if c == '0' && result.len() <= 1 =>
1825 {
1826 result.push(ch);
1827 stream.eat_next_and_advance(pos);
1828
1829 valid = match ch {
1830 'x' | 'X' => is_hex_digit,
1831 'o' | 'O' => is_octal_digit,
1832 'b' | 'B' => is_binary_digit,
1833 c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1834 };
1835
1836 radix_base = Some(match ch {
1837 'x' | 'X' => 16,
1838 'o' | 'O' => 8,
1839 'b' | 'B' => 2,
1840 c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1841 });
1842 }
1843
1844 _ => break,
1845 }
1846 }
1847
1848 let num_pos = negated.map_or(start_pos, |negated_pos| {
1849 result.insert(0, '-');
1850 negated_pos
1851 });
1852
1853 if let Some(ref mut last) = state.last_token {
1854 *last = result.clone();
1855 }
1856
1857 let token = if let Some(radix) = radix_base {
1859 let result = &result[2..];
1860
1861 UNSIGNED_INT::from_str_radix(result, radix)
1862 .map(|v| v as INT)
1863 .map_or_else(
1864 |_| Token::LexError(LERR::MalformedNumber(result.to_string()).into()),
1865 Token::IntegerConstant,
1866 )
1867 } else {
1868 (|| {
1869 let num = INT::from_str(&result).map(Token::IntegerConstant);
1870
1871 #[cfg(not(feature = "no_float"))]
1873 if num.is_err() {
1874 if let Ok(v) = crate::types::FloatWrapper::from_str(&result) {
1875 return Token::FloatConstant((v, result).into());
1876 }
1877 }
1878
1879 #[cfg(feature = "decimal")]
1881 if num.is_err() {
1882 if let Ok(v) = rust_decimal::Decimal::from_str(&result) {
1883 return Token::DecimalConstant((v, result).into());
1884 }
1885 }
1886
1887 #[cfg(feature = "decimal")]
1889 if num.is_err() {
1890 if let Ok(v) = rust_decimal::Decimal::from_scientific(&result) {
1891 return Token::DecimalConstant((v, result).into());
1892 }
1893 }
1894
1895 num.unwrap_or_else(|_| {
1896 Token::LexError(LERR::MalformedNumber(result.to_string()).into())
1897 })
1898 })()
1899 };
1900
1901 return (token, num_pos);
1902 }
1903
1904 ('"', ..) => {
1906 return parse_string_literal(stream, state, pos, c, false, true, false)
1907 .map_or_else(
1908 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1909 |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1910 );
1911 }
1912 ('`', ..) => {
1914 match stream.peek_next() {
1916 Some('\r') => {
1918 stream.eat_next_and_advance(pos);
1919 if stream.peek_next() == Some('\n') {
1921 stream.eat_next_and_advance(pos);
1922 }
1923 pos.new_line();
1924 }
1925 Some('\n') => {
1927 stream.eat_next_and_advance(pos);
1928 pos.new_line();
1929 }
1930 _ => (),
1931 }
1932
1933 return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1934 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1935 |(result, interpolated, ..)| {
1936 if interpolated {
1937 (Token::InterpolatedString(result.into()), start_pos)
1938 } else {
1939 (Token::StringConstant(result.into()), start_pos)
1940 }
1941 },
1942 );
1943 }
1944
1945 ('#', '"' | '#') => {
1947 return parse_raw_string_literal(stream, state, pos, 0).map_or_else(
1948 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1949 |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1950 );
1951 }
1952
1953 ('\'', '\'') => {
1955 return (
1956 Token::LexError(LERR::MalformedChar(String::new()).into()),
1957 start_pos,
1958 )
1959 }
1960 ('\'', ..) => {
1961 return parse_string_literal(stream, state, pos, c, false, false, false)
1962 .map_or_else(
1963 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1964 |(result, ..)| {
1965 let mut chars = result.chars();
1966 let first = chars.next().unwrap();
1967
1968 if chars.next().is_some() {
1969 (
1970 Token::LexError(LERR::MalformedChar(result.to_string()).into()),
1971 start_pos,
1972 )
1973 } else {
1974 (Token::CharConstant(first), start_pos)
1975 }
1976 },
1977 )
1978 }
1979
1980 ('{', ..) => return (Token::LeftBrace, start_pos),
1982 ('}', ..) => return (Token::RightBrace, start_pos),
1983
1984 ('(', ')') => {
1986 stream.eat_next_and_advance(pos);
1987 return (Token::Unit, start_pos);
1988 }
1989
1990 ('(', '*') => {
1992 stream.eat_next_and_advance(pos);
1993 return (Token::Reserved(Box::new("(*".into())), start_pos);
1994 }
1995 ('(', ..) => return (Token::LeftParen, start_pos),
1996 (')', ..) => return (Token::RightParen, start_pos),
1997
1998 ('[', ..) => return (Token::LeftBracket, start_pos),
2000 (']', ..) => return (Token::RightBracket, start_pos),
2001
2002 #[cfg(not(feature = "no_object"))]
2004 ('#', '{') => {
2005 stream.eat_next_and_advance(pos);
2006 return (Token::MapStart, start_pos);
2007 }
2008 ('#', '!') => return (Token::Reserved(Box::new("#!".into())), start_pos),
2010
2011 ('#', ' ') => {
2012 stream.eat_next_and_advance(pos);
2013 let token = if stream.peek_next() == Some('{') {
2014 stream.eat_next_and_advance(pos);
2015 "# {"
2016 } else {
2017 "#"
2018 };
2019 return (Token::Reserved(Box::new(token.into())), start_pos);
2020 }
2021
2022 ('#', ..) => return (Token::Reserved(Box::new("#".into())), start_pos),
2023
2024 ('+', '=') => {
2026 stream.eat_next_and_advance(pos);
2027 return (Token::PlusAssign, start_pos);
2028 }
2029 ('+', '+') => {
2030 stream.eat_next_and_advance(pos);
2031 return (Token::Reserved(Box::new("++".into())), start_pos);
2032 }
2033 ('+', ..) if !state.next_token_cannot_be_unary => return (Token::UnaryPlus, start_pos),
2034 ('+', ..) => return (Token::Plus, start_pos),
2035
2036 ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
2037 ('-', '0'..='9') => return (Token::Minus, start_pos),
2038 ('-', '=') => {
2039 stream.eat_next_and_advance(pos);
2040 return (Token::MinusAssign, start_pos);
2041 }
2042 ('-', '>') => {
2043 stream.eat_next_and_advance(pos);
2044 return (Token::Reserved(Box::new("->".into())), start_pos);
2045 }
2046 ('-', '-') => {
2047 stream.eat_next_and_advance(pos);
2048 return (Token::Reserved(Box::new("--".into())), start_pos);
2049 }
2050 ('-', ..) if !state.next_token_cannot_be_unary => {
2051 return (Token::UnaryMinus, start_pos)
2052 }
2053 ('-', ..) => return (Token::Minus, start_pos),
2054
2055 ('*', ')') => {
2056 stream.eat_next_and_advance(pos);
2057 return (Token::Reserved(Box::new("*)".into())), start_pos);
2058 }
2059 ('*', '=') => {
2060 stream.eat_next_and_advance(pos);
2061 return (Token::MultiplyAssign, start_pos);
2062 }
2063 ('*', '*') => {
2064 stream.eat_next_and_advance(pos);
2065
2066 return (
2067 if stream.peek_next() == Some('=') {
2068 stream.eat_next_and_advance(pos);
2069 Token::PowerOfAssign
2070 } else {
2071 Token::PowerOf
2072 },
2073 start_pos,
2074 );
2075 }
2076 ('*', ..) => return (Token::Multiply, start_pos),
2077
2078 ('/', '/') => {
2080 stream.eat_next_and_advance(pos);
2081
2082 let mut comment: Option<String> = match stream.peek_next() {
2083 #[cfg(not(feature = "no_function"))]
2084 #[cfg(feature = "metadata")]
2085 Some('/') => {
2086 stream.eat_next_and_advance(pos);
2087
2088 match stream.peek_next() {
2090 Some('/') => None,
2091 _ => Some("///".into()),
2092 }
2093 }
2094 #[cfg(feature = "metadata")]
2095 Some('!') => {
2096 stream.eat_next_and_advance(pos);
2097 Some("//!".into())
2098 }
2099 _ if state.include_comments => Some("//".into()),
2100 _ => None,
2101 };
2102
2103 while let Some(c) = stream.get_next() {
2104 if c == '\r' {
2105 if stream.peek_next() == Some('\n') {
2107 stream.eat_next_and_advance(pos);
2108 }
2109 pos.new_line();
2110 break;
2111 }
2112 if c == '\n' {
2113 pos.new_line();
2114 break;
2115 }
2116 if let Some(comment) = comment.as_mut() {
2117 comment.push(c);
2118 }
2119 pos.advance();
2120 }
2121
2122 match comment {
2123 #[cfg(feature = "metadata")]
2124 Some(comment) if comment.starts_with("//!") => {
2125 let g = &mut state.tokenizer_control.borrow_mut().global_comments;
2126 if !g.is_empty() {
2127 *g += "\n";
2128 }
2129 *g += &comment;
2130 }
2131 Some(comment) => return (Token::Comment(comment.into()), start_pos),
2132 None => (),
2133 }
2134 }
2135 ('/', '*') => {
2136 state.comment_level += 1;
2137 stream.eat_next_and_advance(pos);
2138
2139 let mut comment: Option<String> = match stream.peek_next() {
2140 #[cfg(not(feature = "no_function"))]
2141 #[cfg(feature = "metadata")]
2142 Some('*') => {
2143 stream.eat_next_and_advance(pos);
2144
2145 match stream.peek_next() {
2147 Some('*') => None,
2148 _ => Some("/**".into()),
2149 }
2150 }
2151 _ if state.include_comments => Some("/*".into()),
2152 _ => None,
2153 };
2154
2155 state.comment_level =
2156 scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2157
2158 if let Some(comment) = comment {
2159 return (Token::Comment(comment.into()), start_pos);
2160 }
2161 }
2162
2163 ('/', '=') => {
2164 stream.eat_next_and_advance(pos);
2165 return (Token::DivideAssign, start_pos);
2166 }
2167 ('/', ..) => return (Token::Divide, start_pos),
2168
2169 (';', ..) => return (Token::SemiColon, start_pos),
2170 (',', ..) => return (Token::Comma, start_pos),
2171
2172 ('.', '.') => {
2173 stream.eat_next_and_advance(pos);
2174 return (
2175 match stream.peek_next() {
2176 Some('.') => {
2177 stream.eat_next_and_advance(pos);
2178 Token::Reserved(Box::new("...".into()))
2179 }
2180 Some('=') => {
2181 stream.eat_next_and_advance(pos);
2182 Token::InclusiveRange
2183 }
2184 _ => Token::ExclusiveRange,
2185 },
2186 start_pos,
2187 );
2188 }
2189 ('.', ..) => return (Token::Period, start_pos),
2190
2191 ('=', '=') => {
2192 stream.eat_next_and_advance(pos);
2193
2194 if stream.peek_next() == Some('=') {
2195 stream.eat_next_and_advance(pos);
2196 return (Token::Reserved(Box::new("===".into())), start_pos);
2197 }
2198
2199 return (Token::EqualsTo, start_pos);
2200 }
2201 ('=', '>') => {
2202 stream.eat_next_and_advance(pos);
2203 return (Token::DoubleArrow, start_pos);
2204 }
2205 ('=', ..) => return (Token::Equals, start_pos),
2206
2207 #[cfg(not(feature = "no_module"))]
2208 (':', ':') => {
2209 stream.eat_next_and_advance(pos);
2210
2211 if stream.peek_next() == Some('<') {
2212 stream.eat_next_and_advance(pos);
2213 return (Token::Reserved(Box::new("::<".into())), start_pos);
2214 }
2215
2216 return (Token::DoubleColon, start_pos);
2217 }
2218 (':', '=') => {
2219 stream.eat_next_and_advance(pos);
2220 return (Token::Reserved(Box::new(":=".into())), start_pos);
2221 }
2222 (':', ';') => {
2223 stream.eat_next_and_advance(pos);
2224 return (Token::Reserved(Box::new(":;".into())), start_pos);
2225 }
2226 (':', ..) => return (Token::Colon, start_pos),
2227
2228 ('<', '=') => {
2229 stream.eat_next_and_advance(pos);
2230 return (Token::LessThanEqualsTo, start_pos);
2231 }
2232 ('<', '-') => {
2233 stream.eat_next_and_advance(pos);
2234 return (Token::Reserved(Box::new("<-".into())), start_pos);
2235 }
2236 ('<', '<') => {
2237 stream.eat_next_and_advance(pos);
2238
2239 return (
2240 if stream.peek_next() == Some('=') {
2241 stream.eat_next_and_advance(pos);
2242 Token::LeftShiftAssign
2243 } else {
2244 Token::LeftShift
2245 },
2246 start_pos,
2247 );
2248 }
2249 ('<', '|') => {
2250 stream.eat_next_and_advance(pos);
2251 return (Token::Reserved(Box::new("<|".into())), start_pos);
2252 }
2253 ('<', ..) => return (Token::LessThan, start_pos),
2254
2255 ('>', '=') => {
2256 stream.eat_next_and_advance(pos);
2257 return (Token::GreaterThanEqualsTo, start_pos);
2258 }
2259 ('>', '>') => {
2260 stream.eat_next_and_advance(pos);
2261
2262 return (
2263 if stream.peek_next() == Some('=') {
2264 stream.eat_next_and_advance(pos);
2265 Token::RightShiftAssign
2266 } else {
2267 Token::RightShift
2268 },
2269 start_pos,
2270 );
2271 }
2272 ('>', ..) => return (Token::GreaterThan, start_pos),
2273
2274 ('!', 'i') => {
2275 stream.get_next().unwrap();
2276 if stream.peek_next() == Some('n') {
2277 stream.get_next().unwrap();
2278 match stream.peek_next() {
2279 Some(c) if is_id_continue(c) => {
2280 stream.unget('n');
2281 stream.unget('i');
2282 return (Token::Bang, start_pos);
2283 }
2284 _ => {
2285 pos.advance();
2286 pos.advance();
2287 return (Token::NotIn, start_pos);
2288 }
2289 }
2290 }
2291
2292 stream.unget('i');
2293 return (Token::Bang, start_pos);
2294 }
2295 ('!', '=') => {
2296 stream.eat_next_and_advance(pos);
2297
2298 if stream.peek_next() == Some('=') {
2299 stream.eat_next_and_advance(pos);
2300 return (Token::Reserved(Box::new("!==".into())), start_pos);
2301 }
2302
2303 return (Token::NotEqualsTo, start_pos);
2304 }
2305 ('!', '.') => {
2306 stream.eat_next_and_advance(pos);
2307 return (Token::Reserved(Box::new("!.".into())), start_pos);
2308 }
2309 ('!', ..) => return (Token::Bang, start_pos),
2310
2311 ('|', '|') => {
2312 stream.eat_next_and_advance(pos);
2313 return (Token::Or, start_pos);
2314 }
2315 ('|', '=') => {
2316 stream.eat_next_and_advance(pos);
2317 return (Token::OrAssign, start_pos);
2318 }
2319 ('|', '>') => {
2320 stream.eat_next_and_advance(pos);
2321 return (Token::Reserved(Box::new("|>".into())), start_pos);
2322 }
2323 ('|', ..) => return (Token::Pipe, start_pos),
2324
2325 ('&', '&') => {
2326 stream.eat_next_and_advance(pos);
2327 return (Token::And, start_pos);
2328 }
2329 ('&', '=') => {
2330 stream.eat_next_and_advance(pos);
2331 return (Token::AndAssign, start_pos);
2332 }
2333 ('&', ..) => return (Token::Ampersand, start_pos),
2334
2335 ('^', '=') => {
2336 stream.eat_next_and_advance(pos);
2337 return (Token::XOrAssign, start_pos);
2338 }
2339 ('^', ..) => return (Token::XOr, start_pos),
2340
2341 ('~', ..) => return (Token::Reserved(Box::new("~".into())), start_pos),
2342
2343 ('%', '=') => {
2344 stream.eat_next_and_advance(pos);
2345 return (Token::ModuloAssign, start_pos);
2346 }
2347 ('%', ..) => return (Token::Modulo, start_pos),
2348
2349 ('@', ..) => return (Token::Reserved(Box::new("@".into())), start_pos),
2350
2351 ('$', ..) => return (Token::Reserved(Box::new("$".into())), start_pos),
2352
2353 ('?', '.') => {
2354 stream.eat_next_and_advance(pos);
2355 return (
2356 #[cfg(not(feature = "no_object"))]
2357 Token::Elvis,
2358 #[cfg(feature = "no_object")]
2359 Token::Reserved(Box::new("?.".into())),
2360 start_pos,
2361 );
2362 }
2363 ('?', '?') => {
2364 stream.eat_next_and_advance(pos);
2365 return (Token::DoubleQuestion, start_pos);
2366 }
2367 ('?', '[') => {
2368 stream.eat_next_and_advance(pos);
2369 return (
2370 #[cfg(not(feature = "no_index"))]
2371 Token::QuestionBracket,
2372 #[cfg(feature = "no_index")]
2373 Token::Reserved(Box::new("?[".into())),
2374 start_pos,
2375 );
2376 }
2377 ('?', ..) => return (Token::Reserved(Box::new("?".into())), start_pos),
2378
2379 _ if is_id_first_alphabetic(c) || c == '_' => {
2381 return parse_identifier_token(stream, state, pos, start_pos, c);
2382 }
2383
2384 ('\n', ..) => pos.new_line(),
2386
2387 (ch, ..) if ch.is_ascii_whitespace() => (),
2389
2390 _ => {
2391 return (
2392 Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
2393 start_pos,
2394 )
2395 }
2396 }
2397 }
2398
2399 pos.advance();
2400
2401 (Token::EOF, *pos)
2402}
2403
2404fn parse_identifier_token(
2406 stream: &mut (impl InputStream + ?Sized),
2407 state: &mut TokenizeState,
2408 pos: &mut Position,
2409 start_pos: Position,
2410 first_char: char,
2411) -> (Token, Position) {
2412 let mut identifier = SmartString::new_const();
2413 identifier.push(first_char);
2414 if let Some(ref mut last) = state.last_token {
2415 last.clear();
2416 last.push(first_char);
2417 }
2418
2419 while let Some(next_char) = stream.peek_next() {
2420 match next_char {
2421 x if is_id_continue(x) => {
2422 stream.eat_next_and_advance(pos);
2423 identifier.push(x);
2424 if let Some(ref mut last) = state.last_token {
2425 last.push(x);
2426 }
2427 }
2428 _ => break,
2429 }
2430 }
2431
2432 if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
2433 return (token, start_pos);
2434 }
2435
2436 if is_reserved_keyword_or_symbol(&identifier).0 {
2437 return (Token::Reserved(Box::new(identifier)), start_pos);
2438 }
2439
2440 if !is_valid_identifier(&identifier) {
2441 return (
2442 Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2443 start_pos,
2444 );
2445 }
2446
2447 (Token::Identifier(identifier.into()), start_pos)
2448}
2449
2450#[must_use]
2453pub fn is_valid_identifier(name: &str) -> bool {
2454 let mut first_alphabetic = false;
2455
2456 for ch in name.chars() {
2457 match ch {
2458 '_' => (),
2459 _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2460 _ if !first_alphabetic => return false,
2461 _ if char::is_ascii_alphanumeric(&ch) => (),
2462 _ => return false,
2463 }
2464 }
2465
2466 first_alphabetic
2467}
2468
2469#[inline(always)]
2472#[must_use]
2473pub fn is_valid_function_name(name: &str) -> bool {
2474 is_valid_identifier(name)
2475 && !is_reserved_keyword_or_symbol(name).0
2476 && Token::lookup_symbol_from_syntax(name).is_none()
2477}
2478
2479#[inline(always)]
2481#[must_use]
2482#[allow(clippy::missing_const_for_fn)]
2483pub fn is_id_first_alphabetic(x: char) -> bool {
2484 #[cfg(feature = "unicode-xid-ident")]
2485 return unicode_xid::UnicodeXID::is_xid_start(x);
2486 #[cfg(not(feature = "unicode-xid-ident"))]
2487 return x.is_ascii_alphabetic();
2488}
2489
2490#[inline(always)]
2492#[must_use]
2493#[allow(clippy::missing_const_for_fn)]
2494pub fn is_id_continue(x: char) -> bool {
2495 #[cfg(feature = "unicode-xid-ident")]
2496 return unicode_xid::UnicodeXID::is_xid_continue(x);
2497 #[cfg(not(feature = "unicode-xid-ident"))]
2498 return x.is_ascii_alphanumeric() || x == '_';
2499}
2500
2501#[inline]
2513#[must_use]
2514pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
2515 let utf8 = syntax.as_bytes();
2518 let len = utf8.len();
2519
2520 if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
2521 return (false, false, false);
2522 }
2523
2524 let mut hash_val = len;
2525
2526 match len {
2527 1 => (),
2528 _ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
2529 }
2530 hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
2531 hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
2532
2533 if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
2534 return (false, false, false);
2535 }
2536
2537 match RESERVED_LIST[hash_val] {
2538 ("", ..) => (false, false, false),
2539 (s, true, a, b) => {
2540 let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
2543 (is_reserved, is_reserved && a, is_reserved && a && b)
2544 }
2545 _ => (false, false, false),
2546 }
2547}
2548
2549pub struct MultiInputsStream<'a> {
2554 pub buf: [Option<char>; 2],
2556 pub index: usize,
2558 pub streams: StaticVec<Peekable<Chars<'a>>>,
2560}
2561
2562impl InputStream for MultiInputsStream<'_> {
2563 #[inline]
2564 fn unget(&mut self, ch: char) {
2565 match self.buf {
2566 [None, ..] => self.buf[0] = Some(ch),
2567 [_, None] => self.buf[1] = Some(ch),
2568 _ => unreachable!("cannot unget more than 2 characters!"),
2569 }
2570 }
2571 fn get_next(&mut self) -> Option<char> {
2572 match self.buf {
2573 [None, ..] => (),
2574 [ch @ Some(_), None] => {
2575 self.buf[0] = None;
2576 return ch;
2577 }
2578 [_, ch @ Some(_)] => {
2579 self.buf[1] = None;
2580 return ch;
2581 }
2582 }
2583
2584 loop {
2585 if self.index >= self.streams.len() {
2586 return None;
2588 }
2589 if let Some(ch) = self.streams[self.index].next() {
2590 return Some(ch);
2592 }
2593 self.index += 1;
2595 }
2596 }
2597 fn peek_next(&mut self) -> Option<char> {
2598 match self.buf {
2599 [None, ..] => (),
2600 [ch @ Some(_), None] => return ch,
2601 [_, ch @ Some(_)] => return ch,
2602 }
2603
2604 loop {
2605 if self.index >= self.streams.len() {
2606 return None;
2608 }
2609 if let Some(&ch) = self.streams[self.index].peek() {
2610 return Some(ch);
2612 }
2613 self.index += 1;
2615 }
2616 }
2617}
2618
2619pub struct TokenIterator<'a> {
2622 pub engine: &'a Engine,
2624 pub state: TokenizeState,
2626 pub pos: Position,
2628 pub stream: MultiInputsStream<'a>,
2630 pub token_mapper: Option<&'a OnParseTokenCallback>,
2632}
2633
2634impl<'a> Iterator for TokenIterator<'a> {
2635 type Item = (Token, Position);
2636
2637 fn next(&mut self) -> Option<Self::Item> {
2638 let (within_interpolated, compress_script) = {
2639 let control = &mut *self.state.tokenizer_control.borrow_mut();
2640
2641 if control.is_within_text {
2642 self.state.is_within_text_terminated_by = Some("`".to_string().into());
2644 control.is_within_text = false;
2646 }
2647
2648 (
2649 self.state.is_within_text_terminated_by.is_some(),
2650 control.compressed.is_some(),
2651 )
2652 };
2653
2654 let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2655 r @ (Token::EOF, _) => return Some(r),
2657 (Token::StringConstant(..), pos) if self.state.is_within_text_terminated_by.is_some() => {
2663 self.state.is_within_text_terminated_by = None;
2664 return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2665 }
2666 (Token::Reserved(s), pos) => (match
2668 (s.as_str(),
2669 #[cfg(not(feature = "no_custom_syntax"))]
2670 self.engine.custom_keywords.contains_key(&*s),
2671 #[cfg(feature = "no_custom_syntax")]
2672 false
2673 )
2674 {
2675 ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2676 "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2677 ).into()),
2678 ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2679 "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2680 ).into()),
2681 ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2682 "'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2683 ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2684 "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2685 ).into()),
2686 (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2687 "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2688 ).into()),
2689 (":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2690 "':;' is not a valid symbol. Should it be '::'?".to_string(),
2691 ).into()),
2692 ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2693 "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2694 ).into()),
2695 ("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2696 "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2697 ).into()),
2698 ("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2699 "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2700 ).into()),
2701 #[cfg(not(feature = "no_custom_syntax"))]
2703 (.., true) => Token::Custom(s),
2704 #[cfg(feature = "no_custom_syntax")]
2705 (.., true) => unreachable!("no custom operators"),
2706 (token, false) if self.engine.is_symbol_disabled(token) => {
2708 let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
2709 Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
2710 },
2711 (.., false) => Token::Reserved(s),
2713 }, pos),
2714 #[cfg(not(feature = "no_custom_syntax"))]
2716 (Token::Identifier(s), pos) if self.engine.custom_keywords.contains_key(&*s) => {
2717 (Token::Custom(s), pos)
2718 }
2719 #[cfg(not(feature = "no_custom_syntax"))]
2721 (token, pos) if token.is_literal() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
2722 debug_assert!(self.engine.is_symbol_disabled(token.literal_syntax()), "{:?} is an active keyword", token);
2724
2725 (Token::Custom(Box::new(token.literal_syntax().into())), pos)
2726 }
2727 (token, pos) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
2729 (Token::Reserved(Box::new(token.literal_syntax().into())), pos)
2730 }
2731 r => r,
2733 };
2734
2735 let token = match self.token_mapper {
2737 Some(func) => func(token, pos, &self.state),
2738 None => token,
2739 };
2740
2741 if compress_script {
2743 let control = &mut *self.state.tokenizer_control.borrow_mut();
2744
2745 if token != Token::EOF {
2746 if let Some(ref mut compressed) = control.compressed {
2747 use std::fmt::Write;
2748
2749 let last_token = self.state.last_token.as_ref().unwrap();
2750 let mut buf = SmartString::new_const();
2751
2752 if last_token.is_empty() {
2753 write!(buf, "{token}").unwrap();
2754 } else if within_interpolated
2755 && matches!(
2756 token,
2757 Token::StringConstant(..) | Token::InterpolatedString(..)
2758 )
2759 {
2760 *compressed += &last_token[1..];
2761 } else {
2762 buf = last_token.clone();
2763 }
2764
2765 if !buf.is_empty() && !compressed.is_empty() {
2766 let cur = buf.chars().next().unwrap();
2767
2768 if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
2769 let prev = compressed.chars().last().unwrap();
2770
2771 if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
2772 *compressed += " ";
2773 }
2774 }
2775 }
2776
2777 *compressed += &buf;
2778 }
2779 }
2780 }
2781
2782 Some((token, pos))
2783 }
2784}
2785
2786impl FusedIterator for TokenIterator<'_> {}
2787
2788impl Engine {
2789 #[expose_under_internals]
2792 #[inline(always)]
2793 #[must_use]
2794 fn lex<'a>(
2795 &'a self,
2796 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2797 ) -> (TokenIterator<'a>, TokenizerControl) {
2798 self.lex_raw(inputs, self.token_mapper.as_deref())
2799 }
2800 #[expose_under_internals]
2803 #[inline(always)]
2804 #[must_use]
2805 fn lex_with_map<'a>(
2806 &'a self,
2807 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2808 token_mapper: &'a OnParseTokenCallback,
2809 ) -> (TokenIterator<'a>, TokenizerControl) {
2810 self.lex_raw(inputs, Some(token_mapper))
2811 }
2812 #[inline]
2814 #[must_use]
2815 pub(crate) fn lex_raw<'a>(
2816 &'a self,
2817 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2818 token_mapper: Option<&'a OnParseTokenCallback>,
2819 ) -> (TokenIterator<'a>, TokenizerControl) {
2820 let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2821 let buffer2 = buffer.clone();
2822
2823 (
2824 TokenIterator {
2825 engine: self,
2826 state: TokenizeState {
2827 #[cfg(not(feature = "unchecked"))]
2828 max_string_len: std::num::NonZeroUsize::new(self.max_string_size()),
2829 next_token_cannot_be_unary: false,
2830 tokenizer_control: buffer,
2831 comment_level: 0,
2832 include_comments: false,
2833 is_within_text_terminated_by: None,
2834 last_token: None,
2835 },
2836 pos: Position::new(1, 0),
2837 stream: MultiInputsStream {
2838 buf: [None, None],
2839 streams: inputs
2840 .into_iter()
2841 .map(|s| s.as_ref().chars().peekable())
2842 .collect(),
2843 index: 0,
2844 },
2845 token_mapper,
2846 },
2847 buffer2,
2848 )
2849 }
2850}