1use rhai_codegen::expose_under_internals;
4
5use crate::engine::Precedence;
6use crate::func::native::OnParseTokenCallback;
7use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
8#[cfg(feature = "no_std")]
9use std::prelude::v1::*;
10use std::{
11 cell::RefCell,
12 char, fmt,
13 fmt::Write,
14 iter::{repeat, FusedIterator, Peekable},
15 rc::Rc,
16 str::{Chars, FromStr},
17};
18
19#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
21pub struct TokenizerControlBlock {
22 pub is_within_text: bool,
26 #[cfg(not(feature = "no_custom_syntax"))]
28 pub in_char_mode: bool,
29 #[cfg(feature = "metadata")]
31 pub global_comments: String,
32 pub compressed: Option<String>,
36}
37
38impl TokenizerControlBlock {
39 #[inline]
41 #[must_use]
42 pub const fn new() -> Self {
43 Self {
44 is_within_text: false,
45 #[cfg(not(feature = "no_custom_syntax"))]
46 in_char_mode: false,
47 #[cfg(feature = "metadata")]
48 global_comments: String::new(),
49 compressed: None,
50 }
51 }
52}
53
54pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
56
57type LERR = LexError;
58
59const NUMBER_SEPARATOR: char = '_';
61
62pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
64
65#[derive(Debug, PartialEq, Clone, Hash)]
68#[non_exhaustive]
69pub enum Token {
70 IntegerConstant(INT),
72 #[cfg(not(feature = "no_float"))]
76 FloatConstant(Box<(crate::types::FloatWrapper<crate::FLOAT>, Identifier)>),
77 #[cfg(feature = "decimal")]
81 DecimalConstant(Box<(rust_decimal::Decimal, Identifier)>),
82 Identifier(Box<Identifier>),
84 CharConstant(char),
86 StringConstant(Box<SmartString>),
88 InterpolatedString(Box<SmartString>),
90 LeftBrace,
92 RightBrace,
94 LeftParen,
96 RightParen,
98 LeftBracket,
100 RightBracket,
102 Unit,
104 Plus,
106 UnaryPlus,
108 Minus,
110 UnaryMinus,
112 Multiply,
114 Divide,
116 Modulo,
118 PowerOf,
120 LeftShift,
122 RightShift,
124 SemiColon,
126 Colon,
128 DoubleColon,
130 DoubleArrow,
132 Underscore,
134 Comma,
136 Period,
138 #[cfg(not(feature = "no_object"))]
142 Elvis,
143 DoubleQuestion,
145 #[cfg(not(feature = "no_index"))]
149 QuestionBracket,
150 ExclusiveRange,
152 InclusiveRange,
154 MapStart,
156 Equals,
158 True,
160 False,
162 Let,
164 Const,
166 If,
168 Else,
170 Switch,
172 Do,
174 While,
176 Until,
178 Loop,
180 For,
182 In,
184 NotIn,
186 LessThan,
188 GreaterThan,
190 LessThanEqualsTo,
192 GreaterThanEqualsTo,
194 EqualsTo,
196 NotEqualsTo,
198 Bang,
200 Pipe,
202 Or,
204 XOr,
206 Ampersand,
208 And,
210 #[cfg(not(feature = "no_function"))]
214 Fn,
215 Continue,
217 Break,
219 Return,
221 Throw,
223 Try,
225 Catch,
227 PlusAssign,
229 MinusAssign,
231 MultiplyAssign,
233 DivideAssign,
235 LeftShiftAssign,
237 RightShiftAssign,
239 AndAssign,
241 OrAssign,
243 XOrAssign,
245 ModuloAssign,
247 PowerOfAssign,
249 #[cfg(not(feature = "no_function"))]
253 Private,
254 #[cfg(not(feature = "no_module"))]
258 Import,
259 #[cfg(not(feature = "no_module"))]
263 Export,
264 #[cfg(not(feature = "no_module"))]
268 As,
269 LexError(Box<LexError>),
271 Comment(Box<String>),
273 Reserved(Box<Identifier>),
275 #[cfg(not(feature = "no_custom_syntax"))]
279 Custom(Box<Identifier>),
280 #[cfg(not(feature = "no_custom_syntax"))]
284 UnprocessedRawChar(char),
285 EOF,
288}
289
290impl fmt::Display for Token {
291 #[inline(always)]
292 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293 #[allow(clippy::enum_glob_use)]
294 use Token::*;
295
296 match self {
297 IntegerConstant(i) => write!(f, "{i}"),
298 #[cfg(not(feature = "no_float"))]
299 FloatConstant(v) => write!(f, "{}", v.0),
300 #[cfg(feature = "decimal")]
301 DecimalConstant(d) => write!(f, "{}", d.0),
302 StringConstant(s) => write!(f, r#""{s}""#),
303 InterpolatedString(..) => f.write_str("string"),
304 CharConstant(c) => write!(f, "{c}"),
305 Identifier(s) => f.write_str(s),
306 Reserved(s) => f.write_str(s),
307 #[cfg(not(feature = "no_custom_syntax"))]
308 Custom(s) => f.write_str(s),
309 #[cfg(not(feature = "no_custom_syntax"))]
310 UnprocessedRawChar(c) => f.write_char(*c),
311 LexError(err) => write!(f, "{err}"),
312 Comment(s) => f.write_str(s),
313
314 EOF => f.write_str("{EOF}"),
315
316 token => f.write_str(token.literal_syntax()),
317 }
318 }
319}
320
321const MIN_KEYWORD_LEN: usize = 1;
326const MAX_KEYWORD_LEN: usize = 8;
327const MIN_KEYWORD_HASH_VALUE: usize = 1;
328const MAX_KEYWORD_HASH_VALUE: usize = 152;
329
330static KEYWORD_ASSOC_VALUES: [u8; 257] = [
331 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
332 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
333 105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
334 35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
335 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
336 0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
337 45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
338 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
339 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
340 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
341 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
342 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
343 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
344 153,
345];
346static KEYWORDS_LIST: [(&str, Token); 153] = [
347 ("", Token::EOF),
348 (">", Token::GreaterThan),
349 (">=", Token::GreaterThanEqualsTo),
350 (")", Token::RightParen),
351 ("", Token::EOF),
352 ("const", Token::Const),
353 ("=", Token::Equals),
354 ("==", Token::EqualsTo),
355 ("continue", Token::Continue),
356 ("", Token::EOF),
357 ("catch", Token::Catch),
358 ("<", Token::LessThan),
359 ("<=", Token::LessThanEqualsTo),
360 ("for", Token::For),
361 ("loop", Token::Loop),
362 ("", Token::EOF),
363 (".", Token::Period),
364 ("<<", Token::LeftShift),
365 ("<<=", Token::LeftShiftAssign),
366 ("", Token::EOF),
367 ("false", Token::False),
368 ("*", Token::Multiply),
369 ("*=", Token::MultiplyAssign),
370 ("let", Token::Let),
371 ("", Token::EOF),
372 ("while", Token::While),
373 ("+", Token::Plus),
374 ("+=", Token::PlusAssign),
375 ("", Token::EOF),
376 ("", Token::EOF),
377 ("throw", Token::Throw),
378 ("}", Token::RightBrace),
379 (">>", Token::RightShift),
380 (">>=", Token::RightShiftAssign),
381 ("", Token::EOF),
382 ("", Token::EOF),
383 (";", Token::SemiColon),
384 ("=>", Token::DoubleArrow),
385 ("", Token::EOF),
386 ("else", Token::Else),
387 ("", Token::EOF),
388 ("/", Token::Divide),
389 ("/=", Token::DivideAssign),
390 ("", Token::EOF),
391 ("", Token::EOF),
392 ("", Token::EOF),
393 ("{", Token::LeftBrace),
394 ("**", Token::PowerOf),
395 ("**=", Token::PowerOfAssign),
396 ("", Token::EOF),
397 ("", Token::EOF),
398 ("|", Token::Pipe),
399 ("|=", Token::OrAssign),
400 ("", Token::EOF),
401 ("", Token::EOF),
402 ("", Token::EOF),
403 (":", Token::Colon),
404 ("..", Token::ExclusiveRange),
405 ("..=", Token::InclusiveRange),
406 ("", Token::EOF),
407 ("until", Token::Until),
408 ("switch", Token::Switch),
409 #[cfg(not(feature = "no_function"))]
410 ("private", Token::Private),
411 #[cfg(feature = "no_function")]
412 ("", Token::EOF),
413 ("try", Token::Try),
414 ("true", Token::True),
415 ("break", Token::Break),
416 ("return", Token::Return),
417 #[cfg(not(feature = "no_function"))]
418 ("fn", Token::Fn),
419 #[cfg(feature = "no_function")]
420 ("", Token::EOF),
421 ("", Token::EOF),
422 ("", Token::EOF),
423 ("", Token::EOF),
424 #[cfg(not(feature = "no_module"))]
425 ("import", Token::Import),
426 #[cfg(feature = "no_module")]
427 ("", Token::EOF),
428 #[cfg(not(feature = "no_object"))]
429 ("?.", Token::Elvis),
430 #[cfg(feature = "no_object")]
431 ("", Token::EOF),
432 ("", Token::EOF),
433 ("", Token::EOF),
434 ("", Token::EOF),
435 #[cfg(not(feature = "no_module"))]
436 ("export", Token::Export),
437 #[cfg(feature = "no_module")]
438 ("", Token::EOF),
439 ("in", Token::In),
440 ("", Token::EOF),
441 ("", Token::EOF),
442 ("", Token::EOF),
443 ("(", Token::LeftParen),
444 ("||", Token::Or),
445 ("", Token::EOF),
446 ("", Token::EOF),
447 ("", Token::EOF),
448 ("^", Token::XOr),
449 ("^=", Token::XOrAssign),
450 ("", Token::EOF),
451 ("", Token::EOF),
452 ("", Token::EOF),
453 ("_", Token::Underscore),
454 ("::", Token::DoubleColon),
455 ("", Token::EOF),
456 ("", Token::EOF),
457 ("", Token::EOF),
458 ("-", Token::Minus),
459 ("-=", Token::MinusAssign),
460 ("", Token::EOF),
461 ("", Token::EOF),
462 ("", Token::EOF),
463 ("]", Token::RightBracket),
464 ("()", Token::Unit),
465 ("", Token::EOF),
466 ("", Token::EOF),
467 ("", Token::EOF),
468 ("&", Token::Ampersand),
469 ("&=", Token::AndAssign),
470 ("", Token::EOF),
471 ("", Token::EOF),
472 ("", Token::EOF),
473 ("%", Token::Modulo),
474 ("%=", Token::ModuloAssign),
475 ("", Token::EOF),
476 ("", Token::EOF),
477 ("", Token::EOF),
478 ("!", Token::Bang),
479 ("!=", Token::NotEqualsTo),
480 ("!in", Token::NotIn),
481 ("", Token::EOF),
482 ("", Token::EOF),
483 ("[", Token::LeftBracket),
484 ("if", Token::If),
485 ("", Token::EOF),
486 ("", Token::EOF),
487 ("", Token::EOF),
488 (",", Token::Comma),
489 ("do", Token::Do),
490 ("", Token::EOF),
491 ("", Token::EOF),
492 ("", Token::EOF),
493 ("", Token::EOF),
494 #[cfg(not(feature = "no_module"))]
495 ("as", Token::As),
496 #[cfg(feature = "no_module")]
497 ("", Token::EOF),
498 ("", Token::EOF),
499 ("", Token::EOF),
500 ("", Token::EOF),
501 ("", Token::EOF),
502 #[cfg(not(feature = "no_index"))]
503 ("?[", Token::QuestionBracket),
504 #[cfg(feature = "no_index")]
505 ("", Token::EOF),
506 ("", Token::EOF),
507 ("", Token::EOF),
508 ("", Token::EOF),
509 ("", Token::EOF),
510 ("??", Token::DoubleQuestion),
511 ("", Token::EOF),
512 ("", Token::EOF),
513 ("", Token::EOF),
514 ("", Token::EOF),
515 ("&&", Token::And),
516 ("", Token::EOF),
517 ("", Token::EOF),
518 ("", Token::EOF),
519 ("", Token::EOF),
520 ("#{", Token::MapStart),
521];
522
523const MIN_RESERVED_LEN: usize = 1;
528const MAX_RESERVED_LEN: usize = 10;
529const MIN_RESERVED_HASH_VALUE: usize = 1;
530const MAX_RESERVED_HASH_VALUE: usize = 149;
531
532static RESERVED_ASSOC_VALUES: [u8; 256] = [
533 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
534 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
535 150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
536 30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
537 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
538 0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
539 25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
540 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
541 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
542 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
543 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
544 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
545 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
546];
547static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
548 ("", false, false, false),
549 ("?", true, false, false),
550 ("as", cfg!(feature = "no_module"), false, false),
551 ("use", true, false, false),
552 ("case", true, false, false),
553 ("async", true, false, false),
554 ("public", true, false, false),
555 ("package", true, false, false),
556 ("", false, false, false),
557 ("", false, false, false),
558 ("super", true, false, false),
559 ("#", true, false, false),
560 ("private", cfg!(feature = "no_function"), false, false),
561 ("var", true, false, false),
562 ("protected", true, false, false),
563 ("spawn", true, false, false),
564 ("shared", true, false, false),
565 ("is", true, false, false),
566 ("===", true, false, false),
567 ("sync", true, false, false),
568 ("curry", true, true, true),
569 ("static", true, false, false),
570 ("default", true, false, false),
571 ("!==", true, false, false),
572 ("is_shared", cfg!(not(feature = "no_closure")), true, true),
573 ("print", true, true, false),
574 ("", false, false, false),
575 ("#!", true, false, false),
576 ("", false, false, false),
577 ("this", true, false, false),
578 ("is_def_var", true, true, false),
579 ("thread", true, false, false),
580 ("?.", cfg!(feature = "no_object"), false, false),
581 ("", false, false, false),
582 ("is_def_fn", cfg!(not(feature = "no_function")), true, false),
583 ("yield", true, false, false),
584 ("", false, false, false),
585 ("fn", cfg!(feature = "no_function"), false, false),
586 ("new", true, false, false),
587 ("call", true, true, true),
588 ("match", true, false, false),
589 ("~", true, false, false),
590 ("!.", true, false, false),
591 ("", false, false, false),
592 ("eval", true, true, false),
593 ("await", true, false, false),
594 ("", false, false, false),
595 (":=", true, false, false),
596 ("...", true, false, false),
597 ("null", true, false, false),
598 ("debug", true, true, false),
599 ("@", true, false, false),
600 ("type_of", true, true, true),
601 ("", false, false, false),
602 ("with", true, false, false),
603 ("", false, false, false),
604 ("", false, false, false),
605 ("<-", true, false, false),
606 ("", false, false, false),
607 ("void", true, false, false),
608 ("", false, false, false),
609 ("import", cfg!(feature = "no_module"), false, false),
610 ("--", true, false, false),
611 ("nil", true, false, false),
612 ("exit", false, false, false),
613 ("", false, false, false),
614 ("export", cfg!(feature = "no_module"), false, false),
615 ("<|", true, false, false),
616 ("", false, false, false),
617 ("", false, false, false),
618 ("", false, false, false),
619 ("$", true, false, false),
620 ("->", true, false, false),
621 ("", false, false, false),
622 ("", false, false, false),
623 ("", false, false, false),
624 ("", false, false, false),
625 ("|>", true, false, false),
626 ("", false, false, false),
627 ("", false, false, false),
628 ("", false, false, false),
629 ("module", true, false, false),
630 ("?[", cfg!(feature = "no_index"), false, false),
631 ("", false, false, false),
632 ("", false, false, false),
633 ("", false, false, false),
634 ("", false, false, false),
635 ("Fn", true, true, false),
636 ("::<", true, false, false),
637 ("", false, false, false),
638 ("", false, false, false),
639 ("", false, false, false),
640 ("++", true, false, false),
641 ("", false, false, false),
642 ("", false, false, false),
643 ("", false, false, false),
644 ("", false, false, false),
645 (":;", true, false, false),
646 ("", false, false, false),
647 ("", false, false, false),
648 ("", false, false, false),
649 ("", false, false, false),
650 ("*)", true, false, false),
651 ("", false, false, false),
652 ("", false, false, false),
653 ("", false, false, false),
654 ("", false, false, false),
655 ("(*", true, false, false),
656 ("", false, false, false),
657 ("", false, false, false),
658 ("", false, false, false),
659 ("", false, false, false),
660 ("", false, false, false),
661 ("", false, false, false),
662 ("", false, false, false),
663 ("", false, false, false),
664 ("", false, false, false),
665 ("", false, false, false),
666 ("", false, false, false),
667 ("", false, false, false),
668 ("", false, false, false),
669 ("", false, false, false),
670 ("", false, false, false),
671 ("", false, false, false),
672 ("", false, false, false),
673 ("", false, false, false),
674 ("", false, false, false),
675 ("", false, false, false),
676 ("", false, false, false),
677 ("", false, false, false),
678 ("", false, false, false),
679 ("", false, false, false),
680 ("", false, false, false),
681 ("", false, false, false),
682 ("", false, false, false),
683 ("", false, false, false),
684 ("", false, false, false),
685 ("", false, false, false),
686 ("", false, false, false),
687 ("", false, false, false),
688 ("", false, false, false),
689 ("", false, false, false),
690 ("", false, false, false),
691 ("", false, false, false),
692 ("", false, false, false),
693 ("", false, false, false),
694 ("", false, false, false),
695 ("go", true, false, false),
696 ("", false, false, false),
697 ("goto", true, false, false),
698];
699
700impl Token {
701 #[must_use]
703 pub const fn is_literal(&self) -> bool {
704 #[allow(clippy::enum_glob_use)]
705 use Token::*;
706
707 match self {
708 IntegerConstant(..) => false,
709 #[cfg(not(feature = "no_float"))]
710 FloatConstant(..) => false,
711 #[cfg(feature = "decimal")]
712 DecimalConstant(..) => false,
713 StringConstant(..)
714 | InterpolatedString(..)
715 | CharConstant(..)
716 | Identifier(..)
717 | Reserved(..) => false,
718 #[cfg(not(feature = "no_custom_syntax"))]
719 Custom(..) => false,
720 LexError(..) | Comment(..) => false,
721
722 EOF => false,
723
724 _ => true,
725 }
726 }
727 #[must_use]
733 pub const fn literal_syntax(&self) -> &'static str {
734 #[allow(clippy::enum_glob_use)]
735 use Token::*;
736
737 match self {
738 LeftBrace => "{",
739 RightBrace => "}",
740 LeftParen => "(",
741 RightParen => ")",
742 LeftBracket => "[",
743 RightBracket => "]",
744 Unit => "()",
745 Plus => "+",
746 UnaryPlus => "+",
747 Minus => "-",
748 UnaryMinus => "-",
749 Multiply => "*",
750 Divide => "/",
751 SemiColon => ";",
752 Colon => ":",
753 DoubleColon => "::",
754 DoubleArrow => "=>",
755 Underscore => "_",
756 Comma => ",",
757 Period => ".",
758 #[cfg(not(feature = "no_object"))]
759 Elvis => "?.",
760 DoubleQuestion => "??",
761 #[cfg(not(feature = "no_index"))]
762 QuestionBracket => "?[",
763 ExclusiveRange => "..",
764 InclusiveRange => "..=",
765 MapStart => "#{",
766 Equals => "=",
767 True => "true",
768 False => "false",
769 Let => "let",
770 Const => "const",
771 If => "if",
772 Else => "else",
773 Switch => "switch",
774 Do => "do",
775 While => "while",
776 Until => "until",
777 Loop => "loop",
778 For => "for",
779 In => "in",
780 NotIn => "!in",
781 LessThan => "<",
782 GreaterThan => ">",
783 Bang => "!",
784 LessThanEqualsTo => "<=",
785 GreaterThanEqualsTo => ">=",
786 EqualsTo => "==",
787 NotEqualsTo => "!=",
788 Pipe => "|",
789 Or => "||",
790 Ampersand => "&",
791 And => "&&",
792 Continue => "continue",
793 Break => "break",
794 Return => "return",
795 Throw => "throw",
796 Try => "try",
797 Catch => "catch",
798 PlusAssign => "+=",
799 MinusAssign => "-=",
800 MultiplyAssign => "*=",
801 DivideAssign => "/=",
802 LeftShiftAssign => "<<=",
803 RightShiftAssign => ">>=",
804 AndAssign => "&=",
805 OrAssign => "|=",
806 XOrAssign => "^=",
807 LeftShift => "<<",
808 RightShift => ">>",
809 XOr => "^",
810 Modulo => "%",
811 ModuloAssign => "%=",
812 PowerOf => "**",
813 PowerOfAssign => "**=",
814
815 #[cfg(not(feature = "no_function"))]
816 Fn => "fn",
817 #[cfg(not(feature = "no_function"))]
818 Private => "private",
819
820 #[cfg(not(feature = "no_module"))]
821 Import => "import",
822 #[cfg(not(feature = "no_module"))]
823 Export => "export",
824 #[cfg(not(feature = "no_module"))]
825 As => "as",
826
827 _ => panic!("token is not a literal symbol"),
828 }
829 }
830
831 #[inline]
833 #[must_use]
834 pub const fn is_op_assignment(&self) -> bool {
835 #[allow(clippy::enum_glob_use)]
836 use Token::*;
837
838 matches!(
839 self,
840 PlusAssign
841 | MinusAssign
842 | MultiplyAssign
843 | DivideAssign
844 | LeftShiftAssign
845 | RightShiftAssign
846 | ModuloAssign
847 | PowerOfAssign
848 | AndAssign
849 | OrAssign
850 | XOrAssign
851 )
852 }
853
854 #[must_use]
856 pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
857 #[allow(clippy::enum_glob_use)]
858 use Token::*;
859
860 Some(match self {
861 PlusAssign => Plus,
862 MinusAssign => Minus,
863 MultiplyAssign => Multiply,
864 DivideAssign => Divide,
865 LeftShiftAssign => LeftShift,
866 RightShiftAssign => RightShift,
867 ModuloAssign => Modulo,
868 PowerOfAssign => PowerOf,
869 AndAssign => Ampersand,
870 OrAssign => Pipe,
871 XOrAssign => XOr,
872 _ => return None,
873 })
874 }
875
876 #[inline]
878 #[must_use]
879 pub const fn has_op_assignment(&self) -> bool {
880 #[allow(clippy::enum_glob_use)]
881 use Token::*;
882
883 matches!(
884 self,
885 Plus | Minus
886 | Multiply
887 | Divide
888 | LeftShift
889 | RightShift
890 | Modulo
891 | PowerOf
892 | Ampersand
893 | Pipe
894 | XOr
895 )
896 }
897
898 #[must_use]
900 pub const fn convert_to_op_assignment(&self) -> Option<Self> {
901 #[allow(clippy::enum_glob_use)]
902 use Token::*;
903
904 Some(match self {
905 Plus => PlusAssign,
906 Minus => MinusAssign,
907 Multiply => MultiplyAssign,
908 Divide => DivideAssign,
909 LeftShift => LeftShiftAssign,
910 RightShift => RightShiftAssign,
911 Modulo => ModuloAssign,
912 PowerOf => PowerOfAssign,
913 Ampersand => AndAssign,
914 Pipe => OrAssign,
915 XOr => XOrAssign,
916 _ => return None,
917 })
918 }
919
920 #[inline]
922 #[must_use]
923 pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
924 let utf8 = syntax.as_bytes();
927 let len = utf8.len();
928
929 if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
930 return None;
931 }
932
933 let mut hash_val = len;
934
935 match len {
936 1 => (),
937 _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
938 }
939 hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
940
941 if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
942 return None;
943 }
944
945 match KEYWORDS_LIST[hash_val] {
946 (_, Self::EOF) => None,
947 (s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
950 Some(t.clone())
951 }
952 _ => None,
953 }
954 }
955
956 #[must_use]
959 pub const fn is_next_unary(&self) -> bool {
960 #[allow(clippy::enum_glob_use)]
961 use Token::*;
962
963 match self {
964 SemiColon | Colon | Comma | DoubleQuestion | ExclusiveRange | InclusiveRange | LeftBrace | LeftParen | LeftBracket | Plus |
979 PlusAssign |
980 UnaryPlus |
981 Minus |
982 MinusAssign |
983 UnaryMinus |
984 Multiply |
985 MultiplyAssign |
986 Divide |
987 DivideAssign |
988 Modulo |
989 ModuloAssign |
990 PowerOf |
991 PowerOfAssign |
992 LeftShift |
993 LeftShiftAssign |
994 RightShift |
995 RightShiftAssign |
996 Equals |
997 EqualsTo |
998 NotEqualsTo |
999 LessThan |
1000 GreaterThan |
1001 Bang |
1002 LessThanEqualsTo |
1003 GreaterThanEqualsTo |
1004 Pipe |
1005 Ampersand |
1006 If |
1007 While |
1009 Until |
1010 In |
1011 NotIn |
1012 And |
1013 AndAssign |
1014 Or |
1015 OrAssign |
1016 XOr |
1017 XOrAssign |
1018 Return |
1019 Throw => true,
1020
1021 #[cfg(not(feature = "no_index"))]
1022 QuestionBracket => true, LexError(..) => true,
1025
1026 _ => false,
1027 }
1028 }
1029
1030 #[must_use]
1032 pub const fn precedence(&self) -> Option<Precedence> {
1033 #[allow(clippy::enum_glob_use)]
1034 use Token::*;
1035
1036 Precedence::new(match self {
1037 Or | XOr | Pipe => 30,
1038
1039 And | Ampersand => 60,
1040
1041 EqualsTo | NotEqualsTo => 90,
1042
1043 In | NotIn => 110,
1044
1045 LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
1046
1047 DoubleQuestion => 135,
1048
1049 ExclusiveRange | InclusiveRange => 140,
1050
1051 Plus | Minus => 150,
1052
1053 Divide | Multiply | Modulo => 180,
1054
1055 PowerOf => 190,
1056
1057 LeftShift | RightShift => 210,
1058
1059 _ => 0,
1060 })
1061 }
1062
1063 #[must_use]
1065 pub const fn is_bind_right(&self) -> bool {
1066 #[allow(clippy::enum_glob_use)]
1067 use Token::*;
1068
1069 match self {
1070 PowerOf => true,
1072
1073 _ => false,
1074 }
1075 }
1076
1077 #[must_use]
1079 pub const fn is_standard_symbol(&self) -> bool {
1080 #[allow(clippy::enum_glob_use)]
1081 use Token::*;
1082
1083 match self {
1084 LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
1085 | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
1086 | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
1087 | ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
1088 | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
1089 | Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
1090 | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
1091 | XOrAssign | ModuloAssign | PowerOfAssign => true,
1092
1093 #[cfg(not(feature = "no_object"))]
1094 Elvis => true,
1095
1096 #[cfg(not(feature = "no_index"))]
1097 QuestionBracket => true,
1098
1099 _ => false,
1100 }
1101 }
1102
1103 #[inline]
1105 #[must_use]
1106 pub const fn is_standard_keyword(&self) -> bool {
1107 #[allow(clippy::enum_glob_use)]
1108 use Token::*;
1109
1110 match self {
1111 #[cfg(not(feature = "no_function"))]
1112 Fn | Private => true,
1113
1114 #[cfg(not(feature = "no_module"))]
1115 Import | Export | As => true,
1116
1117 True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
1118 | Continue | Break | Return | Throw | Try | Catch => true,
1119
1120 _ => false,
1121 }
1122 }
1123
1124 #[inline(always)]
1126 #[must_use]
1127 pub const fn is_reserved(&self) -> bool {
1128 matches!(self, Self::Reserved(..))
1129 }
1130
1131 #[cfg(not(feature = "no_custom_syntax"))]
1133 #[inline(always)]
1134 #[must_use]
1135 pub const fn is_custom(&self) -> bool {
1136 matches!(self, Self::Custom(..))
1137 }
1138}
1139
1140impl From<Token> for String {
1141 #[inline(always)]
1142 fn from(token: Token) -> Self {
1143 (&token).into()
1144 }
1145}
1146
1147impl From<&Token> for String {
1148 #[inline(always)]
1149 fn from(token: &Token) -> Self {
1150 token.to_string()
1151 }
1152}
1153
1154impl From<Token> for SmartString {
1155 #[inline(always)]
1156 fn from(token: Token) -> Self {
1157 (&token).into()
1158 }
1159}
1160
1161impl From<&Token> for SmartString {
1162 #[inline(always)]
1163 fn from(token: &Token) -> Self {
1164 let mut buf = Self::new_const();
1165 write!(&mut buf, "{token}").unwrap();
1166 buf
1167 }
1168}
1169
1170#[derive(Debug, Clone, Eq, PartialEq, Default)]
1173pub struct TokenizeState {
1174 #[cfg(not(feature = "unchecked"))]
1178 pub max_string_len: Option<std::num::NonZeroUsize>,
1179 pub next_token_cannot_be_unary: bool,
1181 pub tokenizer_control: TokenizerControl,
1183 pub comment_level: usize,
1185 pub include_comments: bool,
1187 pub is_within_text_terminated_by: Option<SmartString>,
1189 pub last_token: Option<SmartString>,
1193}
1194
1195pub trait InputStream {
1198 fn unget(&mut self, ch: char);
1202 fn get_next(&mut self) -> Option<char>;
1204 #[must_use]
1206 fn peek_next(&mut self) -> Option<char>;
1207
1208 #[inline(always)]
1210 fn eat_next_and_advance(&mut self, pos: &mut Position) -> Option<char> {
1211 pos.advance();
1212 self.get_next()
1213 }
1214}
1215
1216pub fn parse_raw_string_literal(
1253 stream: &mut (impl InputStream + ?Sized),
1254 state: &mut TokenizeState,
1255 pos: &mut Position,
1256 mut hash_count: usize,
1257) -> Result<(SmartString, Position), (LexError, Position)> {
1258 let start = *pos;
1259 let mut first_char = Position::NONE;
1260
1261 if hash_count == 0 {
1262 hash_count = 1;
1265
1266 while let Some('#') = stream.peek_next() {
1267 stream.eat_next_and_advance(pos);
1268 hash_count += 1;
1269 }
1270
1271 match stream.get_next() {
1273 Some('"') => pos.advance(),
1274 Some(c) => return Err((LERR::UnexpectedInput(c.to_string()), start)),
1275 None => return Err((LERR::UnterminatedString, start)),
1276 }
1277 }
1278
1279 let collect: SmartString = repeat('#').take(hash_count).collect();
1280 if let Some(ref mut last) = state.last_token {
1281 last.clear();
1282 last.push_str(&collect);
1283 last.push('"');
1284 }
1285 state.is_within_text_terminated_by = Some(collect);
1286
1287 let mut seen_hashes: Option<usize> = None;
1292 let mut result = SmartString::new_const();
1293
1294 while let Some(next_char) = stream.get_next() {
1295 pos.advance();
1296
1297 match (next_char, &mut seen_hashes) {
1298 ('"', None) => seen_hashes = Some(0),
1300 ('"', Some(count)) => {
1302 result.push('"');
1304 result.extend(repeat('#').take(*count));
1305 seen_hashes = Some(0);
1306 }
1307 ('#', Some(count)) => {
1309 *count += 1;
1310 if *count == hash_count {
1311 state.is_within_text_terminated_by = None;
1312 break;
1313 }
1314 }
1315 (c, Some(count)) => {
1317 result.push('"');
1319 result.extend(repeat('#').take(*count));
1320 result.push(c);
1321 seen_hashes = None;
1322 }
1323 ('\n', _) => {
1325 result.push('\n');
1326 pos.new_line();
1327 }
1328 (c, None) => result.push(c),
1330 }
1331
1332 #[cfg(not(feature = "unchecked"))]
1334 if let Some(max) = state.max_string_len {
1335 if result.len() > max.get() {
1336 return Err((LexError::StringTooLong(max.get()), start));
1337 }
1338 }
1339
1340 if first_char.is_none() {
1341 first_char = *pos;
1342 }
1343 }
1344
1345 Ok((result, first_char))
1346}
1347
1348pub fn parse_string_literal(
1379 stream: &mut (impl InputStream + ?Sized),
1380 state: &mut TokenizeState,
1381 pos: &mut Position,
1382 termination_char: char,
1383 verbatim: bool,
1384 allow_line_continuation: bool,
1385 allow_interpolation: bool,
1386) -> Result<(SmartString, bool, Position), (LexError, Position)> {
1387 let mut result = SmartString::new_const();
1388 let mut escape = SmartString::new_const();
1389
1390 let start = *pos;
1391 let mut first_char = Position::NONE;
1392 let mut interpolated = false;
1393 #[cfg(not(feature = "no_position"))]
1394 let mut skip_space_until = 0;
1395
1396 state.is_within_text_terminated_by = Some(termination_char.to_string().into());
1397 if let Some(ref mut last) = state.last_token {
1398 last.clear();
1399 last.push(termination_char);
1400 }
1401
1402 loop {
1403 debug_assert!(
1404 !verbatim || escape.is_empty(),
1405 "verbatim strings should not have any escapes"
1406 );
1407
1408 let next_char = match stream.get_next() {
1409 Some(ch) => {
1410 pos.advance();
1411 ch
1412 }
1413 None if verbatim => {
1414 debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1415 pos.advance();
1416 break;
1417 }
1418 None if allow_line_continuation && !escape.is_empty() => {
1419 debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1420 pos.advance();
1421 break;
1422 }
1423 None => {
1424 pos.advance();
1425 state.is_within_text_terminated_by = None;
1426 return Err((LERR::UnterminatedString, start));
1427 }
1428 };
1429
1430 if let Some(ref mut last) = state.last_token {
1431 last.push(next_char);
1432 }
1433
1434 if allow_interpolation
1436 && next_char == '$'
1437 && escape.is_empty()
1438 && stream.peek_next() == Some('{')
1439 {
1440 interpolated = true;
1441 state.is_within_text_terminated_by = None;
1442 break;
1443 }
1444
1445 #[cfg(not(feature = "unchecked"))]
1447 if let Some(max) = state.max_string_len {
1448 if result.len() > max.get() {
1449 return Err((LexError::StringTooLong(max.get()), start));
1450 }
1451 }
1452
1453 if termination_char == next_char && escape.is_empty() {
1455 if stream.peek_next() == Some(termination_char) {
1457 stream.eat_next_and_advance(pos);
1458 if let Some(ref mut last) = state.last_token {
1459 last.push(termination_char);
1460 }
1461 } else {
1462 state.is_within_text_terminated_by = None;
1463 break;
1464 }
1465 }
1466
1467 if first_char.is_none() {
1468 first_char = *pos;
1469 }
1470
1471 match next_char {
1472 '\r' if stream.peek_next() == Some('\n') => (),
1474 'r' if !escape.is_empty() => {
1476 escape.clear();
1477 result.push_str("\r");
1478 }
1479 'n' if !escape.is_empty() => {
1481 escape.clear();
1482 result.push_str("\n");
1483 }
1484 '\\' if !verbatim && escape.is_empty() => {
1486 escape.push_str("\\");
1487 }
1488 '\\' if !escape.is_empty() => {
1490 escape.clear();
1491 result.push_str("\\");
1492 }
1493 't' if !escape.is_empty() => {
1495 escape.clear();
1496 result.push_str("\t");
1497 }
1498 ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
1500 let mut seq = escape.clone();
1501 escape.clear();
1502 seq.push(ch);
1503
1504 let mut out_val: u32 = 0;
1505 let len = match ch {
1506 'x' => 2,
1507 'u' => 4,
1508 'U' => 8,
1509 c => unreachable!("x or u or U expected but gets '{}'", c),
1510 };
1511
1512 for _ in 0..len {
1513 let c = stream
1514 .get_next()
1515 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1516
1517 pos.advance();
1518 seq.push(c);
1519 if let Some(ref mut last) = state.last_token {
1520 last.push(c);
1521 }
1522
1523 out_val *= 16;
1524 out_val += c
1525 .to_digit(16)
1526 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1527 }
1528
1529 result.push(
1530 char::from_u32(out_val)
1531 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
1532 );
1533 }
1534
1535 '\n' if verbatim => {
1537 debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1538 pos.new_line();
1539 result.push_str("\n");
1540 }
1541
1542 '\n' if allow_line_continuation && !escape.is_empty() => {
1544 debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1545 escape.clear();
1546 pos.new_line();
1547
1548 #[cfg(not(feature = "no_position"))]
1549 {
1550 let start_position = start.position().unwrap();
1551 skip_space_until = start_position + 1;
1552 }
1553 }
1554
1555 '\n' => {
1557 pos.rewind();
1558 state.is_within_text_terminated_by = None;
1559 return Err((LERR::UnterminatedString, start));
1560 }
1561
1562 ch if termination_char == ch && !escape.is_empty() => {
1564 escape.clear();
1565 result.push(termination_char);
1566 }
1567
1568 ch if !escape.is_empty() => {
1570 escape.push(ch);
1571
1572 return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
1573 }
1574
1575 #[cfg(not(feature = "no_position"))]
1577 ch if ch.is_whitespace() && pos.position().unwrap() < skip_space_until => (),
1578
1579 ch => {
1581 escape.clear();
1582 result.push(ch);
1583
1584 #[cfg(not(feature = "no_position"))]
1585 {
1586 skip_space_until = 0;
1587 }
1588 }
1589 }
1590 }
1591
1592 #[cfg(not(feature = "unchecked"))]
1594 if let Some(max) = state.max_string_len {
1595 if result.len() > max.get() {
1596 return Err((LexError::StringTooLong(max.get()), start));
1597 }
1598 }
1599
1600 Ok((result, interpolated, first_char))
1601}
1602
1603fn scan_block_comment(
1605 stream: &mut (impl InputStream + ?Sized),
1606 level: usize,
1607 pos: &mut Position,
1608 comment: Option<&mut String>,
1609) -> usize {
1610 let mut level = level;
1611 let mut comment = comment;
1612
1613 while let Some(c) = stream.get_next() {
1614 pos.advance();
1615
1616 if let Some(comment) = comment.as_mut() {
1617 comment.push(c);
1618 }
1619
1620 match c {
1621 '/' => {
1622 if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
1623 stream.eat_next_and_advance(pos);
1624 if let Some(comment) = comment.as_mut() {
1625 comment.push(c2);
1626 }
1627 level += 1;
1628 }
1629 }
1630 '*' => {
1631 if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
1632 stream.eat_next_and_advance(pos);
1633 if let Some(comment) = comment.as_mut() {
1634 comment.push(c2);
1635 }
1636 level -= 1;
1637 }
1638 }
1639 '\n' => pos.new_line(),
1640 _ => (),
1641 }
1642
1643 if level == 0 {
1644 break;
1645 }
1646 }
1647
1648 level
1649}
1650
1651#[inline(always)]
1653const fn is_hex_digit(c: char) -> bool {
1654 c.is_ascii_hexdigit()
1655}
1656
1657#[inline(always)]
1659const fn is_numeric_digit(c: char) -> bool {
1660 c.is_ascii_digit()
1661}
1662
1663#[inline(always)]
1665const fn is_octal_digit(c: char) -> bool {
1666 matches!(c, '0'..='7')
1667}
1668
1669#[inline(always)]
1671const fn is_binary_digit(c: char) -> bool {
1672 c == '0' || c == '1'
1673}
1674
1675#[cfg(not(feature = "no_function"))]
1677#[cfg(feature = "metadata")]
1678#[inline]
1679#[must_use]
1680pub fn is_doc_comment(comment: &str) -> bool {
1681 (comment.starts_with("///") && !comment.starts_with("////"))
1682 || (comment.starts_with("/**") && !comment.starts_with("/***"))
1683}
1684
1685#[inline(always)]
1688#[must_use]
1689pub fn get_next_token(
1690 stream: &mut (impl InputStream + ?Sized),
1691 state: &mut TokenizeState,
1692 pos: &mut Position,
1693) -> (Token, Position) {
1694 let result = get_next_token_inner(stream, state, pos);
1695
1696 state.next_token_cannot_be_unary = !result.0.is_next_unary();
1698
1699 result
1700}
1701
1702#[must_use]
1704fn get_next_token_inner(
1705 stream: &mut (impl InputStream + ?Sized),
1706 state: &mut TokenizeState,
1707 pos: &mut Position,
1708) -> (Token, Position) {
1709 state.last_token.as_mut().map(SmartString::clear);
1710
1711 if state.comment_level > 0 {
1713 let start_pos = *pos;
1714 let mut comment = String::new();
1715 let comment_buf = state.include_comments.then_some(&mut comment);
1716
1717 state.comment_level = scan_block_comment(stream, state.comment_level, pos, comment_buf);
1718
1719 let return_comment = state.include_comments;
1720
1721 #[cfg(not(feature = "no_function"))]
1722 #[cfg(feature = "metadata")]
1723 let return_comment = return_comment || is_doc_comment(&comment);
1724
1725 if return_comment {
1726 return (Token::Comment(comment.into()), start_pos);
1727 }
1728
1729 if state.comment_level > 0 {
1731 return (Token::EOF, *pos);
1732 }
1733 }
1734
1735 match state.is_within_text_terminated_by.take() {
1737 Some(ch) if ch.starts_with('#') => {
1738 return parse_raw_string_literal(stream, state, pos, ch.len()).map_or_else(
1739 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1740 |(result, start_pos)| (Token::StringConstant(result.into()), start_pos),
1741 )
1742 }
1743 Some(ch) => {
1744 let c = ch.chars().next().unwrap();
1745
1746 return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1747 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1748 |(result, interpolated, start_pos)| {
1749 if interpolated {
1750 (Token::InterpolatedString(result.into()), start_pos)
1751 } else {
1752 (Token::StringConstant(result.into()), start_pos)
1753 }
1754 },
1755 );
1756 }
1757 None => (),
1758 }
1759
1760 let mut negated: Option<Position> = None;
1761
1762 while let Some(c) = stream.get_next() {
1763 pos.advance();
1764
1765 let start_pos = *pos;
1766 let cc = stream.peek_next().unwrap_or('\0');
1767
1768 match (c, cc) {
1770 ('0'..='9', ..) => {
1772 let mut result = SmartString::new_const();
1773 let mut radix_base: Option<u32> = None;
1774 let mut valid: fn(char) -> bool = is_numeric_digit;
1775 let mut _has_period = false;
1776 let mut _has_e = false;
1777
1778 result.push(c);
1779
1780 while let Some(next_char) = stream.peek_next() {
1781 match next_char {
1782 NUMBER_SEPARATOR => {
1783 stream.eat_next_and_advance(pos);
1784 }
1785 ch if valid(ch) => {
1786 result.push(ch);
1787 stream.eat_next_and_advance(pos);
1788 }
1789 #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1790 '.' if !_has_period && radix_base.is_none() => {
1791 stream.get_next().unwrap();
1792
1793 match stream.peek_next() {
1795 Some('0'..='9') => {
1797 result.push_str(".");
1798 pos.advance();
1799 _has_period = true;
1800 }
1801 Some(NUMBER_SEPARATOR) => {
1803 stream.unget('.');
1804 break;
1805 }
1806 Some('.') => {
1808 stream.unget('.');
1809 break;
1810 }
1811 Some(ch) if !is_id_first_alphabetic(ch) => {
1813 result.push_str(".");
1814 pos.advance();
1815 result.push_str("0");
1816 _has_period = true;
1817 }
1818 _ => {
1820 stream.unget('.');
1821 break;
1822 }
1823 }
1824 }
1825 #[cfg(not(feature = "no_float"))]
1826 ch @ ('e' | 'E') if !_has_e && radix_base.is_none() => {
1827 stream.get_next().unwrap();
1828
1829 match stream.peek_next() {
1831 Some('0'..='9') => {
1833 result.push('e');
1834 pos.advance();
1835 _has_e = true;
1836 _has_period = true;
1837 }
1838 Some('+' | '-') => {
1840 result.push('e');
1841 pos.advance();
1842 result.push(stream.get_next().unwrap());
1843 pos.advance();
1844 _has_e = true;
1845 _has_period = true;
1846 }
1847 _ => {
1849 stream.unget(ch);
1850 break;
1851 }
1852 }
1853 }
1854 ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
1856 if c == '0' && result.len() <= 1 =>
1857 {
1858 result.push(ch);
1859 stream.eat_next_and_advance(pos);
1860
1861 valid = match ch {
1862 'x' | 'X' => is_hex_digit,
1863 'o' | 'O' => is_octal_digit,
1864 'b' | 'B' => is_binary_digit,
1865 c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1866 };
1867
1868 radix_base = Some(match ch {
1869 'x' | 'X' => 16,
1870 'o' | 'O' => 8,
1871 'b' | 'B' => 2,
1872 c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1873 });
1874 }
1875
1876 _ => break,
1877 }
1878 }
1879
1880 let num_pos = negated.map_or(start_pos, |negated_pos| {
1881 result.insert(0, '-');
1882 negated_pos
1883 });
1884
1885 if let Some(ref mut last) = state.last_token {
1886 *last = result.clone();
1887 }
1888
1889 let token = if let Some(radix) = radix_base {
1891 let result = &result[2..];
1892
1893 UNSIGNED_INT::from_str_radix(result, radix)
1894 .map(|v| v as INT)
1895 .map_or_else(
1896 |_| Token::LexError(LERR::MalformedNumber(result.to_string()).into()),
1897 Token::IntegerConstant,
1898 )
1899 } else {
1900 (|| {
1901 let num = INT::from_str(&result).map(Token::IntegerConstant);
1902
1903 #[cfg(not(feature = "no_float"))]
1905 if num.is_err() {
1906 if let Ok(v) = crate::types::FloatWrapper::from_str(&result) {
1907 return Token::FloatConstant((v, result).into());
1908 }
1909 }
1910
1911 #[cfg(feature = "decimal")]
1913 if num.is_err() {
1914 if let Ok(v) = rust_decimal::Decimal::from_str(&result) {
1915 return Token::DecimalConstant((v, result).into());
1916 }
1917 }
1918
1919 #[cfg(feature = "decimal")]
1921 if num.is_err() {
1922 if let Ok(v) = rust_decimal::Decimal::from_scientific(&result) {
1923 return Token::DecimalConstant((v, result).into());
1924 }
1925 }
1926
1927 num.unwrap_or_else(|_| {
1928 Token::LexError(LERR::MalformedNumber(result.to_string()).into())
1929 })
1930 })()
1931 };
1932
1933 return (token, num_pos);
1934 }
1935
1936 ('"', ..) => {
1938 return parse_string_literal(stream, state, pos, c, false, true, false)
1939 .map_or_else(
1940 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1941 |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1942 );
1943 }
1944 ('`', ..) => {
1946 match stream.peek_next() {
1948 Some('\r') => {
1950 stream.eat_next_and_advance(pos);
1951 if stream.peek_next() == Some('\n') {
1953 stream.eat_next_and_advance(pos);
1954 }
1955 pos.new_line();
1956 }
1957 Some('\n') => {
1959 stream.eat_next_and_advance(pos);
1960 pos.new_line();
1961 }
1962 _ => (),
1963 }
1964
1965 return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1966 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1967 |(result, interpolated, ..)| {
1968 if interpolated {
1969 (Token::InterpolatedString(result.into()), start_pos)
1970 } else {
1971 (Token::StringConstant(result.into()), start_pos)
1972 }
1973 },
1974 );
1975 }
1976
1977 ('#', '"' | '#') => {
1979 return parse_raw_string_literal(stream, state, pos, 0).map_or_else(
1980 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1981 |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1982 );
1983 }
1984
1985 ('\'', '\'') => {
1987 return (
1988 Token::LexError(LERR::MalformedChar(String::new()).into()),
1989 start_pos,
1990 )
1991 }
1992 ('\'', ..) => {
1993 return parse_string_literal(stream, state, pos, c, false, false, false)
1994 .map_or_else(
1995 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1996 |(result, ..)| {
1997 let mut chars = result.chars();
1998 let first = chars.next().unwrap();
1999
2000 if chars.next().is_some() {
2001 (
2002 Token::LexError(LERR::MalformedChar(result.to_string()).into()),
2003 start_pos,
2004 )
2005 } else {
2006 (Token::CharConstant(first), start_pos)
2007 }
2008 },
2009 )
2010 }
2011
2012 ('{', ..) => return (Token::LeftBrace, start_pos),
2014 ('}', ..) => return (Token::RightBrace, start_pos),
2015
2016 ('(', ')') => {
2018 stream.eat_next_and_advance(pos);
2019 return (Token::Unit, start_pos);
2020 }
2021
2022 ('(', '*') => {
2024 stream.eat_next_and_advance(pos);
2025 return (Token::Reserved(Box::new("(*".into())), start_pos);
2026 }
2027 ('(', ..) => return (Token::LeftParen, start_pos),
2028 (')', ..) => return (Token::RightParen, start_pos),
2029
2030 ('[', ..) => return (Token::LeftBracket, start_pos),
2032 (']', ..) => return (Token::RightBracket, start_pos),
2033
2034 #[cfg(not(feature = "no_object"))]
2036 ('#', '{') => {
2037 stream.eat_next_and_advance(pos);
2038 return (Token::MapStart, start_pos);
2039 }
2040 ('#', '!') => return (Token::Reserved(Box::new("#!".into())), start_pos),
2042
2043 ('#', ' ') => {
2044 stream.eat_next_and_advance(pos);
2045 let token = if stream.peek_next() == Some('{') {
2046 stream.eat_next_and_advance(pos);
2047 "# {"
2048 } else {
2049 "#"
2050 };
2051 return (Token::Reserved(Box::new(token.into())), start_pos);
2052 }
2053
2054 ('#', ..) => return (Token::Reserved(Box::new("#".into())), start_pos),
2055
2056 ('+', '=') => {
2058 stream.eat_next_and_advance(pos);
2059 return (Token::PlusAssign, start_pos);
2060 }
2061 ('+', '+') => {
2062 stream.eat_next_and_advance(pos);
2063 return (Token::Reserved(Box::new("++".into())), start_pos);
2064 }
2065 ('+', ..) if !state.next_token_cannot_be_unary => return (Token::UnaryPlus, start_pos),
2066 ('+', ..) => return (Token::Plus, start_pos),
2067
2068 ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
2069 ('-', '0'..='9') => return (Token::Minus, start_pos),
2070 ('-', '=') => {
2071 stream.eat_next_and_advance(pos);
2072 return (Token::MinusAssign, start_pos);
2073 }
2074 ('-', '>') => {
2075 stream.eat_next_and_advance(pos);
2076 return (Token::Reserved(Box::new("->".into())), start_pos);
2077 }
2078 ('-', '-') => {
2079 stream.eat_next_and_advance(pos);
2080 return (Token::Reserved(Box::new("--".into())), start_pos);
2081 }
2082 ('-', ..) if !state.next_token_cannot_be_unary => {
2083 return (Token::UnaryMinus, start_pos)
2084 }
2085 ('-', ..) => return (Token::Minus, start_pos),
2086
2087 ('*', ')') => {
2088 stream.eat_next_and_advance(pos);
2089 return (Token::Reserved(Box::new("*)".into())), start_pos);
2090 }
2091 ('*', '=') => {
2092 stream.eat_next_and_advance(pos);
2093 return (Token::MultiplyAssign, start_pos);
2094 }
2095 ('*', '*') => {
2096 stream.eat_next_and_advance(pos);
2097
2098 return (
2099 if stream.peek_next() == Some('=') {
2100 stream.eat_next_and_advance(pos);
2101 Token::PowerOfAssign
2102 } else {
2103 Token::PowerOf
2104 },
2105 start_pos,
2106 );
2107 }
2108 ('*', ..) => return (Token::Multiply, start_pos),
2109
2110 ('/', '/') => {
2112 stream.eat_next_and_advance(pos);
2113
2114 let mut comment: Option<String> = match stream.peek_next() {
2115 #[cfg(not(feature = "no_function"))]
2116 #[cfg(feature = "metadata")]
2117 Some('/') => {
2118 stream.eat_next_and_advance(pos);
2119
2120 match stream.peek_next() {
2122 Some('/') => None,
2123 _ => Some("///".into()),
2124 }
2125 }
2126 #[cfg(feature = "metadata")]
2127 Some('!') => {
2128 stream.eat_next_and_advance(pos);
2129 Some("//!".into())
2130 }
2131 _ if state.include_comments => Some("//".into()),
2132 _ => None,
2133 };
2134
2135 while let Some(c) = stream.get_next() {
2136 if c == '\r' {
2137 if stream.peek_next() == Some('\n') {
2139 stream.eat_next_and_advance(pos);
2140 }
2141 pos.new_line();
2142 break;
2143 }
2144 if c == '\n' {
2145 pos.new_line();
2146 break;
2147 }
2148 if let Some(comment) = comment.as_mut() {
2149 comment.push(c);
2150 }
2151 pos.advance();
2152 }
2153
2154 match comment {
2155 #[cfg(feature = "metadata")]
2156 Some(comment) if comment.starts_with("//!") => {
2157 let g = &mut state.tokenizer_control.borrow_mut().global_comments;
2158 if !g.is_empty() {
2159 *g += "\n";
2160 }
2161 *g += &comment;
2162 }
2163 Some(comment) => return (Token::Comment(comment.into()), start_pos),
2164 None => (),
2165 }
2166 }
2167 ('/', '*') => {
2168 state.comment_level += 1;
2169 stream.eat_next_and_advance(pos);
2170
2171 let mut comment: Option<String> = match stream.peek_next() {
2172 #[cfg(not(feature = "no_function"))]
2173 #[cfg(feature = "metadata")]
2174 Some('*') => {
2175 stream.eat_next_and_advance(pos);
2176
2177 match stream.peek_next() {
2179 Some('*') => None,
2180 _ => Some("/**".into()),
2181 }
2182 }
2183 _ if state.include_comments => Some("/*".into()),
2184 _ => None,
2185 };
2186
2187 state.comment_level =
2188 scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2189
2190 if let Some(comment) = comment {
2191 return (Token::Comment(comment.into()), start_pos);
2192 }
2193 }
2194
2195 ('/', '=') => {
2196 stream.eat_next_and_advance(pos);
2197 return (Token::DivideAssign, start_pos);
2198 }
2199 ('/', ..) => return (Token::Divide, start_pos),
2200
2201 (';', ..) => return (Token::SemiColon, start_pos),
2202 (',', ..) => return (Token::Comma, start_pos),
2203
2204 ('.', '.') => {
2205 stream.eat_next_and_advance(pos);
2206 return (
2207 match stream.peek_next() {
2208 Some('.') => {
2209 stream.eat_next_and_advance(pos);
2210 Token::Reserved(Box::new("...".into()))
2211 }
2212 Some('=') => {
2213 stream.eat_next_and_advance(pos);
2214 Token::InclusiveRange
2215 }
2216 _ => Token::ExclusiveRange,
2217 },
2218 start_pos,
2219 );
2220 }
2221 ('.', ..) => return (Token::Period, start_pos),
2222
2223 ('=', '=') => {
2224 stream.eat_next_and_advance(pos);
2225
2226 if stream.peek_next() == Some('=') {
2227 stream.eat_next_and_advance(pos);
2228 return (Token::Reserved(Box::new("===".into())), start_pos);
2229 }
2230
2231 return (Token::EqualsTo, start_pos);
2232 }
2233 ('=', '>') => {
2234 stream.eat_next_and_advance(pos);
2235 return (Token::DoubleArrow, start_pos);
2236 }
2237 ('=', ..) => return (Token::Equals, start_pos),
2238
2239 #[cfg(not(feature = "no_module"))]
2240 (':', ':') => {
2241 stream.eat_next_and_advance(pos);
2242
2243 if stream.peek_next() == Some('<') {
2244 stream.eat_next_and_advance(pos);
2245 return (Token::Reserved(Box::new("::<".into())), start_pos);
2246 }
2247
2248 return (Token::DoubleColon, start_pos);
2249 }
2250 (':', '=') => {
2251 stream.eat_next_and_advance(pos);
2252 return (Token::Reserved(Box::new(":=".into())), start_pos);
2253 }
2254 (':', ';') => {
2255 stream.eat_next_and_advance(pos);
2256 return (Token::Reserved(Box::new(":;".into())), start_pos);
2257 }
2258 (':', ..) => return (Token::Colon, start_pos),
2259
2260 ('<', '=') => {
2261 stream.eat_next_and_advance(pos);
2262 return (Token::LessThanEqualsTo, start_pos);
2263 }
2264 ('<', '-') => {
2265 stream.eat_next_and_advance(pos);
2266 return (Token::Reserved(Box::new("<-".into())), start_pos);
2267 }
2268 ('<', '<') => {
2269 stream.eat_next_and_advance(pos);
2270
2271 return (
2272 if stream.peek_next() == Some('=') {
2273 stream.eat_next_and_advance(pos);
2274 Token::LeftShiftAssign
2275 } else {
2276 Token::LeftShift
2277 },
2278 start_pos,
2279 );
2280 }
2281 ('<', '|') => {
2282 stream.eat_next_and_advance(pos);
2283 return (Token::Reserved(Box::new("<|".into())), start_pos);
2284 }
2285 ('<', ..) => return (Token::LessThan, start_pos),
2286
2287 ('>', '=') => {
2288 stream.eat_next_and_advance(pos);
2289 return (Token::GreaterThanEqualsTo, start_pos);
2290 }
2291 ('>', '>') => {
2292 stream.eat_next_and_advance(pos);
2293
2294 return (
2295 if stream.peek_next() == Some('=') {
2296 stream.eat_next_and_advance(pos);
2297 Token::RightShiftAssign
2298 } else {
2299 Token::RightShift
2300 },
2301 start_pos,
2302 );
2303 }
2304 ('>', ..) => return (Token::GreaterThan, start_pos),
2305
2306 ('!', 'i') => {
2307 stream.get_next().unwrap();
2308 if stream.peek_next() == Some('n') {
2309 stream.get_next().unwrap();
2310 match stream.peek_next() {
2311 Some(c) if is_id_continue(c) => {
2312 stream.unget('n');
2313 stream.unget('i');
2314 return (Token::Bang, start_pos);
2315 }
2316 _ => {
2317 pos.advance();
2318 pos.advance();
2319 return (Token::NotIn, start_pos);
2320 }
2321 }
2322 }
2323
2324 stream.unget('i');
2325 return (Token::Bang, start_pos);
2326 }
2327 ('!', '=') => {
2328 stream.eat_next_and_advance(pos);
2329
2330 if stream.peek_next() == Some('=') {
2331 stream.eat_next_and_advance(pos);
2332 return (Token::Reserved(Box::new("!==".into())), start_pos);
2333 }
2334
2335 return (Token::NotEqualsTo, start_pos);
2336 }
2337 ('!', '.') => {
2338 stream.eat_next_and_advance(pos);
2339 return (Token::Reserved(Box::new("!.".into())), start_pos);
2340 }
2341 ('!', ..) => return (Token::Bang, start_pos),
2342
2343 ('|', '|') => {
2344 stream.eat_next_and_advance(pos);
2345 return (Token::Or, start_pos);
2346 }
2347 ('|', '=') => {
2348 stream.eat_next_and_advance(pos);
2349 return (Token::OrAssign, start_pos);
2350 }
2351 ('|', '>') => {
2352 stream.eat_next_and_advance(pos);
2353 return (Token::Reserved(Box::new("|>".into())), start_pos);
2354 }
2355 ('|', ..) => return (Token::Pipe, start_pos),
2356
2357 ('&', '&') => {
2358 stream.eat_next_and_advance(pos);
2359 return (Token::And, start_pos);
2360 }
2361 ('&', '=') => {
2362 stream.eat_next_and_advance(pos);
2363 return (Token::AndAssign, start_pos);
2364 }
2365 ('&', ..) => return (Token::Ampersand, start_pos),
2366
2367 ('^', '=') => {
2368 stream.eat_next_and_advance(pos);
2369 return (Token::XOrAssign, start_pos);
2370 }
2371 ('^', ..) => return (Token::XOr, start_pos),
2372
2373 ('~', ..) => return (Token::Reserved(Box::new("~".into())), start_pos),
2374
2375 ('%', '=') => {
2376 stream.eat_next_and_advance(pos);
2377 return (Token::ModuloAssign, start_pos);
2378 }
2379 ('%', ..) => return (Token::Modulo, start_pos),
2380
2381 ('@', ..) => return (Token::Reserved(Box::new("@".into())), start_pos),
2382
2383 ('$', ..) => return (Token::Reserved(Box::new("$".into())), start_pos),
2384
2385 ('?', '.') => {
2386 stream.eat_next_and_advance(pos);
2387 return (
2388 #[cfg(not(feature = "no_object"))]
2389 Token::Elvis,
2390 #[cfg(feature = "no_object")]
2391 Token::Reserved(Box::new("?.".into())),
2392 start_pos,
2393 );
2394 }
2395 ('?', '?') => {
2396 stream.eat_next_and_advance(pos);
2397 return (Token::DoubleQuestion, start_pos);
2398 }
2399 ('?', '[') => {
2400 stream.eat_next_and_advance(pos);
2401 return (
2402 #[cfg(not(feature = "no_index"))]
2403 Token::QuestionBracket,
2404 #[cfg(feature = "no_index")]
2405 Token::Reserved(Box::new("?[".into())),
2406 start_pos,
2407 );
2408 }
2409 ('?', ..) => return (Token::Reserved(Box::new("?".into())), start_pos),
2410
2411 _ if is_id_first_alphabetic(c) || c == '_' => {
2413 return parse_identifier_token(stream, state, pos, start_pos, c);
2414 }
2415
2416 ('\n', ..) => pos.new_line(),
2418
2419 (ch, ..) if ch.is_ascii_whitespace() => (),
2421
2422 _ => {
2423 return (
2424 Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
2425 start_pos,
2426 )
2427 }
2428 }
2429 }
2430
2431 pos.advance();
2432
2433 (Token::EOF, *pos)
2434}
2435
2436fn parse_identifier_token(
2438 stream: &mut (impl InputStream + ?Sized),
2439 state: &mut TokenizeState,
2440 pos: &mut Position,
2441 start_pos: Position,
2442 first_char: char,
2443) -> (Token, Position) {
2444 let mut identifier = SmartString::new_const();
2445 identifier.push(first_char);
2446 if let Some(ref mut last) = state.last_token {
2447 last.clear();
2448 last.push(first_char);
2449 }
2450
2451 while let Some(next_char) = stream.peek_next() {
2452 match next_char {
2453 x if is_id_continue(x) => {
2454 stream.eat_next_and_advance(pos);
2455 identifier.push(x);
2456 if let Some(ref mut last) = state.last_token {
2457 last.push(x);
2458 }
2459 }
2460 _ => break,
2461 }
2462 }
2463
2464 if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
2465 return (token, start_pos);
2466 }
2467
2468 if is_reserved_keyword_or_symbol(&identifier).0 {
2469 return (Token::Reserved(Box::new(identifier)), start_pos);
2470 }
2471
2472 if !is_valid_identifier(&identifier) {
2473 return (
2474 Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2475 start_pos,
2476 );
2477 }
2478
2479 (Token::Identifier(identifier.into()), start_pos)
2480}
2481
2482#[must_use]
2485pub fn is_valid_identifier(name: &str) -> bool {
2486 let mut first_alphabetic = false;
2487
2488 for ch in name.chars() {
2489 match ch {
2490 '_' => (),
2491 _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2492 _ if !first_alphabetic => return false,
2493 _ if char::is_ascii_alphanumeric(&ch) => (),
2494 _ => return false,
2495 }
2496 }
2497
2498 first_alphabetic
2499}
2500
2501#[inline(always)]
2504#[must_use]
2505pub fn is_valid_function_name(name: &str) -> bool {
2506 is_valid_identifier(name)
2507 && !is_reserved_keyword_or_symbol(name).0
2508 && Token::lookup_symbol_from_syntax(name).is_none()
2509}
2510
2511#[inline(always)]
2513#[must_use]
2514#[allow(clippy::missing_const_for_fn)]
2515pub fn is_id_first_alphabetic(x: char) -> bool {
2516 #[cfg(feature = "unicode-xid-ident")]
2517 return unicode_xid::UnicodeXID::is_xid_start(x);
2518 #[cfg(not(feature = "unicode-xid-ident"))]
2519 return x.is_ascii_alphabetic();
2520}
2521
2522#[inline(always)]
2524#[must_use]
2525#[allow(clippy::missing_const_for_fn)]
2526pub fn is_id_continue(x: char) -> bool {
2527 #[cfg(feature = "unicode-xid-ident")]
2528 return unicode_xid::UnicodeXID::is_xid_continue(x);
2529 #[cfg(not(feature = "unicode-xid-ident"))]
2530 return x.is_ascii_alphanumeric() || x == '_';
2531}
2532
2533#[inline]
2545#[must_use]
2546pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
2547 let utf8 = syntax.as_bytes();
2550 let len = utf8.len();
2551
2552 if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
2553 return (false, false, false);
2554 }
2555
2556 let mut hash_val = len;
2557
2558 match len {
2559 1 => (),
2560 _ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
2561 }
2562 hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
2563 hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
2564
2565 if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
2566 return (false, false, false);
2567 }
2568
2569 match RESERVED_LIST[hash_val] {
2570 ("", ..) => (false, false, false),
2571 (s, true, a, b) => {
2572 let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
2575 (is_reserved, is_reserved && a, is_reserved && a && b)
2576 }
2577 _ => (false, false, false),
2578 }
2579}
2580
2581pub struct MultiInputsStream<'a> {
2586 pub buf: [Option<char>; 2],
2588 pub index: usize,
2590 pub streams: StaticVec<Peekable<Chars<'a>>>,
2592}
2593
2594impl InputStream for MultiInputsStream<'_> {
2595 #[inline]
2596 fn unget(&mut self, ch: char) {
2597 match self.buf {
2598 [None, ..] => self.buf[0] = Some(ch),
2599 [_, None] => self.buf[1] = Some(ch),
2600 _ => unreachable!("cannot unget more than 2 characters!"),
2601 }
2602 }
2603 fn get_next(&mut self) -> Option<char> {
2604 match self.buf {
2605 [None, ..] => (),
2606 [ch @ Some(_), None] => {
2607 self.buf[0] = None;
2608 return ch;
2609 }
2610 [_, ch @ Some(_)] => {
2611 self.buf[1] = None;
2612 return ch;
2613 }
2614 }
2615
2616 loop {
2617 if self.index >= self.streams.len() {
2618 return None;
2620 }
2621 if let Some(ch) = self.streams[self.index].next() {
2622 return Some(ch);
2624 }
2625 self.index += 1;
2627 }
2628 }
2629 fn peek_next(&mut self) -> Option<char> {
2630 match self.buf {
2631 [None, ..] => (),
2632 [ch @ Some(_), None] => return ch,
2633 [_, ch @ Some(_)] => return ch,
2634 }
2635
2636 loop {
2637 if self.index >= self.streams.len() {
2638 return None;
2640 }
2641 if let Some(&ch) = self.streams[self.index].peek() {
2642 return Some(ch);
2644 }
2645 self.index += 1;
2647 }
2648 }
2649}
2650
2651pub struct TokenIterator<'a> {
2654 pub engine: &'a Engine,
2656 pub state: TokenizeState,
2658 pub pos: Position,
2660 pub stream: MultiInputsStream<'a>,
2662 pub token_mapper: Option<&'a OnParseTokenCallback>,
2664}
2665
2666impl<'a> Iterator for TokenIterator<'a> {
2667 type Item = (Token, Position);
2668
2669 fn next(&mut self) -> Option<Self::Item> {
2670 let (within_interpolated, _char_mode, compress_script) = {
2671 let control = &mut *self.state.tokenizer_control.borrow_mut();
2672
2673 if control.is_within_text {
2674 self.state.is_within_text_terminated_by = Some("`".to_string().into());
2676 control.is_within_text = false;
2678 }
2679
2680 #[cfg(not(feature = "no_custom_syntax"))]
2682 let in_char_mode = std::mem::take(&mut control.in_char_mode);
2683
2684 (
2685 self.state.is_within_text_terminated_by.is_some(),
2686 #[cfg(not(feature = "no_custom_syntax"))]
2687 in_char_mode,
2688 #[cfg(feature = "no_custom_syntax")]
2689 false,
2690 control.compressed.is_some(),
2691 )
2692 };
2693
2694 #[cfg(not(feature = "no_custom_syntax"))]
2695 if _char_mode {
2696 if let Some(ch) = self.stream.get_next() {
2697 let pos = self.pos;
2698 match ch {
2699 '\n' => self.pos.new_line(),
2700 _ => self.pos.advance(),
2701 }
2702 return Some((Token::UnprocessedRawChar(ch), pos));
2703 }
2704 }
2705
2706 let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2707 r @ (Token::EOF, _) => return Some(r),
2709 (Token::StringConstant(..), pos) if self.state.is_within_text_terminated_by.is_some() => {
2715 self.state.is_within_text_terminated_by = None;
2716 return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2717 }
2718 (Token::Reserved(s), pos) => (match
2720 (s.as_str(),
2721 #[cfg(not(feature = "no_custom_syntax"))]
2722 self.engine.custom_keywords.contains_key(&*s),
2723 #[cfg(feature = "no_custom_syntax")]
2724 false
2725 )
2726 {
2727 ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2728 "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2729 ).into()),
2730 ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2731 "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2732 ).into()),
2733 ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2734 "'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2735 ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2736 "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2737 ).into()),
2738 (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2739 "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2740 ).into()),
2741 (":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2742 "':;' is not a valid symbol. Should it be '::'?".to_string(),
2743 ).into()),
2744 ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2745 "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2746 ).into()),
2747 ("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2748 "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2749 ).into()),
2750 ("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2751 "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2752 ).into()),
2753 #[cfg(not(feature = "no_custom_syntax"))]
2755 (.., true) => Token::Custom(s),
2756 #[cfg(feature = "no_custom_syntax")]
2757 (.., true) => unreachable!("no custom operators"),
2758 (token, false) if self.engine.is_symbol_disabled(token) => {
2760 let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
2761 Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
2762 },
2763 (.., false) => Token::Reserved(s),
2765 }, pos),
2766 #[cfg(not(feature = "no_custom_syntax"))]
2768 (Token::Identifier(s), pos) if self.engine.custom_keywords.contains_key(&*s) => {
2769 (Token::Custom(s), pos)
2770 }
2771 #[cfg(not(feature = "no_custom_syntax"))]
2773 (token, pos) if token.is_literal() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
2774 debug_assert!(self.engine.is_symbol_disabled(token.literal_syntax()), "{:?} is an active keyword", token);
2776
2777 (Token::Custom(Box::new(token.literal_syntax().into())), pos)
2778 }
2779 (token, pos) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
2781 (Token::Reserved(Box::new(token.literal_syntax().into())), pos)
2782 }
2783 r => r,
2785 };
2786
2787 let token = match self.token_mapper {
2789 Some(func) => func(token, pos, &self.state),
2790 None => token,
2791 };
2792
2793 if compress_script {
2795 let control = &mut *self.state.tokenizer_control.borrow_mut();
2796
2797 if token != Token::EOF {
2798 if let Some(ref mut compressed) = control.compressed {
2799 use std::fmt::Write;
2800
2801 let last_token = self.state.last_token.as_ref().unwrap();
2802 let mut buf = SmartString::new_const();
2803
2804 if last_token.is_empty() {
2805 write!(buf, "{token}").unwrap();
2806 } else if within_interpolated
2807 && matches!(
2808 token,
2809 Token::StringConstant(..) | Token::InterpolatedString(..)
2810 )
2811 {
2812 *compressed += &last_token[1..];
2813 } else {
2814 buf = last_token.clone();
2815 }
2816
2817 if !buf.is_empty() && !compressed.is_empty() {
2818 let cur = buf.chars().next().unwrap();
2819
2820 if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
2821 let prev = compressed.chars().last().unwrap();
2822
2823 if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
2824 *compressed += " ";
2825 }
2826 }
2827 }
2828
2829 *compressed += &buf;
2830 }
2831 }
2832 }
2833
2834 Some((token, pos))
2835 }
2836}
2837
2838impl FusedIterator for TokenIterator<'_> {}
2839
2840impl Engine {
2841 #[expose_under_internals]
2844 #[inline(always)]
2845 #[must_use]
2846 fn lex<'a>(
2847 &'a self,
2848 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2849 ) -> (TokenIterator<'a>, TokenizerControl) {
2850 self.lex_raw(inputs, self.token_mapper.as_deref())
2851 }
2852 #[expose_under_internals]
2855 #[inline(always)]
2856 #[must_use]
2857 fn lex_with_map<'a>(
2858 &'a self,
2859 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2860 token_mapper: &'a OnParseTokenCallback,
2861 ) -> (TokenIterator<'a>, TokenizerControl) {
2862 self.lex_raw(inputs, Some(token_mapper))
2863 }
2864 #[inline]
2866 #[must_use]
2867 pub(crate) fn lex_raw<'a>(
2868 &'a self,
2869 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2870 token_mapper: Option<&'a OnParseTokenCallback>,
2871 ) -> (TokenIterator<'a>, TokenizerControl) {
2872 let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2873 let buffer2 = buffer.clone();
2874
2875 (
2876 TokenIterator {
2877 engine: self,
2878 state: TokenizeState {
2879 #[cfg(not(feature = "unchecked"))]
2880 max_string_len: std::num::NonZeroUsize::new(self.max_string_size()),
2881 next_token_cannot_be_unary: false,
2882 tokenizer_control: buffer,
2883 comment_level: 0,
2884 include_comments: false,
2885 is_within_text_terminated_by: None,
2886 last_token: None,
2887 },
2888 pos: Position::new(1, 0),
2889 stream: MultiInputsStream {
2890 buf: [None, None],
2891 streams: inputs
2892 .into_iter()
2893 .map(|s| s.as_ref().chars().peekable())
2894 .collect(),
2895 index: 0,
2896 },
2897 token_mapper,
2898 },
2899 buffer2,
2900 )
2901 }
2902}