1use crate::engine::Precedence;
4use crate::func::native::OnParseTokenCallback;
5use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
6#[cfg(feature = "no_std")]
7use std::prelude::v1::*;
8use std::{
9 cell::RefCell,
10 char, fmt,
11 iter::{FusedIterator, Peekable},
12 rc::Rc,
13 str::{Chars, FromStr},
14};
15
16#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
18pub struct TokenizerControlBlock {
19 pub is_within_text: bool,
23 #[cfg(feature = "metadata")]
25 pub global_comments: String,
26 pub compressed: Option<String>,
30}
31
32impl TokenizerControlBlock {
33 #[inline]
35 #[must_use]
36 pub const fn new() -> Self {
37 Self {
38 is_within_text: false,
39 #[cfg(feature = "metadata")]
40 global_comments: String::new(),
41 compressed: None,
42 }
43 }
44}
45
46pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
48
49type LERR = LexError;
50
51const NUMBER_SEPARATOR: char = '_';
53
54pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
56
57#[derive(Debug, PartialEq, Clone, Hash)]
60#[non_exhaustive]
61pub enum Token {
62 IntegerConstant(INT),
64 #[cfg(not(feature = "no_float"))]
68 FloatConstant(Box<(crate::types::FloatWrapper<crate::FLOAT>, Identifier)>),
69 #[cfg(feature = "decimal")]
73 DecimalConstant(Box<(rust_decimal::Decimal, Identifier)>),
74 Identifier(Box<Identifier>),
76 CharConstant(char),
78 StringConstant(Box<SmartString>),
80 InterpolatedString(Box<SmartString>),
82 LeftBrace,
84 RightBrace,
86 LeftParen,
88 RightParen,
90 LeftBracket,
92 RightBracket,
94 Unit,
96 Plus,
98 UnaryPlus,
100 Minus,
102 UnaryMinus,
104 Multiply,
106 Divide,
108 Modulo,
110 PowerOf,
112 LeftShift,
114 RightShift,
116 SemiColon,
118 Colon,
120 DoubleColon,
122 DoubleArrow,
124 Underscore,
126 Comma,
128 Period,
130 #[cfg(not(feature = "no_object"))]
134 Elvis,
135 DoubleQuestion,
137 #[cfg(not(feature = "no_index"))]
141 QuestionBracket,
142 ExclusiveRange,
144 InclusiveRange,
146 MapStart,
148 Equals,
150 True,
152 False,
154 Let,
156 Const,
158 If,
160 Else,
162 Switch,
164 Do,
166 While,
168 Until,
170 Loop,
172 For,
174 In,
176 NotIn,
178 LessThan,
180 GreaterThan,
182 LessThanEqualsTo,
184 GreaterThanEqualsTo,
186 EqualsTo,
188 NotEqualsTo,
190 Bang,
192 Pipe,
194 Or,
196 XOr,
198 Ampersand,
200 And,
202 #[cfg(not(feature = "no_function"))]
206 Fn,
207 Continue,
209 Break,
211 Return,
213 Throw,
215 Try,
217 Catch,
219 PlusAssign,
221 MinusAssign,
223 MultiplyAssign,
225 DivideAssign,
227 LeftShiftAssign,
229 RightShiftAssign,
231 AndAssign,
233 OrAssign,
235 XOrAssign,
237 ModuloAssign,
239 PowerOfAssign,
241 #[cfg(not(feature = "no_function"))]
245 Private,
246 #[cfg(not(feature = "no_module"))]
250 Import,
251 #[cfg(not(feature = "no_module"))]
255 Export,
256 #[cfg(not(feature = "no_module"))]
260 As,
261 LexError(Box<LexError>),
263 Comment(Box<String>),
265 Reserved(Box<Identifier>),
267 #[cfg(not(feature = "no_custom_syntax"))]
271 Custom(Box<Identifier>),
272 EOF,
275}
276
277impl fmt::Display for Token {
278 #[inline(always)]
279 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
280 #[allow(clippy::enum_glob_use)]
281 use Token::*;
282
283 match self {
284 IntegerConstant(i) => write!(f, "{i}"),
285 #[cfg(not(feature = "no_float"))]
286 FloatConstant(v) => write!(f, "{}", v.0),
287 #[cfg(feature = "decimal")]
288 DecimalConstant(d) => write!(f, "{}", d.0),
289 StringConstant(s) => write!(f, r#""{s}""#),
290 InterpolatedString(..) => f.write_str("string"),
291 CharConstant(c) => write!(f, "{c}"),
292 Identifier(s) => f.write_str(s),
293 Reserved(s) => f.write_str(s),
294 #[cfg(not(feature = "no_custom_syntax"))]
295 Custom(s) => f.write_str(s),
296 LexError(err) => write!(f, "{err}"),
297 Comment(s) => f.write_str(s),
298
299 EOF => f.write_str("{EOF}"),
300
301 token => f.write_str(token.literal_syntax()),
302 }
303 }
304}
305
306const MIN_KEYWORD_LEN: usize = 1;
311const MAX_KEYWORD_LEN: usize = 8;
312const MIN_KEYWORD_HASH_VALUE: usize = 1;
313const MAX_KEYWORD_HASH_VALUE: usize = 152;
314
315static KEYWORD_ASSOC_VALUES: [u8; 257] = [
316 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
317 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
318 105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
319 35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
320 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
321 0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
322 45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
323 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
324 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
325 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
326 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
327 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
328 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
329 153,
330];
331static KEYWORDS_LIST: [(&str, Token); 153] = [
332 ("", Token::EOF),
333 (">", Token::GreaterThan),
334 (">=", Token::GreaterThanEqualsTo),
335 (")", Token::RightParen),
336 ("", Token::EOF),
337 ("const", Token::Const),
338 ("=", Token::Equals),
339 ("==", Token::EqualsTo),
340 ("continue", Token::Continue),
341 ("", Token::EOF),
342 ("catch", Token::Catch),
343 ("<", Token::LessThan),
344 ("<=", Token::LessThanEqualsTo),
345 ("for", Token::For),
346 ("loop", Token::Loop),
347 ("", Token::EOF),
348 (".", Token::Period),
349 ("<<", Token::LeftShift),
350 ("<<=", Token::LeftShiftAssign),
351 ("", Token::EOF),
352 ("false", Token::False),
353 ("*", Token::Multiply),
354 ("*=", Token::MultiplyAssign),
355 ("let", Token::Let),
356 ("", Token::EOF),
357 ("while", Token::While),
358 ("+", Token::Plus),
359 ("+=", Token::PlusAssign),
360 ("", Token::EOF),
361 ("", Token::EOF),
362 ("throw", Token::Throw),
363 ("}", Token::RightBrace),
364 (">>", Token::RightShift),
365 (">>=", Token::RightShiftAssign),
366 ("", Token::EOF),
367 ("", Token::EOF),
368 (";", Token::SemiColon),
369 ("=>", Token::DoubleArrow),
370 ("", Token::EOF),
371 ("else", Token::Else),
372 ("", Token::EOF),
373 ("/", Token::Divide),
374 ("/=", Token::DivideAssign),
375 ("", Token::EOF),
376 ("", Token::EOF),
377 ("", Token::EOF),
378 ("{", Token::LeftBrace),
379 ("**", Token::PowerOf),
380 ("**=", Token::PowerOfAssign),
381 ("", Token::EOF),
382 ("", Token::EOF),
383 ("|", Token::Pipe),
384 ("|=", Token::OrAssign),
385 ("", Token::EOF),
386 ("", Token::EOF),
387 ("", Token::EOF),
388 (":", Token::Colon),
389 ("..", Token::ExclusiveRange),
390 ("..=", Token::InclusiveRange),
391 ("", Token::EOF),
392 ("until", Token::Until),
393 ("switch", Token::Switch),
394 #[cfg(not(feature = "no_function"))]
395 ("private", Token::Private),
396 #[cfg(feature = "no_function")]
397 ("", Token::EOF),
398 ("try", Token::Try),
399 ("true", Token::True),
400 ("break", Token::Break),
401 ("return", Token::Return),
402 #[cfg(not(feature = "no_function"))]
403 ("fn", Token::Fn),
404 #[cfg(feature = "no_function")]
405 ("", Token::EOF),
406 ("", Token::EOF),
407 ("", Token::EOF),
408 ("", Token::EOF),
409 #[cfg(not(feature = "no_module"))]
410 ("import", Token::Import),
411 #[cfg(feature = "no_module")]
412 ("", Token::EOF),
413 #[cfg(not(feature = "no_object"))]
414 ("?.", Token::Elvis),
415 #[cfg(feature = "no_object")]
416 ("", Token::EOF),
417 ("", Token::EOF),
418 ("", Token::EOF),
419 ("", Token::EOF),
420 #[cfg(not(feature = "no_module"))]
421 ("export", Token::Export),
422 #[cfg(feature = "no_module")]
423 ("", Token::EOF),
424 ("in", Token::In),
425 ("", Token::EOF),
426 ("", Token::EOF),
427 ("", Token::EOF),
428 ("(", Token::LeftParen),
429 ("||", Token::Or),
430 ("", Token::EOF),
431 ("", Token::EOF),
432 ("", Token::EOF),
433 ("^", Token::XOr),
434 ("^=", Token::XOrAssign),
435 ("", Token::EOF),
436 ("", Token::EOF),
437 ("", Token::EOF),
438 ("_", Token::Underscore),
439 ("::", Token::DoubleColon),
440 ("", Token::EOF),
441 ("", Token::EOF),
442 ("", Token::EOF),
443 ("-", Token::Minus),
444 ("-=", Token::MinusAssign),
445 ("", Token::EOF),
446 ("", Token::EOF),
447 ("", Token::EOF),
448 ("]", Token::RightBracket),
449 ("()", Token::Unit),
450 ("", Token::EOF),
451 ("", Token::EOF),
452 ("", Token::EOF),
453 ("&", Token::Ampersand),
454 ("&=", Token::AndAssign),
455 ("", Token::EOF),
456 ("", Token::EOF),
457 ("", Token::EOF),
458 ("%", Token::Modulo),
459 ("%=", Token::ModuloAssign),
460 ("", Token::EOF),
461 ("", Token::EOF),
462 ("", Token::EOF),
463 ("!", Token::Bang),
464 ("!=", Token::NotEqualsTo),
465 ("!in", Token::NotIn),
466 ("", Token::EOF),
467 ("", Token::EOF),
468 ("[", Token::LeftBracket),
469 ("if", Token::If),
470 ("", Token::EOF),
471 ("", Token::EOF),
472 ("", Token::EOF),
473 (",", Token::Comma),
474 ("do", Token::Do),
475 ("", Token::EOF),
476 ("", Token::EOF),
477 ("", Token::EOF),
478 ("", Token::EOF),
479 #[cfg(not(feature = "no_module"))]
480 ("as", Token::As),
481 #[cfg(feature = "no_module")]
482 ("", Token::EOF),
483 ("", Token::EOF),
484 ("", Token::EOF),
485 ("", Token::EOF),
486 ("", Token::EOF),
487 #[cfg(not(feature = "no_index"))]
488 ("?[", Token::QuestionBracket),
489 #[cfg(feature = "no_index")]
490 ("", Token::EOF),
491 ("", Token::EOF),
492 ("", Token::EOF),
493 ("", Token::EOF),
494 ("", Token::EOF),
495 ("??", Token::DoubleQuestion),
496 ("", Token::EOF),
497 ("", Token::EOF),
498 ("", Token::EOF),
499 ("", Token::EOF),
500 ("&&", Token::And),
501 ("", Token::EOF),
502 ("", Token::EOF),
503 ("", Token::EOF),
504 ("", Token::EOF),
505 ("#{", Token::MapStart),
506];
507
508const MIN_RESERVED_LEN: usize = 1;
513const MAX_RESERVED_LEN: usize = 10;
514const MIN_RESERVED_HASH_VALUE: usize = 1;
515const MAX_RESERVED_HASH_VALUE: usize = 149;
516
517static RESERVED_ASSOC_VALUES: [u8; 256] = [
518 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
519 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
520 150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
521 30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
522 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
523 0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
524 25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
525 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
526 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
527 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
528 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
529 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
530 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
531];
532static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
533 ("", false, false, false),
534 ("?", true, false, false),
535 ("as", cfg!(feature = "no_module"), false, false),
536 ("use", true, false, false),
537 ("case", true, false, false),
538 ("async", true, false, false),
539 ("public", true, false, false),
540 ("package", true, false, false),
541 ("", false, false, false),
542 ("", false, false, false),
543 ("super", true, false, false),
544 ("#", true, false, false),
545 ("private", cfg!(feature = "no_function"), false, false),
546 ("var", true, false, false),
547 ("protected", true, false, false),
548 ("spawn", true, false, false),
549 ("shared", true, false, false),
550 ("is", true, false, false),
551 ("===", true, false, false),
552 ("sync", true, false, false),
553 ("curry", true, true, true),
554 ("static", true, false, false),
555 ("default", true, false, false),
556 ("!==", true, false, false),
557 ("is_shared", cfg!(not(feature = "no_closure")), true, true),
558 ("print", true, true, false),
559 ("", false, false, false),
560 ("#!", true, false, false),
561 ("", false, false, false),
562 ("this", true, false, false),
563 ("is_def_var", true, true, false),
564 ("thread", true, false, false),
565 ("?.", cfg!(feature = "no_object"), false, false),
566 ("", false, false, false),
567 ("is_def_fn", cfg!(not(feature = "no_function")), true, false),
568 ("yield", true, false, false),
569 ("", false, false, false),
570 ("fn", cfg!(feature = "no_function"), false, false),
571 ("new", true, false, false),
572 ("call", true, true, true),
573 ("match", true, false, false),
574 ("~", true, false, false),
575 ("!.", true, false, false),
576 ("", false, false, false),
577 ("eval", true, true, false),
578 ("await", true, false, false),
579 ("", false, false, false),
580 (":=", true, false, false),
581 ("...", true, false, false),
582 ("null", true, false, false),
583 ("debug", true, true, false),
584 ("@", true, false, false),
585 ("type_of", true, true, true),
586 ("", false, false, false),
587 ("with", true, false, false),
588 ("", false, false, false),
589 ("", false, false, false),
590 ("<-", true, false, false),
591 ("", false, false, false),
592 ("void", true, false, false),
593 ("", false, false, false),
594 ("import", cfg!(feature = "no_module"), false, false),
595 ("--", true, false, false),
596 ("nil", true, false, false),
597 ("exit", false, false, false),
598 ("", false, false, false),
599 ("export", cfg!(feature = "no_module"), false, false),
600 ("<|", true, false, false),
601 ("", false, false, false),
602 ("", false, false, false),
603 ("", false, false, false),
604 ("$", true, false, false),
605 ("->", true, false, false),
606 ("", false, false, false),
607 ("", false, false, false),
608 ("", false, false, false),
609 ("", false, false, false),
610 ("|>", true, false, false),
611 ("", false, false, false),
612 ("", false, false, false),
613 ("", false, false, false),
614 ("module", true, false, false),
615 ("?[", cfg!(feature = "no_index"), false, false),
616 ("", false, false, false),
617 ("", false, false, false),
618 ("", false, false, false),
619 ("", false, false, false),
620 ("Fn", true, true, false),
621 ("::<", true, false, false),
622 ("", false, false, false),
623 ("", false, false, false),
624 ("", false, false, false),
625 ("++", true, false, false),
626 ("", false, false, false),
627 ("", false, false, false),
628 ("", false, false, false),
629 ("", false, false, false),
630 (":;", true, false, false),
631 ("", false, false, false),
632 ("", false, false, false),
633 ("", false, false, false),
634 ("", false, false, false),
635 ("*)", true, false, false),
636 ("", false, false, false),
637 ("", false, false, false),
638 ("", false, false, false),
639 ("", false, false, false),
640 ("(*", true, false, false),
641 ("", false, false, false),
642 ("", false, false, false),
643 ("", false, false, false),
644 ("", false, false, false),
645 ("", false, false, false),
646 ("", false, false, false),
647 ("", false, false, false),
648 ("", false, false, false),
649 ("", false, false, false),
650 ("", false, false, false),
651 ("", false, false, false),
652 ("", false, false, false),
653 ("", false, false, false),
654 ("", false, false, false),
655 ("", false, false, false),
656 ("", false, false, false),
657 ("", false, false, false),
658 ("", false, false, false),
659 ("", false, false, false),
660 ("", false, false, false),
661 ("", false, false, false),
662 ("", false, false, false),
663 ("", false, false, false),
664 ("", false, false, false),
665 ("", false, false, false),
666 ("", false, false, false),
667 ("", false, false, false),
668 ("", false, false, false),
669 ("", false, false, false),
670 ("", false, false, false),
671 ("", false, false, false),
672 ("", false, false, false),
673 ("", false, false, false),
674 ("", false, false, false),
675 ("", false, false, false),
676 ("", false, false, false),
677 ("", false, false, false),
678 ("", false, false, false),
679 ("", false, false, false),
680 ("go", true, false, false),
681 ("", false, false, false),
682 ("goto", true, false, false),
683];
684
685impl Token {
686 #[must_use]
688 pub const fn is_literal(&self) -> bool {
689 #[allow(clippy::enum_glob_use)]
690 use Token::*;
691
692 match self {
693 IntegerConstant(..) => false,
694 #[cfg(not(feature = "no_float"))]
695 FloatConstant(..) => false,
696 #[cfg(feature = "decimal")]
697 DecimalConstant(..) => false,
698 StringConstant(..)
699 | InterpolatedString(..)
700 | CharConstant(..)
701 | Identifier(..)
702 | Reserved(..) => false,
703 #[cfg(not(feature = "no_custom_syntax"))]
704 Custom(..) => false,
705 LexError(..) | Comment(..) => false,
706
707 EOF => false,
708
709 _ => true,
710 }
711 }
712 #[must_use]
718 pub const fn literal_syntax(&self) -> &'static str {
719 #[allow(clippy::enum_glob_use)]
720 use Token::*;
721
722 match self {
723 LeftBrace => "{",
724 RightBrace => "}",
725 LeftParen => "(",
726 RightParen => ")",
727 LeftBracket => "[",
728 RightBracket => "]",
729 Unit => "()",
730 Plus => "+",
731 UnaryPlus => "+",
732 Minus => "-",
733 UnaryMinus => "-",
734 Multiply => "*",
735 Divide => "/",
736 SemiColon => ";",
737 Colon => ":",
738 DoubleColon => "::",
739 DoubleArrow => "=>",
740 Underscore => "_",
741 Comma => ",",
742 Period => ".",
743 #[cfg(not(feature = "no_object"))]
744 Elvis => "?.",
745 DoubleQuestion => "??",
746 #[cfg(not(feature = "no_index"))]
747 QuestionBracket => "?[",
748 ExclusiveRange => "..",
749 InclusiveRange => "..=",
750 MapStart => "#{",
751 Equals => "=",
752 True => "true",
753 False => "false",
754 Let => "let",
755 Const => "const",
756 If => "if",
757 Else => "else",
758 Switch => "switch",
759 Do => "do",
760 While => "while",
761 Until => "until",
762 Loop => "loop",
763 For => "for",
764 In => "in",
765 NotIn => "!in",
766 LessThan => "<",
767 GreaterThan => ">",
768 Bang => "!",
769 LessThanEqualsTo => "<=",
770 GreaterThanEqualsTo => ">=",
771 EqualsTo => "==",
772 NotEqualsTo => "!=",
773 Pipe => "|",
774 Or => "||",
775 Ampersand => "&",
776 And => "&&",
777 Continue => "continue",
778 Break => "break",
779 Return => "return",
780 Throw => "throw",
781 Try => "try",
782 Catch => "catch",
783 PlusAssign => "+=",
784 MinusAssign => "-=",
785 MultiplyAssign => "*=",
786 DivideAssign => "/=",
787 LeftShiftAssign => "<<=",
788 RightShiftAssign => ">>=",
789 AndAssign => "&=",
790 OrAssign => "|=",
791 XOrAssign => "^=",
792 LeftShift => "<<",
793 RightShift => ">>",
794 XOr => "^",
795 Modulo => "%",
796 ModuloAssign => "%=",
797 PowerOf => "**",
798 PowerOfAssign => "**=",
799
800 #[cfg(not(feature = "no_function"))]
801 Fn => "fn",
802 #[cfg(not(feature = "no_function"))]
803 Private => "private",
804
805 #[cfg(not(feature = "no_module"))]
806 Import => "import",
807 #[cfg(not(feature = "no_module"))]
808 Export => "export",
809 #[cfg(not(feature = "no_module"))]
810 As => "as",
811
812 _ => panic!("token is not a literal symbol"),
813 }
814 }
815
816 #[inline]
818 #[must_use]
819 pub const fn is_op_assignment(&self) -> bool {
820 #[allow(clippy::enum_glob_use)]
821 use Token::*;
822
823 matches!(
824 self,
825 PlusAssign
826 | MinusAssign
827 | MultiplyAssign
828 | DivideAssign
829 | LeftShiftAssign
830 | RightShiftAssign
831 | ModuloAssign
832 | PowerOfAssign
833 | AndAssign
834 | OrAssign
835 | XOrAssign
836 )
837 }
838
839 #[must_use]
841 pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
842 #[allow(clippy::enum_glob_use)]
843 use Token::*;
844
845 Some(match self {
846 PlusAssign => Plus,
847 MinusAssign => Minus,
848 MultiplyAssign => Multiply,
849 DivideAssign => Divide,
850 LeftShiftAssign => LeftShift,
851 RightShiftAssign => RightShift,
852 ModuloAssign => Modulo,
853 PowerOfAssign => PowerOf,
854 AndAssign => Ampersand,
855 OrAssign => Pipe,
856 XOrAssign => XOr,
857 _ => return None,
858 })
859 }
860
861 #[inline]
863 #[must_use]
864 pub const fn has_op_assignment(&self) -> bool {
865 #[allow(clippy::enum_glob_use)]
866 use Token::*;
867
868 matches!(
869 self,
870 Plus | Minus
871 | Multiply
872 | Divide
873 | LeftShift
874 | RightShift
875 | Modulo
876 | PowerOf
877 | Ampersand
878 | Pipe
879 | XOr
880 )
881 }
882
883 #[must_use]
885 pub const fn convert_to_op_assignment(&self) -> Option<Self> {
886 #[allow(clippy::enum_glob_use)]
887 use Token::*;
888
889 Some(match self {
890 Plus => PlusAssign,
891 Minus => MinusAssign,
892 Multiply => MultiplyAssign,
893 Divide => DivideAssign,
894 LeftShift => LeftShiftAssign,
895 RightShift => RightShiftAssign,
896 Modulo => ModuloAssign,
897 PowerOf => PowerOfAssign,
898 Ampersand => AndAssign,
899 Pipe => OrAssign,
900 XOr => XOrAssign,
901 _ => return None,
902 })
903 }
904
905 #[inline]
907 #[must_use]
908 pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
909 let utf8 = syntax.as_bytes();
912 let len = utf8.len();
913
914 if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
915 return None;
916 }
917
918 let mut hash_val = len;
919
920 match len {
921 1 => (),
922 _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
923 }
924 hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
925
926 if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
927 return None;
928 }
929
930 match KEYWORDS_LIST[hash_val] {
931 (_, Self::EOF) => None,
932 (s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
935 Some(t.clone())
936 }
937 _ => None,
938 }
939 }
940
941 #[must_use]
944 pub const fn is_next_unary(&self) -> bool {
945 #[allow(clippy::enum_glob_use)]
946 use Token::*;
947
948 match self {
949 SemiColon | Colon | Comma | DoubleQuestion | ExclusiveRange | InclusiveRange | LeftBrace | LeftParen | LeftBracket | Plus |
964 PlusAssign |
965 UnaryPlus |
966 Minus |
967 MinusAssign |
968 UnaryMinus |
969 Multiply |
970 MultiplyAssign |
971 Divide |
972 DivideAssign |
973 Modulo |
974 ModuloAssign |
975 PowerOf |
976 PowerOfAssign |
977 LeftShift |
978 LeftShiftAssign |
979 RightShift |
980 RightShiftAssign |
981 Equals |
982 EqualsTo |
983 NotEqualsTo |
984 LessThan |
985 GreaterThan |
986 Bang |
987 LessThanEqualsTo |
988 GreaterThanEqualsTo |
989 Pipe |
990 Ampersand |
991 If |
992 While |
994 Until |
995 In |
996 NotIn |
997 And |
998 AndAssign |
999 Or |
1000 OrAssign |
1001 XOr |
1002 XOrAssign |
1003 Return |
1004 Throw => true,
1005
1006 #[cfg(not(feature = "no_index"))]
1007 QuestionBracket => true, LexError(..) => true,
1010
1011 _ => false,
1012 }
1013 }
1014
1015 #[must_use]
1017 pub const fn precedence(&self) -> Option<Precedence> {
1018 #[allow(clippy::enum_glob_use)]
1019 use Token::*;
1020
1021 Precedence::new(match self {
1022 Or | XOr | Pipe => 30,
1023
1024 And | Ampersand => 60,
1025
1026 EqualsTo | NotEqualsTo => 90,
1027
1028 In | NotIn => 110,
1029
1030 LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
1031
1032 DoubleQuestion => 135,
1033
1034 ExclusiveRange | InclusiveRange => 140,
1035
1036 Plus | Minus => 150,
1037
1038 Divide | Multiply | Modulo => 180,
1039
1040 PowerOf => 190,
1041
1042 LeftShift | RightShift => 210,
1043
1044 _ => 0,
1045 })
1046 }
1047
1048 #[must_use]
1050 pub const fn is_bind_right(&self) -> bool {
1051 #[allow(clippy::enum_glob_use)]
1052 use Token::*;
1053
1054 match self {
1055 PowerOf => true,
1057
1058 _ => false,
1059 }
1060 }
1061
1062 #[must_use]
1064 pub const fn is_standard_symbol(&self) -> bool {
1065 #[allow(clippy::enum_glob_use)]
1066 use Token::*;
1067
1068 match self {
1069 LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
1070 | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
1071 | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
1072 | ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
1073 | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
1074 | Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
1075 | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
1076 | XOrAssign | ModuloAssign | PowerOfAssign => true,
1077
1078 #[cfg(not(feature = "no_object"))]
1079 Elvis => true,
1080
1081 #[cfg(not(feature = "no_index"))]
1082 QuestionBracket => true,
1083
1084 _ => false,
1085 }
1086 }
1087
1088 #[inline]
1090 #[must_use]
1091 pub const fn is_standard_keyword(&self) -> bool {
1092 #[allow(clippy::enum_glob_use)]
1093 use Token::*;
1094
1095 match self {
1096 #[cfg(not(feature = "no_function"))]
1097 Fn | Private => true,
1098
1099 #[cfg(not(feature = "no_module"))]
1100 Import | Export | As => true,
1101
1102 True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
1103 | Continue | Break | Return | Throw | Try | Catch => true,
1104
1105 _ => false,
1106 }
1107 }
1108
1109 #[inline(always)]
1111 #[must_use]
1112 pub const fn is_reserved(&self) -> bool {
1113 matches!(self, Self::Reserved(..))
1114 }
1115
1116 #[cfg(not(feature = "no_custom_syntax"))]
1118 #[inline(always)]
1119 #[must_use]
1120 pub const fn is_custom(&self) -> bool {
1121 matches!(self, Self::Custom(..))
1122 }
1123}
1124
1125impl From<Token> for String {
1126 #[inline(always)]
1127 fn from(token: Token) -> Self {
1128 token.to_string()
1129 }
1130}
1131
1132#[derive(Debug, Clone, Eq, PartialEq, Default)]
1135pub struct TokenizeState {
1136 #[cfg(not(feature = "unchecked"))]
1140 pub max_string_len: Option<std::num::NonZeroUsize>,
1141 pub next_token_cannot_be_unary: bool,
1143 pub tokenizer_control: TokenizerControl,
1145 pub comment_level: usize,
1147 pub include_comments: bool,
1149 pub is_within_text_terminated_by: Option<char>,
1151 pub last_token: Option<SmartString>,
1155}
1156
1157pub trait InputStream {
1160 fn unget(&mut self, ch: char);
1164 fn get_next(&mut self) -> Option<char>;
1166 #[must_use]
1168 fn peek_next(&mut self) -> Option<char>;
1169
1170 #[inline(always)]
1172 fn eat_next_and_advance(&mut self, pos: &mut Position) -> Option<char> {
1173 pos.advance();
1174 self.get_next()
1175 }
1176}
1177
1178pub fn parse_string_literal(
1209 stream: &mut (impl InputStream + ?Sized),
1210 state: &mut TokenizeState,
1211 pos: &mut Position,
1212 termination_char: char,
1213 verbatim: bool,
1214 allow_line_continuation: bool,
1215 allow_interpolation: bool,
1216) -> Result<(SmartString, bool, Position), (LexError, Position)> {
1217 let mut result = SmartString::new_const();
1218 let mut escape = SmartString::new_const();
1219
1220 let start = *pos;
1221 let mut first_char = Position::NONE;
1222 let mut interpolated = false;
1223 #[cfg(not(feature = "no_position"))]
1224 let mut skip_space_until = 0;
1225
1226 state.is_within_text_terminated_by = Some(termination_char);
1227 if let Some(ref mut last) = state.last_token {
1228 last.clear();
1229 last.push(termination_char);
1230 }
1231
1232 loop {
1233 debug_assert!(
1234 !verbatim || escape.is_empty(),
1235 "verbatim strings should not have any escapes"
1236 );
1237
1238 let next_char = match stream.get_next() {
1239 Some(ch) => {
1240 pos.advance();
1241 ch
1242 }
1243 None if verbatim => {
1244 debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1245 pos.advance();
1246 break;
1247 }
1248 None if allow_line_continuation && !escape.is_empty() => {
1249 debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1250 pos.advance();
1251 break;
1252 }
1253 None => {
1254 pos.advance();
1255 state.is_within_text_terminated_by = None;
1256 return Err((LERR::UnterminatedString, start));
1257 }
1258 };
1259
1260 if let Some(ref mut last) = state.last_token {
1261 last.push(next_char);
1262 }
1263
1264 if allow_interpolation
1266 && next_char == '$'
1267 && escape.is_empty()
1268 && stream.peek_next().map_or(false, |ch| ch == '{')
1269 {
1270 interpolated = true;
1271 state.is_within_text_terminated_by = None;
1272 break;
1273 }
1274
1275 #[cfg(not(feature = "unchecked"))]
1277 if let Some(max) = state.max_string_len {
1278 if result.len() > max.get() {
1279 return Err((LexError::StringTooLong(max.get()), start));
1280 }
1281 }
1282
1283 if termination_char == next_char && escape.is_empty() {
1285 if stream.peek_next().map_or(false, |c| c == termination_char) {
1287 stream.eat_next_and_advance(pos);
1288 if let Some(ref mut last) = state.last_token {
1289 last.push(termination_char);
1290 }
1291 } else {
1292 state.is_within_text_terminated_by = None;
1293 break;
1294 }
1295 }
1296
1297 if first_char.is_none() {
1298 first_char = *pos;
1299 }
1300
1301 match next_char {
1302 '\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
1304 'r' if !escape.is_empty() => {
1306 escape.clear();
1307 result.push('\r');
1308 }
1309 'n' if !escape.is_empty() => {
1311 escape.clear();
1312 result.push('\n');
1313 }
1314 '\\' if !verbatim && escape.is_empty() => {
1316 escape.push('\\');
1317 }
1318 '\\' if !escape.is_empty() => {
1320 escape.clear();
1321 result.push('\\');
1322 }
1323 't' if !escape.is_empty() => {
1325 escape.clear();
1326 result.push('\t');
1327 }
1328 ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
1330 let mut seq = escape.clone();
1331 escape.clear();
1332 seq.push(ch);
1333
1334 let mut out_val: u32 = 0;
1335 let len = match ch {
1336 'x' => 2,
1337 'u' => 4,
1338 'U' => 8,
1339 c => unreachable!("x or u or U expected but gets '{}'", c),
1340 };
1341
1342 for _ in 0..len {
1343 let c = stream
1344 .get_next()
1345 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1346
1347 pos.advance();
1348 seq.push(c);
1349 if let Some(ref mut last) = state.last_token {
1350 last.push(c);
1351 }
1352
1353 out_val *= 16;
1354 out_val += c
1355 .to_digit(16)
1356 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1357 }
1358
1359 result.push(
1360 char::from_u32(out_val)
1361 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
1362 );
1363 }
1364
1365 '\n' if verbatim => {
1367 debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1368 pos.new_line();
1369 result.push('\n');
1370 }
1371
1372 '\n' if allow_line_continuation && !escape.is_empty() => {
1374 debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1375 escape.clear();
1376 pos.new_line();
1377
1378 #[cfg(not(feature = "no_position"))]
1379 {
1380 let start_position = start.position().unwrap();
1381 skip_space_until = start_position + 1;
1382 }
1383 }
1384
1385 '\n' => {
1387 pos.rewind();
1388 state.is_within_text_terminated_by = None;
1389 return Err((LERR::UnterminatedString, start));
1390 }
1391
1392 ch if termination_char == ch && !escape.is_empty() => {
1394 escape.clear();
1395 result.push(termination_char);
1396 }
1397
1398 ch if !escape.is_empty() => {
1400 escape.push(ch);
1401
1402 return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
1403 }
1404
1405 #[cfg(not(feature = "no_position"))]
1407 ch if ch.is_whitespace() && pos.position().unwrap() < skip_space_until => (),
1408
1409 ch => {
1411 escape.clear();
1412 result.push(ch);
1413
1414 #[cfg(not(feature = "no_position"))]
1415 {
1416 skip_space_until = 0;
1417 }
1418 }
1419 }
1420 }
1421
1422 #[cfg(not(feature = "unchecked"))]
1424 if let Some(max) = state.max_string_len {
1425 if result.len() > max.get() {
1426 return Err((LexError::StringTooLong(max.get()), start));
1427 }
1428 }
1429
1430 Ok((result, interpolated, first_char))
1431}
1432
1433fn scan_block_comment(
1435 stream: &mut (impl InputStream + ?Sized),
1436 level: usize,
1437 pos: &mut Position,
1438 comment: Option<&mut String>,
1439) -> usize {
1440 let mut level = level;
1441 let mut comment = comment;
1442
1443 while let Some(c) = stream.get_next() {
1444 pos.advance();
1445
1446 if let Some(comment) = comment.as_mut() {
1447 comment.push(c);
1448 }
1449
1450 match c {
1451 '/' => {
1452 if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
1453 stream.eat_next_and_advance(pos);
1454 if let Some(comment) = comment.as_mut() {
1455 comment.push(c2);
1456 }
1457 level += 1;
1458 }
1459 }
1460 '*' => {
1461 if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
1462 stream.eat_next_and_advance(pos);
1463 if let Some(comment) = comment.as_mut() {
1464 comment.push(c2);
1465 }
1466 level -= 1;
1467 }
1468 }
1469 '\n' => pos.new_line(),
1470 _ => (),
1471 }
1472
1473 if level == 0 {
1474 break;
1475 }
1476 }
1477
1478 level
1479}
1480
1481#[inline(always)]
1483const fn is_hex_digit(c: char) -> bool {
1484 c.is_ascii_hexdigit()
1485}
1486
1487#[inline(always)]
1489const fn is_numeric_digit(c: char) -> bool {
1490 c.is_ascii_digit()
1491}
1492
1493#[inline(always)]
1495const fn is_octal_digit(c: char) -> bool {
1496 matches!(c, '0'..='7')
1497}
1498
1499#[inline(always)]
1501const fn is_binary_digit(c: char) -> bool {
1502 c == '0' || c == '1'
1503}
1504
1505#[cfg(not(feature = "no_function"))]
1507#[cfg(feature = "metadata")]
1508#[inline]
1509#[must_use]
1510pub fn is_doc_comment(comment: &str) -> bool {
1511 (comment.starts_with("///") && !comment.starts_with("////"))
1512 || (comment.starts_with("/**") && !comment.starts_with("/***"))
1513}
1514
1515#[inline(always)]
1518#[must_use]
1519pub fn get_next_token(
1520 stream: &mut (impl InputStream + ?Sized),
1521 state: &mut TokenizeState,
1522 pos: &mut Position,
1523) -> (Token, Position) {
1524 let result = get_next_token_inner(stream, state, pos);
1525
1526 state.next_token_cannot_be_unary = !result.0.is_next_unary();
1528
1529 result
1530}
1531
1532#[must_use]
1534fn get_next_token_inner(
1535 stream: &mut (impl InputStream + ?Sized),
1536 state: &mut TokenizeState,
1537 pos: &mut Position,
1538) -> (Token, Position) {
1539 state.last_token.as_mut().map(SmartString::clear);
1540
1541 if state.comment_level > 0 {
1543 let start_pos = *pos;
1544 let mut comment = String::new();
1545 let comment_buf = state.include_comments.then_some(&mut comment);
1546
1547 state.comment_level = scan_block_comment(stream, state.comment_level, pos, comment_buf);
1548
1549 let return_comment = state.include_comments;
1550
1551 #[cfg(not(feature = "no_function"))]
1552 #[cfg(feature = "metadata")]
1553 let return_comment = return_comment || is_doc_comment(&comment);
1554
1555 if return_comment {
1556 return (Token::Comment(comment.into()), start_pos);
1557 }
1558
1559 if state.comment_level > 0 {
1561 return (Token::EOF, *pos);
1562 }
1563 }
1564
1565 if let Some(ch) = state.is_within_text_terminated_by.take() {
1567 return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
1568 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1569 |(result, interpolated, start_pos)| {
1570 if interpolated {
1571 (Token::InterpolatedString(result.into()), start_pos)
1572 } else {
1573 (Token::StringConstant(result.into()), start_pos)
1574 }
1575 },
1576 );
1577 }
1578
1579 let mut negated: Option<Position> = None;
1580
1581 while let Some(c) = stream.get_next() {
1582 pos.advance();
1583
1584 let start_pos = *pos;
1585 let cc = stream.peek_next().unwrap_or('\0');
1586
1587 match (c, cc) {
1589 ('0'..='9', ..) => {
1591 let mut result = SmartString::new_const();
1592 let mut radix_base: Option<u32> = None;
1593 let mut valid: fn(char) -> bool = is_numeric_digit;
1594 let mut _has_period = false;
1595 let mut _has_e = false;
1596
1597 result.push(c);
1598
1599 while let Some(next_char) = stream.peek_next() {
1600 match next_char {
1601 NUMBER_SEPARATOR => {
1602 stream.eat_next_and_advance(pos);
1603 }
1604 ch if valid(ch) => {
1605 result.push(ch);
1606 stream.eat_next_and_advance(pos);
1607 }
1608 #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1609 '.' if !_has_period && radix_base.is_none() => {
1610 stream.get_next().unwrap();
1611
1612 match stream.peek_next() {
1614 Some('0'..='9') => {
1616 result.push('.');
1617 pos.advance();
1618 _has_period = true;
1619 }
1620 Some(NUMBER_SEPARATOR) => {
1622 stream.unget('.');
1623 break;
1624 }
1625 Some('.') => {
1627 stream.unget('.');
1628 break;
1629 }
1630 Some(ch) if !is_id_first_alphabetic(ch) => {
1632 result.push('.');
1633 pos.advance();
1634 result.push('0');
1635 _has_period = true;
1636 }
1637 _ => {
1639 stream.unget('.');
1640 break;
1641 }
1642 }
1643 }
1644 #[cfg(not(feature = "no_float"))]
1645 'e' if !_has_e && radix_base.is_none() => {
1646 stream.get_next().unwrap();
1647
1648 match stream.peek_next() {
1650 Some('0'..='9') => {
1652 result.push('e');
1653 pos.advance();
1654 _has_e = true;
1655 _has_period = true;
1656 }
1657 Some('+' | '-') => {
1659 result.push('e');
1660 pos.advance();
1661 result.push(stream.get_next().unwrap());
1662 pos.advance();
1663 _has_e = true;
1664 _has_period = true;
1665 }
1666 _ => {
1668 stream.unget('e');
1669 break;
1670 }
1671 }
1672 }
1673 ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
1675 if c == '0' && result.len() <= 1 =>
1676 {
1677 result.push(ch);
1678 stream.eat_next_and_advance(pos);
1679
1680 valid = match ch {
1681 'x' | 'X' => is_hex_digit,
1682 'o' | 'O' => is_octal_digit,
1683 'b' | 'B' => is_binary_digit,
1684 c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1685 };
1686
1687 radix_base = Some(match ch {
1688 'x' | 'X' => 16,
1689 'o' | 'O' => 8,
1690 'b' | 'B' => 2,
1691 c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1692 });
1693 }
1694
1695 _ => break,
1696 }
1697 }
1698
1699 let num_pos = negated.map_or(start_pos, |negated_pos| {
1700 result.insert(0, '-');
1701 negated_pos
1702 });
1703
1704 if let Some(ref mut last) = state.last_token {
1705 *last = result.clone();
1706 }
1707
1708 let token = if let Some(radix) = radix_base {
1710 let result = &result[2..];
1711
1712 UNSIGNED_INT::from_str_radix(result, radix)
1713 .map(|v| v as INT)
1714 .map_or_else(
1715 |_| Token::LexError(LERR::MalformedNumber(result.to_string()).into()),
1716 Token::IntegerConstant,
1717 )
1718 } else {
1719 (|| {
1720 let num = INT::from_str(&result).map(Token::IntegerConstant);
1721
1722 #[cfg(not(feature = "no_float"))]
1724 if num.is_err() {
1725 if let Ok(v) = crate::types::FloatWrapper::from_str(&result) {
1726 return Token::FloatConstant((v, result).into());
1727 }
1728 }
1729
1730 #[cfg(feature = "decimal")]
1732 if num.is_err() {
1733 if let Ok(v) = rust_decimal::Decimal::from_str(&result) {
1734 return Token::DecimalConstant((v, result).into());
1735 }
1736 }
1737
1738 #[cfg(feature = "decimal")]
1740 if num.is_err() {
1741 if let Ok(v) = rust_decimal::Decimal::from_scientific(&result) {
1742 return Token::DecimalConstant((v, result).into());
1743 }
1744 }
1745
1746 num.unwrap_or_else(|_| {
1747 Token::LexError(LERR::MalformedNumber(result.to_string()).into())
1748 })
1749 })()
1750 };
1751
1752 return (token, num_pos);
1753 }
1754
1755 ('"', ..) => {
1757 return parse_string_literal(stream, state, pos, c, false, true, false)
1758 .map_or_else(
1759 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1760 |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1761 );
1762 }
1763 ('`', ..) => {
1765 match stream.peek_next() {
1767 Some('\r') => {
1769 stream.eat_next_and_advance(pos);
1770 if stream.peek_next() == Some('\n') {
1772 stream.eat_next_and_advance(pos);
1773 }
1774 pos.new_line();
1775 }
1776 Some('\n') => {
1778 stream.eat_next_and_advance(pos);
1779 pos.new_line();
1780 }
1781 _ => (),
1782 }
1783
1784 return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1785 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1786 |(result, interpolated, ..)| {
1787 if interpolated {
1788 (Token::InterpolatedString(result.into()), start_pos)
1789 } else {
1790 (Token::StringConstant(result.into()), start_pos)
1791 }
1792 },
1793 );
1794 }
1795
1796 ('\'', '\'') => {
1798 return (
1799 Token::LexError(LERR::MalformedChar(String::new()).into()),
1800 start_pos,
1801 )
1802 }
1803 ('\'', ..) => {
1804 return parse_string_literal(stream, state, pos, c, false, false, false)
1805 .map_or_else(
1806 |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1807 |(result, ..)| {
1808 let mut chars = result.chars();
1809 let first = chars.next().unwrap();
1810
1811 if chars.next().is_some() {
1812 (
1813 Token::LexError(LERR::MalformedChar(result.to_string()).into()),
1814 start_pos,
1815 )
1816 } else {
1817 (Token::CharConstant(first), start_pos)
1818 }
1819 },
1820 )
1821 }
1822
1823 ('{', ..) => return (Token::LeftBrace, start_pos),
1825 ('}', ..) => return (Token::RightBrace, start_pos),
1826
1827 ('(', ')') => {
1829 stream.eat_next_and_advance(pos);
1830 return (Token::Unit, start_pos);
1831 }
1832
1833 ('(', '*') => {
1835 stream.eat_next_and_advance(pos);
1836 return (Token::Reserved(Box::new("(*".into())), start_pos);
1837 }
1838 ('(', ..) => return (Token::LeftParen, start_pos),
1839 (')', ..) => return (Token::RightParen, start_pos),
1840
1841 ('[', ..) => return (Token::LeftBracket, start_pos),
1843 (']', ..) => return (Token::RightBracket, start_pos),
1844
1845 #[cfg(not(feature = "no_object"))]
1847 ('#', '{') => {
1848 stream.eat_next_and_advance(pos);
1849 return (Token::MapStart, start_pos);
1850 }
1851 ('#', '!') => return (Token::Reserved(Box::new("#!".into())), start_pos),
1853
1854 ('#', ' ') => {
1855 stream.eat_next_and_advance(pos);
1856 let token = if stream.peek_next() == Some('{') {
1857 stream.eat_next_and_advance(pos);
1858 "# {"
1859 } else {
1860 "#"
1861 };
1862 return (Token::Reserved(Box::new(token.into())), start_pos);
1863 }
1864
1865 ('#', ..) => return (Token::Reserved(Box::new("#".into())), start_pos),
1866
1867 ('+', '=') => {
1869 stream.eat_next_and_advance(pos);
1870 return (Token::PlusAssign, start_pos);
1871 }
1872 ('+', '+') => {
1873 stream.eat_next_and_advance(pos);
1874 return (Token::Reserved(Box::new("++".into())), start_pos);
1875 }
1876 ('+', ..) if !state.next_token_cannot_be_unary => return (Token::UnaryPlus, start_pos),
1877 ('+', ..) => return (Token::Plus, start_pos),
1878
1879 ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
1880 ('-', '0'..='9') => return (Token::Minus, start_pos),
1881 ('-', '=') => {
1882 stream.eat_next_and_advance(pos);
1883 return (Token::MinusAssign, start_pos);
1884 }
1885 ('-', '>') => {
1886 stream.eat_next_and_advance(pos);
1887 return (Token::Reserved(Box::new("->".into())), start_pos);
1888 }
1889 ('-', '-') => {
1890 stream.eat_next_and_advance(pos);
1891 return (Token::Reserved(Box::new("--".into())), start_pos);
1892 }
1893 ('-', ..) if !state.next_token_cannot_be_unary => {
1894 return (Token::UnaryMinus, start_pos)
1895 }
1896 ('-', ..) => return (Token::Minus, start_pos),
1897
1898 ('*', ')') => {
1899 stream.eat_next_and_advance(pos);
1900 return (Token::Reserved(Box::new("*)".into())), start_pos);
1901 }
1902 ('*', '=') => {
1903 stream.eat_next_and_advance(pos);
1904 return (Token::MultiplyAssign, start_pos);
1905 }
1906 ('*', '*') => {
1907 stream.eat_next_and_advance(pos);
1908
1909 return (
1910 if stream.peek_next() == Some('=') {
1911 stream.eat_next_and_advance(pos);
1912 Token::PowerOfAssign
1913 } else {
1914 Token::PowerOf
1915 },
1916 start_pos,
1917 );
1918 }
1919 ('*', ..) => return (Token::Multiply, start_pos),
1920
1921 ('/', '/') => {
1923 stream.eat_next_and_advance(pos);
1924
1925 let mut comment: Option<String> = match stream.peek_next() {
1926 #[cfg(not(feature = "no_function"))]
1927 #[cfg(feature = "metadata")]
1928 Some('/') => {
1929 stream.eat_next_and_advance(pos);
1930
1931 match stream.peek_next() {
1933 Some('/') => None,
1934 _ => Some("///".into()),
1935 }
1936 }
1937 #[cfg(feature = "metadata")]
1938 Some('!') => {
1939 stream.eat_next_and_advance(pos);
1940 Some("//!".into())
1941 }
1942 _ if state.include_comments => Some("//".into()),
1943 _ => None,
1944 };
1945
1946 while let Some(c) = stream.get_next() {
1947 if c == '\r' {
1948 if stream.peek_next() == Some('\n') {
1950 stream.eat_next_and_advance(pos);
1951 }
1952 pos.new_line();
1953 break;
1954 }
1955 if c == '\n' {
1956 pos.new_line();
1957 break;
1958 }
1959 if let Some(comment) = comment.as_mut() {
1960 comment.push(c);
1961 }
1962 pos.advance();
1963 }
1964
1965 match comment {
1966 #[cfg(feature = "metadata")]
1967 Some(comment) if comment.starts_with("//!") => {
1968 let g = &mut state.tokenizer_control.borrow_mut().global_comments;
1969 if !g.is_empty() {
1970 g.push('\n');
1971 }
1972 g.push_str(&comment);
1973 }
1974 Some(comment) => return (Token::Comment(comment.into()), start_pos),
1975 None => (),
1976 }
1977 }
1978 ('/', '*') => {
1979 state.comment_level += 1;
1980 stream.eat_next_and_advance(pos);
1981
1982 let mut comment: Option<String> = match stream.peek_next() {
1983 #[cfg(not(feature = "no_function"))]
1984 #[cfg(feature = "metadata")]
1985 Some('*') => {
1986 stream.eat_next_and_advance(pos);
1987
1988 match stream.peek_next() {
1990 Some('*') => None,
1991 _ => Some("/**".into()),
1992 }
1993 }
1994 _ if state.include_comments => Some("/*".into()),
1995 _ => None,
1996 };
1997
1998 state.comment_level =
1999 scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2000
2001 if let Some(comment) = comment {
2002 return (Token::Comment(comment.into()), start_pos);
2003 }
2004 }
2005
2006 ('/', '=') => {
2007 stream.eat_next_and_advance(pos);
2008 return (Token::DivideAssign, start_pos);
2009 }
2010 ('/', ..) => return (Token::Divide, start_pos),
2011
2012 (';', ..) => return (Token::SemiColon, start_pos),
2013 (',', ..) => return (Token::Comma, start_pos),
2014
2015 ('.', '.') => {
2016 stream.eat_next_and_advance(pos);
2017 return (
2018 match stream.peek_next() {
2019 Some('.') => {
2020 stream.eat_next_and_advance(pos);
2021 Token::Reserved(Box::new("...".into()))
2022 }
2023 Some('=') => {
2024 stream.eat_next_and_advance(pos);
2025 Token::InclusiveRange
2026 }
2027 _ => Token::ExclusiveRange,
2028 },
2029 start_pos,
2030 );
2031 }
2032 ('.', ..) => return (Token::Period, start_pos),
2033
2034 ('=', '=') => {
2035 stream.eat_next_and_advance(pos);
2036
2037 if stream.peek_next() == Some('=') {
2038 stream.eat_next_and_advance(pos);
2039 return (Token::Reserved(Box::new("===".into())), start_pos);
2040 }
2041
2042 return (Token::EqualsTo, start_pos);
2043 }
2044 ('=', '>') => {
2045 stream.eat_next_and_advance(pos);
2046 return (Token::DoubleArrow, start_pos);
2047 }
2048 ('=', ..) => return (Token::Equals, start_pos),
2049
2050 #[cfg(not(feature = "no_module"))]
2051 (':', ':') => {
2052 stream.eat_next_and_advance(pos);
2053
2054 if stream.peek_next() == Some('<') {
2055 stream.eat_next_and_advance(pos);
2056 return (Token::Reserved(Box::new("::<".into())), start_pos);
2057 }
2058
2059 return (Token::DoubleColon, start_pos);
2060 }
2061 (':', '=') => {
2062 stream.eat_next_and_advance(pos);
2063 return (Token::Reserved(Box::new(":=".into())), start_pos);
2064 }
2065 (':', ';') => {
2066 stream.eat_next_and_advance(pos);
2067 return (Token::Reserved(Box::new(":;".into())), start_pos);
2068 }
2069 (':', ..) => return (Token::Colon, start_pos),
2070
2071 ('<', '=') => {
2072 stream.eat_next_and_advance(pos);
2073 return (Token::LessThanEqualsTo, start_pos);
2074 }
2075 ('<', '-') => {
2076 stream.eat_next_and_advance(pos);
2077 return (Token::Reserved(Box::new("<-".into())), start_pos);
2078 }
2079 ('<', '<') => {
2080 stream.eat_next_and_advance(pos);
2081
2082 return (
2083 if stream.peek_next() == Some('=') {
2084 stream.eat_next_and_advance(pos);
2085 Token::LeftShiftAssign
2086 } else {
2087 Token::LeftShift
2088 },
2089 start_pos,
2090 );
2091 }
2092 ('<', '|') => {
2093 stream.eat_next_and_advance(pos);
2094 return (Token::Reserved(Box::new("<|".into())), start_pos);
2095 }
2096 ('<', ..) => return (Token::LessThan, start_pos),
2097
2098 ('>', '=') => {
2099 stream.eat_next_and_advance(pos);
2100 return (Token::GreaterThanEqualsTo, start_pos);
2101 }
2102 ('>', '>') => {
2103 stream.eat_next_and_advance(pos);
2104
2105 return (
2106 if stream.peek_next() == Some('=') {
2107 stream.eat_next_and_advance(pos);
2108 Token::RightShiftAssign
2109 } else {
2110 Token::RightShift
2111 },
2112 start_pos,
2113 );
2114 }
2115 ('>', ..) => return (Token::GreaterThan, start_pos),
2116
2117 ('!', 'i') => {
2118 stream.get_next().unwrap();
2119 if stream.peek_next() == Some('n') {
2120 stream.get_next().unwrap();
2121 match stream.peek_next() {
2122 Some(c) if is_id_continue(c) => {
2123 stream.unget('n');
2124 stream.unget('i');
2125 return (Token::Bang, start_pos);
2126 }
2127 _ => {
2128 pos.advance();
2129 pos.advance();
2130 return (Token::NotIn, start_pos);
2131 }
2132 }
2133 }
2134
2135 stream.unget('i');
2136 return (Token::Bang, start_pos);
2137 }
2138 ('!', '=') => {
2139 stream.eat_next_and_advance(pos);
2140
2141 if stream.peek_next() == Some('=') {
2142 stream.eat_next_and_advance(pos);
2143 return (Token::Reserved(Box::new("!==".into())), start_pos);
2144 }
2145
2146 return (Token::NotEqualsTo, start_pos);
2147 }
2148 ('!', '.') => {
2149 stream.eat_next_and_advance(pos);
2150 return (Token::Reserved(Box::new("!.".into())), start_pos);
2151 }
2152 ('!', ..) => return (Token::Bang, start_pos),
2153
2154 ('|', '|') => {
2155 stream.eat_next_and_advance(pos);
2156 return (Token::Or, start_pos);
2157 }
2158 ('|', '=') => {
2159 stream.eat_next_and_advance(pos);
2160 return (Token::OrAssign, start_pos);
2161 }
2162 ('|', '>') => {
2163 stream.eat_next_and_advance(pos);
2164 return (Token::Reserved(Box::new("|>".into())), start_pos);
2165 }
2166 ('|', ..) => return (Token::Pipe, start_pos),
2167
2168 ('&', '&') => {
2169 stream.eat_next_and_advance(pos);
2170 return (Token::And, start_pos);
2171 }
2172 ('&', '=') => {
2173 stream.eat_next_and_advance(pos);
2174 return (Token::AndAssign, start_pos);
2175 }
2176 ('&', ..) => return (Token::Ampersand, start_pos),
2177
2178 ('^', '=') => {
2179 stream.eat_next_and_advance(pos);
2180 return (Token::XOrAssign, start_pos);
2181 }
2182 ('^', ..) => return (Token::XOr, start_pos),
2183
2184 ('~', ..) => return (Token::Reserved(Box::new("~".into())), start_pos),
2185
2186 ('%', '=') => {
2187 stream.eat_next_and_advance(pos);
2188 return (Token::ModuloAssign, start_pos);
2189 }
2190 ('%', ..) => return (Token::Modulo, start_pos),
2191
2192 ('@', ..) => return (Token::Reserved(Box::new("@".into())), start_pos),
2193
2194 ('$', ..) => return (Token::Reserved(Box::new("$".into())), start_pos),
2195
2196 ('?', '.') => {
2197 stream.eat_next_and_advance(pos);
2198 return (
2199 #[cfg(not(feature = "no_object"))]
2200 Token::Elvis,
2201 #[cfg(feature = "no_object")]
2202 Token::Reserved(Box::new("?.".into())),
2203 start_pos,
2204 );
2205 }
2206 ('?', '?') => {
2207 stream.eat_next_and_advance(pos);
2208 return (Token::DoubleQuestion, start_pos);
2209 }
2210 ('?', '[') => {
2211 stream.eat_next_and_advance(pos);
2212 return (
2213 #[cfg(not(feature = "no_index"))]
2214 Token::QuestionBracket,
2215 #[cfg(feature = "no_index")]
2216 Token::Reserved(Box::new("?[".into())),
2217 start_pos,
2218 );
2219 }
2220 ('?', ..) => return (Token::Reserved(Box::new("?".into())), start_pos),
2221
2222 _ if is_id_first_alphabetic(c) || c == '_' => {
2224 return parse_identifier_token(stream, state, pos, start_pos, c);
2225 }
2226
2227 ('\n', ..) => pos.new_line(),
2229
2230 (ch, ..) if ch.is_ascii_whitespace() => (),
2232
2233 _ => {
2234 return (
2235 Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
2236 start_pos,
2237 )
2238 }
2239 }
2240 }
2241
2242 pos.advance();
2243
2244 (Token::EOF, *pos)
2245}
2246
2247fn parse_identifier_token(
2249 stream: &mut (impl InputStream + ?Sized),
2250 state: &mut TokenizeState,
2251 pos: &mut Position,
2252 start_pos: Position,
2253 first_char: char,
2254) -> (Token, Position) {
2255 let mut identifier = SmartString::new_const();
2256 identifier.push(first_char);
2257 if let Some(ref mut last) = state.last_token {
2258 last.clear();
2259 last.push(first_char);
2260 }
2261
2262 while let Some(next_char) = stream.peek_next() {
2263 match next_char {
2264 x if is_id_continue(x) => {
2265 stream.eat_next_and_advance(pos);
2266 identifier.push(x);
2267 if let Some(ref mut last) = state.last_token {
2268 last.push(x);
2269 }
2270 }
2271 _ => break,
2272 }
2273 }
2274
2275 if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
2276 return (token, start_pos);
2277 }
2278
2279 if is_reserved_keyword_or_symbol(&identifier).0 {
2280 return (Token::Reserved(Box::new(identifier)), start_pos);
2281 }
2282
2283 if !is_valid_identifier(&identifier) {
2284 return (
2285 Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2286 start_pos,
2287 );
2288 }
2289
2290 (Token::Identifier(identifier.into()), start_pos)
2291}
2292
2293#[must_use]
2296pub fn is_valid_identifier(name: &str) -> bool {
2297 let mut first_alphabetic = false;
2298
2299 for ch in name.chars() {
2300 match ch {
2301 '_' => (),
2302 _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2303 _ if !first_alphabetic => return false,
2304 _ if char::is_ascii_alphanumeric(&ch) => (),
2305 _ => return false,
2306 }
2307 }
2308
2309 first_alphabetic
2310}
2311
2312#[inline(always)]
2315#[must_use]
2316pub fn is_valid_function_name(name: &str) -> bool {
2317 is_valid_identifier(name)
2318 && !is_reserved_keyword_or_symbol(name).0
2319 && Token::lookup_symbol_from_syntax(name).is_none()
2320}
2321
2322#[inline(always)]
2324#[must_use]
2325#[allow(clippy::missing_const_for_fn)]
2326pub fn is_id_first_alphabetic(x: char) -> bool {
2327 #[cfg(feature = "unicode-xid-ident")]
2328 return unicode_xid::UnicodeXID::is_xid_start(x);
2329 #[cfg(not(feature = "unicode-xid-ident"))]
2330 return x.is_ascii_alphabetic();
2331}
2332
2333#[inline(always)]
2335#[must_use]
2336#[allow(clippy::missing_const_for_fn)]
2337pub fn is_id_continue(x: char) -> bool {
2338 #[cfg(feature = "unicode-xid-ident")]
2339 return unicode_xid::UnicodeXID::is_xid_continue(x);
2340 #[cfg(not(feature = "unicode-xid-ident"))]
2341 return x.is_ascii_alphanumeric() || x == '_';
2342}
2343
2344#[inline]
2356#[must_use]
2357pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
2358 let utf8 = syntax.as_bytes();
2361 let len = utf8.len();
2362
2363 if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
2364 return (false, false, false);
2365 }
2366
2367 let mut hash_val = len;
2368
2369 match len {
2370 1 => (),
2371 _ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
2372 }
2373 hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
2374 hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
2375
2376 if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
2377 return (false, false, false);
2378 }
2379
2380 match RESERVED_LIST[hash_val] {
2381 ("", ..) => (false, false, false),
2382 (s, true, a, b) => {
2383 let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
2386 (is_reserved, is_reserved && a, is_reserved && a && b)
2387 }
2388 _ => (false, false, false),
2389 }
2390}
2391
2392pub struct MultiInputsStream<'a> {
2397 pub buf: [Option<char>; 2],
2399 pub index: usize,
2401 pub streams: StaticVec<Peekable<Chars<'a>>>,
2403}
2404
2405impl InputStream for MultiInputsStream<'_> {
2406 #[inline]
2407 fn unget(&mut self, ch: char) {
2408 match self.buf {
2409 [None, ..] => self.buf[0] = Some(ch),
2410 [_, None] => self.buf[1] = Some(ch),
2411 _ => unreachable!("cannot unget more than 2 characters!"),
2412 }
2413 }
2414 fn get_next(&mut self) -> Option<char> {
2415 match self.buf {
2416 [None, ..] => (),
2417 [ch @ Some(_), None] => {
2418 self.buf[0] = None;
2419 return ch;
2420 }
2421 [_, ch @ Some(_)] => {
2422 self.buf[1] = None;
2423 return ch;
2424 }
2425 }
2426
2427 loop {
2428 if self.index >= self.streams.len() {
2429 return None;
2431 }
2432 if let Some(ch) = self.streams[self.index].next() {
2433 return Some(ch);
2435 }
2436 self.index += 1;
2438 }
2439 }
2440 fn peek_next(&mut self) -> Option<char> {
2441 match self.buf {
2442 [None, ..] => (),
2443 [ch @ Some(_), None] => return ch,
2444 [_, ch @ Some(_)] => return ch,
2445 }
2446
2447 loop {
2448 if self.index >= self.streams.len() {
2449 return None;
2451 }
2452 if let Some(&ch) = self.streams[self.index].peek() {
2453 return Some(ch);
2455 }
2456 self.index += 1;
2458 }
2459 }
2460}
2461
2462pub struct TokenIterator<'a> {
2465 pub engine: &'a Engine,
2467 pub state: TokenizeState,
2469 pub pos: Position,
2471 pub stream: MultiInputsStream<'a>,
2473 pub token_mapper: Option<&'a OnParseTokenCallback>,
2475}
2476
2477impl<'a> Iterator for TokenIterator<'a> {
2478 type Item = (Token, Position);
2479
2480 fn next(&mut self) -> Option<Self::Item> {
2481 let (within_interpolated, compress_script) = {
2482 let control = &mut *self.state.tokenizer_control.borrow_mut();
2483
2484 if control.is_within_text {
2485 self.state.is_within_text_terminated_by = Some('`');
2487 control.is_within_text = false;
2489 }
2490
2491 (
2492 self.state.is_within_text_terminated_by.is_some(),
2493 control.compressed.is_some(),
2494 )
2495 };
2496
2497 let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2498 r @ (Token::EOF, _) => return Some(r),
2500 (Token::StringConstant(..), pos) if self.state.is_within_text_terminated_by.is_some() => {
2506 self.state.is_within_text_terminated_by = None;
2507 return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2508 }
2509 (Token::Reserved(s), pos) => (match
2511 (s.as_str(),
2512 #[cfg(not(feature = "no_custom_syntax"))]
2513 self.engine.custom_keywords.contains_key(&*s),
2514 #[cfg(feature = "no_custom_syntax")]
2515 false
2516 )
2517 {
2518 ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2519 "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2520 ).into()),
2521 ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2522 "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2523 ).into()),
2524 ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2525 "'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2526 ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2527 "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2528 ).into()),
2529 (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2530 "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2531 ).into()),
2532 (":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2533 "':;' is not a valid symbol. Should it be '::'?".to_string(),
2534 ).into()),
2535 ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2536 "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2537 ).into()),
2538 ("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2539 "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2540 ).into()),
2541 ("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2542 "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2543 ).into()),
2544 #[cfg(not(feature = "no_custom_syntax"))]
2546 (.., true) => Token::Custom(s),
2547 #[cfg(feature = "no_custom_syntax")]
2548 (.., true) => unreachable!("no custom operators"),
2549 (token, false) if self.engine.is_symbol_disabled(token) => {
2551 let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
2552 Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
2553 },
2554 (.., false) => Token::Reserved(s),
2556 }, pos),
2557 #[cfg(not(feature = "no_custom_syntax"))]
2559 (Token::Identifier(s), pos) if self.engine.custom_keywords.contains_key(&*s) => {
2560 (Token::Custom(s), pos)
2561 }
2562 #[cfg(not(feature = "no_custom_syntax"))]
2564 (token, pos) if token.is_literal() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
2565 debug_assert!(self.engine.is_symbol_disabled(token.literal_syntax()), "{:?} is an active keyword", token);
2567
2568 (Token::Custom(Box::new(token.literal_syntax().into())), pos)
2569 }
2570 (token, pos) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
2572 (Token::Reserved(Box::new(token.literal_syntax().into())), pos)
2573 }
2574 r => r,
2576 };
2577
2578 let token = match self.token_mapper {
2580 Some(func) => func(token, pos, &self.state),
2581 None => token,
2582 };
2583
2584 if compress_script {
2586 let control = &mut *self.state.tokenizer_control.borrow_mut();
2587
2588 if token != Token::EOF {
2589 if let Some(ref mut compressed) = control.compressed {
2590 use std::fmt::Write;
2591
2592 let last_token = self.state.last_token.as_ref().unwrap();
2593 let mut buf = SmartString::new_const();
2594
2595 if last_token.is_empty() {
2596 write!(buf, "{token}").unwrap();
2597 } else if within_interpolated
2598 && matches!(
2599 token,
2600 Token::StringConstant(..) | Token::InterpolatedString(..)
2601 )
2602 {
2603 compressed.push_str(&last_token[1..]);
2604 } else {
2605 buf = last_token.clone();
2606 }
2607
2608 if !buf.is_empty() && !compressed.is_empty() {
2609 let cur = buf.chars().next().unwrap();
2610
2611 if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
2612 let prev = compressed.chars().last().unwrap();
2613
2614 if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
2615 compressed.push(' ');
2616 }
2617 }
2618 }
2619
2620 compressed.push_str(&buf);
2621 }
2622 }
2623 }
2624
2625 Some((token, pos))
2626 }
2627}
2628
2629impl FusedIterator for TokenIterator<'_> {}
2630
2631impl Engine {
2632 #[cfg(feature = "internals")]
2639 #[inline(always)]
2640 #[must_use]
2641 pub fn lex<'a>(
2642 &'a self,
2643 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2644 ) -> (TokenIterator<'a>, TokenizerControl) {
2645 lex_raw(self, inputs, None)
2646 }
2647 #[cfg(feature = "internals")]
2654 #[inline(always)]
2655 #[must_use]
2656 pub fn lex_with_map<'a>(
2657 &'a self,
2658 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2659 token_mapper: &'a OnParseTokenCallback,
2660 ) -> (TokenIterator<'a>, TokenizerControl) {
2661 lex_raw(self, inputs, Some(token_mapper))
2662 }
2663}
2664
2665#[inline]
2671#[must_use]
2672pub fn lex_raw<'a>(
2673 engine: &'a Engine,
2674 inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2675 token_mapper: Option<&'a OnParseTokenCallback>,
2676) -> (TokenIterator<'a>, TokenizerControl) {
2677 let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2678 let buffer2 = buffer.clone();
2679
2680 (
2681 TokenIterator {
2682 engine,
2683 state: TokenizeState {
2684 #[cfg(not(feature = "unchecked"))]
2685 max_string_len: std::num::NonZeroUsize::new(engine.max_string_size()),
2686 next_token_cannot_be_unary: false,
2687 tokenizer_control: buffer,
2688 comment_level: 0,
2689 include_comments: false,
2690 is_within_text_terminated_by: None,
2691 last_token: None,
2692 },
2693 pos: Position::new(1, 0),
2694 stream: MultiInputsStream {
2695 buf: [None, None],
2696 streams: inputs
2697 .into_iter()
2698 .map(|s| s.as_ref().chars().peekable())
2699 .collect(),
2700 index: 0,
2701 },
2702 token_mapper,
2703 },
2704 buffer2,
2705 )
2706}