1use crate::engine::{
4 Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL,
5 KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_THIS, KEYWORD_TYPE_OF,
6};
7use crate::func::native::OnParseTokenCallback;
8use crate::{Engine, LexError, StaticVec, INT};
9#[cfg(feature = "no_std")]
10use std::prelude::v1::*;
11use std::{
12 borrow::Cow,
13 cell::Cell,
14 char, fmt,
15 iter::{FusedIterator, Peekable},
16 num::NonZeroUsize,
17 ops::{Add, AddAssign},
18 rc::Rc,
19 str::{Chars, FromStr},
20};
21
22#[derive(Debug, Clone, Eq, PartialEq, Hash, Copy)]
24pub struct TokenizerControlBlock {
25 pub is_within_text: bool,
28}
29
30impl TokenizerControlBlock {
31 #[inline(always)]
33 #[must_use]
34 pub const fn new() -> Self {
35 Self {
36 is_within_text: false,
37 }
38 }
39}
40
41pub type TokenizerControl = Rc<Cell<TokenizerControlBlock>>;
43
44type LERR = LexError;
45
46const NUMBER_SEPARATOR: char = '_';
48
49pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
51
52#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
61pub struct Position {
62 #[cfg(not(feature = "no_position"))]
64 line: u16,
65 #[cfg(not(feature = "no_position"))]
67 pos: u16,
68}
69
70impl Position {
71 pub const NONE: Self = Self {
73 #[cfg(not(feature = "no_position"))]
74 line: 0,
75 #[cfg(not(feature = "no_position"))]
76 pos: 0,
77 };
78 pub const START: Self = Self {
80 #[cfg(not(feature = "no_position"))]
81 line: 1,
82 #[cfg(not(feature = "no_position"))]
83 pos: 0,
84 };
85
86 #[inline(always)]
96 #[must_use]
97 pub fn new(line: u16, position: u16) -> Self {
98 assert!(line != 0, "line cannot be zero");
99
100 let _pos = position;
101
102 Self {
103 #[cfg(not(feature = "no_position"))]
104 line,
105 #[cfg(not(feature = "no_position"))]
106 pos: _pos,
107 }
108 }
109 #[inline]
115 #[must_use]
116 pub const fn new_const(line: u16, position: u16) -> Option<Self> {
117 if line == 0 {
118 return None;
119 }
120 let _pos = position;
121
122 Some(Self {
123 #[cfg(not(feature = "no_position"))]
124 line,
125 #[cfg(not(feature = "no_position"))]
126 pos: _pos,
127 })
128 }
129 #[inline]
131 #[must_use]
132 pub const fn line(self) -> Option<usize> {
133 #[cfg(not(feature = "no_position"))]
134 return if self.is_none() {
135 None
136 } else {
137 Some(self.line as usize)
138 };
139
140 #[cfg(feature = "no_position")]
141 return None;
142 }
143 #[inline]
145 #[must_use]
146 pub const fn position(self) -> Option<usize> {
147 #[cfg(not(feature = "no_position"))]
148 return if self.is_none() || self.pos == 0 {
149 None
150 } else {
151 Some(self.pos as usize)
152 };
153
154 #[cfg(feature = "no_position")]
155 return None;
156 }
157 #[inline]
159 pub(crate) fn advance(&mut self) {
160 #[cfg(not(feature = "no_position"))]
161 {
162 assert!(!self.is_none(), "cannot advance Position::none");
163
164 if self.pos < u16::MAX {
166 self.pos += 1;
167 }
168 }
169 }
170 #[inline]
176 pub(crate) fn rewind(&mut self) {
177 #[cfg(not(feature = "no_position"))]
178 {
179 assert!(!self.is_none(), "cannot rewind Position::none");
180 assert!(self.pos > 0, "cannot rewind at position 0");
181 self.pos -= 1;
182 }
183 }
184 #[inline]
186 pub(crate) fn new_line(&mut self) {
187 #[cfg(not(feature = "no_position"))]
188 {
189 assert!(!self.is_none(), "cannot advance Position::none");
190
191 if self.line < u16::MAX {
193 self.line += 1;
194 self.pos = 0;
195 }
196 }
197 }
198 #[inline]
200 #[must_use]
201 pub const fn is_beginning_of_line(self) -> bool {
202 #[cfg(not(feature = "no_position"))]
203 return self.pos == 0 && !self.is_none();
204 #[cfg(feature = "no_position")]
205 return false;
206 }
207 #[inline]
209 #[must_use]
210 pub const fn is_none(self) -> bool {
211 #[cfg(not(feature = "no_position"))]
212 return self.line == 0 && self.pos == 0;
213 #[cfg(feature = "no_position")]
214 return true;
215 }
216 #[inline]
218 pub(crate) fn debug_print(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
219 #[cfg(not(feature = "no_position"))]
220 if !self.is_none() {
221 write!(_f, " @ {:?}", self)?;
222 }
223
224 Ok(())
225 }
226}
227
228impl Default for Position {
229 #[inline(always)]
230 fn default() -> Self {
231 Self::START
232 }
233}
234
235impl fmt::Display for Position {
236 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
237 if self.is_none() {
238 write!(f, "none")?;
239 } else {
240 #[cfg(not(feature = "no_position"))]
241 write!(f, "line {}, position {}", self.line, self.pos)?;
242 #[cfg(feature = "no_position")]
243 unreachable!();
244 }
245
246 Ok(())
247 }
248}
249
250impl fmt::Debug for Position {
251 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252 #[cfg(not(feature = "no_position"))]
253 write!(f, "{}:{}", self.line, self.pos)?;
254 #[cfg(feature = "no_position")]
255 f.write_str("none")?;
256
257 Ok(())
258 }
259}
260
261impl Add for Position {
262 type Output = Self;
263
264 fn add(self, rhs: Self) -> Self::Output {
265 if rhs.is_none() {
266 self
267 } else {
268 #[cfg(not(feature = "no_position"))]
269 return Self {
270 line: self.line + rhs.line - 1,
271 pos: if rhs.is_beginning_of_line() {
272 self.pos
273 } else {
274 self.pos + rhs.pos - 1
275 },
276 };
277 #[cfg(feature = "no_position")]
278 unreachable!();
279 }
280 }
281}
282
283impl AddAssign for Position {
284 fn add_assign(&mut self, rhs: Self) {
285 *self = *self + rhs;
286 }
287}
288
289#[derive(Debug, PartialEq, Clone, Hash)]
292pub enum Token {
293 IntegerConstant(INT),
295 #[cfg(not(feature = "no_float"))]
299 FloatConstant(crate::ast::FloatWrapper<crate::FLOAT>),
300 #[cfg(feature = "decimal")]
304 DecimalConstant(rust_decimal::Decimal),
305 Identifier(Box<str>),
307 CharConstant(char),
309 StringConstant(Box<str>),
311 InterpolatedString(Box<str>),
313 LeftBrace,
315 RightBrace,
317 LeftParen,
319 RightParen,
321 LeftBracket,
323 RightBracket,
325 Plus,
327 UnaryPlus,
329 Minus,
331 UnaryMinus,
333 Multiply,
335 Divide,
337 Modulo,
339 PowerOf,
341 LeftShift,
343 RightShift,
345 SemiColon,
347 Colon,
349 DoubleColon,
351 DoubleArrow,
353 Underscore,
355 Comma,
357 Period,
359 ExclusiveRange,
361 InclusiveRange,
363 MapStart,
365 Equals,
367 True,
369 False,
371 Let,
373 Const,
375 If,
377 Else,
379 Switch,
381 Do,
383 While,
385 Until,
387 Loop,
389 For,
391 In,
393 LessThan,
395 GreaterThan,
397 LessThanEqualsTo,
399 GreaterThanEqualsTo,
401 EqualsTo,
403 NotEqualsTo,
405 Bang,
407 Pipe,
409 Or,
411 XOr,
413 Ampersand,
415 And,
417 #[cfg(not(feature = "no_function"))]
421 Fn,
422 Continue,
424 Break,
426 Return,
428 Throw,
430 Try,
432 Catch,
434 PlusAssign,
436 MinusAssign,
438 MultiplyAssign,
440 DivideAssign,
442 LeftShiftAssign,
444 RightShiftAssign,
446 AndAssign,
448 OrAssign,
450 XOrAssign,
452 ModuloAssign,
454 PowerOfAssign,
456 #[cfg(not(feature = "no_function"))]
460 Private,
461 #[cfg(not(feature = "no_module"))]
465 Import,
466 #[cfg(not(feature = "no_module"))]
470 Export,
471 #[cfg(not(feature = "no_module"))]
475 As,
476 LexError(LexError),
478 Comment(Box<str>),
480 Reserved(Box<str>),
482 Custom(Box<str>),
484 EOF,
486}
487
488impl Token {
489 #[must_use]
491 pub const fn literal_syntax(&self) -> &'static str {
492 use Token::*;
493
494 match self {
495 LeftBrace => "{",
496 RightBrace => "}",
497 LeftParen => "(",
498 RightParen => ")",
499 LeftBracket => "[",
500 RightBracket => "]",
501 Plus => "+",
502 UnaryPlus => "+",
503 Minus => "-",
504 UnaryMinus => "-",
505 Multiply => "*",
506 Divide => "/",
507 SemiColon => ";",
508 Colon => ":",
509 DoubleColon => "::",
510 DoubleArrow => "=>",
511 Underscore => "_",
512 Comma => ",",
513 Period => ".",
514 ExclusiveRange => "..",
515 InclusiveRange => "..=",
516 MapStart => "#{",
517 Equals => "=",
518 True => "true",
519 False => "false",
520 Let => "let",
521 Const => "const",
522 If => "if",
523 Else => "else",
524 Switch => "switch",
525 Do => "do",
526 While => "while",
527 Until => "until",
528 Loop => "loop",
529 For => "for",
530 In => "in",
531 LessThan => "<",
532 GreaterThan => ">",
533 Bang => "!",
534 LessThanEqualsTo => "<=",
535 GreaterThanEqualsTo => ">=",
536 EqualsTo => "==",
537 NotEqualsTo => "!=",
538 Pipe => "|",
539 Or => "||",
540 Ampersand => "&",
541 And => "&&",
542 Continue => "continue",
543 Break => "break",
544 Return => "return",
545 Throw => "throw",
546 Try => "try",
547 Catch => "catch",
548 PlusAssign => "+=",
549 MinusAssign => "-=",
550 MultiplyAssign => "*=",
551 DivideAssign => "/=",
552 LeftShiftAssign => "<<=",
553 RightShiftAssign => ">>=",
554 AndAssign => "&=",
555 OrAssign => "|=",
556 XOrAssign => "^=",
557 LeftShift => "<<",
558 RightShift => ">>",
559 XOr => "^",
560 Modulo => "%",
561 ModuloAssign => "%=",
562 PowerOf => "**",
563 PowerOfAssign => "**=",
564
565 #[cfg(not(feature = "no_function"))]
566 Fn => "fn",
567 #[cfg(not(feature = "no_function"))]
568 Private => "private",
569
570 #[cfg(not(feature = "no_module"))]
571 Import => "import",
572 #[cfg(not(feature = "no_module"))]
573 Export => "export",
574 #[cfg(not(feature = "no_module"))]
575 As => "as",
576
577 _ => "ERROR: NOT A KEYWORD",
578 }
579 }
580
581 #[must_use]
583 pub fn syntax(&self) -> Cow<'static, str> {
584 use Token::*;
585
586 match self {
587 IntegerConstant(i) => i.to_string().into(),
588 #[cfg(not(feature = "no_float"))]
589 FloatConstant(f) => f.to_string().into(),
590 #[cfg(feature = "decimal")]
591 DecimalConstant(d) => d.to_string().into(),
592 StringConstant(_) => "string".into(),
593 InterpolatedString(_) => "string".into(),
594 CharConstant(c) => c.to_string().into(),
595 Identifier(s) => s.to_string().into(),
596 Reserved(s) => s.to_string().into(),
597 Custom(s) => s.to_string().into(),
598 LexError(err) => err.to_string().into(),
599 Comment(s) => s.to_string().into(),
600
601 EOF => "{EOF}".into(),
602
603 token => token.literal_syntax().into(),
604 }
605 }
606
607 #[inline]
609 #[must_use]
610 pub const fn is_op_assignment(&self) -> bool {
611 matches!(
612 self,
613 Self::PlusAssign
614 | Self::MinusAssign
615 | Self::MultiplyAssign
616 | Self::DivideAssign
617 | Self::LeftShiftAssign
618 | Self::RightShiftAssign
619 | Self::ModuloAssign
620 | Self::PowerOfAssign
621 | Self::AndAssign
622 | Self::OrAssign
623 | Self::XOrAssign
624 )
625 }
626
627 #[must_use]
629 pub const fn map_op_assignment(&self) -> Option<Self> {
630 Some(match self {
631 Self::PlusAssign => Self::Plus,
632 Self::MinusAssign => Self::Minus,
633 Self::MultiplyAssign => Self::Multiply,
634 Self::DivideAssign => Self::Divide,
635 Self::LeftShiftAssign => Self::LeftShift,
636 Self::RightShiftAssign => Self::RightShift,
637 Self::ModuloAssign => Self::Modulo,
638 Self::PowerOfAssign => Self::PowerOf,
639 Self::AndAssign => Self::Ampersand,
640 Self::OrAssign => Self::Pipe,
641 Self::XOrAssign => Self::XOr,
642 _ => return None,
643 })
644 }
645
646 #[inline]
648 #[must_use]
649 pub const fn has_op_assignment(&self) -> bool {
650 matches!(
651 self,
652 Self::Plus
653 | Self::Minus
654 | Self::Multiply
655 | Self::Divide
656 | Self::LeftShift
657 | Self::RightShift
658 | Self::Modulo
659 | Self::PowerOf
660 | Self::Ampersand
661 | Self::Pipe
662 | Self::XOr
663 )
664 }
665
666 #[must_use]
668 pub const fn make_op_assignment(&self) -> Option<Self> {
669 Some(match self {
670 Self::Plus => Self::PlusAssign,
671 Self::Minus => Self::MinusAssign,
672 Self::Multiply => Self::MultiplyAssign,
673 Self::Divide => Self::DivideAssign,
674 Self::LeftShift => Self::LeftShiftAssign,
675 Self::RightShift => Self::RightShiftAssign,
676 Self::Modulo => Self::ModuloAssign,
677 Self::PowerOf => Self::PowerOfAssign,
678 Self::Ampersand => Self::AndAssign,
679 Self::Pipe => Self::OrAssign,
680 Self::XOr => Self::XOrAssign,
681 _ => return None,
682 })
683 }
684
685 #[must_use]
687 pub fn lookup_from_syntax(syntax: impl AsRef<str>) -> Option<Self> {
688 use Token::*;
689
690 let syntax = syntax.as_ref();
691
692 Some(match syntax {
693 "{" => LeftBrace,
694 "}" => RightBrace,
695 "(" => LeftParen,
696 ")" => RightParen,
697 "[" => LeftBracket,
698 "]" => RightBracket,
699 "+" => Plus,
700 "-" => Minus,
701 "*" => Multiply,
702 "/" => Divide,
703 ";" => SemiColon,
704 ":" => Colon,
705 "::" => DoubleColon,
706 "=>" => DoubleArrow,
707 "_" => Underscore,
708 "," => Comma,
709 "." => Period,
710 ".." => ExclusiveRange,
711 "..=" => InclusiveRange,
712 "#{" => MapStart,
713 "=" => Equals,
714 "true" => True,
715 "false" => False,
716 "let" => Let,
717 "const" => Const,
718 "if" => If,
719 "else" => Else,
720 "switch" => Switch,
721 "do" => Do,
722 "while" => While,
723 "until" => Until,
724 "loop" => Loop,
725 "for" => For,
726 "in" => In,
727 "<" => LessThan,
728 ">" => GreaterThan,
729 "!" => Bang,
730 "<=" => LessThanEqualsTo,
731 ">=" => GreaterThanEqualsTo,
732 "==" => EqualsTo,
733 "!=" => NotEqualsTo,
734 "|" => Pipe,
735 "||" => Or,
736 "&" => Ampersand,
737 "&&" => And,
738 "continue" => Continue,
739 "break" => Break,
740 "return" => Return,
741 "throw" => Throw,
742 "try" => Try,
743 "catch" => Catch,
744 "+=" => PlusAssign,
745 "-=" => MinusAssign,
746 "*=" => MultiplyAssign,
747 "/=" => DivideAssign,
748 "<<=" => LeftShiftAssign,
749 ">>=" => RightShiftAssign,
750 "&=" => AndAssign,
751 "|=" => OrAssign,
752 "^=" => XOrAssign,
753 "<<" => LeftShift,
754 ">>" => RightShift,
755 "^" => XOr,
756 "%" => Modulo,
757 "%=" => ModuloAssign,
758 "**" => PowerOf,
759 "**=" => PowerOfAssign,
760
761 #[cfg(not(feature = "no_function"))]
762 "fn" => Fn,
763 #[cfg(not(feature = "no_function"))]
764 "private" => Private,
765
766 #[cfg(feature = "no_function")]
767 "fn" | "private" => Reserved(syntax.into()),
768
769 #[cfg(not(feature = "no_module"))]
770 "import" => Import,
771 #[cfg(not(feature = "no_module"))]
772 "export" => Export,
773 #[cfg(not(feature = "no_module"))]
774 "as" => As,
775
776 #[cfg(feature = "no_module")]
777 "import" | "export" | "as" => Reserved(syntax.into()),
778
779 "===" | "!==" | "->" | "<-" | ":=" | "~" | "::<" | "(*" | "*)" | "#" | "#!"
780 | "public" | "protected" | "super" | "new" | "use" | "module" | "package" | "var"
781 | "static" | "shared" | "with" | "goto" | "exit" | "match" | "case" | "default"
782 | "void" | "null" | "nil" | "spawn" | "thread" | "go" | "sync" | "async" | "await"
783 | "yield" => Reserved(syntax.into()),
784
785 KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR
786 | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_THIS | KEYWORD_IS_DEF_VAR => {
787 Reserved(syntax.into())
788 }
789
790 #[cfg(not(feature = "no_function"))]
791 crate::engine::KEYWORD_IS_DEF_FN => Reserved(syntax.into()),
792
793 _ => return None,
794 })
795 }
796
797 #[inline(always)]
799 #[must_use]
800 pub const fn is_eof(&self) -> bool {
801 matches!(self, Self::EOF)
802 }
803
804 #[must_use]
807 pub const fn is_next_unary(&self) -> bool {
808 use Token::*;
809
810 match self {
811 LexError(_) |
812 SemiColon | Colon | Comma | ExclusiveRange | InclusiveRange | LeftBrace | LeftParen | LeftBracket | Plus |
825 PlusAssign |
826 UnaryPlus |
827 Minus |
828 MinusAssign |
829 UnaryMinus |
830 Multiply |
831 MultiplyAssign |
832 Divide |
833 DivideAssign |
834 Modulo |
835 ModuloAssign |
836 PowerOf |
837 PowerOfAssign |
838 LeftShift |
839 LeftShiftAssign |
840 RightShift |
841 RightShiftAssign |
842 Equals |
843 EqualsTo |
844 NotEqualsTo |
845 LessThan |
846 GreaterThan |
847 Bang |
848 LessThanEqualsTo |
849 GreaterThanEqualsTo |
850 Pipe |
851 Ampersand |
852 If |
853 While |
855 Until |
856 In |
857 And |
858 AndAssign |
859 Or |
860 OrAssign |
861 XOr |
862 XOrAssign |
863 Return |
864 Throw => true,
865
866 _ => false,
867 }
868 }
869
870 #[must_use]
872 pub const fn precedence(&self) -> Option<Precedence> {
873 use Token::*;
874
875 Precedence::new(match self {
876 Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | PowerOfAssign
878 | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign | XOrAssign
879 | ModuloAssign => 0,
880
881 ExclusiveRange | InclusiveRange => 10,
882
883 Or | XOr | Pipe => 30,
884
885 And | Ampersand => 60,
886
887 EqualsTo | NotEqualsTo => 90,
888
889 In => 110,
890
891 LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
892
893 Plus | Minus => 150,
894
895 Divide | Multiply | Modulo => 180,
896
897 PowerOf => 190,
898
899 LeftShift | RightShift => 210,
900
901 Period => 240,
902
903 _ => 0,
904 })
905 }
906
907 #[must_use]
909 pub const fn is_bind_right(&self) -> bool {
910 use Token::*;
911
912 match self {
913 Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | PowerOfAssign
915 | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign | XOrAssign
916 | ModuloAssign => true,
917
918 Period => true,
920
921 PowerOf => true,
923
924 _ => false,
925 }
926 }
927
928 #[must_use]
930 pub const fn is_standard_symbol(&self) -> bool {
931 use Token::*;
932
933 match self {
934 LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
935 | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
936 | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | ExclusiveRange
937 | InclusiveRange | MapStart | Equals | LessThan | GreaterThan | LessThanEqualsTo
938 | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe | Or | XOr | Ampersand
939 | And | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign
940 | RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
941 | PowerOfAssign => true,
942
943 _ => false,
944 }
945 }
946
947 #[inline]
949 #[must_use]
950 pub const fn is_standard_keyword(&self) -> bool {
951 use Token::*;
952
953 match self {
954 #[cfg(not(feature = "no_function"))]
955 Fn | Private => true,
956
957 #[cfg(not(feature = "no_module"))]
958 Import | Export | As => true,
959
960 True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
961 | Continue | Break | Return | Throw | Try | Catch => true,
962
963 _ => false,
964 }
965 }
966
967 #[inline(always)]
969 #[must_use]
970 pub const fn is_reserved(&self) -> bool {
971 matches!(self, Self::Reserved(_))
972 }
973
974 #[cfg(not(feature = "no_function"))]
976 #[inline]
977 pub(crate) fn into_function_name_for_override(self) -> Result<Box<str>, Self> {
978 match self {
979 Self::Custom(s) | Self::Identifier(s) if is_valid_function_name(&*s) => Ok(s),
980 _ => Err(self),
981 }
982 }
983
984 #[inline(always)]
986 #[must_use]
987 pub const fn is_custom(&self) -> bool {
988 matches!(self, Self::Custom(_))
989 }
990}
991
992impl From<Token> for String {
993 #[inline(always)]
994 fn from(token: Token) -> Self {
995 token.syntax().into()
996 }
997}
998
999#[derive(Debug, Clone, Eq, PartialEq, Default)]
1002pub struct TokenizeState {
1003 pub max_string_size: Option<NonZeroUsize>,
1005 pub next_token_cannot_be_unary: bool,
1007 pub comment_level: usize,
1009 pub include_comments: bool,
1011 pub is_within_text_terminated_by: Option<char>,
1013}
1014
1015pub trait InputStream {
1018 fn unget(&mut self, ch: char);
1022 fn get_next(&mut self) -> Option<char>;
1024 #[must_use]
1026 fn peek_next(&mut self) -> Option<char>;
1027}
1028
1029pub fn parse_string_literal(
1060 stream: &mut impl InputStream,
1061 state: &mut TokenizeState,
1062 pos: &mut Position,
1063 termination_char: char,
1064 continuation: bool,
1065 verbatim: bool,
1066 allow_interpolation: bool,
1067) -> Result<(Box<str>, bool), (LexError, Position)> {
1068 let mut result = String::with_capacity(12);
1069 let mut escape = String::with_capacity(12);
1070
1071 let start = *pos;
1072 let mut interpolated = false;
1073 #[cfg(not(feature = "no_position"))]
1074 let mut skip_whitespace_until = 0;
1075
1076 state.is_within_text_terminated_by = Some(termination_char);
1077
1078 loop {
1079 assert!(
1080 !verbatim || escape.is_empty(),
1081 "verbatim strings should not have any escapes"
1082 );
1083
1084 let next_char = match stream.get_next() {
1085 Some(ch) => {
1086 pos.advance();
1087 ch
1088 }
1089 None if verbatim => {
1090 assert_eq!(escape, "", "verbatim strings should not have any escapes");
1091 pos.advance();
1092 break;
1093 }
1094 None if continuation && !escape.is_empty() => {
1095 assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
1096 pos.advance();
1097 break;
1098 }
1099 None => {
1100 result += &escape;
1101 pos.advance();
1102 state.is_within_text_terminated_by = None;
1103 return Err((LERR::UnterminatedString, start));
1104 }
1105 };
1106
1107 if allow_interpolation
1109 && next_char == '$'
1110 && escape.is_empty()
1111 && stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
1112 {
1113 interpolated = true;
1114 state.is_within_text_terminated_by = None;
1115 break;
1116 }
1117
1118 if let Some(max) = state.max_string_size {
1119 if result.len() > max.get() {
1120 return Err((LexError::StringTooLong(max.get()), *pos));
1121 }
1122 }
1123
1124 match next_char {
1125 '\r' if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) => (),
1127 '\\' if !verbatim && escape.is_empty() => {
1129 escape.push('\\');
1130 }
1131 '\\' if !escape.is_empty() => {
1133 escape.clear();
1134 result.push('\\');
1135 }
1136 't' if !escape.is_empty() => {
1138 escape.clear();
1139 result.push('\t');
1140 }
1141 'n' if !escape.is_empty() => {
1143 escape.clear();
1144 result.push('\n');
1145 }
1146 'r' if !escape.is_empty() => {
1148 escape.clear();
1149 result.push('\r');
1150 }
1151 ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => {
1153 let mut seq = escape.clone();
1154 escape.clear();
1155 seq.push(ch);
1156
1157 let mut out_val: u32 = 0;
1158 let len = match ch {
1159 'x' => 2,
1160 'u' => 4,
1161 'U' => 8,
1162 _ => unreachable!(),
1163 };
1164
1165 for _ in 0..len {
1166 let c = stream
1167 .get_next()
1168 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1169
1170 seq.push(c);
1171 pos.advance();
1172
1173 out_val *= 16;
1174 out_val += c
1175 .to_digit(16)
1176 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1177 }
1178
1179 result.push(
1180 char::from_u32(out_val)
1181 .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
1182 );
1183 }
1184
1185 _ if termination_char == next_char && !escape.is_empty() => {
1187 escape.clear();
1188 result.push(next_char)
1189 }
1190
1191 _ if termination_char == next_char
1193 && escape.is_empty()
1194 && stream.peek_next().map_or(false, |c| c == termination_char) =>
1195 {
1196 eat_next(stream, pos);
1197 result.push(termination_char)
1198 }
1199
1200 _ if termination_char == next_char && escape.is_empty() => {
1202 state.is_within_text_terminated_by = None;
1203 break;
1204 }
1205
1206 '\n' if verbatim => {
1208 assert_eq!(escape, "", "verbatim strings should not have any escapes");
1209 pos.new_line();
1210 result.push(next_char);
1211 }
1212
1213 '\n' if continuation && !escape.is_empty() => {
1215 assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
1216 escape.clear();
1217 pos.new_line();
1218
1219 #[cfg(not(feature = "no_position"))]
1220 {
1221 let start_position = start.position().expect("start position");
1222 skip_whitespace_until = start_position + 1;
1223 }
1224 }
1225
1226 '\n' => {
1228 pos.rewind();
1229 state.is_within_text_terminated_by = None;
1230 return Err((LERR::UnterminatedString, start));
1231 }
1232
1233 _ if !escape.is_empty() => {
1235 escape.push(next_char);
1236
1237 return Err((LERR::MalformedEscapeSequence(escape), *pos));
1238 }
1239
1240 #[cfg(not(feature = "no_position"))]
1242 _ if next_char.is_whitespace()
1243 && pos.position().expect("position") < skip_whitespace_until => {}
1244
1245 _ => {
1247 escape.clear();
1248 result.push(next_char);
1249
1250 #[cfg(not(feature = "no_position"))]
1251 {
1252 skip_whitespace_until = 0;
1253 }
1254 }
1255 }
1256 }
1257
1258 if let Some(max) = state.max_string_size {
1259 if result.len() > max.get() {
1260 return Err((LexError::StringTooLong(max.get()), *pos));
1261 }
1262 }
1263
1264 Ok((result.into(), interpolated))
1265}
1266
1267#[inline(always)]
1269fn eat_next(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
1270 pos.advance();
1271 stream.get_next()
1272}
1273
1274fn scan_block_comment(
1276 stream: &mut impl InputStream,
1277 level: usize,
1278 pos: &mut Position,
1279 comment: Option<&mut String>,
1280) -> usize {
1281 let mut level = level;
1282 let mut comment = comment;
1283
1284 while let Some(c) = stream.get_next() {
1285 pos.advance();
1286
1287 if let Some(comment) = comment.as_mut() {
1288 comment.push(c);
1289 }
1290
1291 match c {
1292 '/' => {
1293 if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '*') {
1294 eat_next(stream, pos);
1295 if let Some(comment) = comment.as_mut() {
1296 comment.push(c2);
1297 }
1298 level += 1;
1299 }
1300 }
1301 '*' => {
1302 if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '/') {
1303 eat_next(stream, pos);
1304 if let Some(comment) = comment.as_mut() {
1305 comment.push(c2);
1306 }
1307 level -= 1;
1308 }
1309 }
1310 '\n' => pos.new_line(),
1311 _ => (),
1312 }
1313
1314 if level == 0 {
1315 break;
1316 }
1317 }
1318
1319 level
1320}
1321
1322#[inline]
1325#[must_use]
1326pub fn get_next_token(
1327 stream: &mut impl InputStream,
1328 state: &mut TokenizeState,
1329 pos: &mut Position,
1330) -> Option<(Token, Position)> {
1331 let result = get_next_token_inner(stream, state, pos);
1332
1333 if let Some((ref token, _)) = result {
1335 state.next_token_cannot_be_unary = !token.is_next_unary();
1336 }
1337
1338 result
1339}
1340
1341#[inline(always)]
1343fn is_hex_digit(c: char) -> bool {
1344 matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
1345}
1346
1347#[inline(always)]
1349fn is_numeric_digit(c: char) -> bool {
1350 matches!(c, '0'..='9')
1351}
1352
1353#[cfg(not(feature = "no_function"))]
1355#[cfg(feature = "metadata")]
1356#[inline]
1357#[must_use]
1358pub fn is_doc_comment(comment: impl AsRef<str>) -> bool {
1359 let comment = comment.as_ref();
1360
1361 (comment.starts_with("///") && !comment.starts_with("////"))
1362 || (comment.starts_with("/**") && !comment.starts_with("/***"))
1363}
1364
1365#[must_use]
1367fn get_next_token_inner(
1368 stream: &mut impl InputStream,
1369 state: &mut TokenizeState,
1370 pos: &mut Position,
1371) -> Option<(Token, Position)> {
1372 if state.comment_level > 0 {
1374 let start_pos = *pos;
1375 let mut comment = if state.include_comments {
1376 Some(String::new())
1377 } else {
1378 None
1379 };
1380
1381 state.comment_level =
1382 scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
1383
1384 let return_comment = state.include_comments;
1385
1386 #[cfg(not(feature = "no_function"))]
1387 #[cfg(feature = "metadata")]
1388 let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));
1389
1390 if return_comment {
1391 return Some((Token::Comment(comment.expect("`Some`").into()), start_pos));
1392 }
1393 if state.comment_level > 0 {
1394 return None;
1396 }
1397 }
1398
1399 if let Some(ch) = state.is_within_text_terminated_by.take() {
1401 let start_pos = *pos;
1402
1403 return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
1404 |(err, err_pos)| Some((Token::LexError(err), err_pos)),
1405 |(result, interpolated)| {
1406 if interpolated {
1407 Some((Token::InterpolatedString(result), start_pos))
1408 } else {
1409 Some((Token::StringConstant(result), start_pos))
1410 }
1411 },
1412 );
1413 }
1414
1415 let mut negated: Option<Position> = None;
1416
1417 while let Some(c) = stream.get_next() {
1418 pos.advance();
1419
1420 let start_pos = *pos;
1421
1422 match (c, stream.peek_next().unwrap_or('\0')) {
1423 ('\n', _) => pos.new_line(),
1425
1426 ('0'..='9', _) => {
1428 let mut result = smallvec::SmallVec::<[char; 16]>::new();
1429 let mut radix_base: Option<u32> = None;
1430 let mut valid: fn(char) -> bool = is_numeric_digit;
1431 result.push(c);
1432
1433 while let Some(next_char) = stream.peek_next() {
1434 match next_char {
1435 ch if valid(ch) || ch == NUMBER_SEPARATOR => {
1436 result.push(next_char);
1437 eat_next(stream, pos);
1438 }
1439 #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1440 '.' => {
1441 stream.get_next().expect("`.`");
1442
1443 match stream.peek_next().unwrap_or('\0') {
1445 '0'..='9' => {
1447 result.push(next_char);
1448 pos.advance();
1449 }
1450 '_' => {
1452 stream.unget(next_char);
1453 break;
1454 }
1455 '.' => {
1457 stream.unget(next_char);
1458 break;
1459 }
1460 ch if !is_id_first_alphabetic(ch) => {
1462 result.push(next_char);
1463 pos.advance();
1464 result.push('0');
1465 }
1466 _ => {
1468 stream.unget(next_char);
1469 break;
1470 }
1471 }
1472 }
1473 #[cfg(not(feature = "no_float"))]
1474 'e' => {
1475 stream.get_next().expect("`e`");
1476
1477 match stream.peek_next().unwrap_or('\0') {
1479 '0'..='9' => {
1481 result.push(next_char);
1482 pos.advance();
1483 }
1484 '+' | '-' => {
1486 result.push(next_char);
1487 pos.advance();
1488 result.push(stream.get_next().expect("`+` or `-`"));
1489 pos.advance();
1490 }
1491 _ => {
1493 stream.unget(next_char);
1494 break;
1495 }
1496 }
1497 }
1498 ch @ 'x' | ch @ 'o' | ch @ 'b' | ch @ 'X' | ch @ 'O' | ch @ 'B'
1500 if c == '0' && result.len() <= 1 =>
1501 {
1502 result.push(next_char);
1503 eat_next(stream, pos);
1504
1505 valid = match ch {
1506 'x' | 'X' => is_hex_digit,
1507 'o' | 'O' => is_numeric_digit,
1508 'b' | 'B' => is_numeric_digit,
1509 _ => unreachable!(),
1510 };
1511
1512 radix_base = Some(match ch {
1513 'x' | 'X' => 16,
1514 'o' | 'O' => 8,
1515 'b' | 'B' => 2,
1516 _ => unreachable!(),
1517 });
1518 }
1519
1520 _ => break,
1521 }
1522 }
1523
1524 let num_pos = negated.map_or(start_pos, |negated_pos| {
1525 result.insert(0, '-');
1526 negated_pos
1527 });
1528
1529 return Some((
1531 if let Some(radix) = radix_base {
1532 let out: String = result
1533 .iter()
1534 .skip(2)
1535 .filter(|&&c| c != NUMBER_SEPARATOR)
1536 .collect();
1537
1538 INT::from_str_radix(&out, radix)
1539 .map(Token::IntegerConstant)
1540 .unwrap_or_else(|_| {
1541 Token::LexError(LERR::MalformedNumber(result.into_iter().collect()))
1542 })
1543 } else {
1544 let out: String =
1545 result.iter().filter(|&&c| c != NUMBER_SEPARATOR).collect();
1546 let num = INT::from_str(&out).map(Token::IntegerConstant);
1547
1548 #[cfg(not(feature = "no_float"))]
1550 let num = num.or_else(|_| {
1551 crate::ast::FloatWrapper::from_str(&out).map(Token::FloatConstant)
1552 });
1553
1554 #[cfg(feature = "decimal")]
1556 let num = num.or_else(|_| {
1557 rust_decimal::Decimal::from_str(&out).map(Token::DecimalConstant)
1558 });
1559
1560 #[cfg(feature = "decimal")]
1562 let num = num.or_else(|_| {
1563 rust_decimal::Decimal::from_scientific(&out).map(Token::DecimalConstant)
1564 });
1565
1566 num.unwrap_or_else(|_| {
1567 Token::LexError(LERR::MalformedNumber(result.into_iter().collect()))
1568 })
1569 },
1570 num_pos,
1571 ));
1572 }
1573
1574 #[cfg(not(feature = "unicode-xid-ident"))]
1576 ('a'..='z', _) | ('_', _) | ('A'..='Z', _) => {
1577 return get_identifier(stream, pos, start_pos, c);
1578 }
1579 #[cfg(feature = "unicode-xid-ident")]
1580 (ch, _) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => {
1581 return get_identifier(stream, pos, start_pos, c);
1582 }
1583
1584 ('"', _) => {
1586 return parse_string_literal(stream, state, pos, c, true, false, false)
1587 .map_or_else(
1588 |(err, err_pos)| Some((Token::LexError(err), err_pos)),
1589 |(result, _)| Some((Token::StringConstant(result), start_pos)),
1590 );
1591 }
1592 ('`', _) => {
1594 match stream.peek_next() {
1596 Some('\r') => {
1598 eat_next(stream, pos);
1599 pos.new_line();
1600 if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
1602 eat_next(stream, pos);
1603 }
1604 }
1605 Some('\n') => {
1607 eat_next(stream, pos);
1608 pos.new_line();
1609 }
1610 _ => (),
1611 }
1612
1613 return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
1614 |(err, err_pos)| Some((Token::LexError(err), err_pos)),
1615 |(result, interpolated)| {
1616 if interpolated {
1617 Some((Token::InterpolatedString(result), start_pos))
1618 } else {
1619 Some((Token::StringConstant(result), start_pos))
1620 }
1621 },
1622 );
1623 }
1624
1625 ('\'', '\'') => {
1627 return Some((
1628 Token::LexError(LERR::MalformedChar("".to_string())),
1629 start_pos,
1630 ))
1631 }
1632 ('\'', _) => {
1633 return Some(
1634 parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
1635 |(err, err_pos)| (Token::LexError(err), err_pos),
1636 |(result, _)| {
1637 let mut chars = result.chars();
1638 let first = chars.next().expect("not empty");
1639
1640 if chars.next().is_some() {
1641 (
1642 Token::LexError(LERR::MalformedChar(result.to_string())),
1643 start_pos,
1644 )
1645 } else {
1646 (Token::CharConstant(first), start_pos)
1647 }
1648 },
1649 ),
1650 )
1651 }
1652
1653 ('{', _) => return Some((Token::LeftBrace, start_pos)),
1655 ('}', _) => return Some((Token::RightBrace, start_pos)),
1656
1657 ('(', '*') => {
1659 eat_next(stream, pos);
1660 return Some((Token::Reserved("(*".into()), start_pos));
1661 }
1662 ('(', _) => return Some((Token::LeftParen, start_pos)),
1663 (')', _) => return Some((Token::RightParen, start_pos)),
1664
1665 ('[', _) => return Some((Token::LeftBracket, start_pos)),
1667 (']', _) => return Some((Token::RightBracket, start_pos)),
1668
1669 #[cfg(not(feature = "no_object"))]
1671 ('#', '{') => {
1672 eat_next(stream, pos);
1673 return Some((Token::MapStart, start_pos));
1674 }
1675 ('#', '!') => return Some((Token::Reserved("#!".into()), start_pos)),
1677
1678 ('#', _) => return Some((Token::Reserved("#".into()), start_pos)),
1679
1680 ('+', '=') => {
1682 eat_next(stream, pos);
1683 return Some((Token::PlusAssign, start_pos));
1684 }
1685 ('+', '+') => {
1686 eat_next(stream, pos);
1687 return Some((Token::Reserved("++".into()), start_pos));
1688 }
1689 ('+', _) if !state.next_token_cannot_be_unary => {
1690 return Some((Token::UnaryPlus, start_pos))
1691 }
1692 ('+', _) => return Some((Token::Plus, start_pos)),
1693
1694 ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
1695 ('-', '0'..='9') => return Some((Token::Minus, start_pos)),
1696 ('-', '=') => {
1697 eat_next(stream, pos);
1698 return Some((Token::MinusAssign, start_pos));
1699 }
1700 ('-', '>') => {
1701 eat_next(stream, pos);
1702 return Some((Token::Reserved("->".into()), start_pos));
1703 }
1704 ('-', '-') => {
1705 eat_next(stream, pos);
1706 return Some((Token::Reserved("--".into()), start_pos));
1707 }
1708 ('-', _) if !state.next_token_cannot_be_unary => {
1709 return Some((Token::UnaryMinus, start_pos))
1710 }
1711 ('-', _) => return Some((Token::Minus, start_pos)),
1712
1713 ('*', ')') => {
1714 eat_next(stream, pos);
1715 return Some((Token::Reserved("*)".into()), start_pos));
1716 }
1717 ('*', '=') => {
1718 eat_next(stream, pos);
1719 return Some((Token::MultiplyAssign, start_pos));
1720 }
1721 ('*', '*') => {
1722 eat_next(stream, pos);
1723
1724 return Some((
1725 if stream.peek_next() == Some('=') {
1726 eat_next(stream, pos);
1727 Token::PowerOfAssign
1728 } else {
1729 Token::PowerOf
1730 },
1731 start_pos,
1732 ));
1733 }
1734 ('*', _) => return Some((Token::Multiply, start_pos)),
1735
1736 ('/', '/') => {
1738 eat_next(stream, pos);
1739
1740 let mut comment = match stream.peek_next() {
1741 #[cfg(not(feature = "no_function"))]
1742 #[cfg(feature = "metadata")]
1743 Some('/') => {
1744 eat_next(stream, pos);
1745
1746 match stream.peek_next() {
1748 Some('/') => None,
1749 _ => Some("///".to_string()),
1750 }
1751 }
1752 _ if state.include_comments => Some("//".to_string()),
1753 _ => None,
1754 };
1755
1756 while let Some(c) = stream.get_next() {
1757 if c == '\n' {
1758 pos.new_line();
1759 break;
1760 }
1761 if let Some(comment) = comment.as_mut() {
1762 comment.push(c);
1763 }
1764 pos.advance();
1765 }
1766
1767 if let Some(comment) = comment {
1768 return Some((Token::Comment(comment.into()), start_pos));
1769 }
1770 }
1771 ('/', '*') => {
1772 state.comment_level = 1;
1773 eat_next(stream, pos);
1774
1775 let mut comment = match stream.peek_next() {
1776 #[cfg(not(feature = "no_function"))]
1777 #[cfg(feature = "metadata")]
1778 Some('*') => {
1779 eat_next(stream, pos);
1780
1781 match stream.peek_next() {
1783 Some('*') => None,
1784 _ => Some("/**".to_string()),
1785 }
1786 }
1787 _ if state.include_comments => Some("/*".to_string()),
1788 _ => None,
1789 };
1790
1791 state.comment_level =
1792 scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
1793
1794 if let Some(comment) = comment {
1795 return Some((Token::Comment(comment.into()), start_pos));
1796 }
1797 }
1798
1799 ('/', '=') => {
1800 eat_next(stream, pos);
1801 return Some((Token::DivideAssign, start_pos));
1802 }
1803 ('/', _) => return Some((Token::Divide, start_pos)),
1804
1805 (';', _) => return Some((Token::SemiColon, start_pos)),
1806 (',', _) => return Some((Token::Comma, start_pos)),
1807
1808 ('.', '.') => {
1809 eat_next(stream, pos);
1810 return Some((
1811 match stream.peek_next() {
1812 Some('.') => {
1813 eat_next(stream, pos);
1814 Token::Reserved("...".into())
1815 }
1816 Some('=') => {
1817 eat_next(stream, pos);
1818 Token::InclusiveRange
1819 }
1820 _ => Token::ExclusiveRange,
1821 },
1822 start_pos,
1823 ));
1824 }
1825 ('.', _) => return Some((Token::Period, start_pos)),
1826
1827 ('=', '=') => {
1828 eat_next(stream, pos);
1829
1830 if stream.peek_next() == Some('=') {
1831 eat_next(stream, pos);
1832 return Some((Token::Reserved("===".into()), start_pos));
1833 }
1834
1835 return Some((Token::EqualsTo, start_pos));
1836 }
1837 ('=', '>') => {
1838 eat_next(stream, pos);
1839 return Some((Token::DoubleArrow, start_pos));
1840 }
1841 ('=', _) => return Some((Token::Equals, start_pos)),
1842
1843 #[cfg(not(feature = "no_module"))]
1844 (':', ':') => {
1845 eat_next(stream, pos);
1846
1847 if stream.peek_next() == Some('<') {
1848 eat_next(stream, pos);
1849 return Some((Token::Reserved("::<".into()), start_pos));
1850 }
1851
1852 return Some((Token::DoubleColon, start_pos));
1853 }
1854 (':', '=') => {
1855 eat_next(stream, pos);
1856 return Some((Token::Reserved(":=".into()), start_pos));
1857 }
1858 (':', _) => return Some((Token::Colon, start_pos)),
1859
1860 ('<', '=') => {
1861 eat_next(stream, pos);
1862 return Some((Token::LessThanEqualsTo, start_pos));
1863 }
1864 ('<', '-') => {
1865 eat_next(stream, pos);
1866 return Some((Token::Reserved("<-".into()), start_pos));
1867 }
1868 ('<', '<') => {
1869 eat_next(stream, pos);
1870
1871 return Some((
1872 if stream.peek_next() == Some('=') {
1873 eat_next(stream, pos);
1874 Token::LeftShiftAssign
1875 } else {
1876 Token::LeftShift
1877 },
1878 start_pos,
1879 ));
1880 }
1881 ('<', _) => return Some((Token::LessThan, start_pos)),
1882
1883 ('>', '=') => {
1884 eat_next(stream, pos);
1885 return Some((Token::GreaterThanEqualsTo, start_pos));
1886 }
1887 ('>', '>') => {
1888 eat_next(stream, pos);
1889
1890 return Some((
1891 if stream.peek_next() == Some('=') {
1892 eat_next(stream, pos);
1893 Token::RightShiftAssign
1894 } else {
1895 Token::RightShift
1896 },
1897 start_pos,
1898 ));
1899 }
1900 ('>', _) => return Some((Token::GreaterThan, start_pos)),
1901
1902 ('!', '=') => {
1903 eat_next(stream, pos);
1904
1905 if stream.peek_next() == Some('=') {
1906 eat_next(stream, pos);
1907 return Some((Token::Reserved("!==".into()), start_pos));
1908 }
1909
1910 return Some((Token::NotEqualsTo, start_pos));
1911 }
1912 ('!', _) => return Some((Token::Bang, start_pos)),
1913
1914 ('|', '|') => {
1915 eat_next(stream, pos);
1916 return Some((Token::Or, start_pos));
1917 }
1918 ('|', '=') => {
1919 eat_next(stream, pos);
1920 return Some((Token::OrAssign, start_pos));
1921 }
1922 ('|', _) => return Some((Token::Pipe, start_pos)),
1923
1924 ('&', '&') => {
1925 eat_next(stream, pos);
1926 return Some((Token::And, start_pos));
1927 }
1928 ('&', '=') => {
1929 eat_next(stream, pos);
1930 return Some((Token::AndAssign, start_pos));
1931 }
1932 ('&', _) => return Some((Token::Ampersand, start_pos)),
1933
1934 ('^', '=') => {
1935 eat_next(stream, pos);
1936 return Some((Token::XOrAssign, start_pos));
1937 }
1938 ('^', _) => return Some((Token::XOr, start_pos)),
1939
1940 ('~', _) => return Some((Token::Reserved("~".into()), start_pos)),
1941
1942 ('%', '=') => {
1943 eat_next(stream, pos);
1944 return Some((Token::ModuloAssign, start_pos));
1945 }
1946 ('%', _) => return Some((Token::Modulo, start_pos)),
1947
1948 ('@', _) => return Some((Token::Reserved("@".into()), start_pos)),
1949
1950 ('$', _) => return Some((Token::Reserved("$".into()), start_pos)),
1951
1952 (ch, _) if ch.is_whitespace() => (),
1953
1954 (ch, _) => {
1955 return Some((
1956 Token::LexError(LERR::UnexpectedInput(ch.to_string())),
1957 start_pos,
1958 ))
1959 }
1960 }
1961 }
1962
1963 pos.advance();
1964
1965 Some((Token::EOF, *pos))
1966}
1967
1968fn get_identifier(
1970 stream: &mut impl InputStream,
1971 pos: &mut Position,
1972 start_pos: Position,
1973 first_char: char,
1974) -> Option<(Token, Position)> {
1975 let mut result = smallvec::SmallVec::<[char; 8]>::new();
1976 result.push(first_char);
1977
1978 while let Some(next_char) = stream.peek_next() {
1979 match next_char {
1980 x if is_id_continue(x) => {
1981 result.push(x);
1982 eat_next(stream, pos);
1983 }
1984 _ => break,
1985 }
1986 }
1987
1988 let is_valid_identifier = is_valid_identifier(result.iter().cloned());
1989
1990 let identifier: String = result.into_iter().collect();
1991
1992 if let Some(token) = Token::lookup_from_syntax(&identifier) {
1993 return Some((token, start_pos));
1994 }
1995
1996 if !is_valid_identifier {
1997 return Some((
1998 Token::LexError(LERR::MalformedIdentifier(identifier)),
1999 start_pos,
2000 ));
2001 }
2002
2003 Some((Token::Identifier(identifier.into()), start_pos))
2004}
2005
2006#[inline]
2008#[must_use]
2009pub fn is_keyword_function(name: impl AsRef<str>) -> bool {
2010 match name.as_ref() {
2011 KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR
2012 | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_IS_DEF_VAR => true,
2013
2014 #[cfg(not(feature = "no_function"))]
2015 crate::engine::KEYWORD_IS_DEF_FN => true,
2016
2017 _ => false,
2018 }
2019}
2020
2021#[must_use]
2023pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
2024 let mut first_alphabetic = false;
2025
2026 for ch in name {
2027 match ch {
2028 '_' => (),
2029 _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2030 _ if !first_alphabetic => return false,
2031 _ if char::is_ascii_alphanumeric(&ch) => (),
2032 _ => return false,
2033 }
2034 }
2035
2036 first_alphabetic
2037}
2038
2039#[inline(always)]
2041#[must_use]
2042pub fn is_valid_function_name(name: impl AsRef<str>) -> bool {
2043 is_valid_identifier(name.as_ref().chars())
2044}
2045
2046#[cfg(feature = "unicode-xid-ident")]
2048#[inline(always)]
2049#[must_use]
2050pub fn is_id_first_alphabetic(x: char) -> bool {
2051 unicode_xid::UnicodeXID::is_xid_start(x)
2052}
2053
2054#[cfg(feature = "unicode-xid-ident")]
2056#[inline(always)]
2057#[must_use]
2058pub fn is_id_continue(x: char) -> bool {
2059 unicode_xid::UnicodeXID::is_xid_continue(x)
2060}
2061
2062#[cfg(not(feature = "unicode-xid-ident"))]
2064#[inline(always)]
2065#[must_use]
2066pub fn is_id_first_alphabetic(x: char) -> bool {
2067 x.is_ascii_alphabetic()
2068}
2069
2070#[cfg(not(feature = "unicode-xid-ident"))]
2072#[inline(always)]
2073#[must_use]
2074pub fn is_id_continue(x: char) -> bool {
2075 x.is_ascii_alphanumeric() || x == '_'
2076}
2077
2078pub struct MultiInputsStream<'a> {
2083 pub buf: Option<char>,
2085 pub index: usize,
2087 pub streams: StaticVec<Peekable<Chars<'a>>>,
2089}
2090
2091impl InputStream for MultiInputsStream<'_> {
2092 #[inline]
2093 fn unget(&mut self, ch: char) {
2094 if self.buf.is_some() {
2095 panic!("cannot unget two characters in a row");
2096 }
2097
2098 self.buf = Some(ch);
2099 }
2100 fn get_next(&mut self) -> Option<char> {
2101 if let Some(ch) = self.buf.take() {
2102 return Some(ch);
2103 }
2104
2105 loop {
2106 if self.index >= self.streams.len() {
2107 return None;
2109 } else if let Some(ch) = self.streams[self.index].next() {
2110 return Some(ch);
2112 } else {
2113 self.index += 1;
2115 }
2116 }
2117 }
2118 fn peek_next(&mut self) -> Option<char> {
2119 if let Some(ch) = self.buf {
2120 return Some(ch);
2121 }
2122
2123 loop {
2124 if self.index >= self.streams.len() {
2125 return None;
2127 } else if let Some(&ch) = self.streams[self.index].peek() {
2128 return Some(ch);
2130 } else {
2131 self.index += 1;
2133 }
2134 }
2135 }
2136}
2137
2138pub struct TokenIterator<'a> {
2141 pub engine: &'a Engine,
2143 pub state: TokenizeState,
2145 pub pos: Position,
2147 pub tokenizer_control: TokenizerControl,
2149 pub stream: MultiInputsStream<'a>,
2151 pub token_mapper: Option<&'a OnParseTokenCallback>,
2153}
2154
2155impl<'a> Iterator for TokenIterator<'a> {
2156 type Item = (Token, Position);
2157
2158 fn next(&mut self) -> Option<Self::Item> {
2159 let mut control = self.tokenizer_control.get();
2160
2161 if control.is_within_text {
2162 self.state.is_within_text_terminated_by = Some('`');
2164 control.is_within_text = false;
2166 self.tokenizer_control.set(control);
2167 }
2168
2169 let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2170 None => return None,
2172 Some((Token::StringConstant(_), pos)) if self.state.is_within_text_terminated_by.is_some() => {
2178 self.state.is_within_text_terminated_by = None;
2179 return Some((Token::LexError(LERR::UnterminatedString), pos));
2180 }
2181 Some((Token::Reserved(s), pos)) => (match
2183 (&*s, self.engine.custom_keywords.contains_key(&*s))
2184 {
2185 ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2186 "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2187 )),
2188 ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2189 "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2190 )),
2191 ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2192 "'->' is not a valid symbol. This is not C or C++!".to_string())),
2193 ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2194 "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2195 )),
2196 (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2197 "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2198 )),
2199 ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2200 "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2201 )),
2202 ("(*", false) | ("*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2203 "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2204 )),
2205 ("#", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2206 "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2207 )),
2208 (_, true) => Token::Custom(s),
2210 (token, false) if !is_valid_identifier(token.chars()) => {
2212 let msg = format!("'{}' is a reserved symbol", token);
2213 Token::LexError(LERR::ImproperSymbol(s.to_string(), msg))
2214 },
2215 (token, false) if self.engine.disabled_symbols.contains(token) => {
2217 let msg = format!("reserved symbol '{}' is disabled", token);
2218 Token::LexError(LERR::ImproperSymbol(s.to_string(), msg))
2219 },
2220 (_, false) => Token::Reserved(s),
2222 }, pos),
2223 Some((Token::Identifier(s), pos)) if self.engine.custom_keywords.contains_key(&*s) => {
2225 (Token::Custom(s), pos)
2226 }
2227 Some((token, pos)) if self.engine.custom_keywords.contains_key(&*token.syntax()) => {
2229 if self.engine.disabled_symbols.contains(&*token.syntax()) {
2230 (Token::Custom(token.syntax().into()), pos)
2232 } else {
2233 unreachable!("{:?} is an active keyword", token)
2235 }
2236 }
2237 Some((token, pos)) if self.engine.disabled_symbols.contains(&*token.syntax()) => {
2239 (Token::Reserved(token.syntax().into()), pos)
2240 }
2241 Some(r) => r,
2243 };
2244
2245 let token = match self.token_mapper {
2247 Some(map_func) => map_func(token, pos, &self.state),
2248 None => token,
2249 };
2250
2251 Some((token, pos))
2252 }
2253}
2254
2255impl FusedIterator for TokenIterator<'_> {}
2256
2257impl Engine {
2258 #[cfg(feature = "internals")]
2261 #[inline(always)]
2262 #[must_use]
2263 pub fn lex<'a>(
2264 &'a self,
2265 input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2266 ) -> (TokenIterator<'a>, TokenizerControl) {
2267 self.lex_raw(input, None)
2268 }
2269 #[cfg(feature = "internals")]
2272 #[inline(always)]
2273 #[must_use]
2274 pub fn lex_with_map<'a>(
2275 &'a self,
2276 input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2277 token_mapper: &'a OnParseTokenCallback,
2278 ) -> (TokenIterator<'a>, TokenizerControl) {
2279 self.lex_raw(input, Some(token_mapper))
2280 }
2281 #[inline]
2283 #[must_use]
2284 pub(crate) fn lex_raw<'a>(
2285 &'a self,
2286 input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2287 token_mapper: Option<&'a OnParseTokenCallback>,
2288 ) -> (TokenIterator<'a>, TokenizerControl) {
2289 let buffer: TokenizerControl = Cell::new(TokenizerControlBlock::new()).into();
2290 let buffer2 = buffer.clone();
2291
2292 (
2293 TokenIterator {
2294 engine: self,
2295 state: TokenizeState {
2296 #[cfg(not(feature = "unchecked"))]
2297 max_string_size: self.limits.max_string_size,
2298 #[cfg(feature = "unchecked")]
2299 max_string_size: None,
2300 next_token_cannot_be_unary: false,
2301 comment_level: 0,
2302 include_comments: false,
2303 is_within_text_terminated_by: None,
2304 },
2305 pos: Position::new(1, 0),
2306 tokenizer_control: buffer,
2307 stream: MultiInputsStream {
2308 buf: None,
2309 streams: input
2310 .into_iter()
2311 .map(|s| s.as_ref().chars().peekable())
2312 .collect(),
2313 index: 0,
2314 },
2315 token_mapper,
2316 },
2317 buffer2,
2318 )
2319 }
2320}