quad_compat_rhai/
tokenizer.rs

1//! Main module defining the lexer and parser.
2
3use crate::engine::{
4    Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL,
5    KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_THIS, KEYWORD_TYPE_OF,
6};
7use crate::func::native::OnParseTokenCallback;
8use crate::{Engine, LexError, StaticVec, INT};
9#[cfg(feature = "no_std")]
10use std::prelude::v1::*;
11use std::{
12    borrow::Cow,
13    cell::Cell,
14    char, fmt,
15    iter::{FusedIterator, Peekable},
16    num::NonZeroUsize,
17    ops::{Add, AddAssign},
18    rc::Rc,
19    str::{Chars, FromStr},
20};
21
22/// _(internals)_ A type containing commands to control the tokenizer.
23#[derive(Debug, Clone, Eq, PartialEq, Hash, Copy)]
24pub struct TokenizerControlBlock {
25    /// Is the current tokenizer position within an interpolated text string?
26    /// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
27    pub is_within_text: bool,
28}
29
30impl TokenizerControlBlock {
31    /// Create a new `TokenizerControlBlock`.
32    #[inline(always)]
33    #[must_use]
34    pub const fn new() -> Self {
35        Self {
36            is_within_text: false,
37        }
38    }
39}
40
41/// _(internals)_ A shared object that allows control of the tokenizer from outside.
42pub type TokenizerControl = Rc<Cell<TokenizerControlBlock>>;
43
44type LERR = LexError;
45
46/// Separator character for numbers.
47const NUMBER_SEPARATOR: char = '_';
48
49/// A stream of tokens.
50pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
51
52/// A location (line number + character position) in the input script.
53///
54/// # Limitations
55///
56/// In order to keep footprint small, both line number and character position have 16-bit resolution,
57/// meaning they go up to a maximum of 65,535 lines and 65,535 characters per line.
58///
59/// Advancing beyond the maximum line length or maximum number of lines is not an error but has no effect.
60#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
61pub struct Position {
62    /// Line number - 0 = none
63    #[cfg(not(feature = "no_position"))]
64    line: u16,
65    /// Character position - 0 = BOL
66    #[cfg(not(feature = "no_position"))]
67    pos: u16,
68}
69
70impl Position {
71    /// A [`Position`] representing no position.
72    pub const NONE: Self = Self {
73        #[cfg(not(feature = "no_position"))]
74        line: 0,
75        #[cfg(not(feature = "no_position"))]
76        pos: 0,
77    };
78    /// A [`Position`] representing the first position.
79    pub const START: Self = Self {
80        #[cfg(not(feature = "no_position"))]
81        line: 1,
82        #[cfg(not(feature = "no_position"))]
83        pos: 0,
84    };
85
86    /// Create a new [`Position`].
87    ///
88    /// `line` must not be zero.
89    ///
90    /// If `position` is zero, then it is at the beginning of a line.
91    ///
92    /// # Panics
93    ///
94    /// Panics if `line` is zero.
95    #[inline(always)]
96    #[must_use]
97    pub fn new(line: u16, position: u16) -> Self {
98        assert!(line != 0, "line cannot be zero");
99
100        let _pos = position;
101
102        Self {
103            #[cfg(not(feature = "no_position"))]
104            line,
105            #[cfg(not(feature = "no_position"))]
106            pos: _pos,
107        }
108    }
109    /// Create a new [`Position`].
110    ///
111    /// If `line` is zero, then [`None`] is returned.
112    ///
113    /// If `position` is zero, then it is at the beginning of a line.
114    #[inline]
115    #[must_use]
116    pub const fn new_const(line: u16, position: u16) -> Option<Self> {
117        if line == 0 {
118            return None;
119        }
120        let _pos = position;
121
122        Some(Self {
123            #[cfg(not(feature = "no_position"))]
124            line,
125            #[cfg(not(feature = "no_position"))]
126            pos: _pos,
127        })
128    }
129    /// Get the line number (1-based), or [`None`] if there is no position.
130    #[inline]
131    #[must_use]
132    pub const fn line(self) -> Option<usize> {
133        #[cfg(not(feature = "no_position"))]
134        return if self.is_none() {
135            None
136        } else {
137            Some(self.line as usize)
138        };
139
140        #[cfg(feature = "no_position")]
141        return None;
142    }
143    /// Get the character position (1-based), or [`None`] if at beginning of a line.
144    #[inline]
145    #[must_use]
146    pub const fn position(self) -> Option<usize> {
147        #[cfg(not(feature = "no_position"))]
148        return if self.is_none() || self.pos == 0 {
149            None
150        } else {
151            Some(self.pos as usize)
152        };
153
154        #[cfg(feature = "no_position")]
155        return None;
156    }
157    /// Advance by one character position.
158    #[inline]
159    pub(crate) fn advance(&mut self) {
160        #[cfg(not(feature = "no_position"))]
161        {
162            assert!(!self.is_none(), "cannot advance Position::none");
163
164            // Advance up to maximum position
165            if self.pos < u16::MAX {
166                self.pos += 1;
167            }
168        }
169    }
170    /// Go backwards by one character position.
171    ///
172    /// # Panics
173    ///
174    /// Panics if already at beginning of a line - cannot rewind to a previous line.
175    #[inline]
176    pub(crate) fn rewind(&mut self) {
177        #[cfg(not(feature = "no_position"))]
178        {
179            assert!(!self.is_none(), "cannot rewind Position::none");
180            assert!(self.pos > 0, "cannot rewind at position 0");
181            self.pos -= 1;
182        }
183    }
184    /// Advance to the next line.
185    #[inline]
186    pub(crate) fn new_line(&mut self) {
187        #[cfg(not(feature = "no_position"))]
188        {
189            assert!(!self.is_none(), "cannot advance Position::none");
190
191            // Advance up to maximum position
192            if self.line < u16::MAX {
193                self.line += 1;
194                self.pos = 0;
195            }
196        }
197    }
198    /// Is this [`Position`] at the beginning of a line?
199    #[inline]
200    #[must_use]
201    pub const fn is_beginning_of_line(self) -> bool {
202        #[cfg(not(feature = "no_position"))]
203        return self.pos == 0 && !self.is_none();
204        #[cfg(feature = "no_position")]
205        return false;
206    }
207    /// Is there no [`Position`]?
208    #[inline]
209    #[must_use]
210    pub const fn is_none(self) -> bool {
211        #[cfg(not(feature = "no_position"))]
212        return self.line == 0 && self.pos == 0;
213        #[cfg(feature = "no_position")]
214        return true;
215    }
216    /// Print this [`Position`] for debug purposes.
217    #[inline]
218    pub(crate) fn debug_print(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
219        #[cfg(not(feature = "no_position"))]
220        if !self.is_none() {
221            write!(_f, " @ {:?}", self)?;
222        }
223
224        Ok(())
225    }
226}
227
228impl Default for Position {
229    #[inline(always)]
230    fn default() -> Self {
231        Self::START
232    }
233}
234
235impl fmt::Display for Position {
236    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
237        if self.is_none() {
238            write!(f, "none")?;
239        } else {
240            #[cfg(not(feature = "no_position"))]
241            write!(f, "line {}, position {}", self.line, self.pos)?;
242            #[cfg(feature = "no_position")]
243            unreachable!();
244        }
245
246        Ok(())
247    }
248}
249
250impl fmt::Debug for Position {
251    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252        #[cfg(not(feature = "no_position"))]
253        write!(f, "{}:{}", self.line, self.pos)?;
254        #[cfg(feature = "no_position")]
255        f.write_str("none")?;
256
257        Ok(())
258    }
259}
260
261impl Add for Position {
262    type Output = Self;
263
264    fn add(self, rhs: Self) -> Self::Output {
265        if rhs.is_none() {
266            self
267        } else {
268            #[cfg(not(feature = "no_position"))]
269            return Self {
270                line: self.line + rhs.line - 1,
271                pos: if rhs.is_beginning_of_line() {
272                    self.pos
273                } else {
274                    self.pos + rhs.pos - 1
275                },
276            };
277            #[cfg(feature = "no_position")]
278            unreachable!();
279        }
280    }
281}
282
283impl AddAssign for Position {
284    fn add_assign(&mut self, rhs: Self) {
285        *self = *self + rhs;
286    }
287}
288
289/// _(internals)_ A Rhai language token.
290/// Exported under the `internals` feature only.
291#[derive(Debug, PartialEq, Clone, Hash)]
292pub enum Token {
293    /// An `INT` constant.
294    IntegerConstant(INT),
295    /// A `FLOAT` constant.
296    ///
297    /// Reserved under the `no_float` feature.
298    #[cfg(not(feature = "no_float"))]
299    FloatConstant(crate::ast::FloatWrapper<crate::FLOAT>),
300    /// A [`Decimal`][rust_decimal::Decimal] constant.
301    ///
302    /// Requires the `decimal` feature.
303    #[cfg(feature = "decimal")]
304    DecimalConstant(rust_decimal::Decimal),
305    /// An identifier.
306    Identifier(Box<str>),
307    /// A character constant.
308    CharConstant(char),
309    /// A string constant.
310    StringConstant(Box<str>),
311    /// An interpolated string.
312    InterpolatedString(Box<str>),
313    /// `{`
314    LeftBrace,
315    /// `}`
316    RightBrace,
317    /// `(`
318    LeftParen,
319    /// `)`
320    RightParen,
321    /// `[`
322    LeftBracket,
323    /// `]`
324    RightBracket,
325    /// `+`
326    Plus,
327    /// `+` (unary)
328    UnaryPlus,
329    /// `-`
330    Minus,
331    /// `-` (unary)
332    UnaryMinus,
333    /// `*`
334    Multiply,
335    /// `/`
336    Divide,
337    /// `%`
338    Modulo,
339    /// `**`
340    PowerOf,
341    /// `<<`
342    LeftShift,
343    /// `>>`
344    RightShift,
345    /// `;`
346    SemiColon,
347    /// `:`
348    Colon,
349    /// `::`
350    DoubleColon,
351    /// `=>`
352    DoubleArrow,
353    /// `_`
354    Underscore,
355    /// `,`
356    Comma,
357    /// `.`
358    Period,
359    /// `..`
360    ExclusiveRange,
361    /// `..=`
362    InclusiveRange,
363    /// `#{`
364    MapStart,
365    /// `=`
366    Equals,
367    /// `true`
368    True,
369    /// `false`
370    False,
371    /// `let`
372    Let,
373    /// `const`
374    Const,
375    /// `if`
376    If,
377    /// `else`
378    Else,
379    /// `switch`
380    Switch,
381    /// `do`
382    Do,
383    /// `while`
384    While,
385    /// `until`
386    Until,
387    /// `loop`
388    Loop,
389    /// `for`
390    For,
391    /// `in`
392    In,
393    /// `<`
394    LessThan,
395    /// `>`
396    GreaterThan,
397    /// `<=`
398    LessThanEqualsTo,
399    /// `>=`
400    GreaterThanEqualsTo,
401    /// `==`
402    EqualsTo,
403    /// `!=`
404    NotEqualsTo,
405    /// `!`
406    Bang,
407    /// `|`
408    Pipe,
409    /// `||`
410    Or,
411    /// `^`
412    XOr,
413    /// `&`
414    Ampersand,
415    /// `&&`
416    And,
417    /// `fn`
418    ///
419    /// Reserved under the `no_function` feature.
420    #[cfg(not(feature = "no_function"))]
421    Fn,
422    /// `continue`
423    Continue,
424    /// `break`
425    Break,
426    /// `return`
427    Return,
428    /// `throw`
429    Throw,
430    /// `try`
431    Try,
432    /// `catch`
433    Catch,
434    /// `+=`
435    PlusAssign,
436    /// `-=`
437    MinusAssign,
438    /// `*=`
439    MultiplyAssign,
440    /// `/=`
441    DivideAssign,
442    /// `<<=`
443    LeftShiftAssign,
444    /// `>>=`
445    RightShiftAssign,
446    /// `&=`
447    AndAssign,
448    /// `|=`
449    OrAssign,
450    /// `^=`
451    XOrAssign,
452    /// `%=`
453    ModuloAssign,
454    /// `**=`
455    PowerOfAssign,
456    /// `private`
457    ///
458    /// Reserved under the `no_function` feature.
459    #[cfg(not(feature = "no_function"))]
460    Private,
461    /// `import`
462    ///
463    /// Reserved under the `no_module` feature.
464    #[cfg(not(feature = "no_module"))]
465    Import,
466    /// `export`
467    ///
468    /// Reserved under the `no_module` feature.
469    #[cfg(not(feature = "no_module"))]
470    Export,
471    /// `as`
472    ///
473    /// Reserved under the `no_module` feature.
474    #[cfg(not(feature = "no_module"))]
475    As,
476    /// A lexer error.
477    LexError(LexError),
478    /// A comment block.
479    Comment(Box<str>),
480    /// A reserved symbol.
481    Reserved(Box<str>),
482    /// A custom keyword.
483    Custom(Box<str>),
484    /// End of the input stream.
485    EOF,
486}
487
488impl Token {
489    /// Get the literal syntax of the token.
490    #[must_use]
491    pub const fn literal_syntax(&self) -> &'static str {
492        use Token::*;
493
494        match self {
495            LeftBrace => "{",
496            RightBrace => "}",
497            LeftParen => "(",
498            RightParen => ")",
499            LeftBracket => "[",
500            RightBracket => "]",
501            Plus => "+",
502            UnaryPlus => "+",
503            Minus => "-",
504            UnaryMinus => "-",
505            Multiply => "*",
506            Divide => "/",
507            SemiColon => ";",
508            Colon => ":",
509            DoubleColon => "::",
510            DoubleArrow => "=>",
511            Underscore => "_",
512            Comma => ",",
513            Period => ".",
514            ExclusiveRange => "..",
515            InclusiveRange => "..=",
516            MapStart => "#{",
517            Equals => "=",
518            True => "true",
519            False => "false",
520            Let => "let",
521            Const => "const",
522            If => "if",
523            Else => "else",
524            Switch => "switch",
525            Do => "do",
526            While => "while",
527            Until => "until",
528            Loop => "loop",
529            For => "for",
530            In => "in",
531            LessThan => "<",
532            GreaterThan => ">",
533            Bang => "!",
534            LessThanEqualsTo => "<=",
535            GreaterThanEqualsTo => ">=",
536            EqualsTo => "==",
537            NotEqualsTo => "!=",
538            Pipe => "|",
539            Or => "||",
540            Ampersand => "&",
541            And => "&&",
542            Continue => "continue",
543            Break => "break",
544            Return => "return",
545            Throw => "throw",
546            Try => "try",
547            Catch => "catch",
548            PlusAssign => "+=",
549            MinusAssign => "-=",
550            MultiplyAssign => "*=",
551            DivideAssign => "/=",
552            LeftShiftAssign => "<<=",
553            RightShiftAssign => ">>=",
554            AndAssign => "&=",
555            OrAssign => "|=",
556            XOrAssign => "^=",
557            LeftShift => "<<",
558            RightShift => ">>",
559            XOr => "^",
560            Modulo => "%",
561            ModuloAssign => "%=",
562            PowerOf => "**",
563            PowerOfAssign => "**=",
564
565            #[cfg(not(feature = "no_function"))]
566            Fn => "fn",
567            #[cfg(not(feature = "no_function"))]
568            Private => "private",
569
570            #[cfg(not(feature = "no_module"))]
571            Import => "import",
572            #[cfg(not(feature = "no_module"))]
573            Export => "export",
574            #[cfg(not(feature = "no_module"))]
575            As => "as",
576
577            _ => "ERROR: NOT A KEYWORD",
578        }
579    }
580
581    /// Get the syntax of the token.
582    #[must_use]
583    pub fn syntax(&self) -> Cow<'static, str> {
584        use Token::*;
585
586        match self {
587            IntegerConstant(i) => i.to_string().into(),
588            #[cfg(not(feature = "no_float"))]
589            FloatConstant(f) => f.to_string().into(),
590            #[cfg(feature = "decimal")]
591            DecimalConstant(d) => d.to_string().into(),
592            StringConstant(_) => "string".into(),
593            InterpolatedString(_) => "string".into(),
594            CharConstant(c) => c.to_string().into(),
595            Identifier(s) => s.to_string().into(),
596            Reserved(s) => s.to_string().into(),
597            Custom(s) => s.to_string().into(),
598            LexError(err) => err.to_string().into(),
599            Comment(s) => s.to_string().into(),
600
601            EOF => "{EOF}".into(),
602
603            token => token.literal_syntax().into(),
604        }
605    }
606
607    /// Is this token an op-assignment operator?
608    #[inline]
609    #[must_use]
610    pub const fn is_op_assignment(&self) -> bool {
611        matches!(
612            self,
613            Self::PlusAssign
614                | Self::MinusAssign
615                | Self::MultiplyAssign
616                | Self::DivideAssign
617                | Self::LeftShiftAssign
618                | Self::RightShiftAssign
619                | Self::ModuloAssign
620                | Self::PowerOfAssign
621                | Self::AndAssign
622                | Self::OrAssign
623                | Self::XOrAssign
624        )
625    }
626
627    /// Get the corresponding operator of the token if it is an op-assignment operator.
628    #[must_use]
629    pub const fn map_op_assignment(&self) -> Option<Self> {
630        Some(match self {
631            Self::PlusAssign => Self::Plus,
632            Self::MinusAssign => Self::Minus,
633            Self::MultiplyAssign => Self::Multiply,
634            Self::DivideAssign => Self::Divide,
635            Self::LeftShiftAssign => Self::LeftShift,
636            Self::RightShiftAssign => Self::RightShift,
637            Self::ModuloAssign => Self::Modulo,
638            Self::PowerOfAssign => Self::PowerOf,
639            Self::AndAssign => Self::Ampersand,
640            Self::OrAssign => Self::Pipe,
641            Self::XOrAssign => Self::XOr,
642            _ => return None,
643        })
644    }
645
646    /// Has this token a corresponding op-assignment operator?
647    #[inline]
648    #[must_use]
649    pub const fn has_op_assignment(&self) -> bool {
650        matches!(
651            self,
652            Self::Plus
653                | Self::Minus
654                | Self::Multiply
655                | Self::Divide
656                | Self::LeftShift
657                | Self::RightShift
658                | Self::Modulo
659                | Self::PowerOf
660                | Self::Ampersand
661                | Self::Pipe
662                | Self::XOr
663        )
664    }
665
666    /// Get the corresponding op-assignment operator of the token.
667    #[must_use]
668    pub const fn make_op_assignment(&self) -> Option<Self> {
669        Some(match self {
670            Self::Plus => Self::PlusAssign,
671            Self::Minus => Self::MinusAssign,
672            Self::Multiply => Self::MultiplyAssign,
673            Self::Divide => Self::DivideAssign,
674            Self::LeftShift => Self::LeftShiftAssign,
675            Self::RightShift => Self::RightShiftAssign,
676            Self::Modulo => Self::ModuloAssign,
677            Self::PowerOf => Self::PowerOfAssign,
678            Self::Ampersand => Self::AndAssign,
679            Self::Pipe => Self::OrAssign,
680            Self::XOr => Self::XOrAssign,
681            _ => return None,
682        })
683    }
684
685    /// Reverse lookup a token from a piece of syntax.
686    #[must_use]
687    pub fn lookup_from_syntax(syntax: impl AsRef<str>) -> Option<Self> {
688        use Token::*;
689
690        let syntax = syntax.as_ref();
691
692        Some(match syntax {
693            "{" => LeftBrace,
694            "}" => RightBrace,
695            "(" => LeftParen,
696            ")" => RightParen,
697            "[" => LeftBracket,
698            "]" => RightBracket,
699            "+" => Plus,
700            "-" => Minus,
701            "*" => Multiply,
702            "/" => Divide,
703            ";" => SemiColon,
704            ":" => Colon,
705            "::" => DoubleColon,
706            "=>" => DoubleArrow,
707            "_" => Underscore,
708            "," => Comma,
709            "." => Period,
710            ".." => ExclusiveRange,
711            "..=" => InclusiveRange,
712            "#{" => MapStart,
713            "=" => Equals,
714            "true" => True,
715            "false" => False,
716            "let" => Let,
717            "const" => Const,
718            "if" => If,
719            "else" => Else,
720            "switch" => Switch,
721            "do" => Do,
722            "while" => While,
723            "until" => Until,
724            "loop" => Loop,
725            "for" => For,
726            "in" => In,
727            "<" => LessThan,
728            ">" => GreaterThan,
729            "!" => Bang,
730            "<=" => LessThanEqualsTo,
731            ">=" => GreaterThanEqualsTo,
732            "==" => EqualsTo,
733            "!=" => NotEqualsTo,
734            "|" => Pipe,
735            "||" => Or,
736            "&" => Ampersand,
737            "&&" => And,
738            "continue" => Continue,
739            "break" => Break,
740            "return" => Return,
741            "throw" => Throw,
742            "try" => Try,
743            "catch" => Catch,
744            "+=" => PlusAssign,
745            "-=" => MinusAssign,
746            "*=" => MultiplyAssign,
747            "/=" => DivideAssign,
748            "<<=" => LeftShiftAssign,
749            ">>=" => RightShiftAssign,
750            "&=" => AndAssign,
751            "|=" => OrAssign,
752            "^=" => XOrAssign,
753            "<<" => LeftShift,
754            ">>" => RightShift,
755            "^" => XOr,
756            "%" => Modulo,
757            "%=" => ModuloAssign,
758            "**" => PowerOf,
759            "**=" => PowerOfAssign,
760
761            #[cfg(not(feature = "no_function"))]
762            "fn" => Fn,
763            #[cfg(not(feature = "no_function"))]
764            "private" => Private,
765
766            #[cfg(feature = "no_function")]
767            "fn" | "private" => Reserved(syntax.into()),
768
769            #[cfg(not(feature = "no_module"))]
770            "import" => Import,
771            #[cfg(not(feature = "no_module"))]
772            "export" => Export,
773            #[cfg(not(feature = "no_module"))]
774            "as" => As,
775
776            #[cfg(feature = "no_module")]
777            "import" | "export" | "as" => Reserved(syntax.into()),
778
779            "===" | "!==" | "->" | "<-" | ":=" | "~" | "::<" | "(*" | "*)" | "#" | "#!"
780            | "public" | "protected" | "super" | "new" | "use" | "module" | "package" | "var"
781            | "static" | "shared" | "with" | "goto" | "exit" | "match" | "case" | "default"
782            | "void" | "null" | "nil" | "spawn" | "thread" | "go" | "sync" | "async" | "await"
783            | "yield" => Reserved(syntax.into()),
784
785            KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR
786            | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_THIS | KEYWORD_IS_DEF_VAR => {
787                Reserved(syntax.into())
788            }
789
790            #[cfg(not(feature = "no_function"))]
791            crate::engine::KEYWORD_IS_DEF_FN => Reserved(syntax.into()),
792
793            _ => return None,
794        })
795    }
796
797    // Is this token [`EOF`][Token::EOF]?
798    #[inline(always)]
799    #[must_use]
800    pub const fn is_eof(&self) -> bool {
801        matches!(self, Self::EOF)
802    }
803
804    // If another operator is after these, it's probably an unary operator
805    // (not sure about `fn` name).
806    #[must_use]
807    pub const fn is_next_unary(&self) -> bool {
808        use Token::*;
809
810        match self {
811            LexError(_)      |
812            SemiColon        | // ; - is unary
813            Colon            | // #{ foo: - is unary
814            Comma            | // ( ... , -expr ) - is unary
815            //Period           |
816            ExclusiveRange            | // .. - is unary
817            InclusiveRange   | // ..= - is unary
818            LeftBrace        | // { -expr } - is unary
819            // RightBrace    | { expr } - expr not unary & is closing
820            LeftParen        | // ( -expr ) - is unary
821            // RightParen    | // ( expr ) - expr not unary & is closing
822            LeftBracket      | // [ -expr ] - is unary
823            // RightBracket  | // [ expr ] - expr not unary & is closing
824            Plus             |
825            PlusAssign       |
826            UnaryPlus        |
827            Minus            |
828            MinusAssign      |
829            UnaryMinus       |
830            Multiply         |
831            MultiplyAssign   |
832            Divide           |
833            DivideAssign     |
834            Modulo           |
835            ModuloAssign     |
836            PowerOf          |
837            PowerOfAssign    |
838            LeftShift        |
839            LeftShiftAssign  |
840            RightShift       |
841            RightShiftAssign |
842            Equals           |
843            EqualsTo         |
844            NotEqualsTo      |
845            LessThan         |
846            GreaterThan      |
847            Bang             |
848            LessThanEqualsTo |
849            GreaterThanEqualsTo |
850            Pipe             |
851            Ampersand        |
852            If               |
853            //Do               |
854            While            |
855            Until            |
856            In               |
857            And              |
858            AndAssign        |
859            Or               |
860            OrAssign         |
861            XOr              |
862            XOrAssign        |
863            Return           |
864            Throw                           => true,
865
866            _ => false,
867        }
868    }
869
870    /// Get the precedence number of the token.
871    #[must_use]
872    pub const fn precedence(&self) -> Option<Precedence> {
873        use Token::*;
874
875        Precedence::new(match self {
876            // Assignments are not considered expressions - set to zero
877            Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | PowerOfAssign
878            | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign | XOrAssign
879            | ModuloAssign => 0,
880
881            ExclusiveRange | InclusiveRange => 10,
882
883            Or | XOr | Pipe => 30,
884
885            And | Ampersand => 60,
886
887            EqualsTo | NotEqualsTo => 90,
888
889            In => 110,
890
891            LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
892
893            Plus | Minus => 150,
894
895            Divide | Multiply | Modulo => 180,
896
897            PowerOf => 190,
898
899            LeftShift | RightShift => 210,
900
901            Period => 240,
902
903            _ => 0,
904        })
905    }
906
907    /// Does an expression bind to the right (instead of left)?
908    #[must_use]
909    pub const fn is_bind_right(&self) -> bool {
910        use Token::*;
911
912        match self {
913            // Assignments bind to the right
914            Equals | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | PowerOfAssign
915            | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign | XOrAssign
916            | ModuloAssign => true,
917
918            // Property access binds to the right
919            Period => true,
920
921            // Exponentiation binds to the right
922            PowerOf => true,
923
924            _ => false,
925        }
926    }
927
928    /// Is this token a standard symbol used in the language?
929    #[must_use]
930    pub const fn is_standard_symbol(&self) -> bool {
931        use Token::*;
932
933        match self {
934            LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
935            | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
936            | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | ExclusiveRange
937            | InclusiveRange | MapStart | Equals | LessThan | GreaterThan | LessThanEqualsTo
938            | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe | Or | XOr | Ampersand
939            | And | PlusAssign | MinusAssign | MultiplyAssign | DivideAssign | LeftShiftAssign
940            | RightShiftAssign | AndAssign | OrAssign | XOrAssign | ModuloAssign
941            | PowerOfAssign => true,
942
943            _ => false,
944        }
945    }
946
947    /// Is this token a standard keyword?
948    #[inline]
949    #[must_use]
950    pub const fn is_standard_keyword(&self) -> bool {
951        use Token::*;
952
953        match self {
954            #[cfg(not(feature = "no_function"))]
955            Fn | Private => true,
956
957            #[cfg(not(feature = "no_module"))]
958            Import | Export | As => true,
959
960            True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
961            | Continue | Break | Return | Throw | Try | Catch => true,
962
963            _ => false,
964        }
965    }
966
967    /// Is this token a reserved keyword or symbol?
968    #[inline(always)]
969    #[must_use]
970    pub const fn is_reserved(&self) -> bool {
971        matches!(self, Self::Reserved(_))
972    }
973
974    /// Convert a token into a function name, if possible.
975    #[cfg(not(feature = "no_function"))]
976    #[inline]
977    pub(crate) fn into_function_name_for_override(self) -> Result<Box<str>, Self> {
978        match self {
979            Self::Custom(s) | Self::Identifier(s) if is_valid_function_name(&*s) => Ok(s),
980            _ => Err(self),
981        }
982    }
983
984    /// Is this token a custom keyword?
985    #[inline(always)]
986    #[must_use]
987    pub const fn is_custom(&self) -> bool {
988        matches!(self, Self::Custom(_))
989    }
990}
991
992impl From<Token> for String {
993    #[inline(always)]
994    fn from(token: Token) -> Self {
995        token.syntax().into()
996    }
997}
998
999/// _(internals)_ State of the tokenizer.
1000/// Exported under the `internals` feature only.
1001#[derive(Debug, Clone, Eq, PartialEq, Default)]
1002pub struct TokenizeState {
1003    /// Maximum length of a string.
1004    pub max_string_size: Option<NonZeroUsize>,
1005    /// Can the next token be a unary operator?
1006    pub next_token_cannot_be_unary: bool,
1007    /// Is the tokenizer currently inside a block comment?
1008    pub comment_level: usize,
1009    /// Include comments?
1010    pub include_comments: bool,
1011    /// Is the current tokenizer position within the text stream of an interpolated string?
1012    pub is_within_text_terminated_by: Option<char>,
1013}
1014
1015/// _(internals)_ Trait that encapsulates a peekable character input stream.
1016/// Exported under the `internals` feature only.
1017pub trait InputStream {
1018    /// Un-get a character back into the `InputStream`.
1019    /// The next [`get_next`][InputStream::get_next] or [`peek_next`][InputStream::peek_next]
1020    /// will return this character instead.
1021    fn unget(&mut self, ch: char);
1022    /// Get the next character from the `InputStream`.
1023    fn get_next(&mut self) -> Option<char>;
1024    /// Peek the next character in the `InputStream`.
1025    #[must_use]
1026    fn peek_next(&mut self) -> Option<char>;
1027}
1028
1029/// _(internals)_ Parse a string literal ended by `termination_char`.
1030/// Exported under the `internals` feature only.
1031///
1032/// Returns the parsed string and a boolean indicating whether the string is
1033/// terminated by an interpolation `${`.
1034///
1035/// # Returns
1036///
1037/// |Type                             |Return Value                |`state.is_within_text_terminated_by`|
1038/// |---------------------------------|:--------------------------:|:----------------------------------:|
1039/// |`"hello"`                        |`StringConstant("hello")`   |`None`                              |
1040/// |`"hello`_{LF}_ or _{EOF}_        |`LexError`                  |`None`                              |
1041/// |`"hello\`_{EOF}_ or _{LF}{EOF}_  |`StringConstant("hello")`   |`Some('"')`                         |
1042/// |`` `hello``_{EOF}_               |`StringConstant("hello")`   |``Some('`')``                       |
1043/// |`` `hello``_{LF}{EOF}_           |`StringConstant("hello\n")` |``Some('`')``                       |
1044/// |`` `hello ${``                   |`InterpolatedString("hello ")`<br/>next token is `{`|`None`      |
1045/// |`` } hello` ``                   |`StringConstant(" hello")`  |`None`                              |
1046/// |`} hello`_{EOF}_                 |`StringConstant(" hello")`  |``Some('`')``                       |
1047///
1048/// This function does not throw a `LexError` for the following conditions:
1049///
1050/// * Unterminated literal string at _{EOF}_
1051///
1052/// * Unterminated normal string with continuation at _{EOF}_
1053///
1054/// This is to facilitate using this function to parse a script line-by-line, where the end of the
1055/// line (i.e. _{EOF}_) is not necessarily the end of the script.
1056///
1057/// Any time a [`StringConstant`][`Token::StringConstant`] is returned with
1058/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
1059pub fn parse_string_literal(
1060    stream: &mut impl InputStream,
1061    state: &mut TokenizeState,
1062    pos: &mut Position,
1063    termination_char: char,
1064    continuation: bool,
1065    verbatim: bool,
1066    allow_interpolation: bool,
1067) -> Result<(Box<str>, bool), (LexError, Position)> {
1068    let mut result = String::with_capacity(12);
1069    let mut escape = String::with_capacity(12);
1070
1071    let start = *pos;
1072    let mut interpolated = false;
1073    #[cfg(not(feature = "no_position"))]
1074    let mut skip_whitespace_until = 0;
1075
1076    state.is_within_text_terminated_by = Some(termination_char);
1077
1078    loop {
1079        assert!(
1080            !verbatim || escape.is_empty(),
1081            "verbatim strings should not have any escapes"
1082        );
1083
1084        let next_char = match stream.get_next() {
1085            Some(ch) => {
1086                pos.advance();
1087                ch
1088            }
1089            None if verbatim => {
1090                assert_eq!(escape, "", "verbatim strings should not have any escapes");
1091                pos.advance();
1092                break;
1093            }
1094            None if continuation && !escape.is_empty() => {
1095                assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
1096                pos.advance();
1097                break;
1098            }
1099            None => {
1100                result += &escape;
1101                pos.advance();
1102                state.is_within_text_terminated_by = None;
1103                return Err((LERR::UnterminatedString, start));
1104            }
1105        };
1106
1107        // String interpolation?
1108        if allow_interpolation
1109            && next_char == '$'
1110            && escape.is_empty()
1111            && stream.peek_next().map(|ch| ch == '{').unwrap_or(false)
1112        {
1113            interpolated = true;
1114            state.is_within_text_terminated_by = None;
1115            break;
1116        }
1117
1118        if let Some(max) = state.max_string_size {
1119            if result.len() > max.get() {
1120                return Err((LexError::StringTooLong(max.get()), *pos));
1121            }
1122        }
1123
1124        match next_char {
1125            // \r - ignore if followed by \n
1126            '\r' if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) => (),
1127            // \...
1128            '\\' if !verbatim && escape.is_empty() => {
1129                escape.push('\\');
1130            }
1131            // \\
1132            '\\' if !escape.is_empty() => {
1133                escape.clear();
1134                result.push('\\');
1135            }
1136            // \t
1137            't' if !escape.is_empty() => {
1138                escape.clear();
1139                result.push('\t');
1140            }
1141            // \n
1142            'n' if !escape.is_empty() => {
1143                escape.clear();
1144                result.push('\n');
1145            }
1146            // \r
1147            'r' if !escape.is_empty() => {
1148                escape.clear();
1149                result.push('\r');
1150            }
1151            // \x??, \u????, \U????????
1152            ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => {
1153                let mut seq = escape.clone();
1154                escape.clear();
1155                seq.push(ch);
1156
1157                let mut out_val: u32 = 0;
1158                let len = match ch {
1159                    'x' => 2,
1160                    'u' => 4,
1161                    'U' => 8,
1162                    _ => unreachable!(),
1163                };
1164
1165                for _ in 0..len {
1166                    let c = stream
1167                        .get_next()
1168                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1169
1170                    seq.push(c);
1171                    pos.advance();
1172
1173                    out_val *= 16;
1174                    out_val += c
1175                        .to_digit(16)
1176                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1177                }
1178
1179                result.push(
1180                    char::from_u32(out_val)
1181                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?,
1182                );
1183            }
1184
1185            // \{termination_char} - escaped
1186            _ if termination_char == next_char && !escape.is_empty() => {
1187                escape.clear();
1188                result.push(next_char)
1189            }
1190
1191            // Double wrapper
1192            _ if termination_char == next_char
1193                && escape.is_empty()
1194                && stream.peek_next().map_or(false, |c| c == termination_char) =>
1195            {
1196                eat_next(stream, pos);
1197                result.push(termination_char)
1198            }
1199
1200            // Close wrapper
1201            _ if termination_char == next_char && escape.is_empty() => {
1202                state.is_within_text_terminated_by = None;
1203                break;
1204            }
1205
1206            // Verbatim
1207            '\n' if verbatim => {
1208                assert_eq!(escape, "", "verbatim strings should not have any escapes");
1209                pos.new_line();
1210                result.push(next_char);
1211            }
1212
1213            // Line continuation
1214            '\n' if continuation && !escape.is_empty() => {
1215                assert_eq!(escape, "\\", "unexpected escape {} at end of line", escape);
1216                escape.clear();
1217                pos.new_line();
1218
1219                #[cfg(not(feature = "no_position"))]
1220                {
1221                    let start_position = start.position().expect("start position");
1222                    skip_whitespace_until = start_position + 1;
1223                }
1224            }
1225
1226            // Unterminated string
1227            '\n' => {
1228                pos.rewind();
1229                state.is_within_text_terminated_by = None;
1230                return Err((LERR::UnterminatedString, start));
1231            }
1232
1233            // Unknown escape sequence
1234            _ if !escape.is_empty() => {
1235                escape.push(next_char);
1236
1237                return Err((LERR::MalformedEscapeSequence(escape), *pos));
1238            }
1239
1240            // Whitespace to skip
1241            #[cfg(not(feature = "no_position"))]
1242            _ if next_char.is_whitespace()
1243                && pos.position().expect("position") < skip_whitespace_until => {}
1244
1245            // All other characters
1246            _ => {
1247                escape.clear();
1248                result.push(next_char);
1249
1250                #[cfg(not(feature = "no_position"))]
1251                {
1252                    skip_whitespace_until = 0;
1253                }
1254            }
1255        }
1256    }
1257
1258    if let Some(max) = state.max_string_size {
1259        if result.len() > max.get() {
1260            return Err((LexError::StringTooLong(max.get()), *pos));
1261        }
1262    }
1263
1264    Ok((result.into(), interpolated))
1265}
1266
1267/// Consume the next character.
1268#[inline(always)]
1269fn eat_next(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
1270    pos.advance();
1271    stream.get_next()
1272}
1273
1274/// Scan for a block comment until the end.
1275fn scan_block_comment(
1276    stream: &mut impl InputStream,
1277    level: usize,
1278    pos: &mut Position,
1279    comment: Option<&mut String>,
1280) -> usize {
1281    let mut level = level;
1282    let mut comment = comment;
1283
1284    while let Some(c) = stream.get_next() {
1285        pos.advance();
1286
1287        if let Some(comment) = comment.as_mut() {
1288            comment.push(c);
1289        }
1290
1291        match c {
1292            '/' => {
1293                if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '*') {
1294                    eat_next(stream, pos);
1295                    if let Some(comment) = comment.as_mut() {
1296                        comment.push(c2);
1297                    }
1298                    level += 1;
1299                }
1300            }
1301            '*' => {
1302                if let Some(c2) = stream.peek_next().filter(|&c2| c2 == '/') {
1303                    eat_next(stream, pos);
1304                    if let Some(comment) = comment.as_mut() {
1305                        comment.push(c2);
1306                    }
1307                    level -= 1;
1308                }
1309            }
1310            '\n' => pos.new_line(),
1311            _ => (),
1312        }
1313
1314        if level == 0 {
1315            break;
1316        }
1317    }
1318
1319    level
1320}
1321
1322/// _(internals)_ Get the next token from the `stream`.
1323/// Exported under the `internals` feature only.
1324#[inline]
1325#[must_use]
1326pub fn get_next_token(
1327    stream: &mut impl InputStream,
1328    state: &mut TokenizeState,
1329    pos: &mut Position,
1330) -> Option<(Token, Position)> {
1331    let result = get_next_token_inner(stream, state, pos);
1332
1333    // Save the last token's state
1334    if let Some((ref token, _)) = result {
1335        state.next_token_cannot_be_unary = !token.is_next_unary();
1336    }
1337
1338    result
1339}
1340
1341/// Test if the given character is a hex character.
1342#[inline(always)]
1343fn is_hex_digit(c: char) -> bool {
1344    matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
1345}
1346
1347/// Test if the given character is a numeric digit.
1348#[inline(always)]
1349fn is_numeric_digit(c: char) -> bool {
1350    matches!(c, '0'..='9')
1351}
1352
1353/// Test if the comment block is a doc-comment.
1354#[cfg(not(feature = "no_function"))]
1355#[cfg(feature = "metadata")]
1356#[inline]
1357#[must_use]
1358pub fn is_doc_comment(comment: impl AsRef<str>) -> bool {
1359    let comment = comment.as_ref();
1360
1361    (comment.starts_with("///") && !comment.starts_with("////"))
1362        || (comment.starts_with("/**") && !comment.starts_with("/***"))
1363}
1364
1365/// Get the next token.
1366#[must_use]
1367fn get_next_token_inner(
1368    stream: &mut impl InputStream,
1369    state: &mut TokenizeState,
1370    pos: &mut Position,
1371) -> Option<(Token, Position)> {
1372    // Still inside a comment?
1373    if state.comment_level > 0 {
1374        let start_pos = *pos;
1375        let mut comment = if state.include_comments {
1376            Some(String::new())
1377        } else {
1378            None
1379        };
1380
1381        state.comment_level =
1382            scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
1383
1384        let return_comment = state.include_comments;
1385
1386        #[cfg(not(feature = "no_function"))]
1387        #[cfg(feature = "metadata")]
1388        let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));
1389
1390        if return_comment {
1391            return Some((Token::Comment(comment.expect("`Some`").into()), start_pos));
1392        }
1393        if state.comment_level > 0 {
1394            // Reached EOF without ending comment block
1395            return None;
1396        }
1397    }
1398
1399    // Within text?
1400    if let Some(ch) = state.is_within_text_terminated_by.take() {
1401        let start_pos = *pos;
1402
1403        return parse_string_literal(stream, state, pos, ch, false, true, true).map_or_else(
1404            |(err, err_pos)| Some((Token::LexError(err), err_pos)),
1405            |(result, interpolated)| {
1406                if interpolated {
1407                    Some((Token::InterpolatedString(result), start_pos))
1408                } else {
1409                    Some((Token::StringConstant(result), start_pos))
1410                }
1411            },
1412        );
1413    }
1414
1415    let mut negated: Option<Position> = None;
1416
1417    while let Some(c) = stream.get_next() {
1418        pos.advance();
1419
1420        let start_pos = *pos;
1421
1422        match (c, stream.peek_next().unwrap_or('\0')) {
1423            // \n
1424            ('\n', _) => pos.new_line(),
1425
1426            // digit ...
1427            ('0'..='9', _) => {
1428                let mut result = smallvec::SmallVec::<[char; 16]>::new();
1429                let mut radix_base: Option<u32> = None;
1430                let mut valid: fn(char) -> bool = is_numeric_digit;
1431                result.push(c);
1432
1433                while let Some(next_char) = stream.peek_next() {
1434                    match next_char {
1435                        ch if valid(ch) || ch == NUMBER_SEPARATOR => {
1436                            result.push(next_char);
1437                            eat_next(stream, pos);
1438                        }
1439                        #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1440                        '.' => {
1441                            stream.get_next().expect("`.`");
1442
1443                            // Check if followed by digits or something that cannot start a property name
1444                            match stream.peek_next().unwrap_or('\0') {
1445                                // digits after period - accept the period
1446                                '0'..='9' => {
1447                                    result.push(next_char);
1448                                    pos.advance();
1449                                }
1450                                // _ - cannot follow a decimal point
1451                                '_' => {
1452                                    stream.unget(next_char);
1453                                    break;
1454                                }
1455                                // .. - reserved symbol, not a floating-point number
1456                                '.' => {
1457                                    stream.unget(next_char);
1458                                    break;
1459                                }
1460                                // symbol after period - probably a float
1461                                ch if !is_id_first_alphabetic(ch) => {
1462                                    result.push(next_char);
1463                                    pos.advance();
1464                                    result.push('0');
1465                                }
1466                                // Not a floating-point number
1467                                _ => {
1468                                    stream.unget(next_char);
1469                                    break;
1470                                }
1471                            }
1472                        }
1473                        #[cfg(not(feature = "no_float"))]
1474                        'e' => {
1475                            stream.get_next().expect("`e`");
1476
1477                            // Check if followed by digits or +/-
1478                            match stream.peek_next().unwrap_or('\0') {
1479                                // digits after e - accept the e
1480                                '0'..='9' => {
1481                                    result.push(next_char);
1482                                    pos.advance();
1483                                }
1484                                // +/- after e - accept the e and the sign
1485                                '+' | '-' => {
1486                                    result.push(next_char);
1487                                    pos.advance();
1488                                    result.push(stream.get_next().expect("`+` or `-`"));
1489                                    pos.advance();
1490                                }
1491                                // Not a floating-point number
1492                                _ => {
1493                                    stream.unget(next_char);
1494                                    break;
1495                                }
1496                            }
1497                        }
1498                        // 0x????, 0o????, 0b???? at beginning
1499                        ch @ 'x' | ch @ 'o' | ch @ 'b' | ch @ 'X' | ch @ 'O' | ch @ 'B'
1500                            if c == '0' && result.len() <= 1 =>
1501                        {
1502                            result.push(next_char);
1503                            eat_next(stream, pos);
1504
1505                            valid = match ch {
1506                                'x' | 'X' => is_hex_digit,
1507                                'o' | 'O' => is_numeric_digit,
1508                                'b' | 'B' => is_numeric_digit,
1509                                _ => unreachable!(),
1510                            };
1511
1512                            radix_base = Some(match ch {
1513                                'x' | 'X' => 16,
1514                                'o' | 'O' => 8,
1515                                'b' | 'B' => 2,
1516                                _ => unreachable!(),
1517                            });
1518                        }
1519
1520                        _ => break,
1521                    }
1522                }
1523
1524                let num_pos = negated.map_or(start_pos, |negated_pos| {
1525                    result.insert(0, '-');
1526                    negated_pos
1527                });
1528
1529                // Parse number
1530                return Some((
1531                    if let Some(radix) = radix_base {
1532                        let out: String = result
1533                            .iter()
1534                            .skip(2)
1535                            .filter(|&&c| c != NUMBER_SEPARATOR)
1536                            .collect();
1537
1538                        INT::from_str_radix(&out, radix)
1539                            .map(Token::IntegerConstant)
1540                            .unwrap_or_else(|_| {
1541                                Token::LexError(LERR::MalformedNumber(result.into_iter().collect()))
1542                            })
1543                    } else {
1544                        let out: String =
1545                            result.iter().filter(|&&c| c != NUMBER_SEPARATOR).collect();
1546                        let num = INT::from_str(&out).map(Token::IntegerConstant);
1547
1548                        // If integer parsing is unnecessary, try float instead
1549                        #[cfg(not(feature = "no_float"))]
1550                        let num = num.or_else(|_| {
1551                            crate::ast::FloatWrapper::from_str(&out).map(Token::FloatConstant)
1552                        });
1553
1554                        // Then try decimal
1555                        #[cfg(feature = "decimal")]
1556                        let num = num.or_else(|_| {
1557                            rust_decimal::Decimal::from_str(&out).map(Token::DecimalConstant)
1558                        });
1559
1560                        // Then try decimal in scientific notation
1561                        #[cfg(feature = "decimal")]
1562                        let num = num.or_else(|_| {
1563                            rust_decimal::Decimal::from_scientific(&out).map(Token::DecimalConstant)
1564                        });
1565
1566                        num.unwrap_or_else(|_| {
1567                            Token::LexError(LERR::MalformedNumber(result.into_iter().collect()))
1568                        })
1569                    },
1570                    num_pos,
1571                ));
1572            }
1573
1574            // letter or underscore ...
1575            #[cfg(not(feature = "unicode-xid-ident"))]
1576            ('a'..='z', _) | ('_', _) | ('A'..='Z', _) => {
1577                return get_identifier(stream, pos, start_pos, c);
1578            }
1579            #[cfg(feature = "unicode-xid-ident")]
1580            (ch, _) if unicode_xid::UnicodeXID::is_xid_start(ch) || ch == '_' => {
1581                return get_identifier(stream, pos, start_pos, c);
1582            }
1583
1584            // " - string literal
1585            ('"', _) => {
1586                return parse_string_literal(stream, state, pos, c, true, false, false)
1587                    .map_or_else(
1588                        |(err, err_pos)| Some((Token::LexError(err), err_pos)),
1589                        |(result, _)| Some((Token::StringConstant(result), start_pos)),
1590                    );
1591            }
1592            // ` - string literal
1593            ('`', _) => {
1594                // Start from the next line if at the end of line
1595                match stream.peek_next() {
1596                    // `\r - start from next line
1597                    Some('\r') => {
1598                        eat_next(stream, pos);
1599                        pos.new_line();
1600                        // `\r\n
1601                        if stream.peek_next().map(|ch| ch == '\n').unwrap_or(false) {
1602                            eat_next(stream, pos);
1603                        }
1604                    }
1605                    // `\n - start from next line
1606                    Some('\n') => {
1607                        eat_next(stream, pos);
1608                        pos.new_line();
1609                    }
1610                    _ => (),
1611                }
1612
1613                return parse_string_literal(stream, state, pos, c, false, true, true).map_or_else(
1614                    |(err, err_pos)| Some((Token::LexError(err), err_pos)),
1615                    |(result, interpolated)| {
1616                        if interpolated {
1617                            Some((Token::InterpolatedString(result), start_pos))
1618                        } else {
1619                            Some((Token::StringConstant(result), start_pos))
1620                        }
1621                    },
1622                );
1623            }
1624
1625            // ' - character literal
1626            ('\'', '\'') => {
1627                return Some((
1628                    Token::LexError(LERR::MalformedChar("".to_string())),
1629                    start_pos,
1630                ))
1631            }
1632            ('\'', _) => {
1633                return Some(
1634                    parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
1635                        |(err, err_pos)| (Token::LexError(err), err_pos),
1636                        |(result, _)| {
1637                            let mut chars = result.chars();
1638                            let first = chars.next().expect("not empty");
1639
1640                            if chars.next().is_some() {
1641                                (
1642                                    Token::LexError(LERR::MalformedChar(result.to_string())),
1643                                    start_pos,
1644                                )
1645                            } else {
1646                                (Token::CharConstant(first), start_pos)
1647                            }
1648                        },
1649                    ),
1650                )
1651            }
1652
1653            // Braces
1654            ('{', _) => return Some((Token::LeftBrace, start_pos)),
1655            ('}', _) => return Some((Token::RightBrace, start_pos)),
1656
1657            // Parentheses
1658            ('(', '*') => {
1659                eat_next(stream, pos);
1660                return Some((Token::Reserved("(*".into()), start_pos));
1661            }
1662            ('(', _) => return Some((Token::LeftParen, start_pos)),
1663            (')', _) => return Some((Token::RightParen, start_pos)),
1664
1665            // Indexing
1666            ('[', _) => return Some((Token::LeftBracket, start_pos)),
1667            (']', _) => return Some((Token::RightBracket, start_pos)),
1668
1669            // Map literal
1670            #[cfg(not(feature = "no_object"))]
1671            ('#', '{') => {
1672                eat_next(stream, pos);
1673                return Some((Token::MapStart, start_pos));
1674            }
1675            // Shebang
1676            ('#', '!') => return Some((Token::Reserved("#!".into()), start_pos)),
1677
1678            ('#', _) => return Some((Token::Reserved("#".into()), start_pos)),
1679
1680            // Operators
1681            ('+', '=') => {
1682                eat_next(stream, pos);
1683                return Some((Token::PlusAssign, start_pos));
1684            }
1685            ('+', '+') => {
1686                eat_next(stream, pos);
1687                return Some((Token::Reserved("++".into()), start_pos));
1688            }
1689            ('+', _) if !state.next_token_cannot_be_unary => {
1690                return Some((Token::UnaryPlus, start_pos))
1691            }
1692            ('+', _) => return Some((Token::Plus, start_pos)),
1693
1694            ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
1695            ('-', '0'..='9') => return Some((Token::Minus, start_pos)),
1696            ('-', '=') => {
1697                eat_next(stream, pos);
1698                return Some((Token::MinusAssign, start_pos));
1699            }
1700            ('-', '>') => {
1701                eat_next(stream, pos);
1702                return Some((Token::Reserved("->".into()), start_pos));
1703            }
1704            ('-', '-') => {
1705                eat_next(stream, pos);
1706                return Some((Token::Reserved("--".into()), start_pos));
1707            }
1708            ('-', _) if !state.next_token_cannot_be_unary => {
1709                return Some((Token::UnaryMinus, start_pos))
1710            }
1711            ('-', _) => return Some((Token::Minus, start_pos)),
1712
1713            ('*', ')') => {
1714                eat_next(stream, pos);
1715                return Some((Token::Reserved("*)".into()), start_pos));
1716            }
1717            ('*', '=') => {
1718                eat_next(stream, pos);
1719                return Some((Token::MultiplyAssign, start_pos));
1720            }
1721            ('*', '*') => {
1722                eat_next(stream, pos);
1723
1724                return Some((
1725                    if stream.peek_next() == Some('=') {
1726                        eat_next(stream, pos);
1727                        Token::PowerOfAssign
1728                    } else {
1729                        Token::PowerOf
1730                    },
1731                    start_pos,
1732                ));
1733            }
1734            ('*', _) => return Some((Token::Multiply, start_pos)),
1735
1736            // Comments
1737            ('/', '/') => {
1738                eat_next(stream, pos);
1739
1740                let mut comment = match stream.peek_next() {
1741                    #[cfg(not(feature = "no_function"))]
1742                    #[cfg(feature = "metadata")]
1743                    Some('/') => {
1744                        eat_next(stream, pos);
1745
1746                        // Long streams of `///...` are not doc-comments
1747                        match stream.peek_next() {
1748                            Some('/') => None,
1749                            _ => Some("///".to_string()),
1750                        }
1751                    }
1752                    _ if state.include_comments => Some("//".to_string()),
1753                    _ => None,
1754                };
1755
1756                while let Some(c) = stream.get_next() {
1757                    if c == '\n' {
1758                        pos.new_line();
1759                        break;
1760                    }
1761                    if let Some(comment) = comment.as_mut() {
1762                        comment.push(c);
1763                    }
1764                    pos.advance();
1765                }
1766
1767                if let Some(comment) = comment {
1768                    return Some((Token::Comment(comment.into()), start_pos));
1769                }
1770            }
1771            ('/', '*') => {
1772                state.comment_level = 1;
1773                eat_next(stream, pos);
1774
1775                let mut comment = match stream.peek_next() {
1776                    #[cfg(not(feature = "no_function"))]
1777                    #[cfg(feature = "metadata")]
1778                    Some('*') => {
1779                        eat_next(stream, pos);
1780
1781                        // Long streams of `/****...` are not doc-comments
1782                        match stream.peek_next() {
1783                            Some('*') => None,
1784                            _ => Some("/**".to_string()),
1785                        }
1786                    }
1787                    _ if state.include_comments => Some("/*".to_string()),
1788                    _ => None,
1789                };
1790
1791                state.comment_level =
1792                    scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
1793
1794                if let Some(comment) = comment {
1795                    return Some((Token::Comment(comment.into()), start_pos));
1796                }
1797            }
1798
1799            ('/', '=') => {
1800                eat_next(stream, pos);
1801                return Some((Token::DivideAssign, start_pos));
1802            }
1803            ('/', _) => return Some((Token::Divide, start_pos)),
1804
1805            (';', _) => return Some((Token::SemiColon, start_pos)),
1806            (',', _) => return Some((Token::Comma, start_pos)),
1807
1808            ('.', '.') => {
1809                eat_next(stream, pos);
1810                return Some((
1811                    match stream.peek_next() {
1812                        Some('.') => {
1813                            eat_next(stream, pos);
1814                            Token::Reserved("...".into())
1815                        }
1816                        Some('=') => {
1817                            eat_next(stream, pos);
1818                            Token::InclusiveRange
1819                        }
1820                        _ => Token::ExclusiveRange,
1821                    },
1822                    start_pos,
1823                ));
1824            }
1825            ('.', _) => return Some((Token::Period, start_pos)),
1826
1827            ('=', '=') => {
1828                eat_next(stream, pos);
1829
1830                if stream.peek_next() == Some('=') {
1831                    eat_next(stream, pos);
1832                    return Some((Token::Reserved("===".into()), start_pos));
1833                }
1834
1835                return Some((Token::EqualsTo, start_pos));
1836            }
1837            ('=', '>') => {
1838                eat_next(stream, pos);
1839                return Some((Token::DoubleArrow, start_pos));
1840            }
1841            ('=', _) => return Some((Token::Equals, start_pos)),
1842
1843            #[cfg(not(feature = "no_module"))]
1844            (':', ':') => {
1845                eat_next(stream, pos);
1846
1847                if stream.peek_next() == Some('<') {
1848                    eat_next(stream, pos);
1849                    return Some((Token::Reserved("::<".into()), start_pos));
1850                }
1851
1852                return Some((Token::DoubleColon, start_pos));
1853            }
1854            (':', '=') => {
1855                eat_next(stream, pos);
1856                return Some((Token::Reserved(":=".into()), start_pos));
1857            }
1858            (':', _) => return Some((Token::Colon, start_pos)),
1859
1860            ('<', '=') => {
1861                eat_next(stream, pos);
1862                return Some((Token::LessThanEqualsTo, start_pos));
1863            }
1864            ('<', '-') => {
1865                eat_next(stream, pos);
1866                return Some((Token::Reserved("<-".into()), start_pos));
1867            }
1868            ('<', '<') => {
1869                eat_next(stream, pos);
1870
1871                return Some((
1872                    if stream.peek_next() == Some('=') {
1873                        eat_next(stream, pos);
1874                        Token::LeftShiftAssign
1875                    } else {
1876                        Token::LeftShift
1877                    },
1878                    start_pos,
1879                ));
1880            }
1881            ('<', _) => return Some((Token::LessThan, start_pos)),
1882
1883            ('>', '=') => {
1884                eat_next(stream, pos);
1885                return Some((Token::GreaterThanEqualsTo, start_pos));
1886            }
1887            ('>', '>') => {
1888                eat_next(stream, pos);
1889
1890                return Some((
1891                    if stream.peek_next() == Some('=') {
1892                        eat_next(stream, pos);
1893                        Token::RightShiftAssign
1894                    } else {
1895                        Token::RightShift
1896                    },
1897                    start_pos,
1898                ));
1899            }
1900            ('>', _) => return Some((Token::GreaterThan, start_pos)),
1901
1902            ('!', '=') => {
1903                eat_next(stream, pos);
1904
1905                if stream.peek_next() == Some('=') {
1906                    eat_next(stream, pos);
1907                    return Some((Token::Reserved("!==".into()), start_pos));
1908                }
1909
1910                return Some((Token::NotEqualsTo, start_pos));
1911            }
1912            ('!', _) => return Some((Token::Bang, start_pos)),
1913
1914            ('|', '|') => {
1915                eat_next(stream, pos);
1916                return Some((Token::Or, start_pos));
1917            }
1918            ('|', '=') => {
1919                eat_next(stream, pos);
1920                return Some((Token::OrAssign, start_pos));
1921            }
1922            ('|', _) => return Some((Token::Pipe, start_pos)),
1923
1924            ('&', '&') => {
1925                eat_next(stream, pos);
1926                return Some((Token::And, start_pos));
1927            }
1928            ('&', '=') => {
1929                eat_next(stream, pos);
1930                return Some((Token::AndAssign, start_pos));
1931            }
1932            ('&', _) => return Some((Token::Ampersand, start_pos)),
1933
1934            ('^', '=') => {
1935                eat_next(stream, pos);
1936                return Some((Token::XOrAssign, start_pos));
1937            }
1938            ('^', _) => return Some((Token::XOr, start_pos)),
1939
1940            ('~', _) => return Some((Token::Reserved("~".into()), start_pos)),
1941
1942            ('%', '=') => {
1943                eat_next(stream, pos);
1944                return Some((Token::ModuloAssign, start_pos));
1945            }
1946            ('%', _) => return Some((Token::Modulo, start_pos)),
1947
1948            ('@', _) => return Some((Token::Reserved("@".into()), start_pos)),
1949
1950            ('$', _) => return Some((Token::Reserved("$".into()), start_pos)),
1951
1952            (ch, _) if ch.is_whitespace() => (),
1953
1954            (ch, _) => {
1955                return Some((
1956                    Token::LexError(LERR::UnexpectedInput(ch.to_string())),
1957                    start_pos,
1958                ))
1959            }
1960        }
1961    }
1962
1963    pos.advance();
1964
1965    Some((Token::EOF, *pos))
1966}
1967
1968/// Get the next identifier.
1969fn get_identifier(
1970    stream: &mut impl InputStream,
1971    pos: &mut Position,
1972    start_pos: Position,
1973    first_char: char,
1974) -> Option<(Token, Position)> {
1975    let mut result = smallvec::SmallVec::<[char; 8]>::new();
1976    result.push(first_char);
1977
1978    while let Some(next_char) = stream.peek_next() {
1979        match next_char {
1980            x if is_id_continue(x) => {
1981                result.push(x);
1982                eat_next(stream, pos);
1983            }
1984            _ => break,
1985        }
1986    }
1987
1988    let is_valid_identifier = is_valid_identifier(result.iter().cloned());
1989
1990    let identifier: String = result.into_iter().collect();
1991
1992    if let Some(token) = Token::lookup_from_syntax(&identifier) {
1993        return Some((token, start_pos));
1994    }
1995
1996    if !is_valid_identifier {
1997        return Some((
1998            Token::LexError(LERR::MalformedIdentifier(identifier)),
1999            start_pos,
2000        ));
2001    }
2002
2003    Some((Token::Identifier(identifier.into()), start_pos))
2004}
2005
2006/// Is this keyword allowed as a function?
2007#[inline]
2008#[must_use]
2009pub fn is_keyword_function(name: impl AsRef<str>) -> bool {
2010    match name.as_ref() {
2011        KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_TYPE_OF | KEYWORD_EVAL | KEYWORD_FN_PTR
2012        | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY | KEYWORD_IS_DEF_VAR => true,
2013
2014        #[cfg(not(feature = "no_function"))]
2015        crate::engine::KEYWORD_IS_DEF_FN => true,
2016
2017        _ => false,
2018    }
2019}
2020
2021/// Is a text string a valid identifier?
2022#[must_use]
2023pub fn is_valid_identifier(name: impl Iterator<Item = char>) -> bool {
2024    let mut first_alphabetic = false;
2025
2026    for ch in name {
2027        match ch {
2028            '_' => (),
2029            _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2030            _ if !first_alphabetic => return false,
2031            _ if char::is_ascii_alphanumeric(&ch) => (),
2032            _ => return false,
2033        }
2034    }
2035
2036    first_alphabetic
2037}
2038
2039/// Is a text string a valid scripted function name?
2040#[inline(always)]
2041#[must_use]
2042pub fn is_valid_function_name(name: impl AsRef<str>) -> bool {
2043    is_valid_identifier(name.as_ref().chars())
2044}
2045
2046/// Is a character valid to start an identifier?
2047#[cfg(feature = "unicode-xid-ident")]
2048#[inline(always)]
2049#[must_use]
2050pub fn is_id_first_alphabetic(x: char) -> bool {
2051    unicode_xid::UnicodeXID::is_xid_start(x)
2052}
2053
2054/// Is a character valid for an identifier?
2055#[cfg(feature = "unicode-xid-ident")]
2056#[inline(always)]
2057#[must_use]
2058pub fn is_id_continue(x: char) -> bool {
2059    unicode_xid::UnicodeXID::is_xid_continue(x)
2060}
2061
2062/// Is a character valid to start an identifier?
2063#[cfg(not(feature = "unicode-xid-ident"))]
2064#[inline(always)]
2065#[must_use]
2066pub fn is_id_first_alphabetic(x: char) -> bool {
2067    x.is_ascii_alphabetic()
2068}
2069
2070/// Is a character valid for an identifier?
2071#[cfg(not(feature = "unicode-xid-ident"))]
2072#[inline(always)]
2073#[must_use]
2074pub fn is_id_continue(x: char) -> bool {
2075    x.is_ascii_alphanumeric() || x == '_'
2076}
2077
2078/// _(internals)_ A type that implements the [`InputStream`] trait.
2079/// Exported under the `internals` feature only.
2080///
2081/// Multiple character streams are jointed together to form one single stream.
2082pub struct MultiInputsStream<'a> {
2083    /// Buffered character, if any.
2084    pub buf: Option<char>,
2085    /// The current stream index.
2086    pub index: usize,
2087    /// The input character streams.
2088    pub streams: StaticVec<Peekable<Chars<'a>>>,
2089}
2090
2091impl InputStream for MultiInputsStream<'_> {
2092    #[inline]
2093    fn unget(&mut self, ch: char) {
2094        if self.buf.is_some() {
2095            panic!("cannot unget two characters in a row");
2096        }
2097
2098        self.buf = Some(ch);
2099    }
2100    fn get_next(&mut self) -> Option<char> {
2101        if let Some(ch) = self.buf.take() {
2102            return Some(ch);
2103        }
2104
2105        loop {
2106            if self.index >= self.streams.len() {
2107                // No more streams
2108                return None;
2109            } else if let Some(ch) = self.streams[self.index].next() {
2110                // Next character in current stream
2111                return Some(ch);
2112            } else {
2113                // Jump to the next stream
2114                self.index += 1;
2115            }
2116        }
2117    }
2118    fn peek_next(&mut self) -> Option<char> {
2119        if let Some(ch) = self.buf {
2120            return Some(ch);
2121        }
2122
2123        loop {
2124            if self.index >= self.streams.len() {
2125                // No more streams
2126                return None;
2127            } else if let Some(&ch) = self.streams[self.index].peek() {
2128                // Next character in current stream
2129                return Some(ch);
2130            } else {
2131                // Jump to the next stream
2132                self.index += 1;
2133            }
2134        }
2135    }
2136}
2137
2138/// _(internals)_ An iterator on a [`Token`] stream.
2139/// Exported under the `internals` feature only.
2140pub struct TokenIterator<'a> {
2141    /// Reference to the scripting `Engine`.
2142    pub engine: &'a Engine,
2143    /// Current state.
2144    pub state: TokenizeState,
2145    /// Current position.
2146    pub pos: Position,
2147    /// Shared object to allow controlling the tokenizer externally.
2148    pub tokenizer_control: TokenizerControl,
2149    /// Input character stream.
2150    pub stream: MultiInputsStream<'a>,
2151    /// A processor function that maps a token to another.
2152    pub token_mapper: Option<&'a OnParseTokenCallback>,
2153}
2154
2155impl<'a> Iterator for TokenIterator<'a> {
2156    type Item = (Token, Position);
2157
2158    fn next(&mut self) -> Option<Self::Item> {
2159        let mut control = self.tokenizer_control.get();
2160
2161        if control.is_within_text {
2162            // Switch to text mode terminated by back-tick
2163            self.state.is_within_text_terminated_by = Some('`');
2164            // Reset it
2165            control.is_within_text = false;
2166            self.tokenizer_control.set(control);
2167        }
2168
2169        let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2170            // {EOF}
2171            None => return None,
2172            // {EOF} after unterminated string.
2173            // The only case where `TokenizeState.is_within_text_terminated_by` is set is when
2174            // a verbatim string or a string with continuation encounters {EOF}.
2175            // This is necessary to handle such cases for line-by-line parsing, but for an entire
2176            // script it is a syntax error.
2177            Some((Token::StringConstant(_), pos)) if self.state.is_within_text_terminated_by.is_some() => {
2178                self.state.is_within_text_terminated_by = None;
2179                return Some((Token::LexError(LERR::UnterminatedString), pos));
2180            }
2181            // Reserved keyword/symbol
2182            Some((Token::Reserved(s), pos)) => (match
2183                (&*s, self.engine.custom_keywords.contains_key(&*s))
2184            {
2185                ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2186                    "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2187                )),
2188                ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2189                    "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2190                )),
2191                ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2192                    "'->' is not a valid symbol. This is not C or C++!".to_string())),
2193                ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2194                    "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2195                )),
2196                (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2197                    "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2198                )),
2199                ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2200                    "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2201                )),
2202                ("(*", false) | ("*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2203                    "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2204                )),
2205                ("#", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2206                    "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2207                )),
2208                // Reserved keyword/operator that is custom.
2209                (_, true) => Token::Custom(s),
2210                // Reserved operator that is not custom.
2211                (token, false) if !is_valid_identifier(token.chars()) => {
2212                    let msg = format!("'{}' is a reserved symbol", token);
2213                    Token::LexError(LERR::ImproperSymbol(s.to_string(), msg))
2214                },
2215                // Reserved keyword that is not custom and disabled.
2216                (token, false) if self.engine.disabled_symbols.contains(token) => {
2217                    let msg = format!("reserved symbol '{}' is disabled", token);
2218                    Token::LexError(LERR::ImproperSymbol(s.to_string(), msg))
2219                },
2220                // Reserved keyword/operator that is not custom.
2221                (_, false) => Token::Reserved(s),
2222            }, pos),
2223            // Custom keyword
2224            Some((Token::Identifier(s), pos)) if self.engine.custom_keywords.contains_key(&*s) => {
2225                (Token::Custom(s), pos)
2226            }
2227            // Custom standard keyword/symbol - must be disabled
2228            Some((token, pos)) if self.engine.custom_keywords.contains_key(&*token.syntax()) => {
2229                if self.engine.disabled_symbols.contains(&*token.syntax()) {
2230                    // Disabled standard keyword/symbol
2231                    (Token::Custom(token.syntax().into()), pos)
2232                } else {
2233                    // Active standard keyword - should never be a custom keyword!
2234                    unreachable!("{:?} is an active keyword", token)
2235                }
2236            }
2237            // Disabled symbol
2238            Some((token, pos)) if self.engine.disabled_symbols.contains(&*token.syntax()) => {
2239                (Token::Reserved(token.syntax().into()), pos)
2240            }
2241            // Normal symbol
2242            Some(r) => r,
2243        };
2244
2245        // Run the mapper, if any
2246        let token = match self.token_mapper {
2247            Some(map_func) => map_func(token, pos, &self.state),
2248            None => token,
2249        };
2250
2251        Some((token, pos))
2252    }
2253}
2254
2255impl FusedIterator for TokenIterator<'_> {}
2256
2257impl Engine {
2258    /// _(internals)_ Tokenize an input text stream.
2259    /// Exported under the `internals` feature only.
2260    #[cfg(feature = "internals")]
2261    #[inline(always)]
2262    #[must_use]
2263    pub fn lex<'a>(
2264        &'a self,
2265        input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2266    ) -> (TokenIterator<'a>, TokenizerControl) {
2267        self.lex_raw(input, None)
2268    }
2269    /// _(internals)_ Tokenize an input text stream with a mapping function.
2270    /// Exported under the `internals` feature only.
2271    #[cfg(feature = "internals")]
2272    #[inline(always)]
2273    #[must_use]
2274    pub fn lex_with_map<'a>(
2275        &'a self,
2276        input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2277        token_mapper: &'a OnParseTokenCallback,
2278    ) -> (TokenIterator<'a>, TokenizerControl) {
2279        self.lex_raw(input, Some(token_mapper))
2280    }
2281    /// Tokenize an input text stream with an optional mapping function.
2282    #[inline]
2283    #[must_use]
2284    pub(crate) fn lex_raw<'a>(
2285        &'a self,
2286        input: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2287        token_mapper: Option<&'a OnParseTokenCallback>,
2288    ) -> (TokenIterator<'a>, TokenizerControl) {
2289        let buffer: TokenizerControl = Cell::new(TokenizerControlBlock::new()).into();
2290        let buffer2 = buffer.clone();
2291
2292        (
2293            TokenIterator {
2294                engine: self,
2295                state: TokenizeState {
2296                    #[cfg(not(feature = "unchecked"))]
2297                    max_string_size: self.limits.max_string_size,
2298                    #[cfg(feature = "unchecked")]
2299                    max_string_size: None,
2300                    next_token_cannot_be_unary: false,
2301                    comment_level: 0,
2302                    include_comments: false,
2303                    is_within_text_terminated_by: None,
2304                },
2305                pos: Position::new(1, 0),
2306                tokenizer_control: buffer,
2307                stream: MultiInputsStream {
2308                    buf: None,
2309                    streams: input
2310                        .into_iter()
2311                        .map(|s| s.as_ref().chars().peekable())
2312                        .collect(),
2313                    index: 0,
2314                },
2315                token_mapper,
2316            },
2317            buffer2,
2318        )
2319    }
2320}