rhai/
tokenizer.rs

1//! Main module defining the lexer and parser.
2
3use rhai_codegen::expose_under_internals;
4
5use crate::engine::Precedence;
6use crate::func::native::OnParseTokenCallback;
7use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
8#[cfg(feature = "no_std")]
9use std::prelude::v1::*;
10use std::{
11    cell::RefCell,
12    char, fmt,
13    iter::{repeat, FusedIterator, Peekable},
14    rc::Rc,
15    str::{Chars, FromStr},
16};
17
18/// _(internals)_ A type containing commands to control the tokenizer.
19#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
20pub struct TokenizerControlBlock {
21    /// Is the current tokenizer position within an interpolated text string?
22    ///
23    /// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
24    pub is_within_text: bool,
25    /// Global comments.
26    #[cfg(feature = "metadata")]
27    pub global_comments: String,
28    /// Whitespace-compressed version of the script (if any).
29    ///
30    /// Set to `Some` in order to collect a compressed script.
31    pub compressed: Option<String>,
32}
33
34impl TokenizerControlBlock {
35    /// Create a new `TokenizerControlBlock`.
36    #[inline]
37    #[must_use]
38    pub const fn new() -> Self {
39        Self {
40            is_within_text: false,
41            #[cfg(feature = "metadata")]
42            global_comments: String::new(),
43            compressed: None,
44        }
45    }
46}
47
48/// _(internals)_ A shared object that allows control of the tokenizer from outside.
49pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
50
51type LERR = LexError;
52
53/// Separator character for numbers.
54const NUMBER_SEPARATOR: char = '_';
55
56/// A stream of tokens.
57pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
58
59/// _(internals)_ A Rhai language token.
60/// Exported under the `internals` feature only.
61#[derive(Debug, PartialEq, Clone, Hash)]
62#[non_exhaustive]
63pub enum Token {
64    /// An `INT` constant.
65    IntegerConstant(INT),
66    /// A `FLOAT` constant, including its text representation.
67    ///
68    /// Reserved under the `no_float` feature.
69    #[cfg(not(feature = "no_float"))]
70    FloatConstant(Box<(crate::types::FloatWrapper<crate::FLOAT>, Identifier)>),
71    /// A [`Decimal`][rust_decimal::Decimal] constant.
72    ///
73    /// Requires the `decimal` feature, including its text representation.
74    #[cfg(feature = "decimal")]
75    DecimalConstant(Box<(rust_decimal::Decimal, Identifier)>),
76    /// An identifier.
77    Identifier(Box<Identifier>),
78    /// A character constant.
79    CharConstant(char),
80    /// A string constant.
81    StringConstant(Box<SmartString>),
82    /// An interpolated string.
83    InterpolatedString(Box<SmartString>),
84    /// `{`
85    LeftBrace,
86    /// `}`
87    RightBrace,
88    /// `(`
89    LeftParen,
90    /// `)`
91    RightParen,
92    /// `[`
93    LeftBracket,
94    /// `]`
95    RightBracket,
96    /// `()`
97    Unit,
98    /// `+`
99    Plus,
100    /// `+` (unary)
101    UnaryPlus,
102    /// `-`
103    Minus,
104    /// `-` (unary)
105    UnaryMinus,
106    /// `*`
107    Multiply,
108    /// `/`
109    Divide,
110    /// `%`
111    Modulo,
112    /// `**`
113    PowerOf,
114    /// `<<`
115    LeftShift,
116    /// `>>`
117    RightShift,
118    /// `;`
119    SemiColon,
120    /// `:`
121    Colon,
122    /// `::`
123    DoubleColon,
124    /// `=>`
125    DoubleArrow,
126    /// `_`
127    Underscore,
128    /// `,`
129    Comma,
130    /// `.`
131    Period,
132    /// `?.`
133    ///
134    /// Reserved under the `no_object` feature.
135    #[cfg(not(feature = "no_object"))]
136    Elvis,
137    /// `??`
138    DoubleQuestion,
139    /// `?[`
140    ///
141    /// Reserved under the `no_object` feature.
142    #[cfg(not(feature = "no_index"))]
143    QuestionBracket,
144    /// `..`
145    ExclusiveRange,
146    /// `..=`
147    InclusiveRange,
148    /// `#{`
149    MapStart,
150    /// `=`
151    Equals,
152    /// `true`
153    True,
154    /// `false`
155    False,
156    /// `let`
157    Let,
158    /// `const`
159    Const,
160    /// `if`
161    If,
162    /// `else`
163    Else,
164    /// `switch`
165    Switch,
166    /// `do`
167    Do,
168    /// `while`
169    While,
170    /// `until`
171    Until,
172    /// `loop`
173    Loop,
174    /// `for`
175    For,
176    /// `in`
177    In,
178    /// `!in`
179    NotIn,
180    /// `<`
181    LessThan,
182    /// `>`
183    GreaterThan,
184    /// `<=`
185    LessThanEqualsTo,
186    /// `>=`
187    GreaterThanEqualsTo,
188    /// `==`
189    EqualsTo,
190    /// `!=`
191    NotEqualsTo,
192    /// `!`
193    Bang,
194    /// `|`
195    Pipe,
196    /// `||`
197    Or,
198    /// `^`
199    XOr,
200    /// `&`
201    Ampersand,
202    /// `&&`
203    And,
204    /// `fn`
205    ///
206    /// Reserved under the `no_function` feature.
207    #[cfg(not(feature = "no_function"))]
208    Fn,
209    /// `continue`
210    Continue,
211    /// `break`
212    Break,
213    /// `return`
214    Return,
215    /// `throw`
216    Throw,
217    /// `try`
218    Try,
219    /// `catch`
220    Catch,
221    /// `+=`
222    PlusAssign,
223    /// `-=`
224    MinusAssign,
225    /// `*=`
226    MultiplyAssign,
227    /// `/=`
228    DivideAssign,
229    /// `<<=`
230    LeftShiftAssign,
231    /// `>>=`
232    RightShiftAssign,
233    /// `&=`
234    AndAssign,
235    /// `|=`
236    OrAssign,
237    /// `^=`
238    XOrAssign,
239    /// `%=`
240    ModuloAssign,
241    /// `**=`
242    PowerOfAssign,
243    /// `private`
244    ///
245    /// Reserved under the `no_function` feature.
246    #[cfg(not(feature = "no_function"))]
247    Private,
248    /// `import`
249    ///
250    /// Reserved under the `no_module` feature.
251    #[cfg(not(feature = "no_module"))]
252    Import,
253    /// `export`
254    ///
255    /// Reserved under the `no_module` feature.
256    #[cfg(not(feature = "no_module"))]
257    Export,
258    /// `as`
259    ///
260    /// Reserved under the `no_module` feature.
261    #[cfg(not(feature = "no_module"))]
262    As,
263    /// A lexer error.
264    LexError(Box<LexError>),
265    /// A comment block.
266    Comment(Box<String>),
267    /// A reserved symbol.
268    Reserved(Box<Identifier>),
269    /// A custom keyword.
270    ///
271    /// Not available under `no_custom_syntax`.
272    #[cfg(not(feature = "no_custom_syntax"))]
273    Custom(Box<Identifier>),
274    /// End of the input stream.
275    /// Used as a placeholder for the end of input.
276    EOF,
277}
278
279impl fmt::Display for Token {
280    #[inline(always)]
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        #[allow(clippy::enum_glob_use)]
283        use Token::*;
284
285        match self {
286            IntegerConstant(i) => write!(f, "{i}"),
287            #[cfg(not(feature = "no_float"))]
288            FloatConstant(v) => write!(f, "{}", v.0),
289            #[cfg(feature = "decimal")]
290            DecimalConstant(d) => write!(f, "{}", d.0),
291            StringConstant(s) => write!(f, r#""{s}""#),
292            InterpolatedString(..) => f.write_str("string"),
293            CharConstant(c) => write!(f, "{c}"),
294            Identifier(s) => f.write_str(s),
295            Reserved(s) => f.write_str(s),
296            #[cfg(not(feature = "no_custom_syntax"))]
297            Custom(s) => f.write_str(s),
298            LexError(err) => write!(f, "{err}"),
299            Comment(s) => f.write_str(s),
300
301            EOF => f.write_str("{EOF}"),
302
303            token => f.write_str(token.literal_syntax()),
304        }
305    }
306}
307
308// Table-driven keyword recognizer generated by GNU `gperf` on the file `tools/keywords.txt`.
309//
310// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this.
311
312const MIN_KEYWORD_LEN: usize = 1;
313const MAX_KEYWORD_LEN: usize = 8;
314const MIN_KEYWORD_HASH_VALUE: usize = 1;
315const MAX_KEYWORD_HASH_VALUE: usize = 152;
316
317static KEYWORD_ASSOC_VALUES: [u8; 257] = [
318    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
319    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
320    105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
321    35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
322    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
323    0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
324    45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
325    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
326    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
327    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
328    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
329    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
330    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
331    153,
332];
333static KEYWORDS_LIST: [(&str, Token); 153] = [
334    ("", Token::EOF),
335    (">", Token::GreaterThan),
336    (">=", Token::GreaterThanEqualsTo),
337    (")", Token::RightParen),
338    ("", Token::EOF),
339    ("const", Token::Const),
340    ("=", Token::Equals),
341    ("==", Token::EqualsTo),
342    ("continue", Token::Continue),
343    ("", Token::EOF),
344    ("catch", Token::Catch),
345    ("<", Token::LessThan),
346    ("<=", Token::LessThanEqualsTo),
347    ("for", Token::For),
348    ("loop", Token::Loop),
349    ("", Token::EOF),
350    (".", Token::Period),
351    ("<<", Token::LeftShift),
352    ("<<=", Token::LeftShiftAssign),
353    ("", Token::EOF),
354    ("false", Token::False),
355    ("*", Token::Multiply),
356    ("*=", Token::MultiplyAssign),
357    ("let", Token::Let),
358    ("", Token::EOF),
359    ("while", Token::While),
360    ("+", Token::Plus),
361    ("+=", Token::PlusAssign),
362    ("", Token::EOF),
363    ("", Token::EOF),
364    ("throw", Token::Throw),
365    ("}", Token::RightBrace),
366    (">>", Token::RightShift),
367    (">>=", Token::RightShiftAssign),
368    ("", Token::EOF),
369    ("", Token::EOF),
370    (";", Token::SemiColon),
371    ("=>", Token::DoubleArrow),
372    ("", Token::EOF),
373    ("else", Token::Else),
374    ("", Token::EOF),
375    ("/", Token::Divide),
376    ("/=", Token::DivideAssign),
377    ("", Token::EOF),
378    ("", Token::EOF),
379    ("", Token::EOF),
380    ("{", Token::LeftBrace),
381    ("**", Token::PowerOf),
382    ("**=", Token::PowerOfAssign),
383    ("", Token::EOF),
384    ("", Token::EOF),
385    ("|", Token::Pipe),
386    ("|=", Token::OrAssign),
387    ("", Token::EOF),
388    ("", Token::EOF),
389    ("", Token::EOF),
390    (":", Token::Colon),
391    ("..", Token::ExclusiveRange),
392    ("..=", Token::InclusiveRange),
393    ("", Token::EOF),
394    ("until", Token::Until),
395    ("switch", Token::Switch),
396    #[cfg(not(feature = "no_function"))]
397    ("private", Token::Private),
398    #[cfg(feature = "no_function")]
399    ("", Token::EOF),
400    ("try", Token::Try),
401    ("true", Token::True),
402    ("break", Token::Break),
403    ("return", Token::Return),
404    #[cfg(not(feature = "no_function"))]
405    ("fn", Token::Fn),
406    #[cfg(feature = "no_function")]
407    ("", Token::EOF),
408    ("", Token::EOF),
409    ("", Token::EOF),
410    ("", Token::EOF),
411    #[cfg(not(feature = "no_module"))]
412    ("import", Token::Import),
413    #[cfg(feature = "no_module")]
414    ("", Token::EOF),
415    #[cfg(not(feature = "no_object"))]
416    ("?.", Token::Elvis),
417    #[cfg(feature = "no_object")]
418    ("", Token::EOF),
419    ("", Token::EOF),
420    ("", Token::EOF),
421    ("", Token::EOF),
422    #[cfg(not(feature = "no_module"))]
423    ("export", Token::Export),
424    #[cfg(feature = "no_module")]
425    ("", Token::EOF),
426    ("in", Token::In),
427    ("", Token::EOF),
428    ("", Token::EOF),
429    ("", Token::EOF),
430    ("(", Token::LeftParen),
431    ("||", Token::Or),
432    ("", Token::EOF),
433    ("", Token::EOF),
434    ("", Token::EOF),
435    ("^", Token::XOr),
436    ("^=", Token::XOrAssign),
437    ("", Token::EOF),
438    ("", Token::EOF),
439    ("", Token::EOF),
440    ("_", Token::Underscore),
441    ("::", Token::DoubleColon),
442    ("", Token::EOF),
443    ("", Token::EOF),
444    ("", Token::EOF),
445    ("-", Token::Minus),
446    ("-=", Token::MinusAssign),
447    ("", Token::EOF),
448    ("", Token::EOF),
449    ("", Token::EOF),
450    ("]", Token::RightBracket),
451    ("()", Token::Unit),
452    ("", Token::EOF),
453    ("", Token::EOF),
454    ("", Token::EOF),
455    ("&", Token::Ampersand),
456    ("&=", Token::AndAssign),
457    ("", Token::EOF),
458    ("", Token::EOF),
459    ("", Token::EOF),
460    ("%", Token::Modulo),
461    ("%=", Token::ModuloAssign),
462    ("", Token::EOF),
463    ("", Token::EOF),
464    ("", Token::EOF),
465    ("!", Token::Bang),
466    ("!=", Token::NotEqualsTo),
467    ("!in", Token::NotIn),
468    ("", Token::EOF),
469    ("", Token::EOF),
470    ("[", Token::LeftBracket),
471    ("if", Token::If),
472    ("", Token::EOF),
473    ("", Token::EOF),
474    ("", Token::EOF),
475    (",", Token::Comma),
476    ("do", Token::Do),
477    ("", Token::EOF),
478    ("", Token::EOF),
479    ("", Token::EOF),
480    ("", Token::EOF),
481    #[cfg(not(feature = "no_module"))]
482    ("as", Token::As),
483    #[cfg(feature = "no_module")]
484    ("", Token::EOF),
485    ("", Token::EOF),
486    ("", Token::EOF),
487    ("", Token::EOF),
488    ("", Token::EOF),
489    #[cfg(not(feature = "no_index"))]
490    ("?[", Token::QuestionBracket),
491    #[cfg(feature = "no_index")]
492    ("", Token::EOF),
493    ("", Token::EOF),
494    ("", Token::EOF),
495    ("", Token::EOF),
496    ("", Token::EOF),
497    ("??", Token::DoubleQuestion),
498    ("", Token::EOF),
499    ("", Token::EOF),
500    ("", Token::EOF),
501    ("", Token::EOF),
502    ("&&", Token::And),
503    ("", Token::EOF),
504    ("", Token::EOF),
505    ("", Token::EOF),
506    ("", Token::EOF),
507    ("#{", Token::MapStart),
508];
509
510// Table-driven reserved symbol recognizer generated by GNU `gperf` on the file `tools/reserved.txt`.
511//
512// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this.
513
514const MIN_RESERVED_LEN: usize = 1;
515const MAX_RESERVED_LEN: usize = 10;
516const MIN_RESERVED_HASH_VALUE: usize = 1;
517const MAX_RESERVED_HASH_VALUE: usize = 149;
518
519static RESERVED_ASSOC_VALUES: [u8; 256] = [
520    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
521    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
522    150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
523    30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
524    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
525    0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
526    25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
527    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
528    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
529    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
530    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
531    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
532    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
533];
534static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
535    ("", false, false, false),
536    ("?", true, false, false),
537    ("as", cfg!(feature = "no_module"), false, false),
538    ("use", true, false, false),
539    ("case", true, false, false),
540    ("async", true, false, false),
541    ("public", true, false, false),
542    ("package", true, false, false),
543    ("", false, false, false),
544    ("", false, false, false),
545    ("super", true, false, false),
546    ("#", true, false, false),
547    ("private", cfg!(feature = "no_function"), false, false),
548    ("var", true, false, false),
549    ("protected", true, false, false),
550    ("spawn", true, false, false),
551    ("shared", true, false, false),
552    ("is", true, false, false),
553    ("===", true, false, false),
554    ("sync", true, false, false),
555    ("curry", true, true, true),
556    ("static", true, false, false),
557    ("default", true, false, false),
558    ("!==", true, false, false),
559    ("is_shared", cfg!(not(feature = "no_closure")), true, true),
560    ("print", true, true, false),
561    ("", false, false, false),
562    ("#!", true, false, false),
563    ("", false, false, false),
564    ("this", true, false, false),
565    ("is_def_var", true, true, false),
566    ("thread", true, false, false),
567    ("?.", cfg!(feature = "no_object"), false, false),
568    ("", false, false, false),
569    ("is_def_fn", cfg!(not(feature = "no_function")), true, false),
570    ("yield", true, false, false),
571    ("", false, false, false),
572    ("fn", cfg!(feature = "no_function"), false, false),
573    ("new", true, false, false),
574    ("call", true, true, true),
575    ("match", true, false, false),
576    ("~", true, false, false),
577    ("!.", true, false, false),
578    ("", false, false, false),
579    ("eval", true, true, false),
580    ("await", true, false, false),
581    ("", false, false, false),
582    (":=", true, false, false),
583    ("...", true, false, false),
584    ("null", true, false, false),
585    ("debug", true, true, false),
586    ("@", true, false, false),
587    ("type_of", true, true, true),
588    ("", false, false, false),
589    ("with", true, false, false),
590    ("", false, false, false),
591    ("", false, false, false),
592    ("<-", true, false, false),
593    ("", false, false, false),
594    ("void", true, false, false),
595    ("", false, false, false),
596    ("import", cfg!(feature = "no_module"), false, false),
597    ("--", true, false, false),
598    ("nil", true, false, false),
599    ("exit", false, false, false),
600    ("", false, false, false),
601    ("export", cfg!(feature = "no_module"), false, false),
602    ("<|", true, false, false),
603    ("", false, false, false),
604    ("", false, false, false),
605    ("", false, false, false),
606    ("$", true, false, false),
607    ("->", true, false, false),
608    ("", false, false, false),
609    ("", false, false, false),
610    ("", false, false, false),
611    ("", false, false, false),
612    ("|>", true, false, false),
613    ("", false, false, false),
614    ("", false, false, false),
615    ("", false, false, false),
616    ("module", true, false, false),
617    ("?[", cfg!(feature = "no_index"), false, false),
618    ("", false, false, false),
619    ("", false, false, false),
620    ("", false, false, false),
621    ("", false, false, false),
622    ("Fn", true, true, false),
623    ("::<", true, false, false),
624    ("", false, false, false),
625    ("", false, false, false),
626    ("", false, false, false),
627    ("++", true, false, false),
628    ("", false, false, false),
629    ("", false, false, false),
630    ("", false, false, false),
631    ("", false, false, false),
632    (":;", true, false, false),
633    ("", false, false, false),
634    ("", false, false, false),
635    ("", false, false, false),
636    ("", false, false, false),
637    ("*)", true, false, false),
638    ("", false, false, false),
639    ("", false, false, false),
640    ("", false, false, false),
641    ("", false, false, false),
642    ("(*", true, false, false),
643    ("", false, false, false),
644    ("", false, false, false),
645    ("", false, false, false),
646    ("", false, false, false),
647    ("", false, false, false),
648    ("", false, false, false),
649    ("", false, false, false),
650    ("", false, false, false),
651    ("", false, false, false),
652    ("", false, false, false),
653    ("", false, false, false),
654    ("", false, false, false),
655    ("", false, false, false),
656    ("", false, false, false),
657    ("", false, false, false),
658    ("", false, false, false),
659    ("", false, false, false),
660    ("", false, false, false),
661    ("", false, false, false),
662    ("", false, false, false),
663    ("", false, false, false),
664    ("", false, false, false),
665    ("", false, false, false),
666    ("", false, false, false),
667    ("", false, false, false),
668    ("", false, false, false),
669    ("", false, false, false),
670    ("", false, false, false),
671    ("", false, false, false),
672    ("", false, false, false),
673    ("", false, false, false),
674    ("", false, false, false),
675    ("", false, false, false),
676    ("", false, false, false),
677    ("", false, false, false),
678    ("", false, false, false),
679    ("", false, false, false),
680    ("", false, false, false),
681    ("", false, false, false),
682    ("go", true, false, false),
683    ("", false, false, false),
684    ("goto", true, false, false),
685];
686
687impl Token {
688    /// Is the token a literal symbol?
689    #[must_use]
690    pub const fn is_literal(&self) -> bool {
691        #[allow(clippy::enum_glob_use)]
692        use Token::*;
693
694        match self {
695            IntegerConstant(..) => false,
696            #[cfg(not(feature = "no_float"))]
697            FloatConstant(..) => false,
698            #[cfg(feature = "decimal")]
699            DecimalConstant(..) => false,
700            StringConstant(..)
701            | InterpolatedString(..)
702            | CharConstant(..)
703            | Identifier(..)
704            | Reserved(..) => false,
705            #[cfg(not(feature = "no_custom_syntax"))]
706            Custom(..) => false,
707            LexError(..) | Comment(..) => false,
708
709            EOF => false,
710
711            _ => true,
712        }
713    }
714    /// Get the literal syntax of the token.
715    ///
716    /// # Panics
717    ///
718    /// Panics if the token is not a literal symbol.
719    #[must_use]
720    pub const fn literal_syntax(&self) -> &'static str {
721        #[allow(clippy::enum_glob_use)]
722        use Token::*;
723
724        match self {
725            LeftBrace => "{",
726            RightBrace => "}",
727            LeftParen => "(",
728            RightParen => ")",
729            LeftBracket => "[",
730            RightBracket => "]",
731            Unit => "()",
732            Plus => "+",
733            UnaryPlus => "+",
734            Minus => "-",
735            UnaryMinus => "-",
736            Multiply => "*",
737            Divide => "/",
738            SemiColon => ";",
739            Colon => ":",
740            DoubleColon => "::",
741            DoubleArrow => "=>",
742            Underscore => "_",
743            Comma => ",",
744            Period => ".",
745            #[cfg(not(feature = "no_object"))]
746            Elvis => "?.",
747            DoubleQuestion => "??",
748            #[cfg(not(feature = "no_index"))]
749            QuestionBracket => "?[",
750            ExclusiveRange => "..",
751            InclusiveRange => "..=",
752            MapStart => "#{",
753            Equals => "=",
754            True => "true",
755            False => "false",
756            Let => "let",
757            Const => "const",
758            If => "if",
759            Else => "else",
760            Switch => "switch",
761            Do => "do",
762            While => "while",
763            Until => "until",
764            Loop => "loop",
765            For => "for",
766            In => "in",
767            NotIn => "!in",
768            LessThan => "<",
769            GreaterThan => ">",
770            Bang => "!",
771            LessThanEqualsTo => "<=",
772            GreaterThanEqualsTo => ">=",
773            EqualsTo => "==",
774            NotEqualsTo => "!=",
775            Pipe => "|",
776            Or => "||",
777            Ampersand => "&",
778            And => "&&",
779            Continue => "continue",
780            Break => "break",
781            Return => "return",
782            Throw => "throw",
783            Try => "try",
784            Catch => "catch",
785            PlusAssign => "+=",
786            MinusAssign => "-=",
787            MultiplyAssign => "*=",
788            DivideAssign => "/=",
789            LeftShiftAssign => "<<=",
790            RightShiftAssign => ">>=",
791            AndAssign => "&=",
792            OrAssign => "|=",
793            XOrAssign => "^=",
794            LeftShift => "<<",
795            RightShift => ">>",
796            XOr => "^",
797            Modulo => "%",
798            ModuloAssign => "%=",
799            PowerOf => "**",
800            PowerOfAssign => "**=",
801
802            #[cfg(not(feature = "no_function"))]
803            Fn => "fn",
804            #[cfg(not(feature = "no_function"))]
805            Private => "private",
806
807            #[cfg(not(feature = "no_module"))]
808            Import => "import",
809            #[cfg(not(feature = "no_module"))]
810            Export => "export",
811            #[cfg(not(feature = "no_module"))]
812            As => "as",
813
814            _ => panic!("token is not a literal symbol"),
815        }
816    }
817
818    /// Is this token an op-assignment operator?
819    #[inline]
820    #[must_use]
821    pub const fn is_op_assignment(&self) -> bool {
822        #[allow(clippy::enum_glob_use)]
823        use Token::*;
824
825        matches!(
826            self,
827            PlusAssign
828                | MinusAssign
829                | MultiplyAssign
830                | DivideAssign
831                | LeftShiftAssign
832                | RightShiftAssign
833                | ModuloAssign
834                | PowerOfAssign
835                | AndAssign
836                | OrAssign
837                | XOrAssign
838        )
839    }
840
841    /// Get the corresponding operator of the token if it is an op-assignment operator.
842    #[must_use]
843    pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
844        #[allow(clippy::enum_glob_use)]
845        use Token::*;
846
847        Some(match self {
848            PlusAssign => Plus,
849            MinusAssign => Minus,
850            MultiplyAssign => Multiply,
851            DivideAssign => Divide,
852            LeftShiftAssign => LeftShift,
853            RightShiftAssign => RightShift,
854            ModuloAssign => Modulo,
855            PowerOfAssign => PowerOf,
856            AndAssign => Ampersand,
857            OrAssign => Pipe,
858            XOrAssign => XOr,
859            _ => return None,
860        })
861    }
862
863    /// Has this token a corresponding op-assignment operator?
864    #[inline]
865    #[must_use]
866    pub const fn has_op_assignment(&self) -> bool {
867        #[allow(clippy::enum_glob_use)]
868        use Token::*;
869
870        matches!(
871            self,
872            Plus | Minus
873                | Multiply
874                | Divide
875                | LeftShift
876                | RightShift
877                | Modulo
878                | PowerOf
879                | Ampersand
880                | Pipe
881                | XOr
882        )
883    }
884
885    /// Get the corresponding op-assignment operator of the token.
886    #[must_use]
887    pub const fn convert_to_op_assignment(&self) -> Option<Self> {
888        #[allow(clippy::enum_glob_use)]
889        use Token::*;
890
891        Some(match self {
892            Plus => PlusAssign,
893            Minus => MinusAssign,
894            Multiply => MultiplyAssign,
895            Divide => DivideAssign,
896            LeftShift => LeftShiftAssign,
897            RightShift => RightShiftAssign,
898            Modulo => ModuloAssign,
899            PowerOf => PowerOfAssign,
900            Ampersand => AndAssign,
901            Pipe => OrAssign,
902            XOr => XOrAssign,
903            _ => return None,
904        })
905    }
906
907    /// Reverse lookup a symbol token from a piece of syntax.
908    #[inline]
909    #[must_use]
910    pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
911        // This implementation is based upon a pre-calculated table generated
912        // by GNU `gperf` on the list of keywords.
913        let utf8 = syntax.as_bytes();
914        let len = utf8.len();
915
916        if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
917            return None;
918        }
919
920        let mut hash_val = len;
921
922        match len {
923            1 => (),
924            _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
925        }
926        hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
927
928        if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
929            return None;
930        }
931
932        match KEYWORDS_LIST[hash_val] {
933            (_, Self::EOF) => None,
934            // Fail early to avoid calling memcmp().
935            // Since we are already working with bytes, mind as well check the first one.
936            (s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
937                Some(t.clone())
938            }
939            _ => None,
940        }
941    }
942
943    /// If another operator is after these, it's probably a unary operator
944    /// (not sure about `fn` name).
945    #[must_use]
946    pub const fn is_next_unary(&self) -> bool {
947        #[allow(clippy::enum_glob_use)]
948        use Token::*;
949
950        match self {
951            SemiColon        | // ; - is unary
952            Colon            | // #{ foo: - is unary
953            Comma            | // ( ... , -expr ) - is unary
954            //Period         |
955            //Elvis          |
956            DoubleQuestion   | // ?? - is unary
957            ExclusiveRange   | // .. - is unary
958            InclusiveRange   | // ..= - is unary
959            LeftBrace        | // { -expr } - is unary
960            // RightBrace    | // { expr } - expr not unary & is closing
961            LeftParen        | // ( -expr ) - is unary
962            // RightParen    | // ( expr ) - expr not unary & is closing
963            LeftBracket      | // [ -expr ] - is unary
964            // RightBracket  | // [ expr ] - expr not unary & is closing
965            Plus             |
966            PlusAssign       |
967            UnaryPlus        |
968            Minus            |
969            MinusAssign      |
970            UnaryMinus       |
971            Multiply         |
972            MultiplyAssign   |
973            Divide           |
974            DivideAssign     |
975            Modulo           |
976            ModuloAssign     |
977            PowerOf          |
978            PowerOfAssign    |
979            LeftShift        |
980            LeftShiftAssign  |
981            RightShift       |
982            RightShiftAssign |
983            Equals           |
984            EqualsTo         |
985            NotEqualsTo      |
986            LessThan         |
987            GreaterThan      |
988            Bang             |
989            LessThanEqualsTo |
990            GreaterThanEqualsTo |
991            Pipe             |
992            Ampersand        |
993            If               |
994            //Do             |
995            While            |
996            Until            |
997            In               |
998            NotIn            |
999            And              |
1000            AndAssign        |
1001            Or               |
1002            OrAssign         |
1003            XOr              |
1004            XOrAssign        |
1005            Return           |
1006            Throw               => true,
1007
1008            #[cfg(not(feature = "no_index"))]
1009            QuestionBracket     => true,    // ?[ - is unary
1010
1011            LexError(..)        => true,
1012
1013            _                   => false,
1014        }
1015    }
1016
1017    /// Get the precedence number of the token.
1018    #[must_use]
1019    pub const fn precedence(&self) -> Option<Precedence> {
1020        #[allow(clippy::enum_glob_use)]
1021        use Token::*;
1022
1023        Precedence::new(match self {
1024            Or | XOr | Pipe => 30,
1025
1026            And | Ampersand => 60,
1027
1028            EqualsTo | NotEqualsTo => 90,
1029
1030            In | NotIn => 110,
1031
1032            LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
1033
1034            DoubleQuestion => 135,
1035
1036            ExclusiveRange | InclusiveRange => 140,
1037
1038            Plus | Minus => 150,
1039
1040            Divide | Multiply | Modulo => 180,
1041
1042            PowerOf => 190,
1043
1044            LeftShift | RightShift => 210,
1045
1046            _ => 0,
1047        })
1048    }
1049
1050    /// Does an expression bind to the right (instead of left)?
1051    #[must_use]
1052    pub const fn is_bind_right(&self) -> bool {
1053        #[allow(clippy::enum_glob_use)]
1054        use Token::*;
1055
1056        match self {
1057            // Exponentiation binds to the right
1058            PowerOf => true,
1059
1060            _ => false,
1061        }
1062    }
1063
1064    /// Is this token a standard symbol used in the language?
1065    #[must_use]
1066    pub const fn is_standard_symbol(&self) -> bool {
1067        #[allow(clippy::enum_glob_use)]
1068        use Token::*;
1069
1070        match self {
1071            LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
1072            | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
1073            | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
1074            | ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
1075            | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
1076            | Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
1077            | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
1078            | XOrAssign | ModuloAssign | PowerOfAssign => true,
1079
1080            #[cfg(not(feature = "no_object"))]
1081            Elvis => true,
1082
1083            #[cfg(not(feature = "no_index"))]
1084            QuestionBracket => true,
1085
1086            _ => false,
1087        }
1088    }
1089
1090    /// Is this token a standard keyword?
1091    #[inline]
1092    #[must_use]
1093    pub const fn is_standard_keyword(&self) -> bool {
1094        #[allow(clippy::enum_glob_use)]
1095        use Token::*;
1096
1097        match self {
1098            #[cfg(not(feature = "no_function"))]
1099            Fn | Private => true,
1100
1101            #[cfg(not(feature = "no_module"))]
1102            Import | Export | As => true,
1103
1104            True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
1105            | Continue | Break | Return | Throw | Try | Catch => true,
1106
1107            _ => false,
1108        }
1109    }
1110
1111    /// Is this token a reserved keyword or symbol?
1112    #[inline(always)]
1113    #[must_use]
1114    pub const fn is_reserved(&self) -> bool {
1115        matches!(self, Self::Reserved(..))
1116    }
1117
1118    /// Is this token a custom keyword?
1119    #[cfg(not(feature = "no_custom_syntax"))]
1120    #[inline(always)]
1121    #[must_use]
1122    pub const fn is_custom(&self) -> bool {
1123        matches!(self, Self::Custom(..))
1124    }
1125}
1126
1127impl From<Token> for String {
1128    #[inline(always)]
1129    fn from(token: Token) -> Self {
1130        token.to_string()
1131    }
1132}
1133
1134/// _(internals)_ State of the tokenizer.
1135/// Exported under the `internals` feature only.
1136#[derive(Debug, Clone, Eq, PartialEq, Default)]
1137pub struct TokenizeState {
1138    /// Maximum length of a string.
1139    ///
1140    /// Not available under `unchecked`.
1141    #[cfg(not(feature = "unchecked"))]
1142    pub max_string_len: Option<std::num::NonZeroUsize>,
1143    /// Can the next token be a unary operator?
1144    pub next_token_cannot_be_unary: bool,
1145    /// Shared object to allow controlling the tokenizer externally.
1146    pub tokenizer_control: TokenizerControl,
1147    /// Is the tokenizer currently inside a block comment?
1148    pub comment_level: usize,
1149    /// Include comments?
1150    pub include_comments: bool,
1151    /// Is the current tokenizer position within the text stream of an interpolated string?
1152    pub is_within_text_terminated_by: Option<SmartString>,
1153    /// Textual syntax of the current token, if any.
1154    ///
1155    /// Set to `Some` to begin tracking this information.
1156    pub last_token: Option<SmartString>,
1157}
1158
1159/// _(internals)_ Trait that encapsulates a peekable character input stream.
1160/// Exported under the `internals` feature only.
1161pub trait InputStream {
1162    /// Un-get a character back into the `InputStream`.
1163    /// The next [`get_next`][InputStream::get_next] or [`peek_next`][InputStream::peek_next]
1164    /// will return this character instead.
1165    fn unget(&mut self, ch: char);
1166    /// Get the next character from the `InputStream`.
1167    fn get_next(&mut self) -> Option<char>;
1168    /// Peek the next character in the `InputStream`.
1169    #[must_use]
1170    fn peek_next(&mut self) -> Option<char>;
1171
1172    /// Consume the next character.
1173    #[inline(always)]
1174    fn eat_next_and_advance(&mut self, pos: &mut Position) -> Option<char> {
1175        pos.advance();
1176        self.get_next()
1177    }
1178}
1179
1180/// _(internals)_ Parse a raw string literal. Exported under the `internals` feature only.
1181///
1182/// Raw string literals do not process any escapes. They start with the character `#` (`U+0023`)
1183/// repeated any number of times, then finally a `"` (`U+0022`, double-quote).
1184///
1185/// The raw string _body_ can contain any sequence of Unicode characters. It is terminated only by
1186/// another `"` (`U+0022`, double-quote) character, followed by the same number of `#` (`U+0023`)
1187/// characters.
1188///
1189/// All Unicode characters contained in the raw string body represent themselves, including the
1190/// characters `"` (`U+0022`, double-quote), except when followed by at least as many `#` (`U+0023`)
1191/// characters as were used to start the raw string literal, `\` (`U+005C`) etc., and do not have
1192/// any special meaning.
1193///
1194/// Returns the parsed string.
1195///
1196/// # Returns
1197///
1198/// | Type                      | Return Value                                                 |`state.is_within_text_terminated_by`  |
1199/// |---------------------------|:------------------------------------------------------------:|:------------------------------------:|
1200/// |`#"hello"#`                |[`StringConstant("hello")`][Token::StringConstant]            |`None`                                |
1201/// |`#"hello`_{EOF}_           |[`StringConstant("hello")`][Token::StringConstant]            |`Some("#")`                           |
1202/// |`####"hello`_{EOF}_        |[`StringConstant("hello")`][Token::StringConstant]            |`Some("####")`                        |
1203/// |`#" "hello" "`_{EOF}_      |[`LexError`]                                                  |`None`                                |
1204/// |`#""hello""#`              |[`StringConstant("\"hello\"")`][Token::StringConstant]        |`None`                                |
1205/// |`##"hello #"# world"##`    |[`StringConstant("hello #\"# world")`][Token::StringConstant] |`None`                                |
1206/// |`#"R"#`                    |[`StringConstant("R")`][Token::StringConstant]                |`None`                                |
1207/// |`#"\x52"#`                 |[`StringConstant("\\x52")`][Token::StringConstant]            |`None`                                |
1208///
1209/// This function does _not_ throw a [`LexError`] for an unterminated raw string at _{EOF}_
1210///
1211/// This is to facilitate using this function to parse a script line-by-line, where the end of the
1212/// line (i.e. _{EOF}_) is not necessarily the end of the script.
1213///
1214/// Any time a [`StringConstant`][Token::StringConstant] is returned with
1215/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
1216pub fn parse_raw_string_literal(
1217    stream: &mut (impl InputStream + ?Sized),
1218    state: &mut TokenizeState,
1219    pos: &mut Position,
1220    mut hash_count: usize,
1221) -> Result<(SmartString, Position), (LexError, Position)> {
1222    let start = *pos;
1223    let mut first_char = Position::NONE;
1224
1225    if hash_count == 0 {
1226        // Count the number of '#'s
1227        // Start with 1 because the first '#' is already consumed
1228        hash_count = 1;
1229
1230        while let Some('#') = stream.peek_next() {
1231            stream.eat_next_and_advance(pos);
1232            hash_count += 1;
1233        }
1234
1235        // Match '"'
1236        match stream.get_next() {
1237            Some('"') => pos.advance(),
1238            Some(c) => return Err((LERR::UnexpectedInput(c.to_string()), start)),
1239            None => return Err((LERR::UnterminatedString, start)),
1240        }
1241    }
1242
1243    let collect: SmartString = repeat('#').take(hash_count).collect();
1244    if let Some(ref mut last) = state.last_token {
1245        last.clear();
1246        last.push_str(&collect);
1247        last.push('"');
1248    }
1249    state.is_within_text_terminated_by = Some(collect);
1250
1251    // Match everything until the same number of '#'s are seen, prepended by a '"'
1252
1253    // Counts the number of '#' characters seen after a quotation mark.
1254    // Becomes Some(0) after a quote is seen, but resets to None if a hash doesn't follow.
1255    let mut seen_hashes: Option<usize> = None;
1256    let mut result = SmartString::new_const();
1257
1258    loop {
1259        let next_char = match stream.get_next() {
1260            Some(ch) => ch,
1261            None => break, // Allow unterminated string
1262        };
1263        pos.advance();
1264
1265        match (next_char, &mut seen_hashes) {
1266            // Begin attempt to close string
1267            ('"', None) => seen_hashes = Some(0),
1268            // Restart attempt to close string
1269            ('"', Some(count)) => {
1270                // result.reserve(*count as usize+c.len());
1271                result.push('"');
1272                result.extend(repeat('#').take(*count as usize));
1273                seen_hashes = Some(0);
1274            }
1275            // Continue attempt to close string
1276            ('#', Some(count)) => {
1277                *count += 1;
1278                if *count == hash_count {
1279                    state.is_within_text_terminated_by = None;
1280                    break;
1281                }
1282            }
1283            // Fail to close the string - add previous quote and hashes
1284            (c, Some(count)) => {
1285                // result.reserve(*count as usize +1+c.len());
1286                result.push('"');
1287                result.extend(repeat('#').take(*count as usize));
1288                result.push(c);
1289                seen_hashes = None;
1290            }
1291            // New line
1292            ('\n', _) => {
1293                result.push('\n');
1294                pos.new_line();
1295            }
1296            // Normal new character seen
1297            (c, None) => result.push(c),
1298        }
1299
1300        // Check string length
1301        #[cfg(not(feature = "unchecked"))]
1302        if let Some(max) = state.max_string_len {
1303            if result.len() > max.get() {
1304                return Err((LexError::StringTooLong(max.get()), start));
1305            }
1306        }
1307
1308        if first_char.is_none() {
1309            first_char = *pos;
1310        }
1311    }
1312
1313    Ok((result, first_char))
1314}
1315
1316/// _(internals)_ Parse a string literal ended by a specified termination character.
1317/// Exported under the `internals` feature only.
1318///
1319/// Returns the parsed string and a boolean indicating whether the string is
1320/// terminated by an interpolation `${`.
1321///
1322/// # Returns
1323///
1324/// | Type                            | Return Value                                        |`state.is_within_text_terminated_by`|
1325/// |---------------------------------|:---------------------------------------------------:|:----------------------------------:|
1326/// |`"hello"`                        |[`StringConstant("hello")`][Token::StringConstant]   |`None`                              |
1327/// |`"hello`_{LF}_ or _{EOF}_        |[`LexError`]                                         |`None`                              |
1328/// |`"hello\`_{EOF}_ or _{LF}{EOF}_  |[`StringConstant("hello")`][Token::StringConstant]   |`Some('"')`                         |
1329/// |`` `hello``_{EOF}_               |[`StringConstant("hello")`][Token::StringConstant]   |``Some('`')``                       |
1330/// |`` `hello``_{LF}{EOF}_           |[`StringConstant("hello\n")`][Token::StringConstant] |``Some('`')``                       |
1331/// |`` `hello ${``                   |[`InterpolatedString("hello ")`][Token::InterpolatedString]<br/>next token is `{`|`None`  |
1332/// |`` } hello` ``                   |[`StringConstant(" hello")`][Token::StringConstant]  |`None`                              |
1333/// |`} hello`_{EOF}_                 |[`StringConstant(" hello")`][Token::StringConstant]  |``Some('`')``                       |
1334///
1335/// This function does not throw a [`LexError`] for the following conditions:
1336///
1337/// * Unterminated literal string at _{EOF}_
1338///
1339/// * Unterminated normal string with continuation at _{EOF}_
1340///
1341/// This is to facilitate using this function to parse a script line-by-line, where the end of the
1342/// line (i.e. _{EOF}_) is not necessarily the end of the script.
1343///
1344/// Any time a [`StringConstant`][Token::StringConstant] is returned with
1345/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
1346pub fn parse_string_literal(
1347    stream: &mut (impl InputStream + ?Sized),
1348    state: &mut TokenizeState,
1349    pos: &mut Position,
1350    termination_char: char,
1351    verbatim: bool,
1352    allow_line_continuation: bool,
1353    allow_interpolation: bool,
1354) -> Result<(SmartString, bool, Position), (LexError, Position)> {
1355    let mut result = SmartString::new_const();
1356    let mut escape = SmartString::new_const();
1357
1358    let start = *pos;
1359    let mut first_char = Position::NONE;
1360    let mut interpolated = false;
1361    #[cfg(not(feature = "no_position"))]
1362    let mut skip_space_until = 0;
1363
1364    state.is_within_text_terminated_by = Some(termination_char.to_string().into());
1365    if let Some(ref mut last) = state.last_token {
1366        last.clear();
1367        last.push(termination_char);
1368    }
1369
1370    loop {
1371        debug_assert!(
1372            !verbatim || escape.is_empty(),
1373            "verbatim strings should not have any escapes"
1374        );
1375
1376        let next_char = match stream.get_next() {
1377            Some(ch) => {
1378                pos.advance();
1379                ch
1380            }
1381            None if verbatim => {
1382                debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1383                pos.advance();
1384                break;
1385            }
1386            None if allow_line_continuation && !escape.is_empty() => {
1387                debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1388                pos.advance();
1389                break;
1390            }
1391            None => {
1392                pos.advance();
1393                state.is_within_text_terminated_by = None;
1394                return Err((LERR::UnterminatedString, start));
1395            }
1396        };
1397
1398        if let Some(ref mut last) = state.last_token {
1399            last.push(next_char);
1400        }
1401
1402        // String interpolation?
1403        if allow_interpolation
1404            && next_char == '$'
1405            && escape.is_empty()
1406            && stream.peek_next().map_or(false, |ch| ch == '{')
1407        {
1408            interpolated = true;
1409            state.is_within_text_terminated_by = None;
1410            break;
1411        }
1412
1413        // Check string length
1414        #[cfg(not(feature = "unchecked"))]
1415        if let Some(max) = state.max_string_len {
1416            if result.len() > max.get() {
1417                return Err((LexError::StringTooLong(max.get()), start));
1418            }
1419        }
1420
1421        // Close wrapper
1422        if termination_char == next_char && escape.is_empty() {
1423            // Double wrapper
1424            if stream.peek_next().map_or(false, |c| c == termination_char) {
1425                stream.eat_next_and_advance(pos);
1426                if let Some(ref mut last) = state.last_token {
1427                    last.push(termination_char);
1428                }
1429            } else {
1430                state.is_within_text_terminated_by = None;
1431                break;
1432            }
1433        }
1434
1435        if first_char.is_none() {
1436            first_char = *pos;
1437        }
1438
1439        match next_char {
1440            // \r - ignore if followed by \n
1441            '\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
1442            // \r
1443            'r' if !escape.is_empty() => {
1444                escape.clear();
1445                result.push_str("\r");
1446            }
1447            // \n
1448            'n' if !escape.is_empty() => {
1449                escape.clear();
1450                result.push_str("\n");
1451            }
1452            // \...
1453            '\\' if !verbatim && escape.is_empty() => {
1454                escape.push_str("\\");
1455            }
1456            // \\
1457            '\\' if !escape.is_empty() => {
1458                escape.clear();
1459                result.push_str("\\");
1460            }
1461            // \t
1462            't' if !escape.is_empty() => {
1463                escape.clear();
1464                result.push_str("\t");
1465            }
1466            // \x??, \u????, \U????????
1467            ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
1468                let mut seq = escape.clone();
1469                escape.clear();
1470                seq.push(ch);
1471
1472                let mut out_val: u32 = 0;
1473                let len = match ch {
1474                    'x' => 2,
1475                    'u' => 4,
1476                    'U' => 8,
1477                    c => unreachable!("x or u or U expected but gets '{}'", c),
1478                };
1479
1480                for _ in 0..len {
1481                    let c = stream
1482                        .get_next()
1483                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1484
1485                    pos.advance();
1486                    seq.push(c);
1487                    if let Some(ref mut last) = state.last_token {
1488                        last.push(c);
1489                    }
1490
1491                    out_val *= 16;
1492                    out_val += c
1493                        .to_digit(16)
1494                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1495                }
1496
1497                result.push(
1498                    char::from_u32(out_val)
1499                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
1500                );
1501            }
1502
1503            // LF - Verbatim
1504            '\n' if verbatim => {
1505                debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1506                pos.new_line();
1507                result.push_str("\n");
1508            }
1509
1510            // LF - Line continuation
1511            '\n' if allow_line_continuation && !escape.is_empty() => {
1512                debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1513                escape.clear();
1514                pos.new_line();
1515
1516                #[cfg(not(feature = "no_position"))]
1517                {
1518                    let start_position = start.position().unwrap();
1519                    skip_space_until = start_position + 1;
1520                }
1521            }
1522
1523            // LF - Unterminated string
1524            '\n' => {
1525                pos.rewind();
1526                state.is_within_text_terminated_by = None;
1527                return Err((LERR::UnterminatedString, start));
1528            }
1529
1530            // \{termination_char} - escaped termination character
1531            ch if termination_char == ch && !escape.is_empty() => {
1532                escape.clear();
1533                result.push(termination_char);
1534            }
1535
1536            // Unknown escape sequence
1537            ch if !escape.is_empty() => {
1538                escape.push(ch);
1539
1540                return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
1541            }
1542
1543            // Whitespace to skip
1544            #[cfg(not(feature = "no_position"))]
1545            ch if ch.is_whitespace() && pos.position().unwrap() < skip_space_until => (),
1546
1547            // All other characters
1548            ch => {
1549                escape.clear();
1550                result.push(ch);
1551
1552                #[cfg(not(feature = "no_position"))]
1553                {
1554                    skip_space_until = 0;
1555                }
1556            }
1557        }
1558    }
1559
1560    // Check string length
1561    #[cfg(not(feature = "unchecked"))]
1562    if let Some(max) = state.max_string_len {
1563        if result.len() > max.get() {
1564            return Err((LexError::StringTooLong(max.get()), start));
1565        }
1566    }
1567
1568    Ok((result, interpolated, first_char))
1569}
1570
1571/// Scan for a block comment until the end.
1572fn scan_block_comment(
1573    stream: &mut (impl InputStream + ?Sized),
1574    level: usize,
1575    pos: &mut Position,
1576    comment: Option<&mut String>,
1577) -> usize {
1578    let mut level = level;
1579    let mut comment = comment;
1580
1581    while let Some(c) = stream.get_next() {
1582        pos.advance();
1583
1584        if let Some(comment) = comment.as_mut() {
1585            comment.push(c);
1586        }
1587
1588        match c {
1589            '/' => {
1590                if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
1591                    stream.eat_next_and_advance(pos);
1592                    if let Some(comment) = comment.as_mut() {
1593                        comment.push(c2);
1594                    }
1595                    level += 1;
1596                }
1597            }
1598            '*' => {
1599                if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
1600                    stream.eat_next_and_advance(pos);
1601                    if let Some(comment) = comment.as_mut() {
1602                        comment.push(c2);
1603                    }
1604                    level -= 1;
1605                }
1606            }
1607            '\n' => pos.new_line(),
1608            _ => (),
1609        }
1610
1611        if level == 0 {
1612            break;
1613        }
1614    }
1615
1616    level
1617}
1618
1619/// Test if the given character is a hex character.
1620#[inline(always)]
1621const fn is_hex_digit(c: char) -> bool {
1622    c.is_ascii_hexdigit()
1623}
1624
1625/// Test if the given character is a numeric digit (i.e. 0-9).
1626#[inline(always)]
1627const fn is_numeric_digit(c: char) -> bool {
1628    c.is_ascii_digit()
1629}
1630
1631/// Test if the given character is an octal digit (i.e. 0-7).
1632#[inline(always)]
1633const fn is_octal_digit(c: char) -> bool {
1634    matches!(c, '0'..='7')
1635}
1636
1637/// Test if the given character is a binary digit (i.e. 0 or 1).
1638#[inline(always)]
1639const fn is_binary_digit(c: char) -> bool {
1640    c == '0' || c == '1'
1641}
1642
1643/// Test if the comment block is a doc-comment.
1644#[cfg(not(feature = "no_function"))]
1645#[cfg(feature = "metadata")]
1646#[inline]
1647#[must_use]
1648pub fn is_doc_comment(comment: &str) -> bool {
1649    (comment.starts_with("///") && !comment.starts_with("////"))
1650        || (comment.starts_with("/**") && !comment.starts_with("/***"))
1651}
1652
1653/// _(internals)_ Get the next token from the input stream.
1654/// Exported under the `internals` feature only.
1655#[inline(always)]
1656#[must_use]
1657pub fn get_next_token(
1658    stream: &mut (impl InputStream + ?Sized),
1659    state: &mut TokenizeState,
1660    pos: &mut Position,
1661) -> (Token, Position) {
1662    let result = get_next_token_inner(stream, state, pos);
1663
1664    // Save the last token's state
1665    state.next_token_cannot_be_unary = !result.0.is_next_unary();
1666
1667    result
1668}
1669
1670/// Get the next token.
1671#[must_use]
1672fn get_next_token_inner(
1673    stream: &mut (impl InputStream + ?Sized),
1674    state: &mut TokenizeState,
1675    pos: &mut Position,
1676) -> (Token, Position) {
1677    state.last_token.as_mut().map(SmartString::clear);
1678
1679    // Still inside a comment?
1680    if state.comment_level > 0 {
1681        let start_pos = *pos;
1682        let mut comment = String::new();
1683        let comment_buf = state.include_comments.then_some(&mut comment);
1684
1685        state.comment_level = scan_block_comment(stream, state.comment_level, pos, comment_buf);
1686
1687        let return_comment = state.include_comments;
1688
1689        #[cfg(not(feature = "no_function"))]
1690        #[cfg(feature = "metadata")]
1691        let return_comment = return_comment || is_doc_comment(&comment);
1692
1693        if return_comment {
1694            return (Token::Comment(comment.into()), start_pos);
1695        }
1696
1697        // Reached EOF without ending comment block?
1698        if state.comment_level > 0 {
1699            return (Token::EOF, *pos);
1700        }
1701    }
1702
1703    // Within text?
1704    match state.is_within_text_terminated_by.take() {
1705        Some(ch) if ch.starts_with('#') => {
1706            return parse_raw_string_literal(stream, state, pos, ch.len()).map_or_else(
1707                |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1708                |(result, start_pos)| (Token::StringConstant(result.into()), start_pos),
1709            )
1710        }
1711        Some(ch) => {
1712            let c = ch.chars().next().unwrap();
1713
1714            return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1715                |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1716                |(result, interpolated, start_pos)| {
1717                    if interpolated {
1718                        (Token::InterpolatedString(result.into()), start_pos)
1719                    } else {
1720                        (Token::StringConstant(result.into()), start_pos)
1721                    }
1722                },
1723            );
1724        }
1725        None => (),
1726    }
1727
1728    let mut negated: Option<Position> = None;
1729
1730    while let Some(c) = stream.get_next() {
1731        pos.advance();
1732
1733        let start_pos = *pos;
1734        let cc = stream.peek_next().unwrap_or('\0');
1735
1736        // Identifiers and strings that can have non-ASCII characters
1737        match (c, cc) {
1738            // digit ...
1739            ('0'..='9', ..) => {
1740                let mut result = SmartString::new_const();
1741                let mut radix_base: Option<u32> = None;
1742                let mut valid: fn(char) -> bool = is_numeric_digit;
1743                let mut _has_period = false;
1744                let mut _has_e = false;
1745
1746                result.push(c);
1747
1748                while let Some(next_char) = stream.peek_next() {
1749                    match next_char {
1750                        NUMBER_SEPARATOR => {
1751                            stream.eat_next_and_advance(pos);
1752                        }
1753                        ch if valid(ch) => {
1754                            result.push(ch);
1755                            stream.eat_next_and_advance(pos);
1756                        }
1757                        #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1758                        '.' if !_has_period && radix_base.is_none() => {
1759                            stream.get_next().unwrap();
1760
1761                            // Check if followed by digits or something that cannot start a property name
1762                            match stream.peek_next() {
1763                                // digits after period - accept the period
1764                                Some('0'..='9') => {
1765                                    result.push_str(".");
1766                                    pos.advance();
1767                                    _has_period = true;
1768                                }
1769                                // _ - cannot follow a decimal point
1770                                Some(NUMBER_SEPARATOR) => {
1771                                    stream.unget('.');
1772                                    break;
1773                                }
1774                                // .. - reserved symbol, not a floating-point number
1775                                Some('.') => {
1776                                    stream.unget('.');
1777                                    break;
1778                                }
1779                                // symbol after period - probably a float
1780                                Some(ch) if !is_id_first_alphabetic(ch) => {
1781                                    result.push_str(".");
1782                                    pos.advance();
1783                                    result.push_str("0");
1784                                    _has_period = true;
1785                                }
1786                                // Not a floating-point number
1787                                _ => {
1788                                    stream.unget('.');
1789                                    break;
1790                                }
1791                            }
1792                        }
1793                        #[cfg(not(feature = "no_float"))]
1794                        'e' if !_has_e && radix_base.is_none() => {
1795                            stream.get_next().unwrap();
1796
1797                            // Check if followed by digits or +/-
1798                            match stream.peek_next() {
1799                                // digits after e - accept the e (no decimal points allowed)
1800                                Some('0'..='9') => {
1801                                    result.push_str("e");
1802                                    pos.advance();
1803                                    _has_e = true;
1804                                    _has_period = true;
1805                                }
1806                                // +/- after e - accept the e and the sign (no decimal points allowed)
1807                                Some('+' | '-') => {
1808                                    result.push_str("e");
1809                                    pos.advance();
1810                                    result.push(stream.get_next().unwrap());
1811                                    pos.advance();
1812                                    _has_e = true;
1813                                    _has_period = true;
1814                                }
1815                                // Not a floating-point number
1816                                _ => {
1817                                    stream.unget('e');
1818                                    break;
1819                                }
1820                            }
1821                        }
1822                        // 0x????, 0o????, 0b???? at beginning
1823                        ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
1824                            if c == '0' && result.len() <= 1 =>
1825                        {
1826                            result.push(ch);
1827                            stream.eat_next_and_advance(pos);
1828
1829                            valid = match ch {
1830                                'x' | 'X' => is_hex_digit,
1831                                'o' | 'O' => is_octal_digit,
1832                                'b' | 'B' => is_binary_digit,
1833                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1834                            };
1835
1836                            radix_base = Some(match ch {
1837                                'x' | 'X' => 16,
1838                                'o' | 'O' => 8,
1839                                'b' | 'B' => 2,
1840                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1841                            });
1842                        }
1843
1844                        _ => break,
1845                    }
1846                }
1847
1848                let num_pos = negated.map_or(start_pos, |negated_pos| {
1849                    result.insert(0, '-');
1850                    negated_pos
1851                });
1852
1853                if let Some(ref mut last) = state.last_token {
1854                    *last = result.clone();
1855                }
1856
1857                // Parse number
1858                let token = if let Some(radix) = radix_base {
1859                    let result = &result[2..];
1860
1861                    UNSIGNED_INT::from_str_radix(result, radix)
1862                        .map(|v| v as INT)
1863                        .map_or_else(
1864                            |_| Token::LexError(LERR::MalformedNumber(result.to_string()).into()),
1865                            Token::IntegerConstant,
1866                        )
1867                } else {
1868                    (|| {
1869                        let num = INT::from_str(&result).map(Token::IntegerConstant);
1870
1871                        // If integer parsing is unnecessary, try float instead
1872                        #[cfg(not(feature = "no_float"))]
1873                        if num.is_err() {
1874                            if let Ok(v) = crate::types::FloatWrapper::from_str(&result) {
1875                                return Token::FloatConstant((v, result).into());
1876                            }
1877                        }
1878
1879                        // Then try decimal
1880                        #[cfg(feature = "decimal")]
1881                        if num.is_err() {
1882                            if let Ok(v) = rust_decimal::Decimal::from_str(&result) {
1883                                return Token::DecimalConstant((v, result).into());
1884                            }
1885                        }
1886
1887                        // Then try decimal in scientific notation
1888                        #[cfg(feature = "decimal")]
1889                        if num.is_err() {
1890                            if let Ok(v) = rust_decimal::Decimal::from_scientific(&result) {
1891                                return Token::DecimalConstant((v, result).into());
1892                            }
1893                        }
1894
1895                        num.unwrap_or_else(|_| {
1896                            Token::LexError(LERR::MalformedNumber(result.to_string()).into())
1897                        })
1898                    })()
1899                };
1900
1901                return (token, num_pos);
1902            }
1903
1904            // " - string literal
1905            ('"', ..) => {
1906                return parse_string_literal(stream, state, pos, c, false, true, false)
1907                    .map_or_else(
1908                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1909                        |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1910                    );
1911            }
1912            // ` - string literal
1913            ('`', ..) => {
1914                // Start from the next line if at the end of line
1915                match stream.peek_next() {
1916                    // `\r - start from next line
1917                    Some('\r') => {
1918                        stream.eat_next_and_advance(pos);
1919                        // `\r\n
1920                        if stream.peek_next() == Some('\n') {
1921                            stream.eat_next_and_advance(pos);
1922                        }
1923                        pos.new_line();
1924                    }
1925                    // `\n - start from next line
1926                    Some('\n') => {
1927                        stream.eat_next_and_advance(pos);
1928                        pos.new_line();
1929                    }
1930                    _ => (),
1931                }
1932
1933                return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1934                    |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1935                    |(result, interpolated, ..)| {
1936                        if interpolated {
1937                            (Token::InterpolatedString(result.into()), start_pos)
1938                        } else {
1939                            (Token::StringConstant(result.into()), start_pos)
1940                        }
1941                    },
1942                );
1943            }
1944
1945            // r - raw string literal
1946            ('#', '"' | '#') => {
1947                return parse_raw_string_literal(stream, state, pos, 0).map_or_else(
1948                    |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1949                    |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1950                );
1951            }
1952
1953            // ' - character literal
1954            ('\'', '\'') => {
1955                return (
1956                    Token::LexError(LERR::MalformedChar(String::new()).into()),
1957                    start_pos,
1958                )
1959            }
1960            ('\'', ..) => {
1961                return parse_string_literal(stream, state, pos, c, false, false, false)
1962                    .map_or_else(
1963                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1964                        |(result, ..)| {
1965                            let mut chars = result.chars();
1966                            let first = chars.next().unwrap();
1967
1968                            if chars.next().is_some() {
1969                                (
1970                                    Token::LexError(LERR::MalformedChar(result.to_string()).into()),
1971                                    start_pos,
1972                                )
1973                            } else {
1974                                (Token::CharConstant(first), start_pos)
1975                            }
1976                        },
1977                    )
1978            }
1979
1980            // Braces
1981            ('{', ..) => return (Token::LeftBrace, start_pos),
1982            ('}', ..) => return (Token::RightBrace, start_pos),
1983
1984            // Unit
1985            ('(', ')') => {
1986                stream.eat_next_and_advance(pos);
1987                return (Token::Unit, start_pos);
1988            }
1989
1990            // Parentheses
1991            ('(', '*') => {
1992                stream.eat_next_and_advance(pos);
1993                return (Token::Reserved(Box::new("(*".into())), start_pos);
1994            }
1995            ('(', ..) => return (Token::LeftParen, start_pos),
1996            (')', ..) => return (Token::RightParen, start_pos),
1997
1998            // Indexing
1999            ('[', ..) => return (Token::LeftBracket, start_pos),
2000            (']', ..) => return (Token::RightBracket, start_pos),
2001
2002            // Map literal
2003            #[cfg(not(feature = "no_object"))]
2004            ('#', '{') => {
2005                stream.eat_next_and_advance(pos);
2006                return (Token::MapStart, start_pos);
2007            }
2008            // Shebang
2009            ('#', '!') => return (Token::Reserved(Box::new("#!".into())), start_pos),
2010
2011            ('#', ' ') => {
2012                stream.eat_next_and_advance(pos);
2013                let token = if stream.peek_next() == Some('{') {
2014                    stream.eat_next_and_advance(pos);
2015                    "# {"
2016                } else {
2017                    "#"
2018                };
2019                return (Token::Reserved(Box::new(token.into())), start_pos);
2020            }
2021
2022            ('#', ..) => return (Token::Reserved(Box::new("#".into())), start_pos),
2023
2024            // Operators
2025            ('+', '=') => {
2026                stream.eat_next_and_advance(pos);
2027                return (Token::PlusAssign, start_pos);
2028            }
2029            ('+', '+') => {
2030                stream.eat_next_and_advance(pos);
2031                return (Token::Reserved(Box::new("++".into())), start_pos);
2032            }
2033            ('+', ..) if !state.next_token_cannot_be_unary => return (Token::UnaryPlus, start_pos),
2034            ('+', ..) => return (Token::Plus, start_pos),
2035
2036            ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
2037            ('-', '0'..='9') => return (Token::Minus, start_pos),
2038            ('-', '=') => {
2039                stream.eat_next_and_advance(pos);
2040                return (Token::MinusAssign, start_pos);
2041            }
2042            ('-', '>') => {
2043                stream.eat_next_and_advance(pos);
2044                return (Token::Reserved(Box::new("->".into())), start_pos);
2045            }
2046            ('-', '-') => {
2047                stream.eat_next_and_advance(pos);
2048                return (Token::Reserved(Box::new("--".into())), start_pos);
2049            }
2050            ('-', ..) if !state.next_token_cannot_be_unary => {
2051                return (Token::UnaryMinus, start_pos)
2052            }
2053            ('-', ..) => return (Token::Minus, start_pos),
2054
2055            ('*', ')') => {
2056                stream.eat_next_and_advance(pos);
2057                return (Token::Reserved(Box::new("*)".into())), start_pos);
2058            }
2059            ('*', '=') => {
2060                stream.eat_next_and_advance(pos);
2061                return (Token::MultiplyAssign, start_pos);
2062            }
2063            ('*', '*') => {
2064                stream.eat_next_and_advance(pos);
2065
2066                return (
2067                    if stream.peek_next() == Some('=') {
2068                        stream.eat_next_and_advance(pos);
2069                        Token::PowerOfAssign
2070                    } else {
2071                        Token::PowerOf
2072                    },
2073                    start_pos,
2074                );
2075            }
2076            ('*', ..) => return (Token::Multiply, start_pos),
2077
2078            // Comments
2079            ('/', '/') => {
2080                stream.eat_next_and_advance(pos);
2081
2082                let mut comment: Option<String> = match stream.peek_next() {
2083                    #[cfg(not(feature = "no_function"))]
2084                    #[cfg(feature = "metadata")]
2085                    Some('/') => {
2086                        stream.eat_next_and_advance(pos);
2087
2088                        // Long streams of `///...` are not doc-comments
2089                        match stream.peek_next() {
2090                            Some('/') => None,
2091                            _ => Some("///".into()),
2092                        }
2093                    }
2094                    #[cfg(feature = "metadata")]
2095                    Some('!') => {
2096                        stream.eat_next_and_advance(pos);
2097                        Some("//!".into())
2098                    }
2099                    _ if state.include_comments => Some("//".into()),
2100                    _ => None,
2101                };
2102
2103                while let Some(c) = stream.get_next() {
2104                    if c == '\r' {
2105                        // \r\n
2106                        if stream.peek_next() == Some('\n') {
2107                            stream.eat_next_and_advance(pos);
2108                        }
2109                        pos.new_line();
2110                        break;
2111                    }
2112                    if c == '\n' {
2113                        pos.new_line();
2114                        break;
2115                    }
2116                    if let Some(comment) = comment.as_mut() {
2117                        comment.push(c);
2118                    }
2119                    pos.advance();
2120                }
2121
2122                match comment {
2123                    #[cfg(feature = "metadata")]
2124                    Some(comment) if comment.starts_with("//!") => {
2125                        let g = &mut state.tokenizer_control.borrow_mut().global_comments;
2126                        if !g.is_empty() {
2127                            *g += "\n";
2128                        }
2129                        *g += &comment;
2130                    }
2131                    Some(comment) => return (Token::Comment(comment.into()), start_pos),
2132                    None => (),
2133                }
2134            }
2135            ('/', '*') => {
2136                state.comment_level += 1;
2137                stream.eat_next_and_advance(pos);
2138
2139                let mut comment: Option<String> = match stream.peek_next() {
2140                    #[cfg(not(feature = "no_function"))]
2141                    #[cfg(feature = "metadata")]
2142                    Some('*') => {
2143                        stream.eat_next_and_advance(pos);
2144
2145                        // Long streams of `/****...` are not doc-comments
2146                        match stream.peek_next() {
2147                            Some('*') => None,
2148                            _ => Some("/**".into()),
2149                        }
2150                    }
2151                    _ if state.include_comments => Some("/*".into()),
2152                    _ => None,
2153                };
2154
2155                state.comment_level =
2156                    scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2157
2158                if let Some(comment) = comment {
2159                    return (Token::Comment(comment.into()), start_pos);
2160                }
2161            }
2162
2163            ('/', '=') => {
2164                stream.eat_next_and_advance(pos);
2165                return (Token::DivideAssign, start_pos);
2166            }
2167            ('/', ..) => return (Token::Divide, start_pos),
2168
2169            (';', ..) => return (Token::SemiColon, start_pos),
2170            (',', ..) => return (Token::Comma, start_pos),
2171
2172            ('.', '.') => {
2173                stream.eat_next_and_advance(pos);
2174                return (
2175                    match stream.peek_next() {
2176                        Some('.') => {
2177                            stream.eat_next_and_advance(pos);
2178                            Token::Reserved(Box::new("...".into()))
2179                        }
2180                        Some('=') => {
2181                            stream.eat_next_and_advance(pos);
2182                            Token::InclusiveRange
2183                        }
2184                        _ => Token::ExclusiveRange,
2185                    },
2186                    start_pos,
2187                );
2188            }
2189            ('.', ..) => return (Token::Period, start_pos),
2190
2191            ('=', '=') => {
2192                stream.eat_next_and_advance(pos);
2193
2194                if stream.peek_next() == Some('=') {
2195                    stream.eat_next_and_advance(pos);
2196                    return (Token::Reserved(Box::new("===".into())), start_pos);
2197                }
2198
2199                return (Token::EqualsTo, start_pos);
2200            }
2201            ('=', '>') => {
2202                stream.eat_next_and_advance(pos);
2203                return (Token::DoubleArrow, start_pos);
2204            }
2205            ('=', ..) => return (Token::Equals, start_pos),
2206
2207            #[cfg(not(feature = "no_module"))]
2208            (':', ':') => {
2209                stream.eat_next_and_advance(pos);
2210
2211                if stream.peek_next() == Some('<') {
2212                    stream.eat_next_and_advance(pos);
2213                    return (Token::Reserved(Box::new("::<".into())), start_pos);
2214                }
2215
2216                return (Token::DoubleColon, start_pos);
2217            }
2218            (':', '=') => {
2219                stream.eat_next_and_advance(pos);
2220                return (Token::Reserved(Box::new(":=".into())), start_pos);
2221            }
2222            (':', ';') => {
2223                stream.eat_next_and_advance(pos);
2224                return (Token::Reserved(Box::new(":;".into())), start_pos);
2225            }
2226            (':', ..) => return (Token::Colon, start_pos),
2227
2228            ('<', '=') => {
2229                stream.eat_next_and_advance(pos);
2230                return (Token::LessThanEqualsTo, start_pos);
2231            }
2232            ('<', '-') => {
2233                stream.eat_next_and_advance(pos);
2234                return (Token::Reserved(Box::new("<-".into())), start_pos);
2235            }
2236            ('<', '<') => {
2237                stream.eat_next_and_advance(pos);
2238
2239                return (
2240                    if stream.peek_next() == Some('=') {
2241                        stream.eat_next_and_advance(pos);
2242                        Token::LeftShiftAssign
2243                    } else {
2244                        Token::LeftShift
2245                    },
2246                    start_pos,
2247                );
2248            }
2249            ('<', '|') => {
2250                stream.eat_next_and_advance(pos);
2251                return (Token::Reserved(Box::new("<|".into())), start_pos);
2252            }
2253            ('<', ..) => return (Token::LessThan, start_pos),
2254
2255            ('>', '=') => {
2256                stream.eat_next_and_advance(pos);
2257                return (Token::GreaterThanEqualsTo, start_pos);
2258            }
2259            ('>', '>') => {
2260                stream.eat_next_and_advance(pos);
2261
2262                return (
2263                    if stream.peek_next() == Some('=') {
2264                        stream.eat_next_and_advance(pos);
2265                        Token::RightShiftAssign
2266                    } else {
2267                        Token::RightShift
2268                    },
2269                    start_pos,
2270                );
2271            }
2272            ('>', ..) => return (Token::GreaterThan, start_pos),
2273
2274            ('!', 'i') => {
2275                stream.get_next().unwrap();
2276                if stream.peek_next() == Some('n') {
2277                    stream.get_next().unwrap();
2278                    match stream.peek_next() {
2279                        Some(c) if is_id_continue(c) => {
2280                            stream.unget('n');
2281                            stream.unget('i');
2282                            return (Token::Bang, start_pos);
2283                        }
2284                        _ => {
2285                            pos.advance();
2286                            pos.advance();
2287                            return (Token::NotIn, start_pos);
2288                        }
2289                    }
2290                }
2291
2292                stream.unget('i');
2293                return (Token::Bang, start_pos);
2294            }
2295            ('!', '=') => {
2296                stream.eat_next_and_advance(pos);
2297
2298                if stream.peek_next() == Some('=') {
2299                    stream.eat_next_and_advance(pos);
2300                    return (Token::Reserved(Box::new("!==".into())), start_pos);
2301                }
2302
2303                return (Token::NotEqualsTo, start_pos);
2304            }
2305            ('!', '.') => {
2306                stream.eat_next_and_advance(pos);
2307                return (Token::Reserved(Box::new("!.".into())), start_pos);
2308            }
2309            ('!', ..) => return (Token::Bang, start_pos),
2310
2311            ('|', '|') => {
2312                stream.eat_next_and_advance(pos);
2313                return (Token::Or, start_pos);
2314            }
2315            ('|', '=') => {
2316                stream.eat_next_and_advance(pos);
2317                return (Token::OrAssign, start_pos);
2318            }
2319            ('|', '>') => {
2320                stream.eat_next_and_advance(pos);
2321                return (Token::Reserved(Box::new("|>".into())), start_pos);
2322            }
2323            ('|', ..) => return (Token::Pipe, start_pos),
2324
2325            ('&', '&') => {
2326                stream.eat_next_and_advance(pos);
2327                return (Token::And, start_pos);
2328            }
2329            ('&', '=') => {
2330                stream.eat_next_and_advance(pos);
2331                return (Token::AndAssign, start_pos);
2332            }
2333            ('&', ..) => return (Token::Ampersand, start_pos),
2334
2335            ('^', '=') => {
2336                stream.eat_next_and_advance(pos);
2337                return (Token::XOrAssign, start_pos);
2338            }
2339            ('^', ..) => return (Token::XOr, start_pos),
2340
2341            ('~', ..) => return (Token::Reserved(Box::new("~".into())), start_pos),
2342
2343            ('%', '=') => {
2344                stream.eat_next_and_advance(pos);
2345                return (Token::ModuloAssign, start_pos);
2346            }
2347            ('%', ..) => return (Token::Modulo, start_pos),
2348
2349            ('@', ..) => return (Token::Reserved(Box::new("@".into())), start_pos),
2350
2351            ('$', ..) => return (Token::Reserved(Box::new("$".into())), start_pos),
2352
2353            ('?', '.') => {
2354                stream.eat_next_and_advance(pos);
2355                return (
2356                    #[cfg(not(feature = "no_object"))]
2357                    Token::Elvis,
2358                    #[cfg(feature = "no_object")]
2359                    Token::Reserved(Box::new("?.".into())),
2360                    start_pos,
2361                );
2362            }
2363            ('?', '?') => {
2364                stream.eat_next_and_advance(pos);
2365                return (Token::DoubleQuestion, start_pos);
2366            }
2367            ('?', '[') => {
2368                stream.eat_next_and_advance(pos);
2369                return (
2370                    #[cfg(not(feature = "no_index"))]
2371                    Token::QuestionBracket,
2372                    #[cfg(feature = "no_index")]
2373                    Token::Reserved(Box::new("?[".into())),
2374                    start_pos,
2375                );
2376            }
2377            ('?', ..) => return (Token::Reserved(Box::new("?".into())), start_pos),
2378
2379            // letter or underscore ...
2380            _ if is_id_first_alphabetic(c) || c == '_' => {
2381                return parse_identifier_token(stream, state, pos, start_pos, c);
2382            }
2383
2384            // \n
2385            ('\n', ..) => pos.new_line(),
2386
2387            // Whitespace - follows Rust's SPACE, TAB, CR, LF, FF which is the same as WhatWG.
2388            (ch, ..) if ch.is_ascii_whitespace() => (),
2389
2390            _ => {
2391                return (
2392                    Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
2393                    start_pos,
2394                )
2395            }
2396        }
2397    }
2398
2399    pos.advance();
2400
2401    (Token::EOF, *pos)
2402}
2403
2404/// Get the next token, parsing it as an identifier.
2405fn parse_identifier_token(
2406    stream: &mut (impl InputStream + ?Sized),
2407    state: &mut TokenizeState,
2408    pos: &mut Position,
2409    start_pos: Position,
2410    first_char: char,
2411) -> (Token, Position) {
2412    let mut identifier = SmartString::new_const();
2413    identifier.push(first_char);
2414    if let Some(ref mut last) = state.last_token {
2415        last.clear();
2416        last.push(first_char);
2417    }
2418
2419    while let Some(next_char) = stream.peek_next() {
2420        match next_char {
2421            x if is_id_continue(x) => {
2422                stream.eat_next_and_advance(pos);
2423                identifier.push(x);
2424                if let Some(ref mut last) = state.last_token {
2425                    last.push(x);
2426                }
2427            }
2428            _ => break,
2429        }
2430    }
2431
2432    if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
2433        return (token, start_pos);
2434    }
2435
2436    if is_reserved_keyword_or_symbol(&identifier).0 {
2437        return (Token::Reserved(Box::new(identifier)), start_pos);
2438    }
2439
2440    if !is_valid_identifier(&identifier) {
2441        return (
2442            Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2443            start_pos,
2444        );
2445    }
2446
2447    (Token::Identifier(identifier.into()), start_pos)
2448}
2449
2450/// _(internals)_ Is a text string a valid identifier?
2451/// Exported under the `internals` feature only.
2452#[must_use]
2453pub fn is_valid_identifier(name: &str) -> bool {
2454    let mut first_alphabetic = false;
2455
2456    for ch in name.chars() {
2457        match ch {
2458            '_' => (),
2459            _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2460            _ if !first_alphabetic => return false,
2461            _ if char::is_ascii_alphanumeric(&ch) => (),
2462            _ => return false,
2463        }
2464    }
2465
2466    first_alphabetic
2467}
2468
2469/// _(internals)_ Is a text string a valid script-defined function name?
2470/// Exported under the `internals` feature only.
2471#[inline(always)]
2472#[must_use]
2473pub fn is_valid_function_name(name: &str) -> bool {
2474    is_valid_identifier(name)
2475        && !is_reserved_keyword_or_symbol(name).0
2476        && Token::lookup_symbol_from_syntax(name).is_none()
2477}
2478
2479/// Is a character valid to start an identifier?
2480#[inline(always)]
2481#[must_use]
2482#[allow(clippy::missing_const_for_fn)]
2483pub fn is_id_first_alphabetic(x: char) -> bool {
2484    #[cfg(feature = "unicode-xid-ident")]
2485    return unicode_xid::UnicodeXID::is_xid_start(x);
2486    #[cfg(not(feature = "unicode-xid-ident"))]
2487    return x.is_ascii_alphabetic();
2488}
2489
2490/// Is a character valid for an identifier?
2491#[inline(always)]
2492#[must_use]
2493#[allow(clippy::missing_const_for_fn)]
2494pub fn is_id_continue(x: char) -> bool {
2495    #[cfg(feature = "unicode-xid-ident")]
2496    return unicode_xid::UnicodeXID::is_xid_continue(x);
2497    #[cfg(not(feature = "unicode-xid-ident"))]
2498    return x.is_ascii_alphanumeric() || x == '_';
2499}
2500
2501/// Is a piece of syntax a reserved keyword or reserved symbol?
2502///
2503/// # Return values
2504///
2505/// The first `bool` indicates whether it is a reserved keyword or symbol.
2506///
2507/// The second `bool` indicates whether the keyword can be called normally as a function.
2508/// `false` if it is not a reserved keyword.
2509///
2510/// The third `bool` indicates whether the keyword can be called in method-call style.
2511/// `false` if it is not a reserved keyword or it cannot be called as a function.
2512#[inline]
2513#[must_use]
2514pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
2515    // This implementation is based upon a pre-calculated table generated
2516    // by GNU `gperf` on the list of keywords.
2517    let utf8 = syntax.as_bytes();
2518    let len = utf8.len();
2519
2520    if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
2521        return (false, false, false);
2522    }
2523
2524    let mut hash_val = len;
2525
2526    match len {
2527        1 => (),
2528        _ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
2529    }
2530    hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
2531    hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
2532
2533    if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
2534        return (false, false, false);
2535    }
2536
2537    match RESERVED_LIST[hash_val] {
2538        ("", ..) => (false, false, false),
2539        (s, true, a, b) => {
2540            // Fail early to avoid calling memcmp().
2541            // Since we are already working with bytes, mind as well check the first one.
2542            let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
2543            (is_reserved, is_reserved && a, is_reserved && a && b)
2544        }
2545        _ => (false, false, false),
2546    }
2547}
2548
2549/// _(internals)_ A type that implements the [`InputStream`] trait.
2550/// Exported under the `internals` feature only.
2551///
2552/// Multiple character streams are jointed together to form one single stream.
2553pub struct MultiInputsStream<'a> {
2554    /// Buffered characters, if any.
2555    pub buf: [Option<char>; 2],
2556    /// The current stream index.
2557    pub index: usize,
2558    /// Input character streams.
2559    pub streams: StaticVec<Peekable<Chars<'a>>>,
2560}
2561
2562impl InputStream for MultiInputsStream<'_> {
2563    #[inline]
2564    fn unget(&mut self, ch: char) {
2565        match self.buf {
2566            [None, ..] => self.buf[0] = Some(ch),
2567            [_, None] => self.buf[1] = Some(ch),
2568            _ => unreachable!("cannot unget more than 2 characters!"),
2569        }
2570    }
2571    fn get_next(&mut self) -> Option<char> {
2572        match self.buf {
2573            [None, ..] => (),
2574            [ch @ Some(_), None] => {
2575                self.buf[0] = None;
2576                return ch;
2577            }
2578            [_, ch @ Some(_)] => {
2579                self.buf[1] = None;
2580                return ch;
2581            }
2582        }
2583
2584        loop {
2585            if self.index >= self.streams.len() {
2586                // No more streams
2587                return None;
2588            }
2589            if let Some(ch) = self.streams[self.index].next() {
2590                // Next character in main stream
2591                return Some(ch);
2592            }
2593            // Jump to the next stream
2594            self.index += 1;
2595        }
2596    }
2597    fn peek_next(&mut self) -> Option<char> {
2598        match self.buf {
2599            [None, ..] => (),
2600            [ch @ Some(_), None] => return ch,
2601            [_, ch @ Some(_)] => return ch,
2602        }
2603
2604        loop {
2605            if self.index >= self.streams.len() {
2606                // No more streams
2607                return None;
2608            }
2609            if let Some(&ch) = self.streams[self.index].peek() {
2610                // Next character in main stream
2611                return Some(ch);
2612            }
2613            // Jump to the next stream
2614            self.index += 1;
2615        }
2616    }
2617}
2618
2619/// _(internals)_ An iterator on a [`Token`] stream.
2620/// Exported under the `internals` feature only.
2621pub struct TokenIterator<'a> {
2622    /// Reference to the scripting `Engine`.
2623    pub engine: &'a Engine,
2624    /// Current state.
2625    pub state: TokenizeState,
2626    /// Current position.
2627    pub pos: Position,
2628    /// Input character stream.
2629    pub stream: MultiInputsStream<'a>,
2630    /// A processor function that maps a token to another.
2631    pub token_mapper: Option<&'a OnParseTokenCallback>,
2632}
2633
2634impl<'a> Iterator for TokenIterator<'a> {
2635    type Item = (Token, Position);
2636
2637    fn next(&mut self) -> Option<Self::Item> {
2638        let (within_interpolated, compress_script) = {
2639            let control = &mut *self.state.tokenizer_control.borrow_mut();
2640
2641            if control.is_within_text {
2642                // Switch to text mode terminated by back-tick
2643                self.state.is_within_text_terminated_by = Some("`".to_string().into());
2644                // Reset it
2645                control.is_within_text = false;
2646            }
2647
2648            (
2649                self.state.is_within_text_terminated_by.is_some(),
2650                control.compressed.is_some(),
2651            )
2652        };
2653
2654        let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2655            // {EOF}
2656            r @ (Token::EOF, _) => return Some(r),
2657            // {EOF} after unterminated string.
2658            // The only case where `TokenizeState.is_within_text_terminated_by` is set is when
2659            // a verbatim string or a string with continuation encounters {EOF}.
2660            // This is necessary to handle such cases for line-by-line parsing, but for an entire
2661            // script it is a syntax error.
2662            (Token::StringConstant(..), pos) if self.state.is_within_text_terminated_by.is_some() => {
2663                self.state.is_within_text_terminated_by = None;
2664                return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2665            }
2666            // Reserved keyword/symbol
2667            (Token::Reserved(s), pos) => (match
2668                (s.as_str(),
2669                    #[cfg(not(feature = "no_custom_syntax"))]
2670                    self.engine.custom_keywords.contains_key(&*s),
2671                    #[cfg(feature = "no_custom_syntax")]
2672                    false
2673                )
2674            {
2675                ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2676                    "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2677                ).into()),
2678                ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2679                    "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2680                ).into()),
2681                ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2682                    "'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2683                ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2684                    "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2685                ).into()),
2686                (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2687                    "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2688                ).into()),
2689                (":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2690                    "':;' is not a valid symbol. Should it be '::'?".to_string(),
2691                ).into()),
2692                ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2693                    "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2694                ).into()),
2695                ("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2696                    "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2697                ).into()),
2698                ("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2699                    "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2700                ).into()),
2701                // Reserved keyword/operator that is custom.
2702                #[cfg(not(feature = "no_custom_syntax"))]
2703                (.., true) => Token::Custom(s),
2704                #[cfg(feature = "no_custom_syntax")]
2705                (.., true) => unreachable!("no custom operators"),
2706                // Reserved keyword that is not custom and disabled.
2707                (token, false) if self.engine.is_symbol_disabled(token) => {
2708                    let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
2709                    Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
2710                },
2711                // Reserved keyword/operator that is not custom.
2712                (.., false) => Token::Reserved(s),
2713            }, pos),
2714            // Custom keyword
2715            #[cfg(not(feature = "no_custom_syntax"))]
2716            (Token::Identifier(s), pos) if self.engine.custom_keywords.contains_key(&*s) => {
2717                (Token::Custom(s), pos)
2718            }
2719            // Custom keyword/symbol - must be disabled
2720            #[cfg(not(feature = "no_custom_syntax"))]
2721            (token, pos) if token.is_literal() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
2722                // Active standard keyword should never be a custom keyword!
2723                debug_assert!(self.engine.is_symbol_disabled(token.literal_syntax()), "{:?} is an active keyword", token);
2724
2725                (Token::Custom(Box::new(token.literal_syntax().into())), pos)
2726            }
2727            // Disabled symbol
2728            (token, pos) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
2729                (Token::Reserved(Box::new(token.literal_syntax().into())), pos)
2730            }
2731            // Normal symbol
2732            r => r,
2733        };
2734
2735        // Run the mapper, if any
2736        let token = match self.token_mapper {
2737            Some(func) => func(token, pos, &self.state),
2738            None => token,
2739        };
2740
2741        // Collect the compressed script, if needed
2742        if compress_script {
2743            let control = &mut *self.state.tokenizer_control.borrow_mut();
2744
2745            if token != Token::EOF {
2746                if let Some(ref mut compressed) = control.compressed {
2747                    use std::fmt::Write;
2748
2749                    let last_token = self.state.last_token.as_ref().unwrap();
2750                    let mut buf = SmartString::new_const();
2751
2752                    if last_token.is_empty() {
2753                        write!(buf, "{token}").unwrap();
2754                    } else if within_interpolated
2755                        && matches!(
2756                            token,
2757                            Token::StringConstant(..) | Token::InterpolatedString(..)
2758                        )
2759                    {
2760                        *compressed += &last_token[1..];
2761                    } else {
2762                        buf = last_token.clone();
2763                    }
2764
2765                    if !buf.is_empty() && !compressed.is_empty() {
2766                        let cur = buf.chars().next().unwrap();
2767
2768                        if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
2769                            let prev = compressed.chars().last().unwrap();
2770
2771                            if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
2772                                *compressed += " ";
2773                            }
2774                        }
2775                    }
2776
2777                    *compressed += &buf;
2778                }
2779            }
2780        }
2781
2782        Some((token, pos))
2783    }
2784}
2785
2786impl FusedIterator for TokenIterator<'_> {}
2787
2788impl Engine {
2789    /// _(internals)_ Tokenize an input text stream.
2790    /// Exported under the `internals` feature only.
2791    #[expose_under_internals]
2792    #[inline(always)]
2793    #[must_use]
2794    fn lex<'a>(
2795        &'a self,
2796        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2797    ) -> (TokenIterator<'a>, TokenizerControl) {
2798        self.lex_raw(inputs, self.token_mapper.as_deref())
2799    }
2800    /// _(internals)_ Tokenize an input text stream with a mapping function.
2801    /// Exported under the `internals` feature only.
2802    #[expose_under_internals]
2803    #[inline(always)]
2804    #[must_use]
2805    fn lex_with_map<'a>(
2806        &'a self,
2807        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2808        token_mapper: &'a OnParseTokenCallback,
2809    ) -> (TokenIterator<'a>, TokenizerControl) {
2810        self.lex_raw(inputs, Some(token_mapper))
2811    }
2812    /// Tokenize an input text stream with an optional mapping function.
2813    #[inline]
2814    #[must_use]
2815    pub(crate) fn lex_raw<'a>(
2816        &'a self,
2817        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2818        token_mapper: Option<&'a OnParseTokenCallback>,
2819    ) -> (TokenIterator<'a>, TokenizerControl) {
2820        let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2821        let buffer2 = buffer.clone();
2822
2823        (
2824            TokenIterator {
2825                engine: self,
2826                state: TokenizeState {
2827                    #[cfg(not(feature = "unchecked"))]
2828                    max_string_len: std::num::NonZeroUsize::new(self.max_string_size()),
2829                    next_token_cannot_be_unary: false,
2830                    tokenizer_control: buffer,
2831                    comment_level: 0,
2832                    include_comments: false,
2833                    is_within_text_terminated_by: None,
2834                    last_token: None,
2835                },
2836                pos: Position::new(1, 0),
2837                stream: MultiInputsStream {
2838                    buf: [None, None],
2839                    streams: inputs
2840                        .into_iter()
2841                        .map(|s| s.as_ref().chars().peekable())
2842                        .collect(),
2843                    index: 0,
2844                },
2845                token_mapper,
2846            },
2847            buffer2,
2848        )
2849    }
2850}
rhai/tokenizer.rs

rhai/
tokenizer.rs