rhai/
tokenizer.rs

1//! Main module defining the lexer and parser.
2
3use rhai_codegen::expose_under_internals;
4
5use crate::engine::Precedence;
6use crate::func::native::OnParseTokenCallback;
7use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
8#[cfg(feature = "no_std")]
9use std::prelude::v1::*;
10use std::{
11    cell::RefCell,
12    char, fmt,
13    fmt::Write,
14    iter::{repeat, FusedIterator, Peekable},
15    rc::Rc,
16    str::{Chars, FromStr},
17};
18
19/// _(internals)_ A type containing commands to control the tokenizer.
20#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
21pub struct TokenizerControlBlock {
22    /// Is the current tokenizer position within an interpolated text string?
23    ///
24    /// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
25    pub is_within_text: bool,
26    /// Return the next character in the input stream instead of the next token?
27    #[cfg(not(feature = "no_custom_syntax"))]
28    pub in_char_mode: bool,
29    /// Global comments.
30    #[cfg(feature = "metadata")]
31    pub global_comments: String,
32    /// Whitespace-compressed version of the script (if any).
33    ///
34    /// Set to `Some` in order to collect a compressed script.
35    pub compressed: Option<String>,
36}
37
38impl TokenizerControlBlock {
39    /// Create a new `TokenizerControlBlock`.
40    #[inline]
41    #[must_use]
42    pub const fn new() -> Self {
43        Self {
44            is_within_text: false,
45            #[cfg(not(feature = "no_custom_syntax"))]
46            in_char_mode: false,
47            #[cfg(feature = "metadata")]
48            global_comments: String::new(),
49            compressed: None,
50        }
51    }
52}
53
54/// _(internals)_ A shared object that allows control of the tokenizer from outside.
55pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
56
57type LERR = LexError;
58
59/// Separator character for numbers.
60const NUMBER_SEPARATOR: char = '_';
61
62/// A stream of tokens.
63pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
64
65/// _(internals)_ A Rhai language token.
66/// Exported under the `internals` feature only.
67#[derive(Debug, PartialEq, Clone, Hash)]
68#[non_exhaustive]
69pub enum Token {
70    /// An `INT` constant.
71    IntegerConstant(INT),
72    /// A `FLOAT` constant, including its text representation.
73    ///
74    /// Reserved under the `no_float` feature.
75    #[cfg(not(feature = "no_float"))]
76    FloatConstant(Box<(crate::types::FloatWrapper<crate::FLOAT>, Identifier)>),
77    /// A [`Decimal`][rust_decimal::Decimal] constant.
78    ///
79    /// Requires the `decimal` feature, including its text representation.
80    #[cfg(feature = "decimal")]
81    DecimalConstant(Box<(rust_decimal::Decimal, Identifier)>),
82    /// An identifier.
83    Identifier(Box<Identifier>),
84    /// A character constant.
85    CharConstant(char),
86    /// A string constant.
87    StringConstant(Box<SmartString>),
88    /// An interpolated string.
89    InterpolatedString(Box<SmartString>),
90    /// `{`
91    LeftBrace,
92    /// `}`
93    RightBrace,
94    /// `(`
95    LeftParen,
96    /// `)`
97    RightParen,
98    /// `[`
99    LeftBracket,
100    /// `]`
101    RightBracket,
102    /// `()`
103    Unit,
104    /// `+`
105    Plus,
106    /// `+` (unary)
107    UnaryPlus,
108    /// `-`
109    Minus,
110    /// `-` (unary)
111    UnaryMinus,
112    /// `*`
113    Multiply,
114    /// `/`
115    Divide,
116    /// `%`
117    Modulo,
118    /// `**`
119    PowerOf,
120    /// `<<`
121    LeftShift,
122    /// `>>`
123    RightShift,
124    /// `;`
125    SemiColon,
126    /// `:`
127    Colon,
128    /// `::`
129    DoubleColon,
130    /// `=>`
131    DoubleArrow,
132    /// `_`
133    Underscore,
134    /// `,`
135    Comma,
136    /// `.`
137    Period,
138    /// `?.`
139    ///
140    /// Reserved under the `no_object` feature.
141    #[cfg(not(feature = "no_object"))]
142    Elvis,
143    /// `??`
144    DoubleQuestion,
145    /// `?[`
146    ///
147    /// Reserved under the `no_object` feature.
148    #[cfg(not(feature = "no_index"))]
149    QuestionBracket,
150    /// `..`
151    ExclusiveRange,
152    /// `..=`
153    InclusiveRange,
154    /// `#{`
155    MapStart,
156    /// `=`
157    Equals,
158    /// `true`
159    True,
160    /// `false`
161    False,
162    /// `let`
163    Let,
164    /// `const`
165    Const,
166    /// `if`
167    If,
168    /// `else`
169    Else,
170    /// `switch`
171    Switch,
172    /// `do`
173    Do,
174    /// `while`
175    While,
176    /// `until`
177    Until,
178    /// `loop`
179    Loop,
180    /// `for`
181    For,
182    /// `in`
183    In,
184    /// `!in`
185    NotIn,
186    /// `<`
187    LessThan,
188    /// `>`
189    GreaterThan,
190    /// `<=`
191    LessThanEqualsTo,
192    /// `>=`
193    GreaterThanEqualsTo,
194    /// `==`
195    EqualsTo,
196    /// `!=`
197    NotEqualsTo,
198    /// `!`
199    Bang,
200    /// `|`
201    Pipe,
202    /// `||`
203    Or,
204    /// `^`
205    XOr,
206    /// `&`
207    Ampersand,
208    /// `&&`
209    And,
210    /// `fn`
211    ///
212    /// Reserved under the `no_function` feature.
213    #[cfg(not(feature = "no_function"))]
214    Fn,
215    /// `continue`
216    Continue,
217    /// `break`
218    Break,
219    /// `return`
220    Return,
221    /// `throw`
222    Throw,
223    /// `try`
224    Try,
225    /// `catch`
226    Catch,
227    /// `+=`
228    PlusAssign,
229    /// `-=`
230    MinusAssign,
231    /// `*=`
232    MultiplyAssign,
233    /// `/=`
234    DivideAssign,
235    /// `<<=`
236    LeftShiftAssign,
237    /// `>>=`
238    RightShiftAssign,
239    /// `&=`
240    AndAssign,
241    /// `|=`
242    OrAssign,
243    /// `^=`
244    XOrAssign,
245    /// `%=`
246    ModuloAssign,
247    /// `**=`
248    PowerOfAssign,
249    /// `private`
250    ///
251    /// Reserved under the `no_function` feature.
252    #[cfg(not(feature = "no_function"))]
253    Private,
254    /// `import`
255    ///
256    /// Reserved under the `no_module` feature.
257    #[cfg(not(feature = "no_module"))]
258    Import,
259    /// `export`
260    ///
261    /// Reserved under the `no_module` feature.
262    #[cfg(not(feature = "no_module"))]
263    Export,
264    /// `as`
265    ///
266    /// Reserved under the `no_module` feature.
267    #[cfg(not(feature = "no_module"))]
268    As,
269    /// A lexer error.
270    LexError(Box<LexError>),
271    /// A comment block.
272    Comment(Box<String>),
273    /// A reserved symbol.
274    Reserved(Box<Identifier>),
275    /// A custom keyword.
276    ///
277    /// Not available under `no_custom_syntax`.
278    #[cfg(not(feature = "no_custom_syntax"))]
279    Custom(Box<Identifier>),
280    /// A single character from the input stream, unprocessed.
281    ///
282    /// Not available under `no_custom_syntax`.
283    #[cfg(not(feature = "no_custom_syntax"))]
284    UnprocessedRawChar(char),
285    /// End of the input stream.
286    /// Used as a placeholder for the end of input.
287    EOF,
288}
289
290impl fmt::Display for Token {
291    #[inline(always)]
292    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293        #[allow(clippy::enum_glob_use)]
294        use Token::*;
295
296        match self {
297            IntegerConstant(i) => write!(f, "{i}"),
298            #[cfg(not(feature = "no_float"))]
299            FloatConstant(v) => write!(f, "{}", v.0),
300            #[cfg(feature = "decimal")]
301            DecimalConstant(d) => write!(f, "{}", d.0),
302            StringConstant(s) => write!(f, r#""{s}""#),
303            InterpolatedString(..) => f.write_str("string"),
304            CharConstant(c) => write!(f, "{c}"),
305            Identifier(s) => f.write_str(s),
306            Reserved(s) => f.write_str(s),
307            #[cfg(not(feature = "no_custom_syntax"))]
308            Custom(s) => f.write_str(s),
309            #[cfg(not(feature = "no_custom_syntax"))]
310            UnprocessedRawChar(c) => f.write_char(*c),
311            LexError(err) => write!(f, "{err}"),
312            Comment(s) => f.write_str(s),
313
314            EOF => f.write_str("{EOF}"),
315
316            token => f.write_str(token.literal_syntax()),
317        }
318    }
319}
320
321// Table-driven keyword recognizer generated by GNU `gperf` on the file `tools/keywords.txt`.
322//
323// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this.
324
325const MIN_KEYWORD_LEN: usize = 1;
326const MAX_KEYWORD_LEN: usize = 8;
327const MIN_KEYWORD_HASH_VALUE: usize = 1;
328const MAX_KEYWORD_HASH_VALUE: usize = 152;
329
330static KEYWORD_ASSOC_VALUES: [u8; 257] = [
331    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
332    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
333    105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
334    35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
335    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
336    0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
337    45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
338    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
339    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
340    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
341    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
342    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
343    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
344    153,
345];
346static KEYWORDS_LIST: [(&str, Token); 153] = [
347    ("", Token::EOF),
348    (">", Token::GreaterThan),
349    (">=", Token::GreaterThanEqualsTo),
350    (")", Token::RightParen),
351    ("", Token::EOF),
352    ("const", Token::Const),
353    ("=", Token::Equals),
354    ("==", Token::EqualsTo),
355    ("continue", Token::Continue),
356    ("", Token::EOF),
357    ("catch", Token::Catch),
358    ("<", Token::LessThan),
359    ("<=", Token::LessThanEqualsTo),
360    ("for", Token::For),
361    ("loop", Token::Loop),
362    ("", Token::EOF),
363    (".", Token::Period),
364    ("<<", Token::LeftShift),
365    ("<<=", Token::LeftShiftAssign),
366    ("", Token::EOF),
367    ("false", Token::False),
368    ("*", Token::Multiply),
369    ("*=", Token::MultiplyAssign),
370    ("let", Token::Let),
371    ("", Token::EOF),
372    ("while", Token::While),
373    ("+", Token::Plus),
374    ("+=", Token::PlusAssign),
375    ("", Token::EOF),
376    ("", Token::EOF),
377    ("throw", Token::Throw),
378    ("}", Token::RightBrace),
379    (">>", Token::RightShift),
380    (">>=", Token::RightShiftAssign),
381    ("", Token::EOF),
382    ("", Token::EOF),
383    (";", Token::SemiColon),
384    ("=>", Token::DoubleArrow),
385    ("", Token::EOF),
386    ("else", Token::Else),
387    ("", Token::EOF),
388    ("/", Token::Divide),
389    ("/=", Token::DivideAssign),
390    ("", Token::EOF),
391    ("", Token::EOF),
392    ("", Token::EOF),
393    ("{", Token::LeftBrace),
394    ("**", Token::PowerOf),
395    ("**=", Token::PowerOfAssign),
396    ("", Token::EOF),
397    ("", Token::EOF),
398    ("|", Token::Pipe),
399    ("|=", Token::OrAssign),
400    ("", Token::EOF),
401    ("", Token::EOF),
402    ("", Token::EOF),
403    (":", Token::Colon),
404    ("..", Token::ExclusiveRange),
405    ("..=", Token::InclusiveRange),
406    ("", Token::EOF),
407    ("until", Token::Until),
408    ("switch", Token::Switch),
409    #[cfg(not(feature = "no_function"))]
410    ("private", Token::Private),
411    #[cfg(feature = "no_function")]
412    ("", Token::EOF),
413    ("try", Token::Try),
414    ("true", Token::True),
415    ("break", Token::Break),
416    ("return", Token::Return),
417    #[cfg(not(feature = "no_function"))]
418    ("fn", Token::Fn),
419    #[cfg(feature = "no_function")]
420    ("", Token::EOF),
421    ("", Token::EOF),
422    ("", Token::EOF),
423    ("", Token::EOF),
424    #[cfg(not(feature = "no_module"))]
425    ("import", Token::Import),
426    #[cfg(feature = "no_module")]
427    ("", Token::EOF),
428    #[cfg(not(feature = "no_object"))]
429    ("?.", Token::Elvis),
430    #[cfg(feature = "no_object")]
431    ("", Token::EOF),
432    ("", Token::EOF),
433    ("", Token::EOF),
434    ("", Token::EOF),
435    #[cfg(not(feature = "no_module"))]
436    ("export", Token::Export),
437    #[cfg(feature = "no_module")]
438    ("", Token::EOF),
439    ("in", Token::In),
440    ("", Token::EOF),
441    ("", Token::EOF),
442    ("", Token::EOF),
443    ("(", Token::LeftParen),
444    ("||", Token::Or),
445    ("", Token::EOF),
446    ("", Token::EOF),
447    ("", Token::EOF),
448    ("^", Token::XOr),
449    ("^=", Token::XOrAssign),
450    ("", Token::EOF),
451    ("", Token::EOF),
452    ("", Token::EOF),
453    ("_", Token::Underscore),
454    ("::", Token::DoubleColon),
455    ("", Token::EOF),
456    ("", Token::EOF),
457    ("", Token::EOF),
458    ("-", Token::Minus),
459    ("-=", Token::MinusAssign),
460    ("", Token::EOF),
461    ("", Token::EOF),
462    ("", Token::EOF),
463    ("]", Token::RightBracket),
464    ("()", Token::Unit),
465    ("", Token::EOF),
466    ("", Token::EOF),
467    ("", Token::EOF),
468    ("&", Token::Ampersand),
469    ("&=", Token::AndAssign),
470    ("", Token::EOF),
471    ("", Token::EOF),
472    ("", Token::EOF),
473    ("%", Token::Modulo),
474    ("%=", Token::ModuloAssign),
475    ("", Token::EOF),
476    ("", Token::EOF),
477    ("", Token::EOF),
478    ("!", Token::Bang),
479    ("!=", Token::NotEqualsTo),
480    ("!in", Token::NotIn),
481    ("", Token::EOF),
482    ("", Token::EOF),
483    ("[", Token::LeftBracket),
484    ("if", Token::If),
485    ("", Token::EOF),
486    ("", Token::EOF),
487    ("", Token::EOF),
488    (",", Token::Comma),
489    ("do", Token::Do),
490    ("", Token::EOF),
491    ("", Token::EOF),
492    ("", Token::EOF),
493    ("", Token::EOF),
494    #[cfg(not(feature = "no_module"))]
495    ("as", Token::As),
496    #[cfg(feature = "no_module")]
497    ("", Token::EOF),
498    ("", Token::EOF),
499    ("", Token::EOF),
500    ("", Token::EOF),
501    ("", Token::EOF),
502    #[cfg(not(feature = "no_index"))]
503    ("?[", Token::QuestionBracket),
504    #[cfg(feature = "no_index")]
505    ("", Token::EOF),
506    ("", Token::EOF),
507    ("", Token::EOF),
508    ("", Token::EOF),
509    ("", Token::EOF),
510    ("??", Token::DoubleQuestion),
511    ("", Token::EOF),
512    ("", Token::EOF),
513    ("", Token::EOF),
514    ("", Token::EOF),
515    ("&&", Token::And),
516    ("", Token::EOF),
517    ("", Token::EOF),
518    ("", Token::EOF),
519    ("", Token::EOF),
520    ("#{", Token::MapStart),
521];
522
523// Table-driven reserved symbol recognizer generated by GNU `gperf` on the file `tools/reserved.txt`.
524//
525// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this.
526
527const MIN_RESERVED_LEN: usize = 1;
528const MAX_RESERVED_LEN: usize = 10;
529const MIN_RESERVED_HASH_VALUE: usize = 1;
530const MAX_RESERVED_HASH_VALUE: usize = 149;
531
532static RESERVED_ASSOC_VALUES: [u8; 256] = [
533    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
534    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
535    150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
536    30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
537    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
538    0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
539    25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
540    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
541    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
542    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
543    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
544    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
545    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
546];
547static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
548    ("", false, false, false),
549    ("?", true, false, false),
550    ("as", cfg!(feature = "no_module"), false, false),
551    ("use", true, false, false),
552    ("case", true, false, false),
553    ("async", true, false, false),
554    ("public", true, false, false),
555    ("package", true, false, false),
556    ("", false, false, false),
557    ("", false, false, false),
558    ("super", true, false, false),
559    ("#", true, false, false),
560    ("private", cfg!(feature = "no_function"), false, false),
561    ("var", true, false, false),
562    ("protected", true, false, false),
563    ("spawn", true, false, false),
564    ("shared", true, false, false),
565    ("is", true, false, false),
566    ("===", true, false, false),
567    ("sync", true, false, false),
568    ("curry", true, true, true),
569    ("static", true, false, false),
570    ("default", true, false, false),
571    ("!==", true, false, false),
572    ("is_shared", cfg!(not(feature = "no_closure")), true, true),
573    ("print", true, true, false),
574    ("", false, false, false),
575    ("#!", true, false, false),
576    ("", false, false, false),
577    ("this", true, false, false),
578    ("is_def_var", true, true, false),
579    ("thread", true, false, false),
580    ("?.", cfg!(feature = "no_object"), false, false),
581    ("", false, false, false),
582    ("is_def_fn", cfg!(not(feature = "no_function")), true, false),
583    ("yield", true, false, false),
584    ("", false, false, false),
585    ("fn", cfg!(feature = "no_function"), false, false),
586    ("new", true, false, false),
587    ("call", true, true, true),
588    ("match", true, false, false),
589    ("~", true, false, false),
590    ("!.", true, false, false),
591    ("", false, false, false),
592    ("eval", true, true, false),
593    ("await", true, false, false),
594    ("", false, false, false),
595    (":=", true, false, false),
596    ("...", true, false, false),
597    ("null", true, false, false),
598    ("debug", true, true, false),
599    ("@", true, false, false),
600    ("type_of", true, true, true),
601    ("", false, false, false),
602    ("with", true, false, false),
603    ("", false, false, false),
604    ("", false, false, false),
605    ("<-", true, false, false),
606    ("", false, false, false),
607    ("void", true, false, false),
608    ("", false, false, false),
609    ("import", cfg!(feature = "no_module"), false, false),
610    ("--", true, false, false),
611    ("nil", true, false, false),
612    ("exit", false, false, false),
613    ("", false, false, false),
614    ("export", cfg!(feature = "no_module"), false, false),
615    ("<|", true, false, false),
616    ("", false, false, false),
617    ("", false, false, false),
618    ("", false, false, false),
619    ("$", true, false, false),
620    ("->", true, false, false),
621    ("", false, false, false),
622    ("", false, false, false),
623    ("", false, false, false),
624    ("", false, false, false),
625    ("|>", true, false, false),
626    ("", false, false, false),
627    ("", false, false, false),
628    ("", false, false, false),
629    ("module", true, false, false),
630    ("?[", cfg!(feature = "no_index"), false, false),
631    ("", false, false, false),
632    ("", false, false, false),
633    ("", false, false, false),
634    ("", false, false, false),
635    ("Fn", true, true, false),
636    ("::<", true, false, false),
637    ("", false, false, false),
638    ("", false, false, false),
639    ("", false, false, false),
640    ("++", true, false, false),
641    ("", false, false, false),
642    ("", false, false, false),
643    ("", false, false, false),
644    ("", false, false, false),
645    (":;", true, false, false),
646    ("", false, false, false),
647    ("", false, false, false),
648    ("", false, false, false),
649    ("", false, false, false),
650    ("*)", true, false, false),
651    ("", false, false, false),
652    ("", false, false, false),
653    ("", false, false, false),
654    ("", false, false, false),
655    ("(*", true, false, false),
656    ("", false, false, false),
657    ("", false, false, false),
658    ("", false, false, false),
659    ("", false, false, false),
660    ("", false, false, false),
661    ("", false, false, false),
662    ("", false, false, false),
663    ("", false, false, false),
664    ("", false, false, false),
665    ("", false, false, false),
666    ("", false, false, false),
667    ("", false, false, false),
668    ("", false, false, false),
669    ("", false, false, false),
670    ("", false, false, false),
671    ("", false, false, false),
672    ("", false, false, false),
673    ("", false, false, false),
674    ("", false, false, false),
675    ("", false, false, false),
676    ("", false, false, false),
677    ("", false, false, false),
678    ("", false, false, false),
679    ("", false, false, false),
680    ("", false, false, false),
681    ("", false, false, false),
682    ("", false, false, false),
683    ("", false, false, false),
684    ("", false, false, false),
685    ("", false, false, false),
686    ("", false, false, false),
687    ("", false, false, false),
688    ("", false, false, false),
689    ("", false, false, false),
690    ("", false, false, false),
691    ("", false, false, false),
692    ("", false, false, false),
693    ("", false, false, false),
694    ("", false, false, false),
695    ("go", true, false, false),
696    ("", false, false, false),
697    ("goto", true, false, false),
698];
699
700impl Token {
701    /// Is the token a literal symbol?
702    #[must_use]
703    pub const fn is_literal(&self) -> bool {
704        #[allow(clippy::enum_glob_use)]
705        use Token::*;
706
707        match self {
708            IntegerConstant(..) => false,
709            #[cfg(not(feature = "no_float"))]
710            FloatConstant(..) => false,
711            #[cfg(feature = "decimal")]
712            DecimalConstant(..) => false,
713            StringConstant(..)
714            | InterpolatedString(..)
715            | CharConstant(..)
716            | Identifier(..)
717            | Reserved(..) => false,
718            #[cfg(not(feature = "no_custom_syntax"))]
719            Custom(..) => false,
720            LexError(..) | Comment(..) => false,
721
722            EOF => false,
723
724            _ => true,
725        }
726    }
727    /// Get the literal syntax of the token.
728    ///
729    /// # Panics
730    ///
731    /// Panics if the token is not a literal symbol.
732    #[must_use]
733    pub const fn literal_syntax(&self) -> &'static str {
734        #[allow(clippy::enum_glob_use)]
735        use Token::*;
736
737        match self {
738            LeftBrace => "{",
739            RightBrace => "}",
740            LeftParen => "(",
741            RightParen => ")",
742            LeftBracket => "[",
743            RightBracket => "]",
744            Unit => "()",
745            Plus => "+",
746            UnaryPlus => "+",
747            Minus => "-",
748            UnaryMinus => "-",
749            Multiply => "*",
750            Divide => "/",
751            SemiColon => ";",
752            Colon => ":",
753            DoubleColon => "::",
754            DoubleArrow => "=>",
755            Underscore => "_",
756            Comma => ",",
757            Period => ".",
758            #[cfg(not(feature = "no_object"))]
759            Elvis => "?.",
760            DoubleQuestion => "??",
761            #[cfg(not(feature = "no_index"))]
762            QuestionBracket => "?[",
763            ExclusiveRange => "..",
764            InclusiveRange => "..=",
765            MapStart => "#{",
766            Equals => "=",
767            True => "true",
768            False => "false",
769            Let => "let",
770            Const => "const",
771            If => "if",
772            Else => "else",
773            Switch => "switch",
774            Do => "do",
775            While => "while",
776            Until => "until",
777            Loop => "loop",
778            For => "for",
779            In => "in",
780            NotIn => "!in",
781            LessThan => "<",
782            GreaterThan => ">",
783            Bang => "!",
784            LessThanEqualsTo => "<=",
785            GreaterThanEqualsTo => ">=",
786            EqualsTo => "==",
787            NotEqualsTo => "!=",
788            Pipe => "|",
789            Or => "||",
790            Ampersand => "&",
791            And => "&&",
792            Continue => "continue",
793            Break => "break",
794            Return => "return",
795            Throw => "throw",
796            Try => "try",
797            Catch => "catch",
798            PlusAssign => "+=",
799            MinusAssign => "-=",
800            MultiplyAssign => "*=",
801            DivideAssign => "/=",
802            LeftShiftAssign => "<<=",
803            RightShiftAssign => ">>=",
804            AndAssign => "&=",
805            OrAssign => "|=",
806            XOrAssign => "^=",
807            LeftShift => "<<",
808            RightShift => ">>",
809            XOr => "^",
810            Modulo => "%",
811            ModuloAssign => "%=",
812            PowerOf => "**",
813            PowerOfAssign => "**=",
814
815            #[cfg(not(feature = "no_function"))]
816            Fn => "fn",
817            #[cfg(not(feature = "no_function"))]
818            Private => "private",
819
820            #[cfg(not(feature = "no_module"))]
821            Import => "import",
822            #[cfg(not(feature = "no_module"))]
823            Export => "export",
824            #[cfg(not(feature = "no_module"))]
825            As => "as",
826
827            _ => panic!("token is not a literal symbol"),
828        }
829    }
830
831    /// Is this token an op-assignment operator?
832    #[inline]
833    #[must_use]
834    pub const fn is_op_assignment(&self) -> bool {
835        #[allow(clippy::enum_glob_use)]
836        use Token::*;
837
838        matches!(
839            self,
840            PlusAssign
841                | MinusAssign
842                | MultiplyAssign
843                | DivideAssign
844                | LeftShiftAssign
845                | RightShiftAssign
846                | ModuloAssign
847                | PowerOfAssign
848                | AndAssign
849                | OrAssign
850                | XOrAssign
851        )
852    }
853
854    /// Get the corresponding operator of the token if it is an op-assignment operator.
855    #[must_use]
856    pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
857        #[allow(clippy::enum_glob_use)]
858        use Token::*;
859
860        Some(match self {
861            PlusAssign => Plus,
862            MinusAssign => Minus,
863            MultiplyAssign => Multiply,
864            DivideAssign => Divide,
865            LeftShiftAssign => LeftShift,
866            RightShiftAssign => RightShift,
867            ModuloAssign => Modulo,
868            PowerOfAssign => PowerOf,
869            AndAssign => Ampersand,
870            OrAssign => Pipe,
871            XOrAssign => XOr,
872            _ => return None,
873        })
874    }
875
876    /// Has this token a corresponding op-assignment operator?
877    #[inline]
878    #[must_use]
879    pub const fn has_op_assignment(&self) -> bool {
880        #[allow(clippy::enum_glob_use)]
881        use Token::*;
882
883        matches!(
884            self,
885            Plus | Minus
886                | Multiply
887                | Divide
888                | LeftShift
889                | RightShift
890                | Modulo
891                | PowerOf
892                | Ampersand
893                | Pipe
894                | XOr
895        )
896    }
897
898    /// Get the corresponding op-assignment operator of the token.
899    #[must_use]
900    pub const fn convert_to_op_assignment(&self) -> Option<Self> {
901        #[allow(clippy::enum_glob_use)]
902        use Token::*;
903
904        Some(match self {
905            Plus => PlusAssign,
906            Minus => MinusAssign,
907            Multiply => MultiplyAssign,
908            Divide => DivideAssign,
909            LeftShift => LeftShiftAssign,
910            RightShift => RightShiftAssign,
911            Modulo => ModuloAssign,
912            PowerOf => PowerOfAssign,
913            Ampersand => AndAssign,
914            Pipe => OrAssign,
915            XOr => XOrAssign,
916            _ => return None,
917        })
918    }
919
920    /// Reverse lookup a symbol token from a piece of syntax.
921    #[inline]
922    #[must_use]
923    pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
924        // This implementation is based upon a pre-calculated table generated
925        // by GNU `gperf` on the list of keywords.
926        let utf8 = syntax.as_bytes();
927        let len = utf8.len();
928
929        if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
930            return None;
931        }
932
933        let mut hash_val = len;
934
935        match len {
936            1 => (),
937            _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
938        }
939        hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
940
941        if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
942            return None;
943        }
944
945        match KEYWORDS_LIST[hash_val] {
946            (_, Self::EOF) => None,
947            // Fail early to avoid calling memcmp().
948            // Since we are already working with bytes, mind as well check the first one.
949            (s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
950                Some(t.clone())
951            }
952            _ => None,
953        }
954    }
955
956    /// If another operator is after these, it's probably a unary operator
957    /// (not sure about `fn` name).
958    #[must_use]
959    pub const fn is_next_unary(&self) -> bool {
960        #[allow(clippy::enum_glob_use)]
961        use Token::*;
962
963        match self {
964            SemiColon        | // ; - is unary
965            Colon            | // #{ foo: - is unary
966            Comma            | // ( ... , -expr ) - is unary
967            //Period         |
968            //Elvis          |
969            DoubleQuestion   | // ?? - is unary
970            ExclusiveRange   | // .. - is unary
971            InclusiveRange   | // ..= - is unary
972            LeftBrace        | // { -expr } - is unary
973            // RightBrace    | // { expr } - expr not unary & is closing
974            LeftParen        | // ( -expr ) - is unary
975            // RightParen    | // ( expr ) - expr not unary & is closing
976            LeftBracket      | // [ -expr ] - is unary
977            // RightBracket  | // [ expr ] - expr not unary & is closing
978            Plus             |
979            PlusAssign       |
980            UnaryPlus        |
981            Minus            |
982            MinusAssign      |
983            UnaryMinus       |
984            Multiply         |
985            MultiplyAssign   |
986            Divide           |
987            DivideAssign     |
988            Modulo           |
989            ModuloAssign     |
990            PowerOf          |
991            PowerOfAssign    |
992            LeftShift        |
993            LeftShiftAssign  |
994            RightShift       |
995            RightShiftAssign |
996            Equals           |
997            EqualsTo         |
998            NotEqualsTo      |
999            LessThan         |
1000            GreaterThan      |
1001            Bang             |
1002            LessThanEqualsTo |
1003            GreaterThanEqualsTo |
1004            Pipe             |
1005            Ampersand        |
1006            If               |
1007            //Do             |
1008            While            |
1009            Until            |
1010            In               |
1011            NotIn            |
1012            And              |
1013            AndAssign        |
1014            Or               |
1015            OrAssign         |
1016            XOr              |
1017            XOrAssign        |
1018            Return           |
1019            Throw               => true,
1020
1021            #[cfg(not(feature = "no_index"))]
1022            QuestionBracket     => true,    // ?[ - is unary
1023
1024            LexError(..)        => true,
1025
1026            _                   => false,
1027        }
1028    }
1029
1030    /// Get the precedence number of the token.
1031    #[must_use]
1032    pub const fn precedence(&self) -> Option<Precedence> {
1033        #[allow(clippy::enum_glob_use)]
1034        use Token::*;
1035
1036        Precedence::new(match self {
1037            Or | XOr | Pipe => 30,
1038
1039            And | Ampersand => 60,
1040
1041            EqualsTo | NotEqualsTo => 90,
1042
1043            In | NotIn => 110,
1044
1045            LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
1046
1047            DoubleQuestion => 135,
1048
1049            ExclusiveRange | InclusiveRange => 140,
1050
1051            Plus | Minus => 150,
1052
1053            Divide | Multiply | Modulo => 180,
1054
1055            PowerOf => 190,
1056
1057            LeftShift | RightShift => 210,
1058
1059            _ => 0,
1060        })
1061    }
1062
1063    /// Does an expression bind to the right (instead of left)?
1064    #[must_use]
1065    pub const fn is_bind_right(&self) -> bool {
1066        #[allow(clippy::enum_glob_use)]
1067        use Token::*;
1068
1069        match self {
1070            // Exponentiation binds to the right
1071            PowerOf => true,
1072
1073            _ => false,
1074        }
1075    }
1076
1077    /// Is this token a standard symbol used in the language?
1078    #[must_use]
1079    pub const fn is_standard_symbol(&self) -> bool {
1080        #[allow(clippy::enum_glob_use)]
1081        use Token::*;
1082
1083        match self {
1084            LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
1085            | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
1086            | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
1087            | ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
1088            | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
1089            | Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
1090            | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
1091            | XOrAssign | ModuloAssign | PowerOfAssign => true,
1092
1093            #[cfg(not(feature = "no_object"))]
1094            Elvis => true,
1095
1096            #[cfg(not(feature = "no_index"))]
1097            QuestionBracket => true,
1098
1099            _ => false,
1100        }
1101    }
1102
1103    /// Is this token a standard keyword?
1104    #[inline]
1105    #[must_use]
1106    pub const fn is_standard_keyword(&self) -> bool {
1107        #[allow(clippy::enum_glob_use)]
1108        use Token::*;
1109
1110        match self {
1111            #[cfg(not(feature = "no_function"))]
1112            Fn | Private => true,
1113
1114            #[cfg(not(feature = "no_module"))]
1115            Import | Export | As => true,
1116
1117            True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
1118            | Continue | Break | Return | Throw | Try | Catch => true,
1119
1120            _ => false,
1121        }
1122    }
1123
1124    /// Is this token a reserved keyword or symbol?
1125    #[inline(always)]
1126    #[must_use]
1127    pub const fn is_reserved(&self) -> bool {
1128        matches!(self, Self::Reserved(..))
1129    }
1130
1131    /// Is this token a custom keyword?
1132    #[cfg(not(feature = "no_custom_syntax"))]
1133    #[inline(always)]
1134    #[must_use]
1135    pub const fn is_custom(&self) -> bool {
1136        matches!(self, Self::Custom(..))
1137    }
1138}
1139
1140impl From<Token> for String {
1141    #[inline(always)]
1142    fn from(token: Token) -> Self {
1143        (&token).into()
1144    }
1145}
1146
1147impl From<&Token> for String {
1148    #[inline(always)]
1149    fn from(token: &Token) -> Self {
1150        token.to_string()
1151    }
1152}
1153
1154impl From<Token> for SmartString {
1155    #[inline(always)]
1156    fn from(token: Token) -> Self {
1157        (&token).into()
1158    }
1159}
1160
1161impl From<&Token> for SmartString {
1162    #[inline(always)]
1163    fn from(token: &Token) -> Self {
1164        let mut buf = Self::new_const();
1165        write!(&mut buf, "{token}").unwrap();
1166        buf
1167    }
1168}
1169
1170/// _(internals)_ State of the tokenizer.
1171/// Exported under the `internals` feature only.
1172#[derive(Debug, Clone, Eq, PartialEq, Default)]
1173pub struct TokenizeState {
1174    /// Maximum length of a string.
1175    ///
1176    /// Not available under `unchecked`.
1177    #[cfg(not(feature = "unchecked"))]
1178    pub max_string_len: Option<std::num::NonZeroUsize>,
1179    /// Can the next token be a unary operator?
1180    pub next_token_cannot_be_unary: bool,
1181    /// Shared object to allow controlling the tokenizer externally.
1182    pub tokenizer_control: TokenizerControl,
1183    /// Is the tokenizer currently inside a block comment?
1184    pub comment_level: usize,
1185    /// Include comments?
1186    pub include_comments: bool,
1187    /// Is the current tokenizer position within the text stream of an interpolated string?
1188    pub is_within_text_terminated_by: Option<SmartString>,
1189    /// Textual syntax of the current token, if any.
1190    ///
1191    /// Set to `Some` to begin tracking this information.
1192    pub last_token: Option<SmartString>,
1193}
1194
1195/// _(internals)_ Trait that encapsulates a peekable character input stream.
1196/// Exported under the `internals` feature only.
1197pub trait InputStream {
1198    /// Un-get a character back into the `InputStream`.
1199    /// The next [`get_next`][InputStream::get_next] or [`peek_next`][InputStream::peek_next]
1200    /// will return this character instead.
1201    fn unget(&mut self, ch: char);
1202    /// Get the next character from the `InputStream`.
1203    fn get_next(&mut self) -> Option<char>;
1204    /// Peek the next character in the `InputStream`.
1205    #[must_use]
1206    fn peek_next(&mut self) -> Option<char>;
1207
1208    /// Consume the next character.
1209    #[inline(always)]
1210    fn eat_next_and_advance(&mut self, pos: &mut Position) -> Option<char> {
1211        pos.advance();
1212        self.get_next()
1213    }
1214}
1215
1216/// _(internals)_ Parse a raw string literal. Exported under the `internals` feature only.
1217///
1218/// Raw string literals do not process any escapes. They start with the character `#` (`U+0023`)
1219/// repeated any number of times, then finally a `"` (`U+0022`, double-quote).
1220///
1221/// The raw string _body_ can contain any sequence of Unicode characters. It is terminated only by
1222/// another `"` (`U+0022`, double-quote) character, followed by the same number of `#` (`U+0023`)
1223/// characters.
1224///
1225/// All Unicode characters contained in the raw string body represent themselves, including the
1226/// characters `"` (`U+0022`, double-quote), except when followed by at least as many `#` (`U+0023`)
1227/// characters as were used to start the raw string literal, `\` (`U+005C`) etc., and do not have
1228/// any special meaning.
1229///
1230/// Returns the parsed string.
1231///
1232/// # Returns
1233///
1234/// | Type                      | Return Value                                                 |`state.is_within_text_terminated_by`  |
1235/// |---------------------------|:------------------------------------------------------------:|:------------------------------------:|
1236/// |`#"hello"#`                |[`StringConstant("hello")`][Token::StringConstant]            |`None`                                |
1237/// |`#"hello`_{EOF}_           |[`StringConstant("hello")`][Token::StringConstant]            |`Some("#")`                           |
1238/// |`####"hello`_{EOF}_        |[`StringConstant("hello")`][Token::StringConstant]            |`Some("####")`                        |
1239/// |`#" "hello" "`_{EOF}_      |[`LexError`]                                                  |`None`                                |
1240/// |`#""hello""#`              |[`StringConstant("\"hello\"")`][Token::StringConstant]        |`None`                                |
1241/// |`##"hello #"# world"##`    |[`StringConstant("hello #\"# world")`][Token::StringConstant] |`None`                                |
1242/// |`#"R"#`                    |[`StringConstant("R")`][Token::StringConstant]                |`None`                                |
1243/// |`#"\x52"#`                 |[`StringConstant("\\x52")`][Token::StringConstant]            |`None`                                |
1244///
1245/// This function does _not_ throw a [`LexError`] for an unterminated raw string at _{EOF}_
1246///
1247/// This is to facilitate using this function to parse a script line-by-line, where the end of the
1248/// line (i.e. _{EOF}_) is not necessarily the end of the script.
1249///
1250/// Any time a [`StringConstant`][Token::StringConstant] is returned with
1251/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
1252pub fn parse_raw_string_literal(
1253    stream: &mut (impl InputStream + ?Sized),
1254    state: &mut TokenizeState,
1255    pos: &mut Position,
1256    mut hash_count: usize,
1257) -> Result<(SmartString, Position), (LexError, Position)> {
1258    let start = *pos;
1259    let mut first_char = Position::NONE;
1260
1261    if hash_count == 0 {
1262        // Count the number of '#'s
1263        // Start with 1 because the first '#' is already consumed
1264        hash_count = 1;
1265
1266        while let Some('#') = stream.peek_next() {
1267            stream.eat_next_and_advance(pos);
1268            hash_count += 1;
1269        }
1270
1271        // Match '"'
1272        match stream.get_next() {
1273            Some('"') => pos.advance(),
1274            Some(c) => return Err((LERR::UnexpectedInput(c.to_string()), start)),
1275            None => return Err((LERR::UnterminatedString, start)),
1276        }
1277    }
1278
1279    let collect: SmartString = repeat('#').take(hash_count).collect();
1280    if let Some(ref mut last) = state.last_token {
1281        last.clear();
1282        last.push_str(&collect);
1283        last.push('"');
1284    }
1285    state.is_within_text_terminated_by = Some(collect);
1286
1287    // Match everything until the same number of '#'s are seen, prepended by a '"'
1288
1289    // Counts the number of '#' characters seen after a quotation mark.
1290    // Becomes Some(0) after a quote is seen, but resets to None if a hash doesn't follow.
1291    let mut seen_hashes: Option<usize> = None;
1292    let mut result = SmartString::new_const();
1293
1294    while let Some(next_char) = stream.get_next() {
1295        pos.advance();
1296
1297        match (next_char, &mut seen_hashes) {
1298            // Begin attempt to close string
1299            ('"', None) => seen_hashes = Some(0),
1300            // Restart attempt to close string
1301            ('"', Some(count)) => {
1302                // result.reserve(*count as usize+c.len());
1303                result.push('"');
1304                result.extend(repeat('#').take(*count));
1305                seen_hashes = Some(0);
1306            }
1307            // Continue attempt to close string
1308            ('#', Some(count)) => {
1309                *count += 1;
1310                if *count == hash_count {
1311                    state.is_within_text_terminated_by = None;
1312                    break;
1313                }
1314            }
1315            // Fail to close the string - add previous quote and hashes
1316            (c, Some(count)) => {
1317                // result.reserve(*count as usize +1+c.len());
1318                result.push('"');
1319                result.extend(repeat('#').take(*count));
1320                result.push(c);
1321                seen_hashes = None;
1322            }
1323            // New line
1324            ('\n', _) => {
1325                result.push('\n');
1326                pos.new_line();
1327            }
1328            // Normal new character seen
1329            (c, None) => result.push(c),
1330        }
1331
1332        // Check string length
1333        #[cfg(not(feature = "unchecked"))]
1334        if let Some(max) = state.max_string_len {
1335            if result.len() > max.get() {
1336                return Err((LexError::StringTooLong(max.get()), start));
1337            }
1338        }
1339
1340        if first_char.is_none() {
1341            first_char = *pos;
1342        }
1343    }
1344
1345    Ok((result, first_char))
1346}
1347
1348/// _(internals)_ Parse a string literal ended by a specified termination character.
1349/// Exported under the `internals` feature only.
1350///
1351/// Returns the parsed string and a boolean indicating whether the string is
1352/// terminated by an interpolation `${`.
1353///
1354/// # Returns
1355///
1356/// | Type                            | Return Value                                        |`state.is_within_text_terminated_by`|
1357/// |---------------------------------|:---------------------------------------------------:|:----------------------------------:|
1358/// |`"hello"`                        |[`StringConstant("hello")`][Token::StringConstant]   |`None`                              |
1359/// |`"hello`_{LF}_ or _{EOF}_        |[`LexError`]                                         |`None`                              |
1360/// |`"hello\`_{EOF}_ or _{LF}{EOF}_  |[`StringConstant("hello")`][Token::StringConstant]   |`Some('"')`                         |
1361/// |`` `hello``_{EOF}_               |[`StringConstant("hello")`][Token::StringConstant]   |``Some('`')``                       |
1362/// |`` `hello``_{LF}{EOF}_           |[`StringConstant("hello\n")`][Token::StringConstant] |``Some('`')``                       |
1363/// |`` `hello ${``                   |[`InterpolatedString("hello ")`][Token::InterpolatedString]<br/>next token is `{`|`None`  |
1364/// |`` } hello` ``                   |[`StringConstant(" hello")`][Token::StringConstant]  |`None`                              |
1365/// |`} hello`_{EOF}_                 |[`StringConstant(" hello")`][Token::StringConstant]  |``Some('`')``                       |
1366///
1367/// This function does not throw a [`LexError`] for the following conditions:
1368///
1369/// * Unterminated literal string at _{EOF}_
1370///
1371/// * Unterminated normal string with continuation at _{EOF}_
1372///
1373/// This is to facilitate using this function to parse a script line-by-line, where the end of the
1374/// line (i.e. _{EOF}_) is not necessarily the end of the script.
1375///
1376/// Any time a [`StringConstant`][Token::StringConstant] is returned with
1377/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
1378pub fn parse_string_literal(
1379    stream: &mut (impl InputStream + ?Sized),
1380    state: &mut TokenizeState,
1381    pos: &mut Position,
1382    termination_char: char,
1383    verbatim: bool,
1384    allow_line_continuation: bool,
1385    allow_interpolation: bool,
1386) -> Result<(SmartString, bool, Position), (LexError, Position)> {
1387    let mut result = SmartString::new_const();
1388    let mut escape = SmartString::new_const();
1389
1390    let start = *pos;
1391    let mut first_char = Position::NONE;
1392    let mut interpolated = false;
1393    #[cfg(not(feature = "no_position"))]
1394    let mut skip_space_until = 0;
1395
1396    state.is_within_text_terminated_by = Some(termination_char.to_string().into());
1397    if let Some(ref mut last) = state.last_token {
1398        last.clear();
1399        last.push(termination_char);
1400    }
1401
1402    loop {
1403        debug_assert!(
1404            !verbatim || escape.is_empty(),
1405            "verbatim strings should not have any escapes"
1406        );
1407
1408        let next_char = match stream.get_next() {
1409            Some(ch) => {
1410                pos.advance();
1411                ch
1412            }
1413            None if verbatim => {
1414                debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1415                pos.advance();
1416                break;
1417            }
1418            None if allow_line_continuation && !escape.is_empty() => {
1419                debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1420                pos.advance();
1421                break;
1422            }
1423            None => {
1424                pos.advance();
1425                state.is_within_text_terminated_by = None;
1426                return Err((LERR::UnterminatedString, start));
1427            }
1428        };
1429
1430        if let Some(ref mut last) = state.last_token {
1431            last.push(next_char);
1432        }
1433
1434        // String interpolation?
1435        if allow_interpolation
1436            && next_char == '$'
1437            && escape.is_empty()
1438            && stream.peek_next() == Some('{')
1439        {
1440            interpolated = true;
1441            state.is_within_text_terminated_by = None;
1442            break;
1443        }
1444
1445        // Check string length
1446        #[cfg(not(feature = "unchecked"))]
1447        if let Some(max) = state.max_string_len {
1448            if result.len() > max.get() {
1449                return Err((LexError::StringTooLong(max.get()), start));
1450            }
1451        }
1452
1453        // Close wrapper
1454        if termination_char == next_char && escape.is_empty() {
1455            // Double wrapper
1456            if stream.peek_next() == Some(termination_char) {
1457                stream.eat_next_and_advance(pos);
1458                if let Some(ref mut last) = state.last_token {
1459                    last.push(termination_char);
1460                }
1461            } else {
1462                state.is_within_text_terminated_by = None;
1463                break;
1464            }
1465        }
1466
1467        if first_char.is_none() {
1468            first_char = *pos;
1469        }
1470
1471        match next_char {
1472            // \r - ignore if followed by \n
1473            '\r' if stream.peek_next() == Some('\n') => (),
1474            // \r
1475            'r' if !escape.is_empty() => {
1476                escape.clear();
1477                result.push_str("\r");
1478            }
1479            // \n
1480            'n' if !escape.is_empty() => {
1481                escape.clear();
1482                result.push_str("\n");
1483            }
1484            // \...
1485            '\\' if !verbatim && escape.is_empty() => {
1486                escape.push_str("\\");
1487            }
1488            // \\
1489            '\\' if !escape.is_empty() => {
1490                escape.clear();
1491                result.push_str("\\");
1492            }
1493            // \t
1494            't' if !escape.is_empty() => {
1495                escape.clear();
1496                result.push_str("\t");
1497            }
1498            // \x??, \u????, \U????????
1499            ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
1500                let mut seq = escape.clone();
1501                escape.clear();
1502                seq.push(ch);
1503
1504                let mut out_val: u32 = 0;
1505                let len = match ch {
1506                    'x' => 2,
1507                    'u' => 4,
1508                    'U' => 8,
1509                    c => unreachable!("x or u or U expected but gets '{}'", c),
1510                };
1511
1512                for _ in 0..len {
1513                    let c = stream
1514                        .get_next()
1515                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1516
1517                    pos.advance();
1518                    seq.push(c);
1519                    if let Some(ref mut last) = state.last_token {
1520                        last.push(c);
1521                    }
1522
1523                    out_val *= 16;
1524                    out_val += c
1525                        .to_digit(16)
1526                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1527                }
1528
1529                result.push(
1530                    char::from_u32(out_val)
1531                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
1532                );
1533            }
1534
1535            // LF - Verbatim
1536            '\n' if verbatim => {
1537                debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1538                pos.new_line();
1539                result.push_str("\n");
1540            }
1541
1542            // LF - Line continuation
1543            '\n' if allow_line_continuation && !escape.is_empty() => {
1544                debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1545                escape.clear();
1546                pos.new_line();
1547
1548                #[cfg(not(feature = "no_position"))]
1549                {
1550                    let start_position = start.position().unwrap();
1551                    skip_space_until = start_position + 1;
1552                }
1553            }
1554
1555            // LF - Unterminated string
1556            '\n' => {
1557                pos.rewind();
1558                state.is_within_text_terminated_by = None;
1559                return Err((LERR::UnterminatedString, start));
1560            }
1561
1562            // \{termination_char} - escaped termination character
1563            ch if termination_char == ch && !escape.is_empty() => {
1564                escape.clear();
1565                result.push(termination_char);
1566            }
1567
1568            // Unknown escape sequence
1569            ch if !escape.is_empty() => {
1570                escape.push(ch);
1571
1572                return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
1573            }
1574
1575            // Whitespace to skip
1576            #[cfg(not(feature = "no_position"))]
1577            ch if ch.is_whitespace() && pos.position().unwrap() < skip_space_until => (),
1578
1579            // All other characters
1580            ch => {
1581                escape.clear();
1582                result.push(ch);
1583
1584                #[cfg(not(feature = "no_position"))]
1585                {
1586                    skip_space_until = 0;
1587                }
1588            }
1589        }
1590    }
1591
1592    // Check string length
1593    #[cfg(not(feature = "unchecked"))]
1594    if let Some(max) = state.max_string_len {
1595        if result.len() > max.get() {
1596            return Err((LexError::StringTooLong(max.get()), start));
1597        }
1598    }
1599
1600    Ok((result, interpolated, first_char))
1601}
1602
1603/// Scan for a block comment until the end.
1604fn scan_block_comment(
1605    stream: &mut (impl InputStream + ?Sized),
1606    level: usize,
1607    pos: &mut Position,
1608    comment: Option<&mut String>,
1609) -> usize {
1610    let mut level = level;
1611    let mut comment = comment;
1612
1613    while let Some(c) = stream.get_next() {
1614        pos.advance();
1615
1616        if let Some(comment) = comment.as_mut() {
1617            comment.push(c);
1618        }
1619
1620        match c {
1621            '/' => {
1622                if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
1623                    stream.eat_next_and_advance(pos);
1624                    if let Some(comment) = comment.as_mut() {
1625                        comment.push(c2);
1626                    }
1627                    level += 1;
1628                }
1629            }
1630            '*' => {
1631                if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
1632                    stream.eat_next_and_advance(pos);
1633                    if let Some(comment) = comment.as_mut() {
1634                        comment.push(c2);
1635                    }
1636                    level -= 1;
1637                }
1638            }
1639            '\n' => pos.new_line(),
1640            _ => (),
1641        }
1642
1643        if level == 0 {
1644            break;
1645        }
1646    }
1647
1648    level
1649}
1650
1651/// Test if the given character is a hex character.
1652#[inline(always)]
1653const fn is_hex_digit(c: char) -> bool {
1654    c.is_ascii_hexdigit()
1655}
1656
1657/// Test if the given character is a numeric digit (i.e. 0-9).
1658#[inline(always)]
1659const fn is_numeric_digit(c: char) -> bool {
1660    c.is_ascii_digit()
1661}
1662
1663/// Test if the given character is an octal digit (i.e. 0-7).
1664#[inline(always)]
1665const fn is_octal_digit(c: char) -> bool {
1666    matches!(c, '0'..='7')
1667}
1668
1669/// Test if the given character is a binary digit (i.e. 0 or 1).
1670#[inline(always)]
1671const fn is_binary_digit(c: char) -> bool {
1672    c == '0' || c == '1'
1673}
1674
1675/// Test if the comment block is a doc-comment.
1676#[cfg(not(feature = "no_function"))]
1677#[cfg(feature = "metadata")]
1678#[inline]
1679#[must_use]
1680pub fn is_doc_comment(comment: &str) -> bool {
1681    (comment.starts_with("///") && !comment.starts_with("////"))
1682        || (comment.starts_with("/**") && !comment.starts_with("/***"))
1683}
1684
1685/// _(internals)_ Get the next token from the input stream.
1686/// Exported under the `internals` feature only.
1687#[inline(always)]
1688#[must_use]
1689pub fn get_next_token(
1690    stream: &mut (impl InputStream + ?Sized),
1691    state: &mut TokenizeState,
1692    pos: &mut Position,
1693) -> (Token, Position) {
1694    let result = get_next_token_inner(stream, state, pos);
1695
1696    // Save the last token's state
1697    state.next_token_cannot_be_unary = !result.0.is_next_unary();
1698
1699    result
1700}
1701
1702/// Get the next token.
1703#[must_use]
1704fn get_next_token_inner(
1705    stream: &mut (impl InputStream + ?Sized),
1706    state: &mut TokenizeState,
1707    pos: &mut Position,
1708) -> (Token, Position) {
1709    state.last_token.as_mut().map(SmartString::clear);
1710
1711    // Still inside a comment?
1712    if state.comment_level > 0 {
1713        let start_pos = *pos;
1714        let mut comment = String::new();
1715        let comment_buf = state.include_comments.then_some(&mut comment);
1716
1717        state.comment_level = scan_block_comment(stream, state.comment_level, pos, comment_buf);
1718
1719        let return_comment = state.include_comments;
1720
1721        #[cfg(not(feature = "no_function"))]
1722        #[cfg(feature = "metadata")]
1723        let return_comment = return_comment || is_doc_comment(&comment);
1724
1725        if return_comment {
1726            return (Token::Comment(comment.into()), start_pos);
1727        }
1728
1729        // Reached EOF without ending comment block?
1730        if state.comment_level > 0 {
1731            return (Token::EOF, *pos);
1732        }
1733    }
1734
1735    // Within text?
1736    match state.is_within_text_terminated_by.take() {
1737        Some(ch) if ch.starts_with('#') => {
1738            return parse_raw_string_literal(stream, state, pos, ch.len()).map_or_else(
1739                |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1740                |(result, start_pos)| (Token::StringConstant(result.into()), start_pos),
1741            )
1742        }
1743        Some(ch) => {
1744            let c = ch.chars().next().unwrap();
1745
1746            return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1747                |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1748                |(result, interpolated, start_pos)| {
1749                    if interpolated {
1750                        (Token::InterpolatedString(result.into()), start_pos)
1751                    } else {
1752                        (Token::StringConstant(result.into()), start_pos)
1753                    }
1754                },
1755            );
1756        }
1757        None => (),
1758    }
1759
1760    let mut negated: Option<Position> = None;
1761
1762    while let Some(c) = stream.get_next() {
1763        pos.advance();
1764
1765        let start_pos = *pos;
1766        let cc = stream.peek_next().unwrap_or('\0');
1767
1768        // Identifiers and strings that can have non-ASCII characters
1769        match (c, cc) {
1770            // digit ...
1771            ('0'..='9', ..) => {
1772                let mut result = SmartString::new_const();
1773                let mut radix_base: Option<u32> = None;
1774                let mut valid: fn(char) -> bool = is_numeric_digit;
1775                let mut _has_period = false;
1776                let mut _has_e = false;
1777
1778                result.push(c);
1779
1780                while let Some(next_char) = stream.peek_next() {
1781                    match next_char {
1782                        NUMBER_SEPARATOR => {
1783                            stream.eat_next_and_advance(pos);
1784                        }
1785                        ch if valid(ch) => {
1786                            result.push(ch);
1787                            stream.eat_next_and_advance(pos);
1788                        }
1789                        #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1790                        '.' if !_has_period && radix_base.is_none() => {
1791                            stream.get_next().unwrap();
1792
1793                            // Check if followed by digits or something that cannot start a property name
1794                            match stream.peek_next() {
1795                                // digits after period - accept the period
1796                                Some('0'..='9') => {
1797                                    result.push_str(".");
1798                                    pos.advance();
1799                                    _has_period = true;
1800                                }
1801                                // _ - cannot follow a decimal point
1802                                Some(NUMBER_SEPARATOR) => {
1803                                    stream.unget('.');
1804                                    break;
1805                                }
1806                                // .. - reserved symbol, not a floating-point number
1807                                Some('.') => {
1808                                    stream.unget('.');
1809                                    break;
1810                                }
1811                                // symbol after period - probably a float
1812                                Some(ch) if !is_id_first_alphabetic(ch) => {
1813                                    result.push_str(".");
1814                                    pos.advance();
1815                                    result.push_str("0");
1816                                    _has_period = true;
1817                                }
1818                                // Not a floating-point number
1819                                _ => {
1820                                    stream.unget('.');
1821                                    break;
1822                                }
1823                            }
1824                        }
1825                        #[cfg(not(feature = "no_float"))]
1826                        ch @ ('e' | 'E') if !_has_e && radix_base.is_none() => {
1827                            stream.get_next().unwrap();
1828
1829                            // Check if followed by digits or +/-
1830                            match stream.peek_next() {
1831                                // digits after e/E - accept as 'e' (no decimal points allowed)
1832                                Some('0'..='9') => {
1833                                    result.push('e');
1834                                    pos.advance();
1835                                    _has_e = true;
1836                                    _has_period = true;
1837                                }
1838                                // +/- after e/E - accept as 'e' and the sign (no decimal points allowed)
1839                                Some('+' | '-') => {
1840                                    result.push('e');
1841                                    pos.advance();
1842                                    result.push(stream.get_next().unwrap());
1843                                    pos.advance();
1844                                    _has_e = true;
1845                                    _has_period = true;
1846                                }
1847                                // Not a floating-point number
1848                                _ => {
1849                                    stream.unget(ch);
1850                                    break;
1851                                }
1852                            }
1853                        }
1854                        // 0x????, 0o????, 0b???? at beginning
1855                        ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
1856                            if c == '0' && result.len() <= 1 =>
1857                        {
1858                            result.push(ch);
1859                            stream.eat_next_and_advance(pos);
1860
1861                            valid = match ch {
1862                                'x' | 'X' => is_hex_digit,
1863                                'o' | 'O' => is_octal_digit,
1864                                'b' | 'B' => is_binary_digit,
1865                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1866                            };
1867
1868                            radix_base = Some(match ch {
1869                                'x' | 'X' => 16,
1870                                'o' | 'O' => 8,
1871                                'b' | 'B' => 2,
1872                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1873                            });
1874                        }
1875
1876                        _ => break,
1877                    }
1878                }
1879
1880                let num_pos = negated.map_or(start_pos, |negated_pos| {
1881                    result.insert(0, '-');
1882                    negated_pos
1883                });
1884
1885                if let Some(ref mut last) = state.last_token {
1886                    *last = result.clone();
1887                }
1888
1889                // Parse number
1890                let token = if let Some(radix) = radix_base {
1891                    let result = &result[2..];
1892
1893                    UNSIGNED_INT::from_str_radix(result, radix)
1894                        .map(|v| v as INT)
1895                        .map_or_else(
1896                            |_| Token::LexError(LERR::MalformedNumber(result.to_string()).into()),
1897                            Token::IntegerConstant,
1898                        )
1899                } else {
1900                    (|| {
1901                        let num = INT::from_str(&result).map(Token::IntegerConstant);
1902
1903                        // If integer parsing is unnecessary, try float instead
1904                        #[cfg(not(feature = "no_float"))]
1905                        if num.is_err() {
1906                            if let Ok(v) = crate::types::FloatWrapper::from_str(&result) {
1907                                return Token::FloatConstant((v, result).into());
1908                            }
1909                        }
1910
1911                        // Then try decimal
1912                        #[cfg(feature = "decimal")]
1913                        if num.is_err() {
1914                            if let Ok(v) = rust_decimal::Decimal::from_str(&result) {
1915                                return Token::DecimalConstant((v, result).into());
1916                            }
1917                        }
1918
1919                        // Then try decimal in scientific notation
1920                        #[cfg(feature = "decimal")]
1921                        if num.is_err() {
1922                            if let Ok(v) = rust_decimal::Decimal::from_scientific(&result) {
1923                                return Token::DecimalConstant((v, result).into());
1924                            }
1925                        }
1926
1927                        num.unwrap_or_else(|_| {
1928                            Token::LexError(LERR::MalformedNumber(result.to_string()).into())
1929                        })
1930                    })()
1931                };
1932
1933                return (token, num_pos);
1934            }
1935
1936            // " - string literal
1937            ('"', ..) => {
1938                return parse_string_literal(stream, state, pos, c, false, true, false)
1939                    .map_or_else(
1940                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1941                        |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1942                    );
1943            }
1944            // ` - string literal
1945            ('`', ..) => {
1946                // Start from the next line if at the end of line
1947                match stream.peek_next() {
1948                    // `\r - start from next line
1949                    Some('\r') => {
1950                        stream.eat_next_and_advance(pos);
1951                        // `\r\n
1952                        if stream.peek_next() == Some('\n') {
1953                            stream.eat_next_and_advance(pos);
1954                        }
1955                        pos.new_line();
1956                    }
1957                    // `\n - start from next line
1958                    Some('\n') => {
1959                        stream.eat_next_and_advance(pos);
1960                        pos.new_line();
1961                    }
1962                    _ => (),
1963                }
1964
1965                return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1966                    |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1967                    |(result, interpolated, ..)| {
1968                        if interpolated {
1969                            (Token::InterpolatedString(result.into()), start_pos)
1970                        } else {
1971                            (Token::StringConstant(result.into()), start_pos)
1972                        }
1973                    },
1974                );
1975            }
1976
1977            // r - raw string literal
1978            ('#', '"' | '#') => {
1979                return parse_raw_string_literal(stream, state, pos, 0).map_or_else(
1980                    |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1981                    |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1982                );
1983            }
1984
1985            // ' - character literal
1986            ('\'', '\'') => {
1987                return (
1988                    Token::LexError(LERR::MalformedChar(String::new()).into()),
1989                    start_pos,
1990                )
1991            }
1992            ('\'', ..) => {
1993                return parse_string_literal(stream, state, pos, c, false, false, false)
1994                    .map_or_else(
1995                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1996                        |(result, ..)| {
1997                            let mut chars = result.chars();
1998                            let first = chars.next().unwrap();
1999
2000                            if chars.next().is_some() {
2001                                (
2002                                    Token::LexError(LERR::MalformedChar(result.to_string()).into()),
2003                                    start_pos,
2004                                )
2005                            } else {
2006                                (Token::CharConstant(first), start_pos)
2007                            }
2008                        },
2009                    )
2010            }
2011
2012            // Braces
2013            ('{', ..) => return (Token::LeftBrace, start_pos),
2014            ('}', ..) => return (Token::RightBrace, start_pos),
2015
2016            // Unit
2017            ('(', ')') => {
2018                stream.eat_next_and_advance(pos);
2019                return (Token::Unit, start_pos);
2020            }
2021
2022            // Parentheses
2023            ('(', '*') => {
2024                stream.eat_next_and_advance(pos);
2025                return (Token::Reserved(Box::new("(*".into())), start_pos);
2026            }
2027            ('(', ..) => return (Token::LeftParen, start_pos),
2028            (')', ..) => return (Token::RightParen, start_pos),
2029
2030            // Indexing
2031            ('[', ..) => return (Token::LeftBracket, start_pos),
2032            (']', ..) => return (Token::RightBracket, start_pos),
2033
2034            // Map literal
2035            #[cfg(not(feature = "no_object"))]
2036            ('#', '{') => {
2037                stream.eat_next_and_advance(pos);
2038                return (Token::MapStart, start_pos);
2039            }
2040            // Shebang
2041            ('#', '!') => return (Token::Reserved(Box::new("#!".into())), start_pos),
2042
2043            ('#', ' ') => {
2044                stream.eat_next_and_advance(pos);
2045                let token = if stream.peek_next() == Some('{') {
2046                    stream.eat_next_and_advance(pos);
2047                    "# {"
2048                } else {
2049                    "#"
2050                };
2051                return (Token::Reserved(Box::new(token.into())), start_pos);
2052            }
2053
2054            ('#', ..) => return (Token::Reserved(Box::new("#".into())), start_pos),
2055
2056            // Operators
2057            ('+', '=') => {
2058                stream.eat_next_and_advance(pos);
2059                return (Token::PlusAssign, start_pos);
2060            }
2061            ('+', '+') => {
2062                stream.eat_next_and_advance(pos);
2063                return (Token::Reserved(Box::new("++".into())), start_pos);
2064            }
2065            ('+', ..) if !state.next_token_cannot_be_unary => return (Token::UnaryPlus, start_pos),
2066            ('+', ..) => return (Token::Plus, start_pos),
2067
2068            ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
2069            ('-', '0'..='9') => return (Token::Minus, start_pos),
2070            ('-', '=') => {
2071                stream.eat_next_and_advance(pos);
2072                return (Token::MinusAssign, start_pos);
2073            }
2074            ('-', '>') => {
2075                stream.eat_next_and_advance(pos);
2076                return (Token::Reserved(Box::new("->".into())), start_pos);
2077            }
2078            ('-', '-') => {
2079                stream.eat_next_and_advance(pos);
2080                return (Token::Reserved(Box::new("--".into())), start_pos);
2081            }
2082            ('-', ..) if !state.next_token_cannot_be_unary => {
2083                return (Token::UnaryMinus, start_pos)
2084            }
2085            ('-', ..) => return (Token::Minus, start_pos),
2086
2087            ('*', ')') => {
2088                stream.eat_next_and_advance(pos);
2089                return (Token::Reserved(Box::new("*)".into())), start_pos);
2090            }
2091            ('*', '=') => {
2092                stream.eat_next_and_advance(pos);
2093                return (Token::MultiplyAssign, start_pos);
2094            }
2095            ('*', '*') => {
2096                stream.eat_next_and_advance(pos);
2097
2098                return (
2099                    if stream.peek_next() == Some('=') {
2100                        stream.eat_next_and_advance(pos);
2101                        Token::PowerOfAssign
2102                    } else {
2103                        Token::PowerOf
2104                    },
2105                    start_pos,
2106                );
2107            }
2108            ('*', ..) => return (Token::Multiply, start_pos),
2109
2110            // Comments
2111            ('/', '/') => {
2112                stream.eat_next_and_advance(pos);
2113
2114                let mut comment: Option<String> = match stream.peek_next() {
2115                    #[cfg(not(feature = "no_function"))]
2116                    #[cfg(feature = "metadata")]
2117                    Some('/') => {
2118                        stream.eat_next_and_advance(pos);
2119
2120                        // Long streams of `///...` are not doc-comments
2121                        match stream.peek_next() {
2122                            Some('/') => None,
2123                            _ => Some("///".into()),
2124                        }
2125                    }
2126                    #[cfg(feature = "metadata")]
2127                    Some('!') => {
2128                        stream.eat_next_and_advance(pos);
2129                        Some("//!".into())
2130                    }
2131                    _ if state.include_comments => Some("//".into()),
2132                    _ => None,
2133                };
2134
2135                while let Some(c) = stream.get_next() {
2136                    if c == '\r' {
2137                        // \r\n
2138                        if stream.peek_next() == Some('\n') {
2139                            stream.eat_next_and_advance(pos);
2140                        }
2141                        pos.new_line();
2142                        break;
2143                    }
2144                    if c == '\n' {
2145                        pos.new_line();
2146                        break;
2147                    }
2148                    if let Some(comment) = comment.as_mut() {
2149                        comment.push(c);
2150                    }
2151                    pos.advance();
2152                }
2153
2154                match comment {
2155                    #[cfg(feature = "metadata")]
2156                    Some(comment) if comment.starts_with("//!") => {
2157                        let g = &mut state.tokenizer_control.borrow_mut().global_comments;
2158                        if !g.is_empty() {
2159                            *g += "\n";
2160                        }
2161                        *g += &comment;
2162                    }
2163                    Some(comment) => return (Token::Comment(comment.into()), start_pos),
2164                    None => (),
2165                }
2166            }
2167            ('/', '*') => {
2168                state.comment_level += 1;
2169                stream.eat_next_and_advance(pos);
2170
2171                let mut comment: Option<String> = match stream.peek_next() {
2172                    #[cfg(not(feature = "no_function"))]
2173                    #[cfg(feature = "metadata")]
2174                    Some('*') => {
2175                        stream.eat_next_and_advance(pos);
2176
2177                        // Long streams of `/****...` are not doc-comments
2178                        match stream.peek_next() {
2179                            Some('*') => None,
2180                            _ => Some("/**".into()),
2181                        }
2182                    }
2183                    _ if state.include_comments => Some("/*".into()),
2184                    _ => None,
2185                };
2186
2187                state.comment_level =
2188                    scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2189
2190                if let Some(comment) = comment {
2191                    return (Token::Comment(comment.into()), start_pos);
2192                }
2193            }
2194
2195            ('/', '=') => {
2196                stream.eat_next_and_advance(pos);
2197                return (Token::DivideAssign, start_pos);
2198            }
2199            ('/', ..) => return (Token::Divide, start_pos),
2200
2201            (';', ..) => return (Token::SemiColon, start_pos),
2202            (',', ..) => return (Token::Comma, start_pos),
2203
2204            ('.', '.') => {
2205                stream.eat_next_and_advance(pos);
2206                return (
2207                    match stream.peek_next() {
2208                        Some('.') => {
2209                            stream.eat_next_and_advance(pos);
2210                            Token::Reserved(Box::new("...".into()))
2211                        }
2212                        Some('=') => {
2213                            stream.eat_next_and_advance(pos);
2214                            Token::InclusiveRange
2215                        }
2216                        _ => Token::ExclusiveRange,
2217                    },
2218                    start_pos,
2219                );
2220            }
2221            ('.', ..) => return (Token::Period, start_pos),
2222
2223            ('=', '=') => {
2224                stream.eat_next_and_advance(pos);
2225
2226                if stream.peek_next() == Some('=') {
2227                    stream.eat_next_and_advance(pos);
2228                    return (Token::Reserved(Box::new("===".into())), start_pos);
2229                }
2230
2231                return (Token::EqualsTo, start_pos);
2232            }
2233            ('=', '>') => {
2234                stream.eat_next_and_advance(pos);
2235                return (Token::DoubleArrow, start_pos);
2236            }
2237            ('=', ..) => return (Token::Equals, start_pos),
2238
2239            #[cfg(not(feature = "no_module"))]
2240            (':', ':') => {
2241                stream.eat_next_and_advance(pos);
2242
2243                if stream.peek_next() == Some('<') {
2244                    stream.eat_next_and_advance(pos);
2245                    return (Token::Reserved(Box::new("::<".into())), start_pos);
2246                }
2247
2248                return (Token::DoubleColon, start_pos);
2249            }
2250            (':', '=') => {
2251                stream.eat_next_and_advance(pos);
2252                return (Token::Reserved(Box::new(":=".into())), start_pos);
2253            }
2254            (':', ';') => {
2255                stream.eat_next_and_advance(pos);
2256                return (Token::Reserved(Box::new(":;".into())), start_pos);
2257            }
2258            (':', ..) => return (Token::Colon, start_pos),
2259
2260            ('<', '=') => {
2261                stream.eat_next_and_advance(pos);
2262                return (Token::LessThanEqualsTo, start_pos);
2263            }
2264            ('<', '-') => {
2265                stream.eat_next_and_advance(pos);
2266                return (Token::Reserved(Box::new("<-".into())), start_pos);
2267            }
2268            ('<', '<') => {
2269                stream.eat_next_and_advance(pos);
2270
2271                return (
2272                    if stream.peek_next() == Some('=') {
2273                        stream.eat_next_and_advance(pos);
2274                        Token::LeftShiftAssign
2275                    } else {
2276                        Token::LeftShift
2277                    },
2278                    start_pos,
2279                );
2280            }
2281            ('<', '|') => {
2282                stream.eat_next_and_advance(pos);
2283                return (Token::Reserved(Box::new("<|".into())), start_pos);
2284            }
2285            ('<', ..) => return (Token::LessThan, start_pos),
2286
2287            ('>', '=') => {
2288                stream.eat_next_and_advance(pos);
2289                return (Token::GreaterThanEqualsTo, start_pos);
2290            }
2291            ('>', '>') => {
2292                stream.eat_next_and_advance(pos);
2293
2294                return (
2295                    if stream.peek_next() == Some('=') {
2296                        stream.eat_next_and_advance(pos);
2297                        Token::RightShiftAssign
2298                    } else {
2299                        Token::RightShift
2300                    },
2301                    start_pos,
2302                );
2303            }
2304            ('>', ..) => return (Token::GreaterThan, start_pos),
2305
2306            ('!', 'i') => {
2307                stream.get_next().unwrap();
2308                if stream.peek_next() == Some('n') {
2309                    stream.get_next().unwrap();
2310                    match stream.peek_next() {
2311                        Some(c) if is_id_continue(c) => {
2312                            stream.unget('n');
2313                            stream.unget('i');
2314                            return (Token::Bang, start_pos);
2315                        }
2316                        _ => {
2317                            pos.advance();
2318                            pos.advance();
2319                            return (Token::NotIn, start_pos);
2320                        }
2321                    }
2322                }
2323
2324                stream.unget('i');
2325                return (Token::Bang, start_pos);
2326            }
2327            ('!', '=') => {
2328                stream.eat_next_and_advance(pos);
2329
2330                if stream.peek_next() == Some('=') {
2331                    stream.eat_next_and_advance(pos);
2332                    return (Token::Reserved(Box::new("!==".into())), start_pos);
2333                }
2334
2335                return (Token::NotEqualsTo, start_pos);
2336            }
2337            ('!', '.') => {
2338                stream.eat_next_and_advance(pos);
2339                return (Token::Reserved(Box::new("!.".into())), start_pos);
2340            }
2341            ('!', ..) => return (Token::Bang, start_pos),
2342
2343            ('|', '|') => {
2344                stream.eat_next_and_advance(pos);
2345                return (Token::Or, start_pos);
2346            }
2347            ('|', '=') => {
2348                stream.eat_next_and_advance(pos);
2349                return (Token::OrAssign, start_pos);
2350            }
2351            ('|', '>') => {
2352                stream.eat_next_and_advance(pos);
2353                return (Token::Reserved(Box::new("|>".into())), start_pos);
2354            }
2355            ('|', ..) => return (Token::Pipe, start_pos),
2356
2357            ('&', '&') => {
2358                stream.eat_next_and_advance(pos);
2359                return (Token::And, start_pos);
2360            }
2361            ('&', '=') => {
2362                stream.eat_next_and_advance(pos);
2363                return (Token::AndAssign, start_pos);
2364            }
2365            ('&', ..) => return (Token::Ampersand, start_pos),
2366
2367            ('^', '=') => {
2368                stream.eat_next_and_advance(pos);
2369                return (Token::XOrAssign, start_pos);
2370            }
2371            ('^', ..) => return (Token::XOr, start_pos),
2372
2373            ('~', ..) => return (Token::Reserved(Box::new("~".into())), start_pos),
2374
2375            ('%', '=') => {
2376                stream.eat_next_and_advance(pos);
2377                return (Token::ModuloAssign, start_pos);
2378            }
2379            ('%', ..) => return (Token::Modulo, start_pos),
2380
2381            ('@', ..) => return (Token::Reserved(Box::new("@".into())), start_pos),
2382
2383            ('$', ..) => return (Token::Reserved(Box::new("$".into())), start_pos),
2384
2385            ('?', '.') => {
2386                stream.eat_next_and_advance(pos);
2387                return (
2388                    #[cfg(not(feature = "no_object"))]
2389                    Token::Elvis,
2390                    #[cfg(feature = "no_object")]
2391                    Token::Reserved(Box::new("?.".into())),
2392                    start_pos,
2393                );
2394            }
2395            ('?', '?') => {
2396                stream.eat_next_and_advance(pos);
2397                return (Token::DoubleQuestion, start_pos);
2398            }
2399            ('?', '[') => {
2400                stream.eat_next_and_advance(pos);
2401                return (
2402                    #[cfg(not(feature = "no_index"))]
2403                    Token::QuestionBracket,
2404                    #[cfg(feature = "no_index")]
2405                    Token::Reserved(Box::new("?[".into())),
2406                    start_pos,
2407                );
2408            }
2409            ('?', ..) => return (Token::Reserved(Box::new("?".into())), start_pos),
2410
2411            // letter or underscore ...
2412            _ if is_id_first_alphabetic(c) || c == '_' => {
2413                return parse_identifier_token(stream, state, pos, start_pos, c);
2414            }
2415
2416            // \n
2417            ('\n', ..) => pos.new_line(),
2418
2419            // Whitespace - follows Rust's SPACE, TAB, CR, LF, FF which is the same as WhatWG.
2420            (ch, ..) if ch.is_ascii_whitespace() => (),
2421
2422            _ => {
2423                return (
2424                    Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
2425                    start_pos,
2426                )
2427            }
2428        }
2429    }
2430
2431    pos.advance();
2432
2433    (Token::EOF, *pos)
2434}
2435
2436/// Get the next token, parsing it as an identifier.
2437fn parse_identifier_token(
2438    stream: &mut (impl InputStream + ?Sized),
2439    state: &mut TokenizeState,
2440    pos: &mut Position,
2441    start_pos: Position,
2442    first_char: char,
2443) -> (Token, Position) {
2444    let mut identifier = SmartString::new_const();
2445    identifier.push(first_char);
2446    if let Some(ref mut last) = state.last_token {
2447        last.clear();
2448        last.push(first_char);
2449    }
2450
2451    while let Some(next_char) = stream.peek_next() {
2452        match next_char {
2453            x if is_id_continue(x) => {
2454                stream.eat_next_and_advance(pos);
2455                identifier.push(x);
2456                if let Some(ref mut last) = state.last_token {
2457                    last.push(x);
2458                }
2459            }
2460            _ => break,
2461        }
2462    }
2463
2464    if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
2465        return (token, start_pos);
2466    }
2467
2468    if is_reserved_keyword_or_symbol(&identifier).0 {
2469        return (Token::Reserved(Box::new(identifier)), start_pos);
2470    }
2471
2472    if !is_valid_identifier(&identifier) {
2473        return (
2474            Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2475            start_pos,
2476        );
2477    }
2478
2479    (Token::Identifier(identifier.into()), start_pos)
2480}
2481
2482/// _(internals)_ Is a text string a valid identifier?
2483/// Exported under the `internals` feature only.
2484#[must_use]
2485pub fn is_valid_identifier(name: &str) -> bool {
2486    let mut first_alphabetic = false;
2487
2488    for ch in name.chars() {
2489        match ch {
2490            '_' => (),
2491            _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2492            _ if !first_alphabetic => return false,
2493            _ if char::is_ascii_alphanumeric(&ch) => (),
2494            _ => return false,
2495        }
2496    }
2497
2498    first_alphabetic
2499}
2500
2501/// _(internals)_ Is a text string a valid script-defined function name?
2502/// Exported under the `internals` feature only.
2503#[inline(always)]
2504#[must_use]
2505pub fn is_valid_function_name(name: &str) -> bool {
2506    is_valid_identifier(name)
2507        && !is_reserved_keyword_or_symbol(name).0
2508        && Token::lookup_symbol_from_syntax(name).is_none()
2509}
2510
2511/// Is a character valid to start an identifier?
2512#[inline(always)]
2513#[must_use]
2514#[allow(clippy::missing_const_for_fn)]
2515pub fn is_id_first_alphabetic(x: char) -> bool {
2516    #[cfg(feature = "unicode-xid-ident")]
2517    return unicode_xid::UnicodeXID::is_xid_start(x);
2518    #[cfg(not(feature = "unicode-xid-ident"))]
2519    return x.is_ascii_alphabetic();
2520}
2521
2522/// Is a character valid for an identifier?
2523#[inline(always)]
2524#[must_use]
2525#[allow(clippy::missing_const_for_fn)]
2526pub fn is_id_continue(x: char) -> bool {
2527    #[cfg(feature = "unicode-xid-ident")]
2528    return unicode_xid::UnicodeXID::is_xid_continue(x);
2529    #[cfg(not(feature = "unicode-xid-ident"))]
2530    return x.is_ascii_alphanumeric() || x == '_';
2531}
2532
2533/// Is a piece of syntax a reserved keyword or reserved symbol?
2534///
2535/// # Return values
2536///
2537/// The first `bool` indicates whether it is a reserved keyword or symbol.
2538///
2539/// The second `bool` indicates whether the keyword can be called normally as a function.
2540/// `false` if it is not a reserved keyword.
2541///
2542/// The third `bool` indicates whether the keyword can be called in method-call style.
2543/// `false` if it is not a reserved keyword or it cannot be called as a function.
2544#[inline]
2545#[must_use]
2546pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
2547    // This implementation is based upon a pre-calculated table generated
2548    // by GNU `gperf` on the list of keywords.
2549    let utf8 = syntax.as_bytes();
2550    let len = utf8.len();
2551
2552    if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
2553        return (false, false, false);
2554    }
2555
2556    let mut hash_val = len;
2557
2558    match len {
2559        1 => (),
2560        _ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
2561    }
2562    hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
2563    hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
2564
2565    if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
2566        return (false, false, false);
2567    }
2568
2569    match RESERVED_LIST[hash_val] {
2570        ("", ..) => (false, false, false),
2571        (s, true, a, b) => {
2572            // Fail early to avoid calling memcmp().
2573            // Since we are already working with bytes, mind as well check the first one.
2574            let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
2575            (is_reserved, is_reserved && a, is_reserved && a && b)
2576        }
2577        _ => (false, false, false),
2578    }
2579}
2580
2581/// _(internals)_ A type that implements the [`InputStream`] trait.
2582/// Exported under the `internals` feature only.
2583///
2584/// Multiple character streams are jointed together to form one single stream.
2585pub struct MultiInputsStream<'a> {
2586    /// Buffered characters, if any.
2587    pub buf: [Option<char>; 2],
2588    /// The current stream index.
2589    pub index: usize,
2590    /// Input character streams.
2591    pub streams: StaticVec<Peekable<Chars<'a>>>,
2592}
2593
2594impl InputStream for MultiInputsStream<'_> {
2595    #[inline]
2596    fn unget(&mut self, ch: char) {
2597        match self.buf {
2598            [None, ..] => self.buf[0] = Some(ch),
2599            [_, None] => self.buf[1] = Some(ch),
2600            _ => unreachable!("cannot unget more than 2 characters!"),
2601        }
2602    }
2603    fn get_next(&mut self) -> Option<char> {
2604        match self.buf {
2605            [None, ..] => (),
2606            [ch @ Some(_), None] => {
2607                self.buf[0] = None;
2608                return ch;
2609            }
2610            [_, ch @ Some(_)] => {
2611                self.buf[1] = None;
2612                return ch;
2613            }
2614        }
2615
2616        loop {
2617            if self.index >= self.streams.len() {
2618                // No more streams
2619                return None;
2620            }
2621            if let Some(ch) = self.streams[self.index].next() {
2622                // Next character in main stream
2623                return Some(ch);
2624            }
2625            // Jump to the next stream
2626            self.index += 1;
2627        }
2628    }
2629    fn peek_next(&mut self) -> Option<char> {
2630        match self.buf {
2631            [None, ..] => (),
2632            [ch @ Some(_), None] => return ch,
2633            [_, ch @ Some(_)] => return ch,
2634        }
2635
2636        loop {
2637            if self.index >= self.streams.len() {
2638                // No more streams
2639                return None;
2640            }
2641            if let Some(&ch) = self.streams[self.index].peek() {
2642                // Next character in main stream
2643                return Some(ch);
2644            }
2645            // Jump to the next stream
2646            self.index += 1;
2647        }
2648    }
2649}
2650
2651/// _(internals)_ An iterator on a [`Token`] stream.
2652/// Exported under the `internals` feature only.
2653pub struct TokenIterator<'a> {
2654    /// Reference to the scripting `Engine`.
2655    pub engine: &'a Engine,
2656    /// Current state.
2657    pub state: TokenizeState,
2658    /// Current position.
2659    pub pos: Position,
2660    /// Input character stream.
2661    pub stream: MultiInputsStream<'a>,
2662    /// A processor function that maps a token to another.
2663    pub token_mapper: Option<&'a OnParseTokenCallback>,
2664}
2665
2666impl<'a> Iterator for TokenIterator<'a> {
2667    type Item = (Token, Position);
2668
2669    fn next(&mut self) -> Option<Self::Item> {
2670        let (within_interpolated, _char_mode, compress_script) = {
2671            let control = &mut *self.state.tokenizer_control.borrow_mut();
2672
2673            if control.is_within_text {
2674                // Switch to text mode terminated by back-tick
2675                self.state.is_within_text_terminated_by = Some("`".to_string().into());
2676                // Reset it
2677                control.is_within_text = false;
2678            }
2679
2680            // Check if in single-character mode
2681            #[cfg(not(feature = "no_custom_syntax"))]
2682            let in_char_mode = std::mem::take(&mut control.in_char_mode);
2683
2684            (
2685                self.state.is_within_text_terminated_by.is_some(),
2686                #[cfg(not(feature = "no_custom_syntax"))]
2687                in_char_mode,
2688                #[cfg(feature = "no_custom_syntax")]
2689                false,
2690                control.compressed.is_some(),
2691            )
2692        };
2693
2694        #[cfg(not(feature = "no_custom_syntax"))]
2695        if _char_mode {
2696            if let Some(ch) = self.stream.get_next() {
2697                let pos = self.pos;
2698                match ch {
2699                    '\n' => self.pos.new_line(),
2700                    _ => self.pos.advance(),
2701                }
2702                return Some((Token::UnprocessedRawChar(ch), pos));
2703            }
2704        }
2705
2706        let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2707            // {EOF}
2708            r @ (Token::EOF, _) => return Some(r),
2709            // {EOF} after unterminated string.
2710            // The only case where `TokenizeState.is_within_text_terminated_by` is set is when
2711            // a verbatim string or a string with continuation encounters {EOF}.
2712            // This is necessary to handle such cases for line-by-line parsing, but for an entire
2713            // script it is a syntax error.
2714            (Token::StringConstant(..), pos) if self.state.is_within_text_terminated_by.is_some() => {
2715                self.state.is_within_text_terminated_by = None;
2716                return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2717            }
2718            // Reserved keyword/symbol
2719            (Token::Reserved(s), pos) => (match
2720                (s.as_str(),
2721                    #[cfg(not(feature = "no_custom_syntax"))]
2722                    self.engine.custom_keywords.contains_key(&*s),
2723                    #[cfg(feature = "no_custom_syntax")]
2724                    false
2725                )
2726            {
2727                ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2728                    "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2729                ).into()),
2730                ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2731                    "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2732                ).into()),
2733                ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2734                    "'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2735                ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2736                    "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2737                ).into()),
2738                (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2739                    "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2740                ).into()),
2741                (":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2742                    "':;' is not a valid symbol. Should it be '::'?".to_string(),
2743                ).into()),
2744                ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2745                    "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2746                ).into()),
2747                ("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2748                    "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2749                ).into()),
2750                ("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2751                    "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2752                ).into()),
2753                // Reserved keyword/operator that is custom.
2754                #[cfg(not(feature = "no_custom_syntax"))]
2755                (.., true) => Token::Custom(s),
2756                #[cfg(feature = "no_custom_syntax")]
2757                (.., true) => unreachable!("no custom operators"),
2758                // Reserved keyword that is not custom and disabled.
2759                (token, false) if self.engine.is_symbol_disabled(token) => {
2760                    let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
2761                    Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
2762                },
2763                // Reserved keyword/operator that is not custom.
2764                (.., false) => Token::Reserved(s),
2765            }, pos),
2766            // Custom keyword
2767            #[cfg(not(feature = "no_custom_syntax"))]
2768            (Token::Identifier(s), pos) if self.engine.custom_keywords.contains_key(&*s) => {
2769                (Token::Custom(s), pos)
2770            }
2771            // Custom keyword/symbol - must be disabled
2772            #[cfg(not(feature = "no_custom_syntax"))]
2773            (token, pos) if token.is_literal() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
2774                // Active standard keyword should never be a custom keyword!
2775                debug_assert!(self.engine.is_symbol_disabled(token.literal_syntax()), "{:?} is an active keyword", token);
2776
2777                (Token::Custom(Box::new(token.literal_syntax().into())), pos)
2778            }
2779            // Disabled symbol
2780            (token, pos) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
2781                (Token::Reserved(Box::new(token.literal_syntax().into())), pos)
2782            }
2783            // Normal symbol
2784            r => r,
2785        };
2786
2787        // Run the mapper, if any
2788        let token = match self.token_mapper {
2789            Some(func) => func(token, pos, &self.state),
2790            None => token,
2791        };
2792
2793        // Collect the compressed script, if needed
2794        if compress_script {
2795            let control = &mut *self.state.tokenizer_control.borrow_mut();
2796
2797            if token != Token::EOF {
2798                if let Some(ref mut compressed) = control.compressed {
2799                    use std::fmt::Write;
2800
2801                    let last_token = self.state.last_token.as_ref().unwrap();
2802                    let mut buf = SmartString::new_const();
2803
2804                    if last_token.is_empty() {
2805                        write!(buf, "{token}").unwrap();
2806                    } else if within_interpolated
2807                        && matches!(
2808                            token,
2809                            Token::StringConstant(..) | Token::InterpolatedString(..)
2810                        )
2811                    {
2812                        *compressed += &last_token[1..];
2813                    } else {
2814                        buf = last_token.clone();
2815                    }
2816
2817                    if !buf.is_empty() && !compressed.is_empty() {
2818                        let cur = buf.chars().next().unwrap();
2819
2820                        if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
2821                            let prev = compressed.chars().last().unwrap();
2822
2823                            if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
2824                                *compressed += " ";
2825                            }
2826                        }
2827                    }
2828
2829                    *compressed += &buf;
2830                }
2831            }
2832        }
2833
2834        Some((token, pos))
2835    }
2836}
2837
2838impl FusedIterator for TokenIterator<'_> {}
2839
2840impl Engine {
2841    /// _(internals)_ Tokenize an input text stream.
2842    /// Exported under the `internals` feature only.
2843    #[expose_under_internals]
2844    #[inline(always)]
2845    #[must_use]
2846    fn lex<'a>(
2847        &'a self,
2848        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2849    ) -> (TokenIterator<'a>, TokenizerControl) {
2850        self.lex_raw(inputs, self.token_mapper.as_deref())
2851    }
2852    /// _(internals)_ Tokenize an input text stream with a mapping function.
2853    /// Exported under the `internals` feature only.
2854    #[expose_under_internals]
2855    #[inline(always)]
2856    #[must_use]
2857    fn lex_with_map<'a>(
2858        &'a self,
2859        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2860        token_mapper: &'a OnParseTokenCallback,
2861    ) -> (TokenIterator<'a>, TokenizerControl) {
2862        self.lex_raw(inputs, Some(token_mapper))
2863    }
2864    /// Tokenize an input text stream with an optional mapping function.
2865    #[inline]
2866    #[must_use]
2867    pub(crate) fn lex_raw<'a>(
2868        &'a self,
2869        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2870        token_mapper: Option<&'a OnParseTokenCallback>,
2871    ) -> (TokenIterator<'a>, TokenizerControl) {
2872        let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2873        let buffer2 = buffer.clone();
2874
2875        (
2876            TokenIterator {
2877                engine: self,
2878                state: TokenizeState {
2879                    #[cfg(not(feature = "unchecked"))]
2880                    max_string_len: std::num::NonZeroUsize::new(self.max_string_size()),
2881                    next_token_cannot_be_unary: false,
2882                    tokenizer_control: buffer,
2883                    comment_level: 0,
2884                    include_comments: false,
2885                    is_within_text_terminated_by: None,
2886                    last_token: None,
2887                },
2888                pos: Position::new(1, 0),
2889                stream: MultiInputsStream {
2890                    buf: [None, None],
2891                    streams: inputs
2892                        .into_iter()
2893                        .map(|s| s.as_ref().chars().peekable())
2894                        .collect(),
2895                    index: 0,
2896                },
2897                token_mapper,
2898            },
2899            buffer2,
2900        )
2901    }
2902}
rhai/tokenizer.rs

rhai/
tokenizer.rs