spo_rhai/
tokenizer.rs

1//! Main module defining the lexer and parser.
2
3use crate::engine::Precedence;
4use crate::func::native::OnParseTokenCallback;
5use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
6#[cfg(feature = "no_std")]
7use std::prelude::v1::*;
8use std::{
9    cell::RefCell,
10    char, fmt,
11    iter::{FusedIterator, Peekable},
12    rc::Rc,
13    str::{Chars, FromStr},
14};
15
16/// _(internals)_ A type containing commands to control the tokenizer.
17#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
18pub struct TokenizerControlBlock {
19    /// Is the current tokenizer position within an interpolated text string?
20    ///
21    /// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream.
22    pub is_within_text: bool,
23    /// Global comments.
24    #[cfg(feature = "metadata")]
25    pub global_comments: String,
26    /// Whitespace-compressed version of the script (if any).
27    ///
28    /// Set to `Some` in order to collect a compressed script.
29    pub compressed: Option<String>,
30}
31
32impl TokenizerControlBlock {
33    /// Create a new `TokenizerControlBlock`.
34    #[inline]
35    #[must_use]
36    pub const fn new() -> Self {
37        Self {
38            is_within_text: false,
39            #[cfg(feature = "metadata")]
40            global_comments: String::new(),
41            compressed: None,
42        }
43    }
44}
45
46/// _(internals)_ A shared object that allows control of the tokenizer from outside.
47pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
48
49type LERR = LexError;
50
51/// Separator character for numbers.
52const NUMBER_SEPARATOR: char = '_';
53
54/// A stream of tokens.
55pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
56
57/// _(internals)_ A Rhai language token.
58/// Exported under the `internals` feature only.
59#[derive(Debug, PartialEq, Clone, Hash)]
60#[non_exhaustive]
61pub enum Token {
62    /// An `INT` constant.
63    IntegerConstant(INT),
64    /// A `FLOAT` constant, including its text representation.
65    ///
66    /// Reserved under the `no_float` feature.
67    #[cfg(not(feature = "no_float"))]
68    FloatConstant(Box<(crate::types::FloatWrapper<crate::FLOAT>, Identifier)>),
69    /// A [`Decimal`][rust_decimal::Decimal] constant.
70    ///
71    /// Requires the `decimal` feature, including its text representation.
72    #[cfg(feature = "decimal")]
73    DecimalConstant(Box<(rust_decimal::Decimal, Identifier)>),
74    /// An identifier.
75    Identifier(Box<Identifier>),
76    /// A character constant.
77    CharConstant(char),
78    /// A string constant.
79    StringConstant(Box<SmartString>),
80    /// An interpolated string.
81    InterpolatedString(Box<SmartString>),
82    /// `{`
83    LeftBrace,
84    /// `}`
85    RightBrace,
86    /// `(`
87    LeftParen,
88    /// `)`
89    RightParen,
90    /// `[`
91    LeftBracket,
92    /// `]`
93    RightBracket,
94    /// `()`
95    Unit,
96    /// `+`
97    Plus,
98    /// `+` (unary)
99    UnaryPlus,
100    /// `-`
101    Minus,
102    /// `-` (unary)
103    UnaryMinus,
104    /// `*`
105    Multiply,
106    /// `/`
107    Divide,
108    /// `%`
109    Modulo,
110    /// `**`
111    PowerOf,
112    /// `<<`
113    LeftShift,
114    /// `>>`
115    RightShift,
116    /// `;`
117    SemiColon,
118    /// `:`
119    Colon,
120    /// `::`
121    DoubleColon,
122    /// `=>`
123    DoubleArrow,
124    /// `_`
125    Underscore,
126    /// `,`
127    Comma,
128    /// `.`
129    Period,
130    /// `?.`
131    ///
132    /// Reserved under the `no_object` feature.
133    #[cfg(not(feature = "no_object"))]
134    Elvis,
135    /// `??`
136    DoubleQuestion,
137    /// `?[`
138    ///
139    /// Reserved under the `no_object` feature.
140    #[cfg(not(feature = "no_index"))]
141    QuestionBracket,
142    /// `..`
143    ExclusiveRange,
144    /// `..=`
145    InclusiveRange,
146    /// `#{`
147    MapStart,
148    /// `=`
149    Equals,
150    /// `true`
151    True,
152    /// `false`
153    False,
154    /// `let`
155    Let,
156    /// `const`
157    Const,
158    /// `if`
159    If,
160    /// `else`
161    Else,
162    /// `switch`
163    Switch,
164    /// `do`
165    Do,
166    /// `while`
167    While,
168    /// `until`
169    Until,
170    /// `loop`
171    Loop,
172    /// `for`
173    For,
174    /// `in`
175    In,
176    /// `!in`
177    NotIn,
178    /// `<`
179    LessThan,
180    /// `>`
181    GreaterThan,
182    /// `<=`
183    LessThanEqualsTo,
184    /// `>=`
185    GreaterThanEqualsTo,
186    /// `==`
187    EqualsTo,
188    /// `!=`
189    NotEqualsTo,
190    /// `!`
191    Bang,
192    /// `|`
193    Pipe,
194    /// `||`
195    Or,
196    /// `^`
197    XOr,
198    /// `&`
199    Ampersand,
200    /// `&&`
201    And,
202    /// `fn`
203    ///
204    /// Reserved under the `no_function` feature.
205    #[cfg(not(feature = "no_function"))]
206    Fn,
207    /// `continue`
208    Continue,
209    /// `break`
210    Break,
211    /// `return`
212    Return,
213    /// `throw`
214    Throw,
215    /// `try`
216    Try,
217    /// `catch`
218    Catch,
219    /// `+=`
220    PlusAssign,
221    /// `-=`
222    MinusAssign,
223    /// `*=`
224    MultiplyAssign,
225    /// `/=`
226    DivideAssign,
227    /// `<<=`
228    LeftShiftAssign,
229    /// `>>=`
230    RightShiftAssign,
231    /// `&=`
232    AndAssign,
233    /// `|=`
234    OrAssign,
235    /// `^=`
236    XOrAssign,
237    /// `%=`
238    ModuloAssign,
239    /// `**=`
240    PowerOfAssign,
241    /// `private`
242    ///
243    /// Reserved under the `no_function` feature.
244    #[cfg(not(feature = "no_function"))]
245    Private,
246    /// `import`
247    ///
248    /// Reserved under the `no_module` feature.
249    #[cfg(not(feature = "no_module"))]
250    Import,
251    /// `export`
252    ///
253    /// Reserved under the `no_module` feature.
254    #[cfg(not(feature = "no_module"))]
255    Export,
256    /// `as`
257    ///
258    /// Reserved under the `no_module` feature.
259    #[cfg(not(feature = "no_module"))]
260    As,
261    /// A lexer error.
262    LexError(Box<LexError>),
263    /// A comment block.
264    Comment(Box<String>),
265    /// A reserved symbol.
266    Reserved(Box<Identifier>),
267    /// A custom keyword.
268    ///
269    /// Not available under `no_custom_syntax`.
270    #[cfg(not(feature = "no_custom_syntax"))]
271    Custom(Box<Identifier>),
272    /// End of the input stream.
273    /// Used as a placeholder for the end of input.
274    EOF,
275}
276
277impl fmt::Display for Token {
278    #[inline(always)]
279    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
280        #[allow(clippy::enum_glob_use)]
281        use Token::*;
282
283        match self {
284            IntegerConstant(i) => write!(f, "{i}"),
285            #[cfg(not(feature = "no_float"))]
286            FloatConstant(v) => write!(f, "{}", v.0),
287            #[cfg(feature = "decimal")]
288            DecimalConstant(d) => write!(f, "{}", d.0),
289            StringConstant(s) => write!(f, r#""{s}""#),
290            InterpolatedString(..) => f.write_str("string"),
291            CharConstant(c) => write!(f, "{c}"),
292            Identifier(s) => f.write_str(s),
293            Reserved(s) => f.write_str(s),
294            #[cfg(not(feature = "no_custom_syntax"))]
295            Custom(s) => f.write_str(s),
296            LexError(err) => write!(f, "{err}"),
297            Comment(s) => f.write_str(s),
298
299            EOF => f.write_str("{EOF}"),
300
301            token => f.write_str(token.literal_syntax()),
302        }
303    }
304}
305
306// Table-driven keyword recognizer generated by GNU `gperf` on the file `tools/keywords.txt`.
307//
308// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this.
309
310const MIN_KEYWORD_LEN: usize = 1;
311const MAX_KEYWORD_LEN: usize = 8;
312const MIN_KEYWORD_HASH_VALUE: usize = 1;
313const MAX_KEYWORD_HASH_VALUE: usize = 152;
314
315static KEYWORD_ASSOC_VALUES: [u8; 257] = [
316    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
317    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
318    105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
319    35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
320    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
321    0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
322    45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
323    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
324    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
325    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
326    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
327    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
328    153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
329    153,
330];
331static KEYWORDS_LIST: [(&str, Token); 153] = [
332    ("", Token::EOF),
333    (">", Token::GreaterThan),
334    (">=", Token::GreaterThanEqualsTo),
335    (")", Token::RightParen),
336    ("", Token::EOF),
337    ("const", Token::Const),
338    ("=", Token::Equals),
339    ("==", Token::EqualsTo),
340    ("continue", Token::Continue),
341    ("", Token::EOF),
342    ("catch", Token::Catch),
343    ("<", Token::LessThan),
344    ("<=", Token::LessThanEqualsTo),
345    ("for", Token::For),
346    ("loop", Token::Loop),
347    ("", Token::EOF),
348    (".", Token::Period),
349    ("<<", Token::LeftShift),
350    ("<<=", Token::LeftShiftAssign),
351    ("", Token::EOF),
352    ("false", Token::False),
353    ("*", Token::Multiply),
354    ("*=", Token::MultiplyAssign),
355    ("let", Token::Let),
356    ("", Token::EOF),
357    ("while", Token::While),
358    ("+", Token::Plus),
359    ("+=", Token::PlusAssign),
360    ("", Token::EOF),
361    ("", Token::EOF),
362    ("throw", Token::Throw),
363    ("}", Token::RightBrace),
364    (">>", Token::RightShift),
365    (">>=", Token::RightShiftAssign),
366    ("", Token::EOF),
367    ("", Token::EOF),
368    (";", Token::SemiColon),
369    ("=>", Token::DoubleArrow),
370    ("", Token::EOF),
371    ("else", Token::Else),
372    ("", Token::EOF),
373    ("/", Token::Divide),
374    ("/=", Token::DivideAssign),
375    ("", Token::EOF),
376    ("", Token::EOF),
377    ("", Token::EOF),
378    ("{", Token::LeftBrace),
379    ("**", Token::PowerOf),
380    ("**=", Token::PowerOfAssign),
381    ("", Token::EOF),
382    ("", Token::EOF),
383    ("|", Token::Pipe),
384    ("|=", Token::OrAssign),
385    ("", Token::EOF),
386    ("", Token::EOF),
387    ("", Token::EOF),
388    (":", Token::Colon),
389    ("..", Token::ExclusiveRange),
390    ("..=", Token::InclusiveRange),
391    ("", Token::EOF),
392    ("until", Token::Until),
393    ("switch", Token::Switch),
394    #[cfg(not(feature = "no_function"))]
395    ("private", Token::Private),
396    #[cfg(feature = "no_function")]
397    ("", Token::EOF),
398    ("try", Token::Try),
399    ("true", Token::True),
400    ("break", Token::Break),
401    ("return", Token::Return),
402    #[cfg(not(feature = "no_function"))]
403    ("fn", Token::Fn),
404    #[cfg(feature = "no_function")]
405    ("", Token::EOF),
406    ("", Token::EOF),
407    ("", Token::EOF),
408    ("", Token::EOF),
409    #[cfg(not(feature = "no_module"))]
410    ("import", Token::Import),
411    #[cfg(feature = "no_module")]
412    ("", Token::EOF),
413    #[cfg(not(feature = "no_object"))]
414    ("?.", Token::Elvis),
415    #[cfg(feature = "no_object")]
416    ("", Token::EOF),
417    ("", Token::EOF),
418    ("", Token::EOF),
419    ("", Token::EOF),
420    #[cfg(not(feature = "no_module"))]
421    ("export", Token::Export),
422    #[cfg(feature = "no_module")]
423    ("", Token::EOF),
424    ("in", Token::In),
425    ("", Token::EOF),
426    ("", Token::EOF),
427    ("", Token::EOF),
428    ("(", Token::LeftParen),
429    ("||", Token::Or),
430    ("", Token::EOF),
431    ("", Token::EOF),
432    ("", Token::EOF),
433    ("^", Token::XOr),
434    ("^=", Token::XOrAssign),
435    ("", Token::EOF),
436    ("", Token::EOF),
437    ("", Token::EOF),
438    ("_", Token::Underscore),
439    ("::", Token::DoubleColon),
440    ("", Token::EOF),
441    ("", Token::EOF),
442    ("", Token::EOF),
443    ("-", Token::Minus),
444    ("-=", Token::MinusAssign),
445    ("", Token::EOF),
446    ("", Token::EOF),
447    ("", Token::EOF),
448    ("]", Token::RightBracket),
449    ("()", Token::Unit),
450    ("", Token::EOF),
451    ("", Token::EOF),
452    ("", Token::EOF),
453    ("&", Token::Ampersand),
454    ("&=", Token::AndAssign),
455    ("", Token::EOF),
456    ("", Token::EOF),
457    ("", Token::EOF),
458    ("%", Token::Modulo),
459    ("%=", Token::ModuloAssign),
460    ("", Token::EOF),
461    ("", Token::EOF),
462    ("", Token::EOF),
463    ("!", Token::Bang),
464    ("!=", Token::NotEqualsTo),
465    ("!in", Token::NotIn),
466    ("", Token::EOF),
467    ("", Token::EOF),
468    ("[", Token::LeftBracket),
469    ("if", Token::If),
470    ("", Token::EOF),
471    ("", Token::EOF),
472    ("", Token::EOF),
473    (",", Token::Comma),
474    ("do", Token::Do),
475    ("", Token::EOF),
476    ("", Token::EOF),
477    ("", Token::EOF),
478    ("", Token::EOF),
479    #[cfg(not(feature = "no_module"))]
480    ("as", Token::As),
481    #[cfg(feature = "no_module")]
482    ("", Token::EOF),
483    ("", Token::EOF),
484    ("", Token::EOF),
485    ("", Token::EOF),
486    ("", Token::EOF),
487    #[cfg(not(feature = "no_index"))]
488    ("?[", Token::QuestionBracket),
489    #[cfg(feature = "no_index")]
490    ("", Token::EOF),
491    ("", Token::EOF),
492    ("", Token::EOF),
493    ("", Token::EOF),
494    ("", Token::EOF),
495    ("??", Token::DoubleQuestion),
496    ("", Token::EOF),
497    ("", Token::EOF),
498    ("", Token::EOF),
499    ("", Token::EOF),
500    ("&&", Token::And),
501    ("", Token::EOF),
502    ("", Token::EOF),
503    ("", Token::EOF),
504    ("", Token::EOF),
505    ("#{", Token::MapStart),
506];
507
508// Table-driven reserved symbol recognizer generated by GNU `gperf` on the file `tools/reserved.txt`.
509//
510// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this.
511
512const MIN_RESERVED_LEN: usize = 1;
513const MAX_RESERVED_LEN: usize = 10;
514const MIN_RESERVED_HASH_VALUE: usize = 1;
515const MAX_RESERVED_HASH_VALUE: usize = 149;
516
517static RESERVED_ASSOC_VALUES: [u8; 256] = [
518    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
519    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
520    150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
521    30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
522    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
523    0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
524    25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
525    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
526    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
527    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
528    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
529    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
530    150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
531];
532static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
533    ("", false, false, false),
534    ("?", true, false, false),
535    ("as", cfg!(feature = "no_module"), false, false),
536    ("use", true, false, false),
537    ("case", true, false, false),
538    ("async", true, false, false),
539    ("public", true, false, false),
540    ("package", true, false, false),
541    ("", false, false, false),
542    ("", false, false, false),
543    ("super", true, false, false),
544    ("#", true, false, false),
545    ("private", cfg!(feature = "no_function"), false, false),
546    ("var", true, false, false),
547    ("protected", true, false, false),
548    ("spawn", true, false, false),
549    ("shared", true, false, false),
550    ("is", true, false, false),
551    ("===", true, false, false),
552    ("sync", true, false, false),
553    ("curry", true, true, true),
554    ("static", true, false, false),
555    ("default", true, false, false),
556    ("!==", true, false, false),
557    ("is_shared", cfg!(not(feature = "no_closure")), true, true),
558    ("print", true, true, false),
559    ("", false, false, false),
560    ("#!", true, false, false),
561    ("", false, false, false),
562    ("this", true, false, false),
563    ("is_def_var", true, true, false),
564    ("thread", true, false, false),
565    ("?.", cfg!(feature = "no_object"), false, false),
566    ("", false, false, false),
567    ("is_def_fn", cfg!(not(feature = "no_function")), true, false),
568    ("yield", true, false, false),
569    ("", false, false, false),
570    ("fn", cfg!(feature = "no_function"), false, false),
571    ("new", true, false, false),
572    ("call", true, true, true),
573    ("match", true, false, false),
574    ("~", true, false, false),
575    ("!.", true, false, false),
576    ("", false, false, false),
577    ("eval", true, true, false),
578    ("await", true, false, false),
579    ("", false, false, false),
580    (":=", true, false, false),
581    ("...", true, false, false),
582    ("null", true, false, false),
583    ("debug", true, true, false),
584    ("@", true, false, false),
585    ("type_of", true, true, true),
586    ("", false, false, false),
587    ("with", true, false, false),
588    ("", false, false, false),
589    ("", false, false, false),
590    ("<-", true, false, false),
591    ("", false, false, false),
592    ("void", true, false, false),
593    ("", false, false, false),
594    ("import", cfg!(feature = "no_module"), false, false),
595    ("--", true, false, false),
596    ("nil", true, false, false),
597    ("exit", false, false, false),
598    ("", false, false, false),
599    ("export", cfg!(feature = "no_module"), false, false),
600    ("<|", true, false, false),
601    ("", false, false, false),
602    ("", false, false, false),
603    ("", false, false, false),
604    ("$", true, false, false),
605    ("->", true, false, false),
606    ("", false, false, false),
607    ("", false, false, false),
608    ("", false, false, false),
609    ("", false, false, false),
610    ("|>", true, false, false),
611    ("", false, false, false),
612    ("", false, false, false),
613    ("", false, false, false),
614    ("module", true, false, false),
615    ("?[", cfg!(feature = "no_index"), false, false),
616    ("", false, false, false),
617    ("", false, false, false),
618    ("", false, false, false),
619    ("", false, false, false),
620    ("Fn", true, true, false),
621    ("::<", true, false, false),
622    ("", false, false, false),
623    ("", false, false, false),
624    ("", false, false, false),
625    ("++", true, false, false),
626    ("", false, false, false),
627    ("", false, false, false),
628    ("", false, false, false),
629    ("", false, false, false),
630    (":;", true, false, false),
631    ("", false, false, false),
632    ("", false, false, false),
633    ("", false, false, false),
634    ("", false, false, false),
635    ("*)", true, false, false),
636    ("", false, false, false),
637    ("", false, false, false),
638    ("", false, false, false),
639    ("", false, false, false),
640    ("(*", true, false, false),
641    ("", false, false, false),
642    ("", false, false, false),
643    ("", false, false, false),
644    ("", false, false, false),
645    ("", false, false, false),
646    ("", false, false, false),
647    ("", false, false, false),
648    ("", false, false, false),
649    ("", false, false, false),
650    ("", false, false, false),
651    ("", false, false, false),
652    ("", false, false, false),
653    ("", false, false, false),
654    ("", false, false, false),
655    ("", false, false, false),
656    ("", false, false, false),
657    ("", false, false, false),
658    ("", false, false, false),
659    ("", false, false, false),
660    ("", false, false, false),
661    ("", false, false, false),
662    ("", false, false, false),
663    ("", false, false, false),
664    ("", false, false, false),
665    ("", false, false, false),
666    ("", false, false, false),
667    ("", false, false, false),
668    ("", false, false, false),
669    ("", false, false, false),
670    ("", false, false, false),
671    ("", false, false, false),
672    ("", false, false, false),
673    ("", false, false, false),
674    ("", false, false, false),
675    ("", false, false, false),
676    ("", false, false, false),
677    ("", false, false, false),
678    ("", false, false, false),
679    ("", false, false, false),
680    ("go", true, false, false),
681    ("", false, false, false),
682    ("goto", true, false, false),
683];
684
685impl Token {
686    /// Is the token a literal symbol?
687    #[must_use]
688    pub const fn is_literal(&self) -> bool {
689        #[allow(clippy::enum_glob_use)]
690        use Token::*;
691
692        match self {
693            IntegerConstant(..) => false,
694            #[cfg(not(feature = "no_float"))]
695            FloatConstant(..) => false,
696            #[cfg(feature = "decimal")]
697            DecimalConstant(..) => false,
698            StringConstant(..)
699            | InterpolatedString(..)
700            | CharConstant(..)
701            | Identifier(..)
702            | Reserved(..) => false,
703            #[cfg(not(feature = "no_custom_syntax"))]
704            Custom(..) => false,
705            LexError(..) | Comment(..) => false,
706
707            EOF => false,
708
709            _ => true,
710        }
711    }
712    /// Get the literal syntax of the token.
713    ///
714    /// # Panics
715    ///
716    /// Panics if the token is not a literal symbol.
717    #[must_use]
718    pub const fn literal_syntax(&self) -> &'static str {
719        #[allow(clippy::enum_glob_use)]
720        use Token::*;
721
722        match self {
723            LeftBrace => "{",
724            RightBrace => "}",
725            LeftParen => "(",
726            RightParen => ")",
727            LeftBracket => "[",
728            RightBracket => "]",
729            Unit => "()",
730            Plus => "+",
731            UnaryPlus => "+",
732            Minus => "-",
733            UnaryMinus => "-",
734            Multiply => "*",
735            Divide => "/",
736            SemiColon => ";",
737            Colon => ":",
738            DoubleColon => "::",
739            DoubleArrow => "=>",
740            Underscore => "_",
741            Comma => ",",
742            Period => ".",
743            #[cfg(not(feature = "no_object"))]
744            Elvis => "?.",
745            DoubleQuestion => "??",
746            #[cfg(not(feature = "no_index"))]
747            QuestionBracket => "?[",
748            ExclusiveRange => "..",
749            InclusiveRange => "..=",
750            MapStart => "#{",
751            Equals => "=",
752            True => "true",
753            False => "false",
754            Let => "let",
755            Const => "const",
756            If => "if",
757            Else => "else",
758            Switch => "switch",
759            Do => "do",
760            While => "while",
761            Until => "until",
762            Loop => "loop",
763            For => "for",
764            In => "in",
765            NotIn => "!in",
766            LessThan => "<",
767            GreaterThan => ">",
768            Bang => "!",
769            LessThanEqualsTo => "<=",
770            GreaterThanEqualsTo => ">=",
771            EqualsTo => "==",
772            NotEqualsTo => "!=",
773            Pipe => "|",
774            Or => "||",
775            Ampersand => "&",
776            And => "&&",
777            Continue => "continue",
778            Break => "break",
779            Return => "return",
780            Throw => "throw",
781            Try => "try",
782            Catch => "catch",
783            PlusAssign => "+=",
784            MinusAssign => "-=",
785            MultiplyAssign => "*=",
786            DivideAssign => "/=",
787            LeftShiftAssign => "<<=",
788            RightShiftAssign => ">>=",
789            AndAssign => "&=",
790            OrAssign => "|=",
791            XOrAssign => "^=",
792            LeftShift => "<<",
793            RightShift => ">>",
794            XOr => "^",
795            Modulo => "%",
796            ModuloAssign => "%=",
797            PowerOf => "**",
798            PowerOfAssign => "**=",
799
800            #[cfg(not(feature = "no_function"))]
801            Fn => "fn",
802            #[cfg(not(feature = "no_function"))]
803            Private => "private",
804
805            #[cfg(not(feature = "no_module"))]
806            Import => "import",
807            #[cfg(not(feature = "no_module"))]
808            Export => "export",
809            #[cfg(not(feature = "no_module"))]
810            As => "as",
811
812            _ => panic!("token is not a literal symbol"),
813        }
814    }
815
816    /// Is this token an op-assignment operator?
817    #[inline]
818    #[must_use]
819    pub const fn is_op_assignment(&self) -> bool {
820        #[allow(clippy::enum_glob_use)]
821        use Token::*;
822
823        matches!(
824            self,
825            PlusAssign
826                | MinusAssign
827                | MultiplyAssign
828                | DivideAssign
829                | LeftShiftAssign
830                | RightShiftAssign
831                | ModuloAssign
832                | PowerOfAssign
833                | AndAssign
834                | OrAssign
835                | XOrAssign
836        )
837    }
838
839    /// Get the corresponding operator of the token if it is an op-assignment operator.
840    #[must_use]
841    pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
842        #[allow(clippy::enum_glob_use)]
843        use Token::*;
844
845        Some(match self {
846            PlusAssign => Plus,
847            MinusAssign => Minus,
848            MultiplyAssign => Multiply,
849            DivideAssign => Divide,
850            LeftShiftAssign => LeftShift,
851            RightShiftAssign => RightShift,
852            ModuloAssign => Modulo,
853            PowerOfAssign => PowerOf,
854            AndAssign => Ampersand,
855            OrAssign => Pipe,
856            XOrAssign => XOr,
857            _ => return None,
858        })
859    }
860
861    /// Has this token a corresponding op-assignment operator?
862    #[inline]
863    #[must_use]
864    pub const fn has_op_assignment(&self) -> bool {
865        #[allow(clippy::enum_glob_use)]
866        use Token::*;
867
868        matches!(
869            self,
870            Plus | Minus
871                | Multiply
872                | Divide
873                | LeftShift
874                | RightShift
875                | Modulo
876                | PowerOf
877                | Ampersand
878                | Pipe
879                | XOr
880        )
881    }
882
883    /// Get the corresponding op-assignment operator of the token.
884    #[must_use]
885    pub const fn convert_to_op_assignment(&self) -> Option<Self> {
886        #[allow(clippy::enum_glob_use)]
887        use Token::*;
888
889        Some(match self {
890            Plus => PlusAssign,
891            Minus => MinusAssign,
892            Multiply => MultiplyAssign,
893            Divide => DivideAssign,
894            LeftShift => LeftShiftAssign,
895            RightShift => RightShiftAssign,
896            Modulo => ModuloAssign,
897            PowerOf => PowerOfAssign,
898            Ampersand => AndAssign,
899            Pipe => OrAssign,
900            XOr => XOrAssign,
901            _ => return None,
902        })
903    }
904
905    /// Reverse lookup a symbol token from a piece of syntax.
906    #[inline]
907    #[must_use]
908    pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
909        // This implementation is based upon a pre-calculated table generated
910        // by GNU `gperf` on the list of keywords.
911        let utf8 = syntax.as_bytes();
912        let len = utf8.len();
913
914        if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
915            return None;
916        }
917
918        let mut hash_val = len;
919
920        match len {
921            1 => (),
922            _ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
923        }
924        hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
925
926        if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
927            return None;
928        }
929
930        match KEYWORDS_LIST[hash_val] {
931            (_, Self::EOF) => None,
932            // Fail early to avoid calling memcmp().
933            // Since we are already working with bytes, mind as well check the first one.
934            (s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
935                Some(t.clone())
936            }
937            _ => None,
938        }
939    }
940
941    /// If another operator is after these, it's probably a unary operator
942    /// (not sure about `fn` name).
943    #[must_use]
944    pub const fn is_next_unary(&self) -> bool {
945        #[allow(clippy::enum_glob_use)]
946        use Token::*;
947
948        match self {
949            SemiColon        | // ; - is unary
950            Colon            | // #{ foo: - is unary
951            Comma            | // ( ... , -expr ) - is unary
952            //Period         |
953            //Elvis          |
954            DoubleQuestion   | // ?? - is unary
955            ExclusiveRange   | // .. - is unary
956            InclusiveRange   | // ..= - is unary
957            LeftBrace        | // { -expr } - is unary
958            // RightBrace    | // { expr } - expr not unary & is closing
959            LeftParen        | // ( -expr ) - is unary
960            // RightParen    | // ( expr ) - expr not unary & is closing
961            LeftBracket      | // [ -expr ] - is unary
962            // RightBracket  | // [ expr ] - expr not unary & is closing
963            Plus             |
964            PlusAssign       |
965            UnaryPlus        |
966            Minus            |
967            MinusAssign      |
968            UnaryMinus       |
969            Multiply         |
970            MultiplyAssign   |
971            Divide           |
972            DivideAssign     |
973            Modulo           |
974            ModuloAssign     |
975            PowerOf          |
976            PowerOfAssign    |
977            LeftShift        |
978            LeftShiftAssign  |
979            RightShift       |
980            RightShiftAssign |
981            Equals           |
982            EqualsTo         |
983            NotEqualsTo      |
984            LessThan         |
985            GreaterThan      |
986            Bang             |
987            LessThanEqualsTo |
988            GreaterThanEqualsTo |
989            Pipe             |
990            Ampersand        |
991            If               |
992            //Do             |
993            While            |
994            Until            |
995            In               |
996            NotIn            |
997            And              |
998            AndAssign        |
999            Or               |
1000            OrAssign         |
1001            XOr              |
1002            XOrAssign        |
1003            Return           |
1004            Throw               => true,
1005
1006            #[cfg(not(feature = "no_index"))]
1007            QuestionBracket     => true,    // ?[ - is unary
1008
1009            LexError(..)        => true,
1010
1011            _                   => false,
1012        }
1013    }
1014
1015    /// Get the precedence number of the token.
1016    #[must_use]
1017    pub const fn precedence(&self) -> Option<Precedence> {
1018        #[allow(clippy::enum_glob_use)]
1019        use Token::*;
1020
1021        Precedence::new(match self {
1022            Or | XOr | Pipe => 30,
1023
1024            And | Ampersand => 60,
1025
1026            EqualsTo | NotEqualsTo => 90,
1027
1028            In | NotIn => 110,
1029
1030            LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
1031
1032            DoubleQuestion => 135,
1033
1034            ExclusiveRange | InclusiveRange => 140,
1035
1036            Plus | Minus => 150,
1037
1038            Divide | Multiply | Modulo => 180,
1039
1040            PowerOf => 190,
1041
1042            LeftShift | RightShift => 210,
1043
1044            _ => 0,
1045        })
1046    }
1047
1048    /// Does an expression bind to the right (instead of left)?
1049    #[must_use]
1050    pub const fn is_bind_right(&self) -> bool {
1051        #[allow(clippy::enum_glob_use)]
1052        use Token::*;
1053
1054        match self {
1055            // Exponentiation binds to the right
1056            PowerOf => true,
1057
1058            _ => false,
1059        }
1060    }
1061
1062    /// Is this token a standard symbol used in the language?
1063    #[must_use]
1064    pub const fn is_standard_symbol(&self) -> bool {
1065        #[allow(clippy::enum_glob_use)]
1066        use Token::*;
1067
1068        match self {
1069            LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
1070            | UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
1071            | RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
1072            | ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
1073            | LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
1074            | Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
1075            | DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
1076            | XOrAssign | ModuloAssign | PowerOfAssign => true,
1077
1078            #[cfg(not(feature = "no_object"))]
1079            Elvis => true,
1080
1081            #[cfg(not(feature = "no_index"))]
1082            QuestionBracket => true,
1083
1084            _ => false,
1085        }
1086    }
1087
1088    /// Is this token a standard keyword?
1089    #[inline]
1090    #[must_use]
1091    pub const fn is_standard_keyword(&self) -> bool {
1092        #[allow(clippy::enum_glob_use)]
1093        use Token::*;
1094
1095        match self {
1096            #[cfg(not(feature = "no_function"))]
1097            Fn | Private => true,
1098
1099            #[cfg(not(feature = "no_module"))]
1100            Import | Export | As => true,
1101
1102            True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
1103            | Continue | Break | Return | Throw | Try | Catch => true,
1104
1105            _ => false,
1106        }
1107    }
1108
1109    /// Is this token a reserved keyword or symbol?
1110    #[inline(always)]
1111    #[must_use]
1112    pub const fn is_reserved(&self) -> bool {
1113        matches!(self, Self::Reserved(..))
1114    }
1115
1116    /// Is this token a custom keyword?
1117    #[cfg(not(feature = "no_custom_syntax"))]
1118    #[inline(always)]
1119    #[must_use]
1120    pub const fn is_custom(&self) -> bool {
1121        matches!(self, Self::Custom(..))
1122    }
1123}
1124
1125impl From<Token> for String {
1126    #[inline(always)]
1127    fn from(token: Token) -> Self {
1128        token.to_string()
1129    }
1130}
1131
1132/// _(internals)_ State of the tokenizer.
1133/// Exported under the `internals` feature only.
1134#[derive(Debug, Clone, Eq, PartialEq, Default)]
1135pub struct TokenizeState {
1136    /// Maximum length of a string.
1137    ///
1138    /// Not available under `unchecked`.
1139    #[cfg(not(feature = "unchecked"))]
1140    pub max_string_len: Option<std::num::NonZeroUsize>,
1141    /// Can the next token be a unary operator?
1142    pub next_token_cannot_be_unary: bool,
1143    /// Shared object to allow controlling the tokenizer externally.
1144    pub tokenizer_control: TokenizerControl,
1145    /// Is the tokenizer currently inside a block comment?
1146    pub comment_level: usize,
1147    /// Include comments?
1148    pub include_comments: bool,
1149    /// Is the current tokenizer position within the text stream of an interpolated string?
1150    pub is_within_text_terminated_by: Option<char>,
1151    /// Textual syntax of the current token, if any.
1152    ///
1153    /// Set to `Some` to begin tracking this information.
1154    pub last_token: Option<SmartString>,
1155}
1156
1157/// _(internals)_ Trait that encapsulates a peekable character input stream.
1158/// Exported under the `internals` feature only.
1159pub trait InputStream {
1160    /// Un-get a character back into the `InputStream`.
1161    /// The next [`get_next`][InputStream::get_next] or [`peek_next`][InputStream::peek_next]
1162    /// will return this character instead.
1163    fn unget(&mut self, ch: char);
1164    /// Get the next character from the `InputStream`.
1165    fn get_next(&mut self) -> Option<char>;
1166    /// Peek the next character in the `InputStream`.
1167    #[must_use]
1168    fn peek_next(&mut self) -> Option<char>;
1169
1170    /// Consume the next character.
1171    #[inline(always)]
1172    fn eat_next_and_advance(&mut self, pos: &mut Position) -> Option<char> {
1173        pos.advance();
1174        self.get_next()
1175    }
1176}
1177
1178/// _(internals)_ Parse a string literal ended by a specified termination character.
1179/// Exported under the `internals` feature only.
1180///
1181/// Returns the parsed string and a boolean indicating whether the string is
1182/// terminated by an interpolation `${`.
1183///
1184/// # Returns
1185///
1186/// | Type                            | Return Value               |`state.is_within_text_terminated_by`|
1187/// |---------------------------------|:--------------------------:|:----------------------------------:|
1188/// |`"hello"`                        |`StringConstant("hello")`   |`None`                              |
1189/// |`"hello`_{LF}_ or _{EOF}_        |`LexError`                  |`None`                              |
1190/// |`"hello\`_{EOF}_ or _{LF}{EOF}_  |`StringConstant("hello")`   |`Some('"')`                         |
1191/// |`` `hello``_{EOF}_               |`StringConstant("hello")`   |``Some('`')``                       |
1192/// |`` `hello``_{LF}{EOF}_           |`StringConstant("hello\n")` |``Some('`')``                       |
1193/// |`` `hello ${``                   |`InterpolatedString("hello ")`<br/>next token is `{`|`None`      |
1194/// |`` } hello` ``                   |`StringConstant(" hello")`  |`None`                              |
1195/// |`} hello`_{EOF}_                 |`StringConstant(" hello")`  |``Some('`')``                       |
1196///
1197/// This function does not throw a `LexError` for the following conditions:
1198///
1199/// * Unterminated literal string at _{EOF}_
1200///
1201/// * Unterminated normal string with continuation at _{EOF}_
1202///
1203/// This is to facilitate using this function to parse a script line-by-line, where the end of the
1204/// line (i.e. _{EOF}_) is not necessarily the end of the script.
1205///
1206/// Any time a [`StringConstant`][`Token::StringConstant`] is returned with
1207/// `state.is_within_text_terminated_by` set to `Some(_)` is one of the above conditions.
1208pub fn parse_string_literal(
1209    stream: &mut (impl InputStream + ?Sized),
1210    state: &mut TokenizeState,
1211    pos: &mut Position,
1212    termination_char: char,
1213    verbatim: bool,
1214    allow_line_continuation: bool,
1215    allow_interpolation: bool,
1216) -> Result<(SmartString, bool, Position), (LexError, Position)> {
1217    let mut result = SmartString::new_const();
1218    let mut escape = SmartString::new_const();
1219
1220    let start = *pos;
1221    let mut first_char = Position::NONE;
1222    let mut interpolated = false;
1223    #[cfg(not(feature = "no_position"))]
1224    let mut skip_space_until = 0;
1225
1226    state.is_within_text_terminated_by = Some(termination_char);
1227    if let Some(ref mut last) = state.last_token {
1228        last.clear();
1229        last.push(termination_char);
1230    }
1231
1232    loop {
1233        debug_assert!(
1234            !verbatim || escape.is_empty(),
1235            "verbatim strings should not have any escapes"
1236        );
1237
1238        let next_char = match stream.get_next() {
1239            Some(ch) => {
1240                pos.advance();
1241                ch
1242            }
1243            None if verbatim => {
1244                debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1245                pos.advance();
1246                break;
1247            }
1248            None if allow_line_continuation && !escape.is_empty() => {
1249                debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1250                pos.advance();
1251                break;
1252            }
1253            None => {
1254                pos.advance();
1255                state.is_within_text_terminated_by = None;
1256                return Err((LERR::UnterminatedString, start));
1257            }
1258        };
1259
1260        if let Some(ref mut last) = state.last_token {
1261            last.push(next_char);
1262        }
1263
1264        // String interpolation?
1265        if allow_interpolation
1266            && next_char == '$'
1267            && escape.is_empty()
1268            && stream.peek_next().map_or(false, |ch| ch == '{')
1269        {
1270            interpolated = true;
1271            state.is_within_text_terminated_by = None;
1272            break;
1273        }
1274
1275        // Check string length
1276        #[cfg(not(feature = "unchecked"))]
1277        if let Some(max) = state.max_string_len {
1278            if result.len() > max.get() {
1279                return Err((LexError::StringTooLong(max.get()), start));
1280            }
1281        }
1282
1283        // Close wrapper
1284        if termination_char == next_char && escape.is_empty() {
1285            // Double wrapper
1286            if stream.peek_next().map_or(false, |c| c == termination_char) {
1287                stream.eat_next_and_advance(pos);
1288                if let Some(ref mut last) = state.last_token {
1289                    last.push(termination_char);
1290                }
1291            } else {
1292                state.is_within_text_terminated_by = None;
1293                break;
1294            }
1295        }
1296
1297        if first_char.is_none() {
1298            first_char = *pos;
1299        }
1300
1301        match next_char {
1302            // \r - ignore if followed by \n
1303            '\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
1304            // \r
1305            'r' if !escape.is_empty() => {
1306                escape.clear();
1307                result.push('\r');
1308            }
1309            // \n
1310            'n' if !escape.is_empty() => {
1311                escape.clear();
1312                result.push('\n');
1313            }
1314            // \...
1315            '\\' if !verbatim && escape.is_empty() => {
1316                escape.push('\\');
1317            }
1318            // \\
1319            '\\' if !escape.is_empty() => {
1320                escape.clear();
1321                result.push('\\');
1322            }
1323            // \t
1324            't' if !escape.is_empty() => {
1325                escape.clear();
1326                result.push('\t');
1327            }
1328            // \x??, \u????, \U????????
1329            ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
1330                let mut seq = escape.clone();
1331                escape.clear();
1332                seq.push(ch);
1333
1334                let mut out_val: u32 = 0;
1335                let len = match ch {
1336                    'x' => 2,
1337                    'u' => 4,
1338                    'U' => 8,
1339                    c => unreachable!("x or u or U expected but gets '{}'", c),
1340                };
1341
1342                for _ in 0..len {
1343                    let c = stream
1344                        .get_next()
1345                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1346
1347                    pos.advance();
1348                    seq.push(c);
1349                    if let Some(ref mut last) = state.last_token {
1350                        last.push(c);
1351                    }
1352
1353                    out_val *= 16;
1354                    out_val += c
1355                        .to_digit(16)
1356                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
1357                }
1358
1359                result.push(
1360                    char::from_u32(out_val)
1361                        .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
1362                );
1363            }
1364
1365            // LF - Verbatim
1366            '\n' if verbatim => {
1367                debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
1368                pos.new_line();
1369                result.push('\n');
1370            }
1371
1372            // LF - Line continuation
1373            '\n' if allow_line_continuation && !escape.is_empty() => {
1374                debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
1375                escape.clear();
1376                pos.new_line();
1377
1378                #[cfg(not(feature = "no_position"))]
1379                {
1380                    let start_position = start.position().unwrap();
1381                    skip_space_until = start_position + 1;
1382                }
1383            }
1384
1385            // LF - Unterminated string
1386            '\n' => {
1387                pos.rewind();
1388                state.is_within_text_terminated_by = None;
1389                return Err((LERR::UnterminatedString, start));
1390            }
1391
1392            // \{termination_char} - escaped termination character
1393            ch if termination_char == ch && !escape.is_empty() => {
1394                escape.clear();
1395                result.push(termination_char);
1396            }
1397
1398            // Unknown escape sequence
1399            ch if !escape.is_empty() => {
1400                escape.push(ch);
1401
1402                return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
1403            }
1404
1405            // Whitespace to skip
1406            #[cfg(not(feature = "no_position"))]
1407            ch if ch.is_whitespace() && pos.position().unwrap() < skip_space_until => (),
1408
1409            // All other characters
1410            ch => {
1411                escape.clear();
1412                result.push(ch);
1413
1414                #[cfg(not(feature = "no_position"))]
1415                {
1416                    skip_space_until = 0;
1417                }
1418            }
1419        }
1420    }
1421
1422    // Check string length
1423    #[cfg(not(feature = "unchecked"))]
1424    if let Some(max) = state.max_string_len {
1425        if result.len() > max.get() {
1426            return Err((LexError::StringTooLong(max.get()), start));
1427        }
1428    }
1429
1430    Ok((result, interpolated, first_char))
1431}
1432
1433/// Scan for a block comment until the end.
1434fn scan_block_comment(
1435    stream: &mut (impl InputStream + ?Sized),
1436    level: usize,
1437    pos: &mut Position,
1438    comment: Option<&mut String>,
1439) -> usize {
1440    let mut level = level;
1441    let mut comment = comment;
1442
1443    while let Some(c) = stream.get_next() {
1444        pos.advance();
1445
1446        if let Some(comment) = comment.as_mut() {
1447            comment.push(c);
1448        }
1449
1450        match c {
1451            '/' => {
1452                if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
1453                    stream.eat_next_and_advance(pos);
1454                    if let Some(comment) = comment.as_mut() {
1455                        comment.push(c2);
1456                    }
1457                    level += 1;
1458                }
1459            }
1460            '*' => {
1461                if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
1462                    stream.eat_next_and_advance(pos);
1463                    if let Some(comment) = comment.as_mut() {
1464                        comment.push(c2);
1465                    }
1466                    level -= 1;
1467                }
1468            }
1469            '\n' => pos.new_line(),
1470            _ => (),
1471        }
1472
1473        if level == 0 {
1474            break;
1475        }
1476    }
1477
1478    level
1479}
1480
1481/// Test if the given character is a hex character.
1482#[inline(always)]
1483const fn is_hex_digit(c: char) -> bool {
1484    c.is_ascii_hexdigit()
1485}
1486
1487/// Test if the given character is a numeric digit (i.e. 0-9).
1488#[inline(always)]
1489const fn is_numeric_digit(c: char) -> bool {
1490    c.is_ascii_digit()
1491}
1492
1493/// Test if the given character is an octal digit (i.e. 0-7).
1494#[inline(always)]
1495const fn is_octal_digit(c: char) -> bool {
1496    matches!(c, '0'..='7')
1497}
1498
1499/// Test if the given character is a binary digit (i.e. 0 or 1).
1500#[inline(always)]
1501const fn is_binary_digit(c: char) -> bool {
1502    c == '0' || c == '1'
1503}
1504
1505/// Test if the comment block is a doc-comment.
1506#[cfg(not(feature = "no_function"))]
1507#[cfg(feature = "metadata")]
1508#[inline]
1509#[must_use]
1510pub fn is_doc_comment(comment: &str) -> bool {
1511    (comment.starts_with("///") && !comment.starts_with("////"))
1512        || (comment.starts_with("/**") && !comment.starts_with("/***"))
1513}
1514
1515/// _(internals)_ Get the next token from the input stream.
1516/// Exported under the `internals` feature only.
1517#[inline(always)]
1518#[must_use]
1519pub fn get_next_token(
1520    stream: &mut (impl InputStream + ?Sized),
1521    state: &mut TokenizeState,
1522    pos: &mut Position,
1523) -> (Token, Position) {
1524    let result = get_next_token_inner(stream, state, pos);
1525
1526    // Save the last token's state
1527    state.next_token_cannot_be_unary = !result.0.is_next_unary();
1528
1529    result
1530}
1531
1532/// Get the next token.
1533#[must_use]
1534fn get_next_token_inner(
1535    stream: &mut (impl InputStream + ?Sized),
1536    state: &mut TokenizeState,
1537    pos: &mut Position,
1538) -> (Token, Position) {
1539    state.last_token.as_mut().map(SmartString::clear);
1540
1541    // Still inside a comment?
1542    if state.comment_level > 0 {
1543        let start_pos = *pos;
1544        let mut comment = String::new();
1545        let comment_buf = state.include_comments.then_some(&mut comment);
1546
1547        state.comment_level = scan_block_comment(stream, state.comment_level, pos, comment_buf);
1548
1549        let return_comment = state.include_comments;
1550
1551        #[cfg(not(feature = "no_function"))]
1552        #[cfg(feature = "metadata")]
1553        let return_comment = return_comment || is_doc_comment(&comment);
1554
1555        if return_comment {
1556            return (Token::Comment(comment.into()), start_pos);
1557        }
1558
1559        // Reached EOF without ending comment block?
1560        if state.comment_level > 0 {
1561            return (Token::EOF, *pos);
1562        }
1563    }
1564
1565    // Within text?
1566    if let Some(ch) = state.is_within_text_terminated_by.take() {
1567        return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
1568            |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1569            |(result, interpolated, start_pos)| {
1570                if interpolated {
1571                    (Token::InterpolatedString(result.into()), start_pos)
1572                } else {
1573                    (Token::StringConstant(result.into()), start_pos)
1574                }
1575            },
1576        );
1577    }
1578
1579    let mut negated: Option<Position> = None;
1580
1581    while let Some(c) = stream.get_next() {
1582        pos.advance();
1583
1584        let start_pos = *pos;
1585        let cc = stream.peek_next().unwrap_or('\0');
1586
1587        // Identifiers and strings that can have non-ASCII characters
1588        match (c, cc) {
1589            // digit ...
1590            ('0'..='9', ..) => {
1591                let mut result = SmartString::new_const();
1592                let mut radix_base: Option<u32> = None;
1593                let mut valid: fn(char) -> bool = is_numeric_digit;
1594                let mut _has_period = false;
1595                let mut _has_e = false;
1596
1597                result.push(c);
1598
1599                while let Some(next_char) = stream.peek_next() {
1600                    match next_char {
1601                        NUMBER_SEPARATOR => {
1602                            stream.eat_next_and_advance(pos);
1603                        }
1604                        ch if valid(ch) => {
1605                            result.push(ch);
1606                            stream.eat_next_and_advance(pos);
1607                        }
1608                        #[cfg(any(not(feature = "no_float"), feature = "decimal"))]
1609                        '.' if !_has_period && radix_base.is_none() => {
1610                            stream.get_next().unwrap();
1611
1612                            // Check if followed by digits or something that cannot start a property name
1613                            match stream.peek_next() {
1614                                // digits after period - accept the period
1615                                Some('0'..='9') => {
1616                                    result.push('.');
1617                                    pos.advance();
1618                                    _has_period = true;
1619                                }
1620                                // _ - cannot follow a decimal point
1621                                Some(NUMBER_SEPARATOR) => {
1622                                    stream.unget('.');
1623                                    break;
1624                                }
1625                                // .. - reserved symbol, not a floating-point number
1626                                Some('.') => {
1627                                    stream.unget('.');
1628                                    break;
1629                                }
1630                                // symbol after period - probably a float
1631                                Some(ch) if !is_id_first_alphabetic(ch) => {
1632                                    result.push('.');
1633                                    pos.advance();
1634                                    result.push('0');
1635                                    _has_period = true;
1636                                }
1637                                // Not a floating-point number
1638                                _ => {
1639                                    stream.unget('.');
1640                                    break;
1641                                }
1642                            }
1643                        }
1644                        #[cfg(not(feature = "no_float"))]
1645                        'e' if !_has_e && radix_base.is_none() => {
1646                            stream.get_next().unwrap();
1647
1648                            // Check if followed by digits or +/-
1649                            match stream.peek_next() {
1650                                // digits after e - accept the e (no decimal points allowed)
1651                                Some('0'..='9') => {
1652                                    result.push('e');
1653                                    pos.advance();
1654                                    _has_e = true;
1655                                    _has_period = true;
1656                                }
1657                                // +/- after e - accept the e and the sign (no decimal points allowed)
1658                                Some('+' | '-') => {
1659                                    result.push('e');
1660                                    pos.advance();
1661                                    result.push(stream.get_next().unwrap());
1662                                    pos.advance();
1663                                    _has_e = true;
1664                                    _has_period = true;
1665                                }
1666                                // Not a floating-point number
1667                                _ => {
1668                                    stream.unget('e');
1669                                    break;
1670                                }
1671                            }
1672                        }
1673                        // 0x????, 0o????, 0b???? at beginning
1674                        ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
1675                            if c == '0' && result.len() <= 1 =>
1676                        {
1677                            result.push(ch);
1678                            stream.eat_next_and_advance(pos);
1679
1680                            valid = match ch {
1681                                'x' | 'X' => is_hex_digit,
1682                                'o' | 'O' => is_octal_digit,
1683                                'b' | 'B' => is_binary_digit,
1684                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1685                            };
1686
1687                            radix_base = Some(match ch {
1688                                'x' | 'X' => 16,
1689                                'o' | 'O' => 8,
1690                                'b' | 'B' => 2,
1691                                c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
1692                            });
1693                        }
1694
1695                        _ => break,
1696                    }
1697                }
1698
1699                let num_pos = negated.map_or(start_pos, |negated_pos| {
1700                    result.insert(0, '-');
1701                    negated_pos
1702                });
1703
1704                if let Some(ref mut last) = state.last_token {
1705                    *last = result.clone();
1706                }
1707
1708                // Parse number
1709                let token = if let Some(radix) = radix_base {
1710                    let result = &result[2..];
1711
1712                    UNSIGNED_INT::from_str_radix(result, radix)
1713                        .map(|v| v as INT)
1714                        .map_or_else(
1715                            |_| Token::LexError(LERR::MalformedNumber(result.to_string()).into()),
1716                            Token::IntegerConstant,
1717                        )
1718                } else {
1719                    (|| {
1720                        let num = INT::from_str(&result).map(Token::IntegerConstant);
1721
1722                        // If integer parsing is unnecessary, try float instead
1723                        #[cfg(not(feature = "no_float"))]
1724                        if num.is_err() {
1725                            if let Ok(v) = crate::types::FloatWrapper::from_str(&result) {
1726                                return Token::FloatConstant((v, result).into());
1727                            }
1728                        }
1729
1730                        // Then try decimal
1731                        #[cfg(feature = "decimal")]
1732                        if num.is_err() {
1733                            if let Ok(v) = rust_decimal::Decimal::from_str(&result) {
1734                                return Token::DecimalConstant((v, result).into());
1735                            }
1736                        }
1737
1738                        // Then try decimal in scientific notation
1739                        #[cfg(feature = "decimal")]
1740                        if num.is_err() {
1741                            if let Ok(v) = rust_decimal::Decimal::from_scientific(&result) {
1742                                return Token::DecimalConstant((v, result).into());
1743                            }
1744                        }
1745
1746                        num.unwrap_or_else(|_| {
1747                            Token::LexError(LERR::MalformedNumber(result.to_string()).into())
1748                        })
1749                    })()
1750                };
1751
1752                return (token, num_pos);
1753            }
1754
1755            // " - string literal
1756            ('"', ..) => {
1757                return parse_string_literal(stream, state, pos, c, false, true, false)
1758                    .map_or_else(
1759                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1760                        |(result, ..)| (Token::StringConstant(result.into()), start_pos),
1761                    );
1762            }
1763            // ` - string literal
1764            ('`', ..) => {
1765                // Start from the next line if at the end of line
1766                match stream.peek_next() {
1767                    // `\r - start from next line
1768                    Some('\r') => {
1769                        stream.eat_next_and_advance(pos);
1770                        // `\r\n
1771                        if stream.peek_next() == Some('\n') {
1772                            stream.eat_next_and_advance(pos);
1773                        }
1774                        pos.new_line();
1775                    }
1776                    // `\n - start from next line
1777                    Some('\n') => {
1778                        stream.eat_next_and_advance(pos);
1779                        pos.new_line();
1780                    }
1781                    _ => (),
1782                }
1783
1784                return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
1785                    |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1786                    |(result, interpolated, ..)| {
1787                        if interpolated {
1788                            (Token::InterpolatedString(result.into()), start_pos)
1789                        } else {
1790                            (Token::StringConstant(result.into()), start_pos)
1791                        }
1792                    },
1793                );
1794            }
1795
1796            // ' - character literal
1797            ('\'', '\'') => {
1798                return (
1799                    Token::LexError(LERR::MalformedChar(String::new()).into()),
1800                    start_pos,
1801                )
1802            }
1803            ('\'', ..) => {
1804                return parse_string_literal(stream, state, pos, c, false, false, false)
1805                    .map_or_else(
1806                        |(err, err_pos)| (Token::LexError(err.into()), err_pos),
1807                        |(result, ..)| {
1808                            let mut chars = result.chars();
1809                            let first = chars.next().unwrap();
1810
1811                            if chars.next().is_some() {
1812                                (
1813                                    Token::LexError(LERR::MalformedChar(result.to_string()).into()),
1814                                    start_pos,
1815                                )
1816                            } else {
1817                                (Token::CharConstant(first), start_pos)
1818                            }
1819                        },
1820                    )
1821            }
1822
1823            // Braces
1824            ('{', ..) => return (Token::LeftBrace, start_pos),
1825            ('}', ..) => return (Token::RightBrace, start_pos),
1826
1827            // Unit
1828            ('(', ')') => {
1829                stream.eat_next_and_advance(pos);
1830                return (Token::Unit, start_pos);
1831            }
1832
1833            // Parentheses
1834            ('(', '*') => {
1835                stream.eat_next_and_advance(pos);
1836                return (Token::Reserved(Box::new("(*".into())), start_pos);
1837            }
1838            ('(', ..) => return (Token::LeftParen, start_pos),
1839            (')', ..) => return (Token::RightParen, start_pos),
1840
1841            // Indexing
1842            ('[', ..) => return (Token::LeftBracket, start_pos),
1843            (']', ..) => return (Token::RightBracket, start_pos),
1844
1845            // Map literal
1846            #[cfg(not(feature = "no_object"))]
1847            ('#', '{') => {
1848                stream.eat_next_and_advance(pos);
1849                return (Token::MapStart, start_pos);
1850            }
1851            // Shebang
1852            ('#', '!') => return (Token::Reserved(Box::new("#!".into())), start_pos),
1853
1854            ('#', ' ') => {
1855                stream.eat_next_and_advance(pos);
1856                let token = if stream.peek_next() == Some('{') {
1857                    stream.eat_next_and_advance(pos);
1858                    "# {"
1859                } else {
1860                    "#"
1861                };
1862                return (Token::Reserved(Box::new(token.into())), start_pos);
1863            }
1864
1865            ('#', ..) => return (Token::Reserved(Box::new("#".into())), start_pos),
1866
1867            // Operators
1868            ('+', '=') => {
1869                stream.eat_next_and_advance(pos);
1870                return (Token::PlusAssign, start_pos);
1871            }
1872            ('+', '+') => {
1873                stream.eat_next_and_advance(pos);
1874                return (Token::Reserved(Box::new("++".into())), start_pos);
1875            }
1876            ('+', ..) if !state.next_token_cannot_be_unary => return (Token::UnaryPlus, start_pos),
1877            ('+', ..) => return (Token::Plus, start_pos),
1878
1879            ('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
1880            ('-', '0'..='9') => return (Token::Minus, start_pos),
1881            ('-', '=') => {
1882                stream.eat_next_and_advance(pos);
1883                return (Token::MinusAssign, start_pos);
1884            }
1885            ('-', '>') => {
1886                stream.eat_next_and_advance(pos);
1887                return (Token::Reserved(Box::new("->".into())), start_pos);
1888            }
1889            ('-', '-') => {
1890                stream.eat_next_and_advance(pos);
1891                return (Token::Reserved(Box::new("--".into())), start_pos);
1892            }
1893            ('-', ..) if !state.next_token_cannot_be_unary => {
1894                return (Token::UnaryMinus, start_pos)
1895            }
1896            ('-', ..) => return (Token::Minus, start_pos),
1897
1898            ('*', ')') => {
1899                stream.eat_next_and_advance(pos);
1900                return (Token::Reserved(Box::new("*)".into())), start_pos);
1901            }
1902            ('*', '=') => {
1903                stream.eat_next_and_advance(pos);
1904                return (Token::MultiplyAssign, start_pos);
1905            }
1906            ('*', '*') => {
1907                stream.eat_next_and_advance(pos);
1908
1909                return (
1910                    if stream.peek_next() == Some('=') {
1911                        stream.eat_next_and_advance(pos);
1912                        Token::PowerOfAssign
1913                    } else {
1914                        Token::PowerOf
1915                    },
1916                    start_pos,
1917                );
1918            }
1919            ('*', ..) => return (Token::Multiply, start_pos),
1920
1921            // Comments
1922            ('/', '/') => {
1923                stream.eat_next_and_advance(pos);
1924
1925                let mut comment: Option<String> = match stream.peek_next() {
1926                    #[cfg(not(feature = "no_function"))]
1927                    #[cfg(feature = "metadata")]
1928                    Some('/') => {
1929                        stream.eat_next_and_advance(pos);
1930
1931                        // Long streams of `///...` are not doc-comments
1932                        match stream.peek_next() {
1933                            Some('/') => None,
1934                            _ => Some("///".into()),
1935                        }
1936                    }
1937                    #[cfg(feature = "metadata")]
1938                    Some('!') => {
1939                        stream.eat_next_and_advance(pos);
1940                        Some("//!".into())
1941                    }
1942                    _ if state.include_comments => Some("//".into()),
1943                    _ => None,
1944                };
1945
1946                while let Some(c) = stream.get_next() {
1947                    if c == '\r' {
1948                        // \r\n
1949                        if stream.peek_next() == Some('\n') {
1950                            stream.eat_next_and_advance(pos);
1951                        }
1952                        pos.new_line();
1953                        break;
1954                    }
1955                    if c == '\n' {
1956                        pos.new_line();
1957                        break;
1958                    }
1959                    if let Some(comment) = comment.as_mut() {
1960                        comment.push(c);
1961                    }
1962                    pos.advance();
1963                }
1964
1965                match comment {
1966                    #[cfg(feature = "metadata")]
1967                    Some(comment) if comment.starts_with("//!") => {
1968                        let g = &mut state.tokenizer_control.borrow_mut().global_comments;
1969                        if !g.is_empty() {
1970                            g.push('\n');
1971                        }
1972                        g.push_str(&comment);
1973                    }
1974                    Some(comment) => return (Token::Comment(comment.into()), start_pos),
1975                    None => (),
1976                }
1977            }
1978            ('/', '*') => {
1979                state.comment_level += 1;
1980                stream.eat_next_and_advance(pos);
1981
1982                let mut comment: Option<String> = match stream.peek_next() {
1983                    #[cfg(not(feature = "no_function"))]
1984                    #[cfg(feature = "metadata")]
1985                    Some('*') => {
1986                        stream.eat_next_and_advance(pos);
1987
1988                        // Long streams of `/****...` are not doc-comments
1989                        match stream.peek_next() {
1990                            Some('*') => None,
1991                            _ => Some("/**".into()),
1992                        }
1993                    }
1994                    _ if state.include_comments => Some("/*".into()),
1995                    _ => None,
1996                };
1997
1998                state.comment_level =
1999                    scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
2000
2001                if let Some(comment) = comment {
2002                    return (Token::Comment(comment.into()), start_pos);
2003                }
2004            }
2005
2006            ('/', '=') => {
2007                stream.eat_next_and_advance(pos);
2008                return (Token::DivideAssign, start_pos);
2009            }
2010            ('/', ..) => return (Token::Divide, start_pos),
2011
2012            (';', ..) => return (Token::SemiColon, start_pos),
2013            (',', ..) => return (Token::Comma, start_pos),
2014
2015            ('.', '.') => {
2016                stream.eat_next_and_advance(pos);
2017                return (
2018                    match stream.peek_next() {
2019                        Some('.') => {
2020                            stream.eat_next_and_advance(pos);
2021                            Token::Reserved(Box::new("...".into()))
2022                        }
2023                        Some('=') => {
2024                            stream.eat_next_and_advance(pos);
2025                            Token::InclusiveRange
2026                        }
2027                        _ => Token::ExclusiveRange,
2028                    },
2029                    start_pos,
2030                );
2031            }
2032            ('.', ..) => return (Token::Period, start_pos),
2033
2034            ('=', '=') => {
2035                stream.eat_next_and_advance(pos);
2036
2037                if stream.peek_next() == Some('=') {
2038                    stream.eat_next_and_advance(pos);
2039                    return (Token::Reserved(Box::new("===".into())), start_pos);
2040                }
2041
2042                return (Token::EqualsTo, start_pos);
2043            }
2044            ('=', '>') => {
2045                stream.eat_next_and_advance(pos);
2046                return (Token::DoubleArrow, start_pos);
2047            }
2048            ('=', ..) => return (Token::Equals, start_pos),
2049
2050            #[cfg(not(feature = "no_module"))]
2051            (':', ':') => {
2052                stream.eat_next_and_advance(pos);
2053
2054                if stream.peek_next() == Some('<') {
2055                    stream.eat_next_and_advance(pos);
2056                    return (Token::Reserved(Box::new("::<".into())), start_pos);
2057                }
2058
2059                return (Token::DoubleColon, start_pos);
2060            }
2061            (':', '=') => {
2062                stream.eat_next_and_advance(pos);
2063                return (Token::Reserved(Box::new(":=".into())), start_pos);
2064            }
2065            (':', ';') => {
2066                stream.eat_next_and_advance(pos);
2067                return (Token::Reserved(Box::new(":;".into())), start_pos);
2068            }
2069            (':', ..) => return (Token::Colon, start_pos),
2070
2071            ('<', '=') => {
2072                stream.eat_next_and_advance(pos);
2073                return (Token::LessThanEqualsTo, start_pos);
2074            }
2075            ('<', '-') => {
2076                stream.eat_next_and_advance(pos);
2077                return (Token::Reserved(Box::new("<-".into())), start_pos);
2078            }
2079            ('<', '<') => {
2080                stream.eat_next_and_advance(pos);
2081
2082                return (
2083                    if stream.peek_next() == Some('=') {
2084                        stream.eat_next_and_advance(pos);
2085                        Token::LeftShiftAssign
2086                    } else {
2087                        Token::LeftShift
2088                    },
2089                    start_pos,
2090                );
2091            }
2092            ('<', '|') => {
2093                stream.eat_next_and_advance(pos);
2094                return (Token::Reserved(Box::new("<|".into())), start_pos);
2095            }
2096            ('<', ..) => return (Token::LessThan, start_pos),
2097
2098            ('>', '=') => {
2099                stream.eat_next_and_advance(pos);
2100                return (Token::GreaterThanEqualsTo, start_pos);
2101            }
2102            ('>', '>') => {
2103                stream.eat_next_and_advance(pos);
2104
2105                return (
2106                    if stream.peek_next() == Some('=') {
2107                        stream.eat_next_and_advance(pos);
2108                        Token::RightShiftAssign
2109                    } else {
2110                        Token::RightShift
2111                    },
2112                    start_pos,
2113                );
2114            }
2115            ('>', ..) => return (Token::GreaterThan, start_pos),
2116
2117            ('!', 'i') => {
2118                stream.get_next().unwrap();
2119                if stream.peek_next() == Some('n') {
2120                    stream.get_next().unwrap();
2121                    match stream.peek_next() {
2122                        Some(c) if is_id_continue(c) => {
2123                            stream.unget('n');
2124                            stream.unget('i');
2125                            return (Token::Bang, start_pos);
2126                        }
2127                        _ => {
2128                            pos.advance();
2129                            pos.advance();
2130                            return (Token::NotIn, start_pos);
2131                        }
2132                    }
2133                }
2134
2135                stream.unget('i');
2136                return (Token::Bang, start_pos);
2137            }
2138            ('!', '=') => {
2139                stream.eat_next_and_advance(pos);
2140
2141                if stream.peek_next() == Some('=') {
2142                    stream.eat_next_and_advance(pos);
2143                    return (Token::Reserved(Box::new("!==".into())), start_pos);
2144                }
2145
2146                return (Token::NotEqualsTo, start_pos);
2147            }
2148            ('!', '.') => {
2149                stream.eat_next_and_advance(pos);
2150                return (Token::Reserved(Box::new("!.".into())), start_pos);
2151            }
2152            ('!', ..) => return (Token::Bang, start_pos),
2153
2154            ('|', '|') => {
2155                stream.eat_next_and_advance(pos);
2156                return (Token::Or, start_pos);
2157            }
2158            ('|', '=') => {
2159                stream.eat_next_and_advance(pos);
2160                return (Token::OrAssign, start_pos);
2161            }
2162            ('|', '>') => {
2163                stream.eat_next_and_advance(pos);
2164                return (Token::Reserved(Box::new("|>".into())), start_pos);
2165            }
2166            ('|', ..) => return (Token::Pipe, start_pos),
2167
2168            ('&', '&') => {
2169                stream.eat_next_and_advance(pos);
2170                return (Token::And, start_pos);
2171            }
2172            ('&', '=') => {
2173                stream.eat_next_and_advance(pos);
2174                return (Token::AndAssign, start_pos);
2175            }
2176            ('&', ..) => return (Token::Ampersand, start_pos),
2177
2178            ('^', '=') => {
2179                stream.eat_next_and_advance(pos);
2180                return (Token::XOrAssign, start_pos);
2181            }
2182            ('^', ..) => return (Token::XOr, start_pos),
2183
2184            ('~', ..) => return (Token::Reserved(Box::new("~".into())), start_pos),
2185
2186            ('%', '=') => {
2187                stream.eat_next_and_advance(pos);
2188                return (Token::ModuloAssign, start_pos);
2189            }
2190            ('%', ..) => return (Token::Modulo, start_pos),
2191
2192            ('@', ..) => return (Token::Reserved(Box::new("@".into())), start_pos),
2193
2194            ('$', ..) => return (Token::Reserved(Box::new("$".into())), start_pos),
2195
2196            ('?', '.') => {
2197                stream.eat_next_and_advance(pos);
2198                return (
2199                    #[cfg(not(feature = "no_object"))]
2200                    Token::Elvis,
2201                    #[cfg(feature = "no_object")]
2202                    Token::Reserved(Box::new("?.".into())),
2203                    start_pos,
2204                );
2205            }
2206            ('?', '?') => {
2207                stream.eat_next_and_advance(pos);
2208                return (Token::DoubleQuestion, start_pos);
2209            }
2210            ('?', '[') => {
2211                stream.eat_next_and_advance(pos);
2212                return (
2213                    #[cfg(not(feature = "no_index"))]
2214                    Token::QuestionBracket,
2215                    #[cfg(feature = "no_index")]
2216                    Token::Reserved(Box::new("?[".into())),
2217                    start_pos,
2218                );
2219            }
2220            ('?', ..) => return (Token::Reserved(Box::new("?".into())), start_pos),
2221
2222            // letter or underscore ...
2223            _ if is_id_first_alphabetic(c) || c == '_' => {
2224                return parse_identifier_token(stream, state, pos, start_pos, c);
2225            }
2226
2227            // \n
2228            ('\n', ..) => pos.new_line(),
2229
2230            // Whitespace - follows Rust's SPACE, TAB, CR, LF, FF which is the same as WhatWG.
2231            (ch, ..) if ch.is_ascii_whitespace() => (),
2232
2233            _ => {
2234                return (
2235                    Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
2236                    start_pos,
2237                )
2238            }
2239        }
2240    }
2241
2242    pos.advance();
2243
2244    (Token::EOF, *pos)
2245}
2246
2247/// Get the next token, parsing it as an identifier.
2248fn parse_identifier_token(
2249    stream: &mut (impl InputStream + ?Sized),
2250    state: &mut TokenizeState,
2251    pos: &mut Position,
2252    start_pos: Position,
2253    first_char: char,
2254) -> (Token, Position) {
2255    let mut identifier = SmartString::new_const();
2256    identifier.push(first_char);
2257    if let Some(ref mut last) = state.last_token {
2258        last.clear();
2259        last.push(first_char);
2260    }
2261
2262    while let Some(next_char) = stream.peek_next() {
2263        match next_char {
2264            x if is_id_continue(x) => {
2265                stream.eat_next_and_advance(pos);
2266                identifier.push(x);
2267                if let Some(ref mut last) = state.last_token {
2268                    last.push(x);
2269                }
2270            }
2271            _ => break,
2272        }
2273    }
2274
2275    if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
2276        return (token, start_pos);
2277    }
2278
2279    if is_reserved_keyword_or_symbol(&identifier).0 {
2280        return (Token::Reserved(Box::new(identifier)), start_pos);
2281    }
2282
2283    if !is_valid_identifier(&identifier) {
2284        return (
2285            Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
2286            start_pos,
2287        );
2288    }
2289
2290    (Token::Identifier(identifier.into()), start_pos)
2291}
2292
2293/// _(internals)_ Is a text string a valid identifier?
2294/// Exported under the `internals` feature only.
2295#[must_use]
2296pub fn is_valid_identifier(name: &str) -> bool {
2297    let mut first_alphabetic = false;
2298
2299    for ch in name.chars() {
2300        match ch {
2301            '_' => (),
2302            _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
2303            _ if !first_alphabetic => return false,
2304            _ if char::is_ascii_alphanumeric(&ch) => (),
2305            _ => return false,
2306        }
2307    }
2308
2309    first_alphabetic
2310}
2311
2312/// _(internals)_ Is a text string a valid script-defined function name?
2313/// Exported under the `internals` feature only.
2314#[inline(always)]
2315#[must_use]
2316pub fn is_valid_function_name(name: &str) -> bool {
2317    is_valid_identifier(name)
2318        && !is_reserved_keyword_or_symbol(name).0
2319        && Token::lookup_symbol_from_syntax(name).is_none()
2320}
2321
2322/// Is a character valid to start an identifier?
2323#[inline(always)]
2324#[must_use]
2325#[allow(clippy::missing_const_for_fn)]
2326pub fn is_id_first_alphabetic(x: char) -> bool {
2327    #[cfg(feature = "unicode-xid-ident")]
2328    return unicode_xid::UnicodeXID::is_xid_start(x);
2329    #[cfg(not(feature = "unicode-xid-ident"))]
2330    return x.is_ascii_alphabetic();
2331}
2332
2333/// Is a character valid for an identifier?
2334#[inline(always)]
2335#[must_use]
2336#[allow(clippy::missing_const_for_fn)]
2337pub fn is_id_continue(x: char) -> bool {
2338    #[cfg(feature = "unicode-xid-ident")]
2339    return unicode_xid::UnicodeXID::is_xid_continue(x);
2340    #[cfg(not(feature = "unicode-xid-ident"))]
2341    return x.is_ascii_alphanumeric() || x == '_';
2342}
2343
2344/// Is a piece of syntax a reserved keyword or reserved symbol?
2345///
2346/// # Return values
2347///
2348/// The first `bool` indicates whether it is a reserved keyword or symbol.
2349///
2350/// The second `bool` indicates whether the keyword can be called normally as a function.
2351/// `false` if it is not a reserved keyword.
2352///
2353/// The third `bool` indicates whether the keyword can be called in method-call style.
2354/// `false` if it is not a reserved keyword or it cannot be called as a function.
2355#[inline]
2356#[must_use]
2357pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
2358    // This implementation is based upon a pre-calculated table generated
2359    // by GNU `gperf` on the list of keywords.
2360    let utf8 = syntax.as_bytes();
2361    let len = utf8.len();
2362
2363    if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
2364        return (false, false, false);
2365    }
2366
2367    let mut hash_val = len;
2368
2369    match len {
2370        1 => (),
2371        _ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
2372    }
2373    hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
2374    hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
2375
2376    if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
2377        return (false, false, false);
2378    }
2379
2380    match RESERVED_LIST[hash_val] {
2381        ("", ..) => (false, false, false),
2382        (s, true, a, b) => {
2383            // Fail early to avoid calling memcmp().
2384            // Since we are already working with bytes, mind as well check the first one.
2385            let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
2386            (is_reserved, is_reserved && a, is_reserved && a && b)
2387        }
2388        _ => (false, false, false),
2389    }
2390}
2391
2392/// _(internals)_ A type that implements the [`InputStream`] trait.
2393/// Exported under the `internals` feature only.
2394///
2395/// Multiple character streams are jointed together to form one single stream.
2396pub struct MultiInputsStream<'a> {
2397    /// Buffered characters, if any.
2398    pub buf: [Option<char>; 2],
2399    /// The current stream index.
2400    pub index: usize,
2401    /// Input character streams.
2402    pub streams: StaticVec<Peekable<Chars<'a>>>,
2403}
2404
2405impl InputStream for MultiInputsStream<'_> {
2406    #[inline]
2407    fn unget(&mut self, ch: char) {
2408        match self.buf {
2409            [None, ..] => self.buf[0] = Some(ch),
2410            [_, None] => self.buf[1] = Some(ch),
2411            _ => unreachable!("cannot unget more than 2 characters!"),
2412        }
2413    }
2414    fn get_next(&mut self) -> Option<char> {
2415        match self.buf {
2416            [None, ..] => (),
2417            [ch @ Some(_), None] => {
2418                self.buf[0] = None;
2419                return ch;
2420            }
2421            [_, ch @ Some(_)] => {
2422                self.buf[1] = None;
2423                return ch;
2424            }
2425        }
2426
2427        loop {
2428            if self.index >= self.streams.len() {
2429                // No more streams
2430                return None;
2431            }
2432            if let Some(ch) = self.streams[self.index].next() {
2433                // Next character in main stream
2434                return Some(ch);
2435            }
2436            // Jump to the next stream
2437            self.index += 1;
2438        }
2439    }
2440    fn peek_next(&mut self) -> Option<char> {
2441        match self.buf {
2442            [None, ..] => (),
2443            [ch @ Some(_), None] => return ch,
2444            [_, ch @ Some(_)] => return ch,
2445        }
2446
2447        loop {
2448            if self.index >= self.streams.len() {
2449                // No more streams
2450                return None;
2451            }
2452            if let Some(&ch) = self.streams[self.index].peek() {
2453                // Next character in main stream
2454                return Some(ch);
2455            }
2456            // Jump to the next stream
2457            self.index += 1;
2458        }
2459    }
2460}
2461
2462/// _(internals)_ An iterator on a [`Token`] stream.
2463/// Exported under the `internals` feature only.
2464pub struct TokenIterator<'a> {
2465    /// Reference to the scripting `Engine`.
2466    pub engine: &'a Engine,
2467    /// Current state.
2468    pub state: TokenizeState,
2469    /// Current position.
2470    pub pos: Position,
2471    /// Input character stream.
2472    pub stream: MultiInputsStream<'a>,
2473    /// A processor function that maps a token to another.
2474    pub token_mapper: Option<&'a OnParseTokenCallback>,
2475}
2476
2477impl<'a> Iterator for TokenIterator<'a> {
2478    type Item = (Token, Position);
2479
2480    fn next(&mut self) -> Option<Self::Item> {
2481        let (within_interpolated, compress_script) = {
2482            let control = &mut *self.state.tokenizer_control.borrow_mut();
2483
2484            if control.is_within_text {
2485                // Switch to text mode terminated by back-tick
2486                self.state.is_within_text_terminated_by = Some('`');
2487                // Reset it
2488                control.is_within_text = false;
2489            }
2490
2491            (
2492                self.state.is_within_text_terminated_by.is_some(),
2493                control.compressed.is_some(),
2494            )
2495        };
2496
2497        let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
2498            // {EOF}
2499            r @ (Token::EOF, _) => return Some(r),
2500            // {EOF} after unterminated string.
2501            // The only case where `TokenizeState.is_within_text_terminated_by` is set is when
2502            // a verbatim string or a string with continuation encounters {EOF}.
2503            // This is necessary to handle such cases for line-by-line parsing, but for an entire
2504            // script it is a syntax error.
2505            (Token::StringConstant(..), pos) if self.state.is_within_text_terminated_by.is_some() => {
2506                self.state.is_within_text_terminated_by = None;
2507                return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
2508            }
2509            // Reserved keyword/symbol
2510            (Token::Reserved(s), pos) => (match
2511                (s.as_str(),
2512                    #[cfg(not(feature = "no_custom_syntax"))]
2513                    self.engine.custom_keywords.contains_key(&*s),
2514                    #[cfg(feature = "no_custom_syntax")]
2515                    false
2516                )
2517            {
2518                ("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2519                    "'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
2520                ).into()),
2521                ("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2522                    "'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
2523                ).into()),
2524                ("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2525                    "'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
2526                ("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2527                    "'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
2528                ).into()),
2529                (":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2530                    "':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
2531                ).into()),
2532                (":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2533                    "':;' is not a valid symbol. Should it be '::'?".to_string(),
2534                ).into()),
2535                ("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2536                    "'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
2537                ).into()),
2538                ("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2539                    "'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
2540                ).into()),
2541                ("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
2542                    "'#' is not a valid symbol. Should it be '#{'?".to_string(),
2543                ).into()),
2544                // Reserved keyword/operator that is custom.
2545                #[cfg(not(feature = "no_custom_syntax"))]
2546                (.., true) => Token::Custom(s),
2547                #[cfg(feature = "no_custom_syntax")]
2548                (.., true) => unreachable!("no custom operators"),
2549                // Reserved keyword that is not custom and disabled.
2550                (token, false) if self.engine.is_symbol_disabled(token) => {
2551                    let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
2552                    Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
2553                },
2554                // Reserved keyword/operator that is not custom.
2555                (.., false) => Token::Reserved(s),
2556            }, pos),
2557            // Custom keyword
2558            #[cfg(not(feature = "no_custom_syntax"))]
2559            (Token::Identifier(s), pos) if self.engine.custom_keywords.contains_key(&*s) => {
2560                (Token::Custom(s), pos)
2561            }
2562            // Custom keyword/symbol - must be disabled
2563            #[cfg(not(feature = "no_custom_syntax"))]
2564            (token, pos) if token.is_literal() && self.engine.custom_keywords.contains_key(token.literal_syntax()) => {
2565                // Active standard keyword should never be a custom keyword!
2566                debug_assert!(self.engine.is_symbol_disabled(token.literal_syntax()), "{:?} is an active keyword", token);
2567
2568                (Token::Custom(Box::new(token.literal_syntax().into())), pos)
2569            }
2570            // Disabled symbol
2571            (token, pos) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
2572                (Token::Reserved(Box::new(token.literal_syntax().into())), pos)
2573            }
2574            // Normal symbol
2575            r => r,
2576        };
2577
2578        // Run the mapper, if any
2579        let token = match self.token_mapper {
2580            Some(func) => func(token, pos, &self.state),
2581            None => token,
2582        };
2583
2584        // Collect the compressed script, if needed
2585        if compress_script {
2586            let control = &mut *self.state.tokenizer_control.borrow_mut();
2587
2588            if token != Token::EOF {
2589                if let Some(ref mut compressed) = control.compressed {
2590                    use std::fmt::Write;
2591
2592                    let last_token = self.state.last_token.as_ref().unwrap();
2593                    let mut buf = SmartString::new_const();
2594
2595                    if last_token.is_empty() {
2596                        write!(buf, "{token}").unwrap();
2597                    } else if within_interpolated
2598                        && matches!(
2599                            token,
2600                            Token::StringConstant(..) | Token::InterpolatedString(..)
2601                        )
2602                    {
2603                        compressed.push_str(&last_token[1..]);
2604                    } else {
2605                        buf = last_token.clone();
2606                    }
2607
2608                    if !buf.is_empty() && !compressed.is_empty() {
2609                        let cur = buf.chars().next().unwrap();
2610
2611                        if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
2612                            let prev = compressed.chars().last().unwrap();
2613
2614                            if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
2615                                compressed.push(' ');
2616                            }
2617                        }
2618                    }
2619
2620                    compressed.push_str(&buf);
2621                }
2622            }
2623        }
2624
2625        Some((token, pos))
2626    }
2627}
2628
2629impl FusedIterator for TokenIterator<'_> {}
2630
2631impl Engine {
2632    /// _(internals)_ Tokenize an input text stream.
2633    /// Exported under the `internals` feature only.
2634    ///
2635    /// # Panics
2636    ///
2637    /// Panics if there are no input streams.
2638    #[cfg(feature = "internals")]
2639    #[inline(always)]
2640    #[must_use]
2641    pub fn lex<'a>(
2642        &'a self,
2643        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2644    ) -> (TokenIterator<'a>, TokenizerControl) {
2645        lex_raw(self, inputs, None)
2646    }
2647    /// _(internals)_ Tokenize an input text stream with a mapping function.
2648    /// Exported under the `internals` feature only.
2649    ///
2650    /// # Panics
2651    ///
2652    /// Panics if there are no input streams.
2653    #[cfg(feature = "internals")]
2654    #[inline(always)]
2655    #[must_use]
2656    pub fn lex_with_map<'a>(
2657        &'a self,
2658        inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2659        token_mapper: &'a OnParseTokenCallback,
2660    ) -> (TokenIterator<'a>, TokenizerControl) {
2661        lex_raw(self, inputs, Some(token_mapper))
2662    }
2663}
2664
2665/// Tokenize an input text stream with an optional mapping function.
2666///
2667/// # Panics
2668///
2669/// Panics if there are no input streams.
2670#[inline]
2671#[must_use]
2672pub fn lex_raw<'a>(
2673    engine: &'a Engine,
2674    inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
2675    token_mapper: Option<&'a OnParseTokenCallback>,
2676) -> (TokenIterator<'a>, TokenizerControl) {
2677    let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
2678    let buffer2 = buffer.clone();
2679
2680    (
2681        TokenIterator {
2682            engine,
2683            state: TokenizeState {
2684                #[cfg(not(feature = "unchecked"))]
2685                max_string_len: std::num::NonZeroUsize::new(engine.max_string_size()),
2686                next_token_cannot_be_unary: false,
2687                tokenizer_control: buffer,
2688                comment_level: 0,
2689                include_comments: false,
2690                is_within_text_terminated_by: None,
2691                last_token: None,
2692            },
2693            pos: Position::new(1, 0),
2694            stream: MultiInputsStream {
2695                buf: [None, None],
2696                streams: inputs
2697                    .into_iter()
2698                    .map(|s| s.as_ref().chars().peekable())
2699                    .collect(),
2700                index: 0,
2701            },
2702            token_mapper,
2703        },
2704        buffer2,
2705    )
2706}
spo_rhai/tokenizer.rs

spo_rhai/
tokenizer.rs