use crate::engine::Precedence;
use crate::func::native::OnParseTokenCallback;
use crate::{Engine, Identifier, LexError, Position, SmartString, INT, UNSIGNED_INT};
#[cfg(feature = "no_std")]
use std::prelude::v1::*;
use std::{
cell::RefCell,
char, fmt,
iter::{FusedIterator, Peekable},
num::NonZeroUsize,
rc::Rc,
str::{Chars, FromStr},
};
#[derive(Debug, Clone, Eq, PartialEq, Default, Hash)]
pub struct TokenizerControlBlock {
pub is_within_text: bool,
#[cfg(feature = "metadata")]
pub global_comments: String,
pub compressed: Option<String>,
}
impl TokenizerControlBlock {
#[inline]
#[must_use]
pub const fn new() -> Self {
Self {
is_within_text: false,
#[cfg(feature = "metadata")]
global_comments: String::new(),
compressed: None,
}
}
}
pub type TokenizerControl = Rc<RefCell<TokenizerControlBlock>>;
type LERR = LexError;
const NUMBER_SEPARATOR: char = '_';
pub type TokenStream<'a> = Peekable<TokenIterator<'a>>;
#[derive(Debug, PartialEq, Clone, Hash)]
#[non_exhaustive]
pub enum Token {
IntegerConstant(INT),
#[cfg(not(feature = "no_float"))]
FloatConstant(crate::types::FloatWrapper<crate::FLOAT>),
#[cfg(feature = "decimal")]
DecimalConstant(Box<rust_decimal::Decimal>),
Identifier(Box<Identifier>),
CharConstant(char),
StringConstant(Box<SmartString>),
InterpolatedString(Box<SmartString>),
LeftBrace,
RightBrace,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
Unit,
Plus,
UnaryPlus,
Minus,
UnaryMinus,
Multiply,
Divide,
Modulo,
PowerOf,
LeftShift,
RightShift,
SemiColon,
Colon,
DoubleColon,
DoubleArrow,
Underscore,
Comma,
Period,
#[cfg(not(feature = "no_object"))]
Elvis,
DoubleQuestion,
#[cfg(not(feature = "no_index"))]
QuestionBracket,
ExclusiveRange,
InclusiveRange,
MapStart,
Equals,
True,
False,
Let,
Const,
If,
Else,
Switch,
Do,
While,
Until,
Loop,
For,
In,
NotIn,
LessThan,
GreaterThan,
LessThanEqualsTo,
GreaterThanEqualsTo,
EqualsTo,
NotEqualsTo,
Bang,
Pipe,
Or,
XOr,
Ampersand,
And,
#[cfg(not(feature = "no_function"))]
Fn,
Continue,
Break,
Return,
Throw,
Try,
Catch,
PlusAssign,
MinusAssign,
MultiplyAssign,
DivideAssign,
LeftShiftAssign,
RightShiftAssign,
AndAssign,
OrAssign,
XOrAssign,
ModuloAssign,
PowerOfAssign,
#[cfg(not(feature = "no_function"))]
Private,
#[cfg(not(feature = "no_module"))]
Import,
#[cfg(not(feature = "no_module"))]
Export,
#[cfg(not(feature = "no_module"))]
As,
LexError(Box<LexError>),
Comment(Box<String>),
Reserved(Box<SmartString>),
#[cfg(not(feature = "no_custom_syntax"))]
Custom(Box<SmartString>),
EOF,
}
impl fmt::Display for Token {
#[inline(always)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
IntegerConstant(i) => write!(f, "{i}"),
#[cfg(not(feature = "no_float"))]
FloatConstant(v) => write!(f, "{v}"),
#[cfg(feature = "decimal")]
DecimalConstant(d) => write!(f, "{d}"),
StringConstant(s) => write!(f, r#""{s}""#),
InterpolatedString(..) => f.write_str("string"),
CharConstant(c) => write!(f, "{c}"),
Identifier(s) => f.write_str(s),
Reserved(s) => f.write_str(s),
#[cfg(not(feature = "no_custom_syntax"))]
Custom(s) => f.write_str(s),
LexError(err) => write!(f, "{err}"),
Comment(s) => f.write_str(s),
EOF => f.write_str("{EOF}"),
token => f.write_str(token.literal_syntax()),
}
}
}
const MIN_KEYWORD_LEN: usize = 1;
const MAX_KEYWORD_LEN: usize = 8;
const MIN_KEYWORD_HASH_VALUE: usize = 1;
const MAX_KEYWORD_HASH_VALUE: usize = 152;
static KEYWORD_ASSOC_VALUES: [u8; 257] = [
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 115, 153, 100, 153, 110,
105, 40, 80, 2, 20, 25, 125, 95, 15, 40, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 55,
35, 10, 5, 0, 30, 110, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 120, 105, 100, 85, 90, 153, 125, 5,
0, 125, 35, 10, 100, 153, 20, 0, 153, 10, 0, 45, 55, 0, 153, 50, 55, 5, 0, 153, 0, 0, 35, 153,
45, 50, 30, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153, 153,
153,
];
static KEYWORDS_LIST: [(&str, Token); 153] = [
("", Token::EOF),
(">", Token::GreaterThan),
(">=", Token::GreaterThanEqualsTo),
(")", Token::RightParen),
("", Token::EOF),
("const", Token::Const),
("=", Token::Equals),
("==", Token::EqualsTo),
("continue", Token::Continue),
("", Token::EOF),
("catch", Token::Catch),
("<", Token::LessThan),
("<=", Token::LessThanEqualsTo),
("for", Token::For),
("loop", Token::Loop),
("", Token::EOF),
(".", Token::Period),
("<<", Token::LeftShift),
("<<=", Token::LeftShiftAssign),
("", Token::EOF),
("false", Token::False),
("*", Token::Multiply),
("*=", Token::MultiplyAssign),
("let", Token::Let),
("", Token::EOF),
("while", Token::While),
("+", Token::Plus),
("+=", Token::PlusAssign),
("", Token::EOF),
("", Token::EOF),
("throw", Token::Throw),
("}", Token::RightBrace),
(">>", Token::RightShift),
(">>=", Token::RightShiftAssign),
("", Token::EOF),
("", Token::EOF),
(";", Token::SemiColon),
("=>", Token::DoubleArrow),
("", Token::EOF),
("else", Token::Else),
("", Token::EOF),
("/", Token::Divide),
("/=", Token::DivideAssign),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("{", Token::LeftBrace),
("**", Token::PowerOf),
("**=", Token::PowerOfAssign),
("", Token::EOF),
("", Token::EOF),
("|", Token::Pipe),
("|=", Token::OrAssign),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
(":", Token::Colon),
("..", Token::ExclusiveRange),
("..=", Token::InclusiveRange),
("", Token::EOF),
("until", Token::Until),
("switch", Token::Switch),
#[cfg(not(feature = "no_function"))]
("private", Token::Private),
#[cfg(feature = "no_function")]
("", Token::EOF),
("try", Token::Try),
("true", Token::True),
("break", Token::Break),
("return", Token::Return),
#[cfg(not(feature = "no_function"))]
("fn", Token::Fn),
#[cfg(feature = "no_function")]
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
#[cfg(not(feature = "no_module"))]
("import", Token::Import),
#[cfg(feature = "no_module")]
("", Token::EOF),
#[cfg(not(feature = "no_object"))]
("?.", Token::Elvis),
#[cfg(feature = "no_object")]
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
#[cfg(not(feature = "no_module"))]
("export", Token::Export),
#[cfg(feature = "no_module")]
("", Token::EOF),
("in", Token::In),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("(", Token::LeftParen),
("||", Token::Or),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("^", Token::XOr),
("^=", Token::XOrAssign),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("_", Token::Underscore),
("::", Token::DoubleColon),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("-", Token::Minus),
("-=", Token::MinusAssign),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("]", Token::RightBracket),
("()", Token::Unit),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("&", Token::Ampersand),
("&=", Token::AndAssign),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("%", Token::Modulo),
("%=", Token::ModuloAssign),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("!", Token::Bang),
("!=", Token::NotEqualsTo),
("!in", Token::NotIn),
("", Token::EOF),
("", Token::EOF),
("[", Token::LeftBracket),
("if", Token::If),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
(",", Token::Comma),
("do", Token::Do),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
#[cfg(not(feature = "no_module"))]
("as", Token::As),
#[cfg(feature = "no_module")]
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
#[cfg(not(feature = "no_index"))]
("?[", Token::QuestionBracket),
#[cfg(feature = "no_index")]
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("??", Token::DoubleQuestion),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("&&", Token::And),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("", Token::EOF),
("#{", Token::MapStart),
];
const MIN_RESERVED_LEN: usize = 1;
const MAX_RESERVED_LEN: usize = 10;
const MIN_RESERVED_HASH_VALUE: usize = 1;
const MAX_RESERVED_HASH_VALUE: usize = 149;
static RESERVED_ASSOC_VALUES: [u8; 256] = [
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 10, 150, 5, 35, 150, 150,
150, 45, 35, 30, 30, 150, 20, 15, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 35,
30, 15, 5, 25, 0, 25, 150, 150, 150, 150, 150, 65, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 40, 150, 150, 150, 150, 150, 0, 150, 0,
0, 0, 15, 45, 10, 15, 150, 150, 35, 25, 10, 50, 0, 150, 5, 0, 15, 0, 5, 25, 45, 15, 150, 150,
25, 150, 20, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150, 150,
];
static RESERVED_LIST: [(&str, bool, bool, bool); 150] = [
("", false, false, false),
("?", true, false, false),
("as", cfg!(feature = "no_module"), false, false),
("use", true, false, false),
("case", true, false, false),
("async", true, false, false),
("public", true, false, false),
("package", true, false, false),
("", false, false, false),
("", false, false, false),
("super", true, false, false),
("#", true, false, false),
("private", cfg!(feature = "no_function"), false, false),
("var", true, false, false),
("protected", true, false, false),
("spawn", true, false, false),
("shared", true, false, false),
("is", true, false, false),
("===", true, false, false),
("sync", true, false, false),
("curry", true, true, true),
("static", true, false, false),
("default", true, false, false),
("!==", true, false, false),
("is_shared", cfg!(not(feature = "no_closure")), true, true),
("print", true, true, false),
("", false, false, false),
("#!", true, false, false),
("", false, false, false),
("this", true, false, false),
("is_def_var", true, true, false),
("thread", true, false, false),
("?.", cfg!(feature = "no_object"), false, false),
("", false, false, false),
("is_def_fn", cfg!(not(feature = "no_function")), true, false),
("yield", true, false, false),
("", false, false, false),
("fn", cfg!(feature = "no_function"), false, false),
("new", true, false, false),
("call", true, true, true),
("match", true, false, false),
("~", true, false, false),
("!.", true, false, false),
("", false, false, false),
("eval", true, true, false),
("await", true, false, false),
("", false, false, false),
(":=", true, false, false),
("...", true, false, false),
("null", true, false, false),
("debug", true, true, false),
("@", true, false, false),
("type_of", true, true, true),
("", false, false, false),
("with", true, false, false),
("", false, false, false),
("", false, false, false),
("<-", true, false, false),
("", false, false, false),
("void", true, false, false),
("", false, false, false),
("import", cfg!(feature = "no_module"), false, false),
("--", true, false, false),
("nil", true, false, false),
("exit", false, false, false),
("", false, false, false),
("export", cfg!(feature = "no_module"), false, false),
("<|", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("$", true, false, false),
("->", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("|>", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("module", true, false, false),
("?[", cfg!(feature = "no_index"), false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("Fn", true, true, false),
("::<", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("++", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
(":;", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("*)", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("(*", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("go", true, false, false),
("", false, false, false),
("goto", true, false, false),
];
impl Token {
#[must_use]
pub const fn is_literal(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
IntegerConstant(..) => false,
#[cfg(not(feature = "no_float"))]
FloatConstant(..) => false,
#[cfg(feature = "decimal")]
DecimalConstant(..) => false,
StringConstant(..)
| InterpolatedString(..)
| CharConstant(..)
| Identifier(..)
| Reserved(..) => false,
#[cfg(not(feature = "no_custom_syntax"))]
Custom(..) => false,
LexError(..) | Comment(..) => false,
EOF => false,
_ => true,
}
}
#[must_use]
pub const fn literal_syntax(&self) -> &'static str {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
LeftBrace => "{",
RightBrace => "}",
LeftParen => "(",
RightParen => ")",
LeftBracket => "[",
RightBracket => "]",
Unit => "()",
Plus => "+",
UnaryPlus => "+",
Minus => "-",
UnaryMinus => "-",
Multiply => "*",
Divide => "/",
SemiColon => ";",
Colon => ":",
DoubleColon => "::",
DoubleArrow => "=>",
Underscore => "_",
Comma => ",",
Period => ".",
#[cfg(not(feature = "no_object"))]
Elvis => "?.",
DoubleQuestion => "??",
#[cfg(not(feature = "no_index"))]
QuestionBracket => "?[",
ExclusiveRange => "..",
InclusiveRange => "..=",
MapStart => "#{",
Equals => "=",
True => "true",
False => "false",
Let => "let",
Const => "const",
If => "if",
Else => "else",
Switch => "switch",
Do => "do",
While => "while",
Until => "until",
Loop => "loop",
For => "for",
In => "in",
NotIn => "!in",
LessThan => "<",
GreaterThan => ">",
Bang => "!",
LessThanEqualsTo => "<=",
GreaterThanEqualsTo => ">=",
EqualsTo => "==",
NotEqualsTo => "!=",
Pipe => "|",
Or => "||",
Ampersand => "&",
And => "&&",
Continue => "continue",
Break => "break",
Return => "return",
Throw => "throw",
Try => "try",
Catch => "catch",
PlusAssign => "+=",
MinusAssign => "-=",
MultiplyAssign => "*=",
DivideAssign => "/=",
LeftShiftAssign => "<<=",
RightShiftAssign => ">>=",
AndAssign => "&=",
OrAssign => "|=",
XOrAssign => "^=",
LeftShift => "<<",
RightShift => ">>",
XOr => "^",
Modulo => "%",
ModuloAssign => "%=",
PowerOf => "**",
PowerOfAssign => "**=",
#[cfg(not(feature = "no_function"))]
Fn => "fn",
#[cfg(not(feature = "no_function"))]
Private => "private",
#[cfg(not(feature = "no_module"))]
Import => "import",
#[cfg(not(feature = "no_module"))]
Export => "export",
#[cfg(not(feature = "no_module"))]
As => "as",
_ => panic!("token is not a literal symbol"),
}
}
#[inline]
#[must_use]
pub const fn is_op_assignment(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
matches!(
self,
PlusAssign
| MinusAssign
| MultiplyAssign
| DivideAssign
| LeftShiftAssign
| RightShiftAssign
| ModuloAssign
| PowerOfAssign
| AndAssign
| OrAssign
| XOrAssign
)
}
#[must_use]
pub const fn get_base_op_from_assignment(&self) -> Option<Self> {
#[allow(clippy::enum_glob_use)]
use Token::*;
Some(match self {
PlusAssign => Plus,
MinusAssign => Minus,
MultiplyAssign => Multiply,
DivideAssign => Divide,
LeftShiftAssign => LeftShift,
RightShiftAssign => RightShift,
ModuloAssign => Modulo,
PowerOfAssign => PowerOf,
AndAssign => Ampersand,
OrAssign => Pipe,
XOrAssign => XOr,
_ => return None,
})
}
#[inline]
#[must_use]
pub const fn has_op_assignment(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
matches!(
self,
Plus | Minus
| Multiply
| Divide
| LeftShift
| RightShift
| Modulo
| PowerOf
| Ampersand
| Pipe
| XOr
)
}
#[must_use]
pub const fn convert_to_op_assignment(&self) -> Option<Self> {
#[allow(clippy::enum_glob_use)]
use Token::*;
Some(match self {
Plus => PlusAssign,
Minus => MinusAssign,
Multiply => MultiplyAssign,
Divide => DivideAssign,
LeftShift => LeftShiftAssign,
RightShift => RightShiftAssign,
Modulo => ModuloAssign,
PowerOf => PowerOfAssign,
Ampersand => AndAssign,
Pipe => OrAssign,
XOr => XOrAssign,
_ => return None,
})
}
#[inline]
#[must_use]
pub fn lookup_symbol_from_syntax(syntax: &str) -> Option<Self> {
let utf8 = syntax.as_bytes();
let len = utf8.len();
if !(MIN_KEYWORD_LEN..=MAX_KEYWORD_LEN).contains(&len) {
return None;
}
let mut hash_val = len;
match len {
1 => (),
_ => hash_val += KEYWORD_ASSOC_VALUES[(utf8[1] as usize) + 1] as usize,
}
hash_val += KEYWORD_ASSOC_VALUES[utf8[0] as usize] as usize;
if !(MIN_KEYWORD_HASH_VALUE..=MAX_KEYWORD_HASH_VALUE).contains(&hash_val) {
return None;
}
match KEYWORDS_LIST[hash_val] {
(_, Self::EOF) => None,
(s, ref t) if s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax => {
Some(t.clone())
}
_ => None,
}
}
#[must_use]
pub const fn is_next_unary(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
SemiColon | Colon | Comma | DoubleQuestion | ExclusiveRange | InclusiveRange | LeftBrace | LeftParen | LeftBracket | Plus |
PlusAssign |
UnaryPlus |
Minus |
MinusAssign |
UnaryMinus |
Multiply |
MultiplyAssign |
Divide |
DivideAssign |
Modulo |
ModuloAssign |
PowerOf |
PowerOfAssign |
LeftShift |
LeftShiftAssign |
RightShift |
RightShiftAssign |
Equals |
EqualsTo |
NotEqualsTo |
LessThan |
GreaterThan |
Bang |
LessThanEqualsTo |
GreaterThanEqualsTo |
Pipe |
Ampersand |
If |
While |
Until |
In |
NotIn |
And |
AndAssign |
Or |
OrAssign |
XOr |
XOrAssign |
Return |
Throw => true,
#[cfg(not(feature = "no_index"))]
QuestionBracket => true, LexError(..) => true,
_ => false,
}
}
#[must_use]
pub const fn precedence(&self) -> Option<Precedence> {
#[allow(clippy::enum_glob_use)]
use Token::*;
Precedence::new(match self {
Or | XOr | Pipe => 30,
And | Ampersand => 60,
EqualsTo | NotEqualsTo => 90,
In | NotIn => 110,
LessThan | LessThanEqualsTo | GreaterThan | GreaterThanEqualsTo => 130,
DoubleQuestion => 135,
ExclusiveRange | InclusiveRange => 140,
Plus | Minus => 150,
Divide | Multiply | Modulo => 180,
PowerOf => 190,
LeftShift | RightShift => 210,
_ => 0,
})
}
#[must_use]
pub const fn is_bind_right(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
PowerOf => true,
_ => false,
}
}
#[must_use]
pub const fn is_standard_symbol(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
LeftBrace | RightBrace | LeftParen | RightParen | LeftBracket | RightBracket | Plus
| UnaryPlus | Minus | UnaryMinus | Multiply | Divide | Modulo | PowerOf | LeftShift
| RightShift | SemiColon | Colon | DoubleColon | Comma | Period | DoubleQuestion
| ExclusiveRange | InclusiveRange | MapStart | Equals | LessThan | GreaterThan
| LessThanEqualsTo | GreaterThanEqualsTo | EqualsTo | NotEqualsTo | Bang | Pipe
| Or | XOr | Ampersand | And | PlusAssign | MinusAssign | MultiplyAssign
| DivideAssign | LeftShiftAssign | RightShiftAssign | AndAssign | OrAssign
| XOrAssign | ModuloAssign | PowerOfAssign => true,
#[cfg(not(feature = "no_object"))]
Elvis => true,
#[cfg(not(feature = "no_index"))]
QuestionBracket => true,
_ => false,
}
}
#[inline]
#[must_use]
pub const fn is_standard_keyword(&self) -> bool {
#[allow(clippy::enum_glob_use)]
use Token::*;
match self {
#[cfg(not(feature = "no_function"))]
Fn | Private => true,
#[cfg(not(feature = "no_module"))]
Import | Export | As => true,
True | False | Let | Const | If | Else | Do | While | Until | Loop | For | In
| Continue | Break | Return | Throw | Try | Catch => true,
_ => false,
}
}
#[inline(always)]
#[must_use]
pub const fn is_reserved(&self) -> bool {
matches!(self, Self::Reserved(..))
}
#[cfg(not(feature = "no_function"))]
#[inline]
pub(crate) fn into_function_name_for_override(self) -> Result<SmartString, Self> {
match self {
#[cfg(not(feature = "no_custom_syntax"))]
Self::Custom(s) if is_valid_function_name(&s) => Ok(*s),
Self::Identifier(s) if is_valid_function_name(&s) => Ok(*s),
_ => Err(self),
}
}
#[cfg(not(feature = "no_custom_syntax"))]
#[inline(always)]
#[must_use]
pub const fn is_custom(&self) -> bool {
matches!(self, Self::Custom(..))
}
}
impl From<Token> for String {
#[inline(always)]
fn from(token: Token) -> Self {
token.to_string()
}
}
#[derive(Debug, Clone, Eq, PartialEq, Default)]
pub struct TokenizeState {
pub max_string_len: Option<NonZeroUsize>,
pub next_token_cannot_be_unary: bool,
pub tokenizer_control: TokenizerControl,
pub comment_level: usize,
pub include_comments: bool,
pub is_within_text_terminated_by: Option<char>,
pub last_token: Option<SmartString>,
}
pub trait InputStream {
fn unget(&mut self, ch: char);
fn get_next(&mut self) -> Option<char>;
#[must_use]
fn peek_next(&mut self) -> Option<char>;
}
#[inline]
const fn ensure_string_len_within_limit(
max: Option<NonZeroUsize>,
value: &str,
) -> Result<(), LexError> {
match max {
Some(max) if value.len() > max.get() => Err(LexError::StringTooLong(max.get())),
_ => Ok(()),
}
}
pub fn parse_string_literal(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
termination_char: char,
verbatim: bool,
allow_line_continuation: bool,
allow_interpolation: bool,
) -> Result<(SmartString, bool, Position), (LexError, Position)> {
let mut result = SmartString::new_const();
let mut escape = SmartString::new_const();
let start = *pos;
let mut first_char = Position::NONE;
let mut interpolated = false;
#[cfg(not(feature = "no_position"))]
let mut skip_whitespace_until = 0;
state.is_within_text_terminated_by = Some(termination_char);
if let Some(ref mut last) = state.last_token {
last.clear();
last.push(termination_char);
}
loop {
debug_assert!(
!verbatim || escape.is_empty(),
"verbatim strings should not have any escapes"
);
let next_char = match stream.get_next() {
Some(ch) => {
pos.advance();
ch
}
None if verbatim => {
debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
pos.advance();
break;
}
None if allow_line_continuation && !escape.is_empty() => {
debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
pos.advance();
break;
}
None => {
pos.advance();
state.is_within_text_terminated_by = None;
return Err((LERR::UnterminatedString, start));
}
};
if let Some(ref mut last) = state.last_token {
last.push(next_char);
}
if allow_interpolation
&& next_char == '$'
&& escape.is_empty()
&& stream.peek_next().map_or(false, |ch| ch == '{')
{
interpolated = true;
state.is_within_text_terminated_by = None;
break;
}
ensure_string_len_within_limit(state.max_string_len, &result)
.map_err(|err| (err, start))?;
if termination_char == next_char && escape.is_empty() {
if stream.peek_next().map_or(false, |c| c == termination_char) {
eat_next_and_advance(stream, pos);
if let Some(ref mut last) = state.last_token {
last.push(termination_char);
}
} else {
state.is_within_text_terminated_by = None;
break;
}
}
if first_char.is_none() {
first_char = *pos;
}
match next_char {
'\r' if stream.peek_next().map_or(false, |ch| ch == '\n') => (),
'\\' if !verbatim && escape.is_empty() => {
escape.push('\\');
}
'\\' if !escape.is_empty() => {
escape.clear();
result.push('\\');
}
't' if !escape.is_empty() => {
escape.clear();
result.push('\t');
}
'n' if !escape.is_empty() => {
escape.clear();
result.push('\n');
}
'r' if !escape.is_empty() => {
escape.clear();
result.push('\r');
}
ch @ ('x' | 'u' | 'U') if !escape.is_empty() => {
let mut seq = escape.clone();
escape.clear();
seq.push(ch);
let mut out_val: u32 = 0;
let len = match ch {
'x' => 2,
'u' => 4,
'U' => 8,
c => unreachable!("x or u or U expected but gets '{}'", c),
};
for _ in 0..len {
let c = stream
.get_next()
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
pos.advance();
seq.push(c);
if let Some(ref mut last) = state.last_token {
last.push(c);
}
out_val *= 16;
out_val += c
.to_digit(16)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?;
}
result.push(
char::from_u32(out_val)
.ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?,
);
}
_ if termination_char == next_char && !escape.is_empty() => {
escape.clear();
result.push(next_char);
}
'\n' if verbatim => {
debug_assert_eq!(escape, "", "verbatim strings should not have any escapes");
pos.new_line();
result.push(next_char);
}
'\n' if allow_line_continuation && !escape.is_empty() => {
debug_assert_eq!(escape, "\\", "unexpected escape {escape} at end of line");
escape.clear();
pos.new_line();
#[cfg(not(feature = "no_position"))]
{
let start_position = start.position().unwrap();
skip_whitespace_until = start_position + 1;
}
}
'\n' => {
pos.rewind();
state.is_within_text_terminated_by = None;
return Err((LERR::UnterminatedString, start));
}
_ if !escape.is_empty() => {
escape.push(next_char);
return Err((LERR::MalformedEscapeSequence(escape.to_string()), *pos));
}
#[cfg(not(feature = "no_position"))]
_ if next_char.is_whitespace() && pos.position().unwrap() < skip_whitespace_until => (),
_ => {
escape.clear();
result.push(next_char);
#[cfg(not(feature = "no_position"))]
{
skip_whitespace_until = 0;
}
}
}
}
ensure_string_len_within_limit(state.max_string_len, &result).map_err(|err| (err, start))?;
Ok((result, interpolated, first_char))
}
#[inline(always)]
fn eat_next_and_advance(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
pos.advance();
stream.get_next()
}
fn scan_block_comment(
stream: &mut impl InputStream,
level: usize,
pos: &mut Position,
comment: Option<&mut String>,
) -> usize {
let mut level = level;
let mut comment = comment;
while let Some(c) = stream.get_next() {
pos.advance();
if let Some(comment) = comment.as_mut() {
comment.push(c);
}
match c {
'/' => {
if let Some(c2) = stream.peek_next().filter(|&ch| ch == '*') {
eat_next_and_advance(stream, pos);
if let Some(comment) = comment.as_mut() {
comment.push(c2);
}
level += 1;
}
}
'*' => {
if let Some(c2) = stream.peek_next().filter(|&ch| ch == '/') {
eat_next_and_advance(stream, pos);
if let Some(comment) = comment.as_mut() {
comment.push(c2);
}
level -= 1;
}
}
'\n' => pos.new_line(),
_ => (),
}
if level == 0 {
break;
}
}
level
}
#[inline]
#[must_use]
pub fn get_next_token(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
) -> Option<(Token, Position)> {
let result = get_next_token_inner(stream, state, pos);
if let Some((ref token, ..)) = result {
state.next_token_cannot_be_unary = !token.is_next_unary();
}
result
}
#[inline(always)]
const fn is_hex_digit(c: char) -> bool {
matches!(c, 'a'..='f' | 'A'..='F' | '0'..='9')
}
#[inline(always)]
const fn is_numeric_digit(c: char) -> bool {
c.is_ascii_digit()
}
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
#[inline]
#[must_use]
pub fn is_doc_comment(comment: &str) -> bool {
(comment.starts_with("///") && !comment.starts_with("////"))
|| (comment.starts_with("/**") && !comment.starts_with("/***"))
}
#[must_use]
fn get_next_token_inner(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
) -> Option<(Token, Position)> {
state.last_token.as_mut().map(SmartString::clear);
if state.comment_level > 0 {
let start_pos = *pos;
let mut comment = state.include_comments.then(String::new);
state.comment_level =
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
let return_comment = state.include_comments;
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
let return_comment = return_comment || is_doc_comment(comment.as_ref().expect("`Some`"));
if return_comment {
return Some((Token::Comment(comment.expect("`Some`").into()), start_pos));
}
if state.comment_level > 0 {
return None;
}
}
if let Some(ch) = state.is_within_text_terminated_by.take() {
return parse_string_literal(stream, state, pos, ch, true, false, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|(result, interpolated, start_pos)| {
if interpolated {
Some((Token::InterpolatedString(result.into()), start_pos))
} else {
Some((Token::StringConstant(result.into()), start_pos))
}
},
);
}
let mut negated: Option<Position> = None;
while let Some(c) = stream.get_next() {
pos.advance();
let start_pos = *pos;
let cc = stream.peek_next().unwrap_or('\0');
match (c, cc) {
('\n', ..) => pos.new_line(),
('0'..='9', ..) => {
let mut result = SmartString::new_const();
let mut radix_base: Option<u32> = None;
let mut valid: fn(char) -> bool = is_numeric_digit;
result.push(c);
while let Some(next_char) = stream.peek_next() {
match next_char {
NUMBER_SEPARATOR => {
eat_next_and_advance(stream, pos);
}
ch if valid(ch) => {
result.push(next_char);
eat_next_and_advance(stream, pos);
}
#[cfg(any(not(feature = "no_float"), feature = "decimal"))]
'.' => {
stream.get_next().unwrap();
match stream.peek_next() {
Some('0'..='9') => {
result.push(next_char);
pos.advance();
}
Some(NUMBER_SEPARATOR) => {
stream.unget(next_char);
break;
}
Some('.') => {
stream.unget(next_char);
break;
}
Some(ch) if !is_id_first_alphabetic(ch) => {
result.push(next_char);
pos.advance();
result.push('0');
}
_ => {
stream.unget(next_char);
break;
}
}
}
#[cfg(not(feature = "no_float"))]
'e' => {
stream.get_next().expect("`e`");
match stream.peek_next() {
Some('0'..='9') => {
result.push(next_char);
pos.advance();
}
Some('+' | '-') => {
result.push(next_char);
pos.advance();
result.push(stream.get_next().unwrap());
pos.advance();
}
_ => {
stream.unget(next_char);
break;
}
}
}
ch @ ('x' | 'o' | 'b' | 'X' | 'O' | 'B')
if c == '0' && result.len() <= 1 =>
{
result.push(next_char);
eat_next_and_advance(stream, pos);
valid = match ch {
'x' | 'X' => is_hex_digit,
'o' | 'O' => is_numeric_digit,
'b' | 'B' => is_numeric_digit,
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
};
radix_base = Some(match ch {
'x' | 'X' => 16,
'o' | 'O' => 8,
'b' | 'B' => 2,
c => unreachable!("x/X or o/O or b/B expected but gets '{}'", c),
});
}
_ => break,
}
}
let num_pos = negated.map_or(start_pos, |negated_pos| {
result.insert(0, '-');
negated_pos
});
if let Some(ref mut last) = state.last_token {
*last = result.clone();
}
let token = radix_base.map_or_else(
|| {
let num = INT::from_str(&result).map(Token::IntegerConstant);
#[cfg(not(feature = "no_float"))]
let num = num.or_else(|_| {
crate::types::FloatWrapper::from_str(&result).map(Token::FloatConstant)
});
#[cfg(feature = "decimal")]
let num = num.or_else(|_| {
rust_decimal::Decimal::from_str(&result)
.map(Box::new)
.map(Token::DecimalConstant)
});
#[cfg(feature = "decimal")]
let num = num.or_else(|_| {
rust_decimal::Decimal::from_scientific(&result)
.map(Box::new)
.map(Token::DecimalConstant)
});
num.unwrap_or_else(|_| {
Token::LexError(LERR::MalformedNumber(result.to_string()).into())
})
},
|radix| {
let result = &result[2..];
UNSIGNED_INT::from_str_radix(result, radix)
.map(|v| v as INT)
.map_or_else(
|_| {
Token::LexError(
LERR::MalformedNumber(result.to_string()).into(),
)
},
Token::IntegerConstant,
)
},
);
return Some((token, num_pos));
}
('"', ..) => {
return parse_string_literal(stream, state, pos, c, false, true, false)
.map_or_else(
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|(result, ..)| Some((Token::StringConstant(result.into()), start_pos)),
);
}
('`', ..) => {
match stream.peek_next() {
Some('\r') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('\n') {
eat_next_and_advance(stream, pos);
}
pos.new_line();
}
Some('\n') => {
eat_next_and_advance(stream, pos);
pos.new_line();
}
_ => (),
}
return parse_string_literal(stream, state, pos, c, true, false, true).map_or_else(
|(err, err_pos)| Some((Token::LexError(err.into()), err_pos)),
|(result, interpolated, ..)| {
if interpolated {
Some((Token::InterpolatedString(result.into()), start_pos))
} else {
Some((Token::StringConstant(result.into()), start_pos))
}
},
);
}
('\'', '\'') => {
return Some((
Token::LexError(LERR::MalformedChar(String::new()).into()),
start_pos,
))
}
('\'', ..) => {
return Some(
parse_string_literal(stream, state, pos, c, false, false, false).map_or_else(
|(err, err_pos)| (Token::LexError(err.into()), err_pos),
|(result, ..)| {
let mut chars = result.chars();
let first = chars.next().unwrap();
if chars.next().is_some() {
(
Token::LexError(LERR::MalformedChar(result.to_string()).into()),
start_pos,
)
} else {
(Token::CharConstant(first), start_pos)
}
},
),
)
}
('{', ..) => return Some((Token::LeftBrace, start_pos)),
('}', ..) => return Some((Token::RightBrace, start_pos)),
('(', ')') => {
eat_next_and_advance(stream, pos);
return Some((Token::Unit, start_pos));
}
('(', '*') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("(*".into())), start_pos));
}
('(', ..) => return Some((Token::LeftParen, start_pos)),
(')', ..) => return Some((Token::RightParen, start_pos)),
('[', ..) => return Some((Token::LeftBracket, start_pos)),
(']', ..) => return Some((Token::RightBracket, start_pos)),
#[cfg(not(feature = "no_object"))]
('#', '{') => {
eat_next_and_advance(stream, pos);
return Some((Token::MapStart, start_pos));
}
('#', '!') => return Some((Token::Reserved(Box::new("#!".into())), start_pos)),
('#', ' ') => {
eat_next_and_advance(stream, pos);
let token = if stream.peek_next() == Some('{') {
eat_next_and_advance(stream, pos);
"# {"
} else {
"#"
};
return Some((Token::Reserved(Box::new(token.into())), start_pos));
}
('#', ..) => return Some((Token::Reserved(Box::new("#".into())), start_pos)),
('+', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::PlusAssign, start_pos));
}
('+', '+') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("++".into())), start_pos));
}
('+', ..) if !state.next_token_cannot_be_unary => {
return Some((Token::UnaryPlus, start_pos))
}
('+', ..) => return Some((Token::Plus, start_pos)),
('-', '0'..='9') if !state.next_token_cannot_be_unary => negated = Some(start_pos),
('-', '0'..='9') => return Some((Token::Minus, start_pos)),
('-', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::MinusAssign, start_pos));
}
('-', '>') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("->".into())), start_pos));
}
('-', '-') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("--".into())), start_pos));
}
('-', ..) if !state.next_token_cannot_be_unary => {
return Some((Token::UnaryMinus, start_pos))
}
('-', ..) => return Some((Token::Minus, start_pos)),
('*', ')') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("*)".into())), start_pos));
}
('*', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::MultiplyAssign, start_pos));
}
('*', '*') => {
eat_next_and_advance(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
Token::PowerOfAssign
} else {
Token::PowerOf
},
start_pos,
));
}
('*', ..) => return Some((Token::Multiply, start_pos)),
('/', '/') => {
eat_next_and_advance(stream, pos);
let mut comment: Option<String> = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('/') => {
eat_next_and_advance(stream, pos);
match stream.peek_next() {
Some('/') => None,
_ => Some("///".into()),
}
}
#[cfg(feature = "metadata")]
Some('!') => {
eat_next_and_advance(stream, pos);
Some("//!".into())
}
_ if state.include_comments => Some("//".into()),
_ => None,
};
while let Some(c) = stream.get_next() {
if c == '\r' {
if stream.peek_next() == Some('\n') {
eat_next_and_advance(stream, pos);
}
pos.new_line();
break;
}
if c == '\n' {
pos.new_line();
break;
}
if let Some(comment) = comment.as_mut() {
comment.push(c);
}
pos.advance();
}
match comment {
#[cfg(feature = "metadata")]
Some(comment) if comment.starts_with("//!") => {
let g = &mut state.tokenizer_control.borrow_mut().global_comments;
if !g.is_empty() {
g.push('\n');
}
g.push_str(&comment);
}
Some(comment) => return Some((Token::Comment(comment.into()), start_pos)),
None => (),
}
}
('/', '*') => {
state.comment_level = 1;
eat_next_and_advance(stream, pos);
let mut comment: Option<String> = match stream.peek_next() {
#[cfg(not(feature = "no_function"))]
#[cfg(feature = "metadata")]
Some('*') => {
eat_next_and_advance(stream, pos);
match stream.peek_next() {
Some('*') => None,
_ => Some("/**".into()),
}
}
_ if state.include_comments => Some("/*".into()),
_ => None,
};
state.comment_level =
scan_block_comment(stream, state.comment_level, pos, comment.as_mut());
if let Some(comment) = comment {
return Some((Token::Comment(comment.into()), start_pos));
}
}
('/', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::DivideAssign, start_pos));
}
('/', ..) => return Some((Token::Divide, start_pos)),
(';', ..) => return Some((Token::SemiColon, start_pos)),
(',', ..) => return Some((Token::Comma, start_pos)),
('.', '.') => {
eat_next_and_advance(stream, pos);
return Some((
match stream.peek_next() {
Some('.') => {
eat_next_and_advance(stream, pos);
Token::Reserved(Box::new("...".into()))
}
Some('=') => {
eat_next_and_advance(stream, pos);
Token::InclusiveRange
}
_ => Token::ExclusiveRange,
},
start_pos,
));
}
('.', ..) => return Some((Token::Period, start_pos)),
('=', '=') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("===".into())), start_pos));
}
return Some((Token::EqualsTo, start_pos));
}
('=', '>') => {
eat_next_and_advance(stream, pos);
return Some((Token::DoubleArrow, start_pos));
}
('=', ..) => return Some((Token::Equals, start_pos)),
#[cfg(not(feature = "no_module"))]
(':', ':') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('<') {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("::<".into())), start_pos));
}
return Some((Token::DoubleColon, start_pos));
}
(':', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new(":=".into())), start_pos));
}
(':', ';') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new(":;".into())), start_pos));
}
(':', ..) => return Some((Token::Colon, start_pos)),
('<', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::LessThanEqualsTo, start_pos));
}
('<', '-') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("<-".into())), start_pos));
}
('<', '<') => {
eat_next_and_advance(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
Token::LeftShiftAssign
} else {
Token::LeftShift
},
start_pos,
));
}
('<', '|') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("<|".into())), start_pos));
}
('<', ..) => return Some((Token::LessThan, start_pos)),
('>', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::GreaterThanEqualsTo, start_pos));
}
('>', '>') => {
eat_next_and_advance(stream, pos);
return Some((
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
Token::RightShiftAssign
} else {
Token::RightShift
},
start_pos,
));
}
('>', ..) => return Some((Token::GreaterThan, start_pos)),
('!', 'i') => {
stream.get_next().unwrap();
if stream.peek_next() == Some('n') {
stream.get_next().unwrap();
match stream.peek_next() {
Some(c) if is_id_continue(c) => {
stream.unget('n');
stream.unget('i');
return Some((Token::Bang, start_pos));
}
_ => {
pos.advance();
pos.advance();
return Some((Token::NotIn, start_pos));
}
}
}
stream.unget('i');
return Some((Token::Bang, start_pos));
}
('!', '=') => {
eat_next_and_advance(stream, pos);
if stream.peek_next() == Some('=') {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("!==".into())), start_pos));
}
return Some((Token::NotEqualsTo, start_pos));
}
('!', '.') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("!.".into())), start_pos));
}
('!', ..) => return Some((Token::Bang, start_pos)),
('|', '|') => {
eat_next_and_advance(stream, pos);
return Some((Token::Or, start_pos));
}
('|', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::OrAssign, start_pos));
}
('|', '>') => {
eat_next_and_advance(stream, pos);
return Some((Token::Reserved(Box::new("|>".into())), start_pos));
}
('|', ..) => return Some((Token::Pipe, start_pos)),
('&', '&') => {
eat_next_and_advance(stream, pos);
return Some((Token::And, start_pos));
}
('&', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::AndAssign, start_pos));
}
('&', ..) => return Some((Token::Ampersand, start_pos)),
('^', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::XOrAssign, start_pos));
}
('^', ..) => return Some((Token::XOr, start_pos)),
('~', ..) => return Some((Token::Reserved(Box::new("~".into())), start_pos)),
('%', '=') => {
eat_next_and_advance(stream, pos);
return Some((Token::ModuloAssign, start_pos));
}
('%', ..) => return Some((Token::Modulo, start_pos)),
('@', ..) => return Some((Token::Reserved(Box::new("@".into())), start_pos)),
('$', ..) => return Some((Token::Reserved(Box::new("$".into())), start_pos)),
('?', '.') => {
eat_next_and_advance(stream, pos);
return Some((
#[cfg(not(feature = "no_object"))]
Token::Elvis,
#[cfg(feature = "no_object")]
Token::Reserved(Box::new("?.".into())),
start_pos,
));
}
('?', '?') => {
eat_next_and_advance(stream, pos);
return Some((Token::DoubleQuestion, start_pos));
}
('?', '[') => {
eat_next_and_advance(stream, pos);
return Some((
#[cfg(not(feature = "no_index"))]
Token::QuestionBracket,
#[cfg(feature = "no_index")]
Token::Reserved(Box::new("?[".into())),
start_pos,
));
}
('?', ..) => return Some((Token::Reserved(Box::new("?".into())), start_pos)),
_ if is_id_first_alphabetic(c) || c == '_' => {
return Some(parse_identifier_token(stream, state, pos, start_pos, c));
}
_ if c.is_whitespace() => (),
_ => {
return Some((
Token::LexError(LERR::UnexpectedInput(c.to_string()).into()),
start_pos,
))
}
}
}
pos.advance();
Some((Token::EOF, *pos))
}
fn parse_identifier_token(
stream: &mut impl InputStream,
state: &mut TokenizeState,
pos: &mut Position,
start_pos: Position,
first_char: char,
) -> (Token, Position) {
let mut identifier = SmartString::new_const();
identifier.push(first_char);
if let Some(ref mut last) = state.last_token {
last.clear();
last.push(first_char);
}
while let Some(next_char) = stream.peek_next() {
match next_char {
x if is_id_continue(x) => {
eat_next_and_advance(stream, pos);
identifier.push(x);
if let Some(ref mut last) = state.last_token {
last.push(x);
}
}
_ => break,
}
}
if let Some(token) = Token::lookup_symbol_from_syntax(&identifier) {
return (token, start_pos);
}
if is_reserved_keyword_or_symbol(&identifier).0 {
return (Token::Reserved(Box::new(identifier)), start_pos);
}
if !is_valid_identifier(&identifier) {
return (
Token::LexError(LERR::MalformedIdentifier(identifier.to_string()).into()),
start_pos,
);
}
(Token::Identifier(identifier.into()), start_pos)
}
#[must_use]
pub fn is_valid_identifier(name: &str) -> bool {
let mut first_alphabetic = false;
for ch in name.chars() {
match ch {
'_' => (),
_ if is_id_first_alphabetic(ch) => first_alphabetic = true,
_ if !first_alphabetic => return false,
_ if char::is_ascii_alphanumeric(&ch) => (),
_ => return false,
}
}
first_alphabetic
}
#[inline(always)]
#[must_use]
pub fn is_valid_function_name(name: &str) -> bool {
is_valid_identifier(name)
&& !is_reserved_keyword_or_symbol(name).0
&& Token::lookup_symbol_from_syntax(name).is_none()
}
#[inline(always)]
#[must_use]
#[allow(clippy::missing_const_for_fn)]
pub fn is_id_first_alphabetic(x: char) -> bool {
#[cfg(feature = "unicode-xid-ident")]
return unicode_xid::UnicodeXID::is_xid_start(x);
#[cfg(not(feature = "unicode-xid-ident"))]
return x.is_ascii_alphabetic();
}
#[inline(always)]
#[must_use]
#[allow(clippy::missing_const_for_fn)]
pub fn is_id_continue(x: char) -> bool {
#[cfg(feature = "unicode-xid-ident")]
return unicode_xid::UnicodeXID::is_xid_continue(x);
#[cfg(not(feature = "unicode-xid-ident"))]
return x.is_ascii_alphanumeric() || x == '_';
}
#[inline]
#[must_use]
pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
let utf8 = syntax.as_bytes();
let len = utf8.len();
if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
return (false, false, false);
}
let mut hash_val = len;
match len {
1 => (),
_ => hash_val += RESERVED_ASSOC_VALUES[utf8[1] as usize] as usize,
}
hash_val += RESERVED_ASSOC_VALUES[utf8[0] as usize] as usize;
hash_val += RESERVED_ASSOC_VALUES[utf8[len - 1] as usize] as usize;
if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
return (false, false, false);
}
match RESERVED_LIST[hash_val] {
("", ..) => (false, false, false),
(s, true, a, b) => {
let is_reserved = s.len() == len && s.as_bytes()[0] == utf8[0] && s == syntax;
(is_reserved, is_reserved && a, is_reserved && a && b)
}
_ => (false, false, false),
}
}
pub struct MultiInputsStream<'a> {
pub buf: [Option<char>; 2],
pub index: usize,
pub stream: Peekable<Chars<'a>>,
pub extra_streams: Box<[Peekable<Chars<'a>>]>,
}
impl InputStream for MultiInputsStream<'_> {
#[inline]
fn unget(&mut self, ch: char) {
match self.buf {
[None, ..] => self.buf[0] = Some(ch),
[_, None] => self.buf[1] = Some(ch),
_ => unreachable!("cannot unget more than 2 characters!"),
}
}
fn get_next(&mut self) -> Option<char> {
match self.buf {
[None, ..] => (),
[ch @ Some(_), None] => {
self.buf[0] = None;
return ch;
}
[_, ch @ Some(_)] => {
self.buf[1] = None;
return ch;
}
}
loop {
if self.index > self.extra_streams.len() {
return None;
}
if self.index == 0 {
if let Some(ch) = self.stream.next() {
return Some(ch);
}
} else if let Some(ch) = self.extra_streams[self.index - 1].next() {
return Some(ch);
}
self.index += 1;
}
}
fn peek_next(&mut self) -> Option<char> {
match self.buf {
[None, ..] => (),
[ch @ Some(_), None] => return ch,
[_, ch @ Some(_)] => return ch,
}
loop {
if self.index > self.extra_streams.len() {
return None;
}
if self.index == 0 {
if let Some(&ch) = self.stream.peek() {
return Some(ch);
}
} else if let Some(&ch) = self.extra_streams[self.index - 1].peek() {
return Some(ch);
}
self.index += 1;
}
}
}
pub struct TokenIterator<'a> {
pub engine: &'a Engine,
pub state: TokenizeState,
pub pos: Position,
pub stream: MultiInputsStream<'a>,
pub token_mapper: Option<&'a OnParseTokenCallback>,
}
impl<'a> Iterator for TokenIterator<'a> {
type Item = (Token, Position);
fn next(&mut self) -> Option<Self::Item> {
let (within_interpolated, compress_script) = {
let control = &mut *self.state.tokenizer_control.borrow_mut();
if control.is_within_text {
self.state.is_within_text_terminated_by = Some('`');
control.is_within_text = false;
}
(
self.state.is_within_text_terminated_by.is_some(),
control.compressed.is_some(),
)
};
let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) {
None => return None,
Some((Token::StringConstant(..), pos)) if self.state.is_within_text_terminated_by.is_some() => {
self.state.is_within_text_terminated_by = None;
return Some((Token::LexError(LERR::UnterminatedString.into()), pos));
}
Some((Token::Reserved(s), pos)) => (match
(s.as_str(),
#[cfg(not(feature = "no_custom_syntax"))]
self.engine.is_custom_keyword(&s),
#[cfg(feature = "no_custom_syntax")]
false
)
{
("===", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'===' is not a valid operator. This is not JavaScript! Should it be '=='?".to_string(),
).into()),
("!==", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'!==' is not a valid operator. This is not JavaScript! Should it be '!='?".to_string(),
).into()),
("->", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'->' is not a valid symbol. This is not C or C++!".to_string()).into()),
("<-", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'<-' is not a valid symbol. This is not Go! Should it be '<='?".to_string(),
).into()),
(":=", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"':=' is not a valid assignment operator. This is not Go or Pascal! Should it be simply '='?".to_string(),
).into()),
(":;", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"':;' is not a valid symbol. Should it be '::'?".to_string(),
).into()),
("::<", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'::<>' is not a valid symbol. This is not Rust! Should it be '::'?".to_string(),
).into()),
("(*" | "*)", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?".to_string(),
).into()),
("# {", false) => Token::LexError(LERR::ImproperSymbol(s.to_string(),
"'#' is not a valid symbol. Should it be '#{'?".to_string(),
).into()),
#[cfg(not(feature = "no_custom_syntax"))]
(.., true) => Token::Custom(s),
#[cfg(feature = "no_custom_syntax")]
(.., true) => unreachable!("no custom operators"),
(token, false) if self.engine.is_symbol_disabled(token) => {
let msg = format!("reserved {} '{token}' is disabled", if is_valid_identifier(token) { "keyword"} else {"symbol"});
Token::LexError(LERR::ImproperSymbol(s.to_string(), msg).into())
},
(.., false) => Token::Reserved(s),
}, pos),
#[cfg(not(feature = "no_custom_syntax"))]
Some((Token::Identifier(s), pos)) if self.engine.is_custom_keyword(&s) => {
(Token::Custom(s), pos)
}
#[cfg(not(feature = "no_custom_syntax"))]
Some((token, pos)) if token.is_literal() && self.engine.is_custom_keyword(token.literal_syntax()) => {
if self.engine.is_symbol_disabled(token.literal_syntax()) {
(Token::Custom(Box::new(token.literal_syntax().into())), pos)
} else {
unreachable!("{:?} is an active keyword", token)
}
}
Some((token, pos)) if token.is_literal() && self.engine.is_symbol_disabled(token.literal_syntax()) => {
(Token::Reserved(Box::new(token.literal_syntax().into())), pos)
}
Some(r) => r,
};
let token = match self.token_mapper {
Some(func) => func(token, pos, &self.state),
None => token,
};
if compress_script {
let control = &mut *self.state.tokenizer_control.borrow_mut();
if token != Token::EOF {
if let Some(ref mut compressed) = control.compressed {
use std::fmt::Write;
let last_token = self.state.last_token.as_ref().unwrap();
let mut buf = SmartString::new_const();
if last_token.is_empty() {
write!(buf, "{token}").unwrap();
} else if within_interpolated
&& matches!(
token,
Token::StringConstant(..) | Token::InterpolatedString(..)
)
{
compressed.push_str(&last_token[1..]);
} else {
buf = last_token.clone();
}
if !buf.is_empty() && !compressed.is_empty() {
let cur = buf.chars().next().unwrap();
if cur == '_' || is_id_first_alphabetic(cur) || is_id_continue(cur) {
let prev = compressed.chars().last().unwrap();
if prev == '_' || is_id_first_alphabetic(prev) || is_id_continue(prev) {
compressed.push(' ');
}
}
}
compressed.push_str(&buf);
}
}
}
Some((token, pos))
}
}
impl FusedIterator for TokenIterator<'_> {}
impl Engine {
#[cfg(feature = "internals")]
#[inline(always)]
#[must_use]
pub fn lex<'a>(
&'a self,
inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
) -> (TokenIterator<'a>, TokenizerControl) {
self.lex_raw(inputs, None)
}
#[cfg(feature = "internals")]
#[inline(always)]
#[must_use]
pub fn lex_with_map<'a>(
&'a self,
inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
token_mapper: &'a OnParseTokenCallback,
) -> (TokenIterator<'a>, TokenizerControl) {
self.lex_raw(inputs, Some(token_mapper))
}
#[inline]
#[must_use]
pub(crate) fn lex_raw<'a>(
&'a self,
inputs: impl IntoIterator<Item = &'a (impl AsRef<str> + 'a)>,
token_mapper: Option<&'a OnParseTokenCallback>,
) -> (TokenIterator<'a>, TokenizerControl) {
let buffer: TokenizerControl = RefCell::new(TokenizerControlBlock::new()).into();
let buffer2 = buffer.clone();
let mut input_streams = inputs.into_iter().map(|s| s.as_ref().chars().peekable());
(
TokenIterator {
engine: self,
state: TokenizeState {
#[cfg(not(feature = "unchecked"))]
max_string_len: NonZeroUsize::new(self.max_string_size()),
#[cfg(feature = "unchecked")]
max_string_len: None,
next_token_cannot_be_unary: false,
tokenizer_control: buffer,
comment_level: 0,
include_comments: false,
is_within_text_terminated_by: None,
last_token: None,
},
pos: Position::new(1, 0),
stream: MultiInputsStream {
buf: [None, None],
stream: input_streams.next().unwrap(),
extra_streams: input_streams.collect(),
index: 0,
},
token_mapper,
},
buffer2,
)
}
}