use crate::{error::ParseError, span::Span};
use logos::Logos;
use std::{fmt, ops::Range, sync::Arc};
#[cfg(test)]
mod tests;
#[derive(Clone, Debug, Eq, Hash, Logos, PartialEq, Ord, PartialOrd)]
#[logos(skip r"[ \t\n\r\f]+")]
#[logos(error = ParseError)]
pub enum Token {
#[token(":")]
Colon,
#[token("::")]
DoubleColon,
#[token("!")]
Bang,
#[token("|")]
Pipe,
#[token("+")]
Plus,
#[token("++")]
PlusPlus,
#[token("-")]
Minus,
#[token("/")]
Div,
#[token("%")]
Mod,
#[token("=")]
Eq,
#[token(">")]
Gt,
#[token("<")]
Lt,
#[token("<=")]
LtEq,
#[token(">=")]
GtEq,
#[token("==")]
EqEq,
#[token("!=")]
NotEq,
#[token("&&")]
DoubleAmpersand,
#[token("||")]
DoublePipe,
#[token("'")]
SingleQuote,
#[token("?")]
QuestionMark,
#[token(":=")]
ColonEq,
#[token(";")]
Semi,
#[token(",")]
Comma,
#[token("*")]
Star,
#[token("{")]
BraceOpen,
#[token("}")]
BraceClose,
#[token("(")]
ParenOpen,
#[token(")")]
ParenClose,
#[token("[")]
BracketOpen,
#[token("]")]
BracketClose,
#[token("->")]
Arrow,
#[token("=>")]
HeavyArrow,
#[token(".")]
Dot,
#[token("..")]
TwoDots,
#[token("~")]
Tilde,
#[token("@")]
At,
#[token("real")]
Real,
#[token("int")]
Int,
#[token("bool")]
Bool,
#[token("true")]
True,
#[token("false")]
False,
#[token("string")]
String,
#[token("b256")]
B256,
#[token("macro")]
Macro,
#[regex(r"@[A-Za-z_][A-Za-z_0-9]*", |lex| lex.slice().to_string())]
MacroName(String),
#[regex(r"\$[A-Za-z_0-9]+", |lex| lex.slice().to_string())]
MacroParam(String),
#[regex(r"&[A-Za-z_0-9]+", |lex| lex.slice().to_string())]
MacroParamPack(String),
MacroSplice(String),
MacroBody(MacroBody),
MacroCallArgs(MacroCallArgs),
MacroTag(Option<usize>),
#[token("if")]
If,
#[token("else")]
Else,
#[token("cond")]
Cond,
#[token("match")]
Match,
#[token("asm")]
Asm,
#[token("let")]
Let,
#[token("const")]
Const,
#[token("storage")]
Storage,
#[token("interface")]
Interface,
#[token("type")]
Type,
#[token("union")]
Union,
#[token("constraint")]
Constraint,
#[token("use")]
Use,
#[token("self")]
SelfTok,
#[token("as")]
As,
#[token("predicate")]
Predicate,
#[token("in")]
In,
#[token("forall")]
ForAll,
#[token("exists")]
Exists,
#[token("map")]
Map,
#[token("fold")]
Fold,
#[token("filter")]
Filter,
#[token("where")]
Where,
#[token("nil")]
Nil,
#[regex(r"[A-Za-z_][A-Za-z_0-9]*", |lex| {(lex.slice().to_string(), false)})]
Ident((String, bool)),
#[regex(r"__[A-Za-z_][A-Za-z_0-9]*", |lex| lex.slice().to_string())]
IntrinsicName(String),
#[regex(r"([0-9](_?[0-9])*)+\.([0-9]_?)+([Ee][-+]?([0-9](_?[0-9])*)+)?|([0-9](_?[0-9])*)+_?[Ee][-+]?([0-9](_?[0-9])*)+", |lex| lex.slice().to_string())]
RealLiteral(String),
#[regex(r"(0x([0-9A-Fa-f](_[0-9A-Fa-f])*)+|0b([0-1](_[0-1])*)+|([0-9](_[0-9])*)+)", |lex| lex.slice().to_string())]
IntLiteral(String),
#[regex(
r#""([^"\\]|\\(x[0-9a-fA-F]{2}|[nt"]|\\|\n))*""#,
|lex| {
StringLiteralChar::lexer(lex.slice())
.map(|c| c.map(char::from))
.collect::<Result<String, _>>()
.unwrap()
}
)]
StringLiteral(String),
#[regex(r"//[^\n\r]*", logos::skip)]
Comment,
#[token("###expr###")]
TestMarkerExpr,
#[token("###range###")]
TestMarkerRange,
#[token("###type###")]
TestMarkerType,
#[token("###svtype###")]
TestMarkerSVType,
#[token("###ident###")]
TestMarkerIdent,
#[token("###intrinsic###")]
TestMarkerIntrinsic,
#[token("###usetree###")]
TestMarkerUseTree,
}
pub type MacroCallArgs = Vec<Vec<(usize, Token, usize)>>;
pub type MacroBody = Vec<(usize, Token, usize)>;
#[cfg(test)]
pub(super) static KEYWORDS: &[Token] = &[
Token::As,
Token::B256,
Token::Bool,
Token::Cond,
Token::Const,
Token::Constraint,
Token::Else,
Token::Exists,
Token::False,
Token::Filter,
Token::Fold,
Token::ForAll,
Token::If,
Token::In,
Token::Int,
Token::Interface,
Token::Let,
Token::Macro,
Token::Map,
Token::Match,
Token::Asm,
Token::Predicate,
Token::Real,
Token::SelfTok,
Token::Storage,
Token::String,
Token::True,
Token::Type,
Token::Union,
Token::Use,
Token::Where,
Token::Nil,
];
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Token::Colon => write!(f, ":"),
Token::DoubleColon => write!(f, "::"),
Token::Bang => write!(f, "!"),
Token::Pipe => write!(f, "|"),
Token::Plus => write!(f, "+"),
Token::PlusPlus => write!(f, "++"),
Token::Minus => write!(f, "-"),
Token::Div => write!(f, "/"),
Token::Mod => write!(f, "%"),
Token::Eq => write!(f, "="),
Token::Gt => write!(f, ">"),
Token::Lt => write!(f, "<"),
Token::LtEq => write!(f, "<="),
Token::GtEq => write!(f, ">="),
Token::EqEq => write!(f, "=="),
Token::NotEq => write!(f, "!="),
Token::DoubleAmpersand => write!(f, "&&"),
Token::DoublePipe => write!(f, "||"),
Token::SingleQuote => write!(f, "'"),
Token::QuestionMark => write!(f, "?"),
Token::ColonEq => write!(f, ":="),
Token::Semi => write!(f, ";"),
Token::Comma => write!(f, ","),
Token::Star => write!(f, "*"),
Token::BraceOpen => write!(f, "{{"),
Token::BraceClose => write!(f, "}}"),
Token::ParenOpen => write!(f, "("),
Token::ParenClose => write!(f, ")"),
Token::BracketOpen => write!(f, "["),
Token::BracketClose => write!(f, "]"),
Token::Arrow => write!(f, "->"),
Token::HeavyArrow => write!(f, "=>"),
Token::Dot => write!(f, "."),
Token::TwoDots => write!(f, ".."),
Token::Tilde => write!(f, "~"),
Token::At => write!(f, "@"),
Token::Real => write!(f, "real"),
Token::Int => write!(f, "int"),
Token::Bool => write!(f, "bool"),
Token::True => write!(f, "true"),
Token::False => write!(f, "false"),
Token::String => write!(f, "string"),
Token::B256 => write!(f, "b256"),
Token::Macro => write!(f, "macro"),
Token::MacroName(name) => write!(f, "{name}"),
Token::MacroParam(arg) | Token::MacroParamPack(arg) | Token::MacroSplice(arg) => {
write!(f, "{arg}")
}
Token::MacroBody(body) => write!(
f,
"{{ {} }}",
body.iter()
.map(|(_, tok, _)| tok.to_string())
.collect::<Vec<_>>()
.join(" ")
),
Token::MacroCallArgs(params) => write!(
f,
"{}",
params
.iter()
.map(|param| {
param
.iter()
.fold(String::new(), |s, (_, tok, _)| format!("{s} {tok}"))
})
.collect::<Vec<_>>()
.join("; ")
),
Token::MacroTag(tag) => {
if let Some(tag) = tag {
write!(f, "<{tag}>")
} else {
Ok(())
}
}
Token::If => write!(f, "if"),
Token::Else => write!(f, "else"),
Token::Cond => write!(f, "cond"),
Token::Match => write!(f, "match"),
Token::Asm => write!(f, "asm"),
Token::Let => write!(f, "let"),
Token::Const => write!(f, "const"),
Token::Storage => write!(f, "storage"),
Token::Interface => write!(f, "interface"),
Token::Type => write!(f, "type"),
Token::Union => write!(f, "union"),
Token::Constraint => write!(f, "constraint"),
Token::Use => write!(f, "use"),
Token::SelfTok => write!(f, "self"),
Token::As => write!(f, "as"),
Token::Predicate => write!(f, "predicate"),
Token::In => write!(f, "in"),
Token::ForAll => write!(f, "forall"),
Token::Exists => write!(f, "exists"),
Token::Map => write!(f, "map"),
Token::Fold => write!(f, "fold"),
Token::Filter => write!(f, "filter"),
Token::Where => write!(f, "where"),
Token::Nil => write!(f, "nil"),
Token::Ident((ident, _)) => write!(f, "{ident}"),
Token::IntrinsicName(ident) => write!(f, "{ident}"),
Token::RealLiteral(ident) => write!(f, "{ident}"),
Token::IntLiteral(ident) => write!(f, "{ident}"),
Token::StringLiteral(contents) => write!(f, "{contents}"),
Token::Comment => write!(f, "comment"),
Token::TestMarkerExpr => write!(f, "### expr test ###"),
Token::TestMarkerRange => write!(f, "### range test ###"),
Token::TestMarkerType => write!(f, "### type test ###"),
Token::TestMarkerSVType => write!(f, "### storage var type test ###"),
Token::TestMarkerIdent => write!(f, "### ident test ###"),
Token::TestMarkerIntrinsic => write!(f, "### intrinsic ident test ###"),
Token::TestMarkerUseTree => write!(f, "### use tree test ###"),
}
}
}
pub(super) struct Lexer<'a> {
token_stream: TokenSource<'a>,
filepath: Arc<std::path::Path>,
mod_path: &'a [String],
state: LexerState,
}
impl<'sc> Lexer<'sc> {
pub(super) fn new(
src: &'sc str,
filepath: &Arc<std::path::Path>,
mod_path: &'sc [String],
) -> Self {
Self {
token_stream: TokenSource::LogosLexer(Token::lexer(src)),
filepath: filepath.clone(),
mod_path,
state: LexerState::default(),
}
}
pub(super) fn from_tokens(
tokens: Vec<(usize, Token, usize)>,
filepath: &Arc<std::path::Path>,
mod_path: &'sc [String],
) -> Self {
Self {
token_stream: TokenSource::VecToken(VecTokenSourceState::new(tokens)),
filepath: filepath.clone(),
mod_path,
state: LexerState::default(),
}
}
fn gather_macro_body(
&mut self,
obrace_tok: Token,
obrace_span: &Range<usize>,
) -> Result<(usize, Token, usize), ParseError> {
let mut body_token_stream = self.token_stream.clone();
let mut parsed_tok_count = 0;
let mut count_since_double_colon = 1;
let mut body_toks = vec![(obrace_span.start, Token::BraceOpen, obrace_span.end)];
let mut nest_depth = 0;
loop {
parsed_tok_count += 1;
count_since_double_colon += 1;
let next_tok = body_token_stream.next();
let next_span = body_token_stream.span();
macro_rules! push_tok {
($tok: expr) => {
body_toks.push((next_span.start, $tok, next_span.end))
};
}
match next_tok {
None => {
return Ok((obrace_span.start, obrace_tok, obrace_span.end));
}
Some(Ok(Token::BraceOpen)) => {
push_tok!(Token::BraceOpen);
nest_depth += 1;
}
Some(Ok(Token::BraceClose)) => {
push_tok!(Token::BraceClose);
if nest_depth > 0 {
nest_depth -= 1;
} else {
let _ = self.token_stream.nth(parsed_tok_count - 1);
return Ok((
obrace_span.start,
Token::MacroBody(body_toks),
next_span.end,
));
}
}
Some(Ok(Token::DoubleColon)) => {
push_tok!(Token::DoubleColon);
count_since_double_colon = 0;
}
Some(Ok(tok @ Token::MacroName(_))) => {
if count_since_double_colon > 1 {
push_tok!(Token::DoubleColon);
for path_el in self.mod_path {
push_tok!(Token::Ident((path_el.clone(), false)));
push_tok!(Token::DoubleColon);
}
}
push_tok!(tok);
push_tok!(Token::MacroTag(None));
}
Some(Ok(tok)) => {
push_tok!(tok);
}
Some(Err(_)) => {
return Err(ParseError::InvalidToken);
}
}
}
}
fn gather_macro_call_args(
&mut self,
oparen_tok: Token,
oparen_span: &Range<usize>,
) -> Result<(usize, Token, usize), ParseError> {
let mut args_token_stream = self.token_stream.clone();
let mut parsed_tok_count = 0;
let mut nested_paren_count = 0;
let mut most_recent_tilde = 0;
let mut tilde_tok_range: Option<Range<usize>> = None;
let mut all_args: Vec<Vec<(usize, Token, usize)>> = vec![Vec::default()];
macro_rules! push_tok {
($tok: expr) => {{
let tok_span = args_token_stream.span();
all_args
.last_mut()
.expect("Args vec is always valid.")
.push((tok_span.start, $tok, tok_span.end))
}};
}
loop {
parsed_tok_count += 1;
match args_token_stream.next() {
None => {
return Ok((oparen_span.start, oparen_tok, oparen_span.end));
}
Some(Ok(Token::Semi)) if nested_paren_count == 0 => {
all_args.push(Vec::new());
}
Some(Ok(Token::Tilde)) => {
tilde_tok_range = Some(args_token_stream.span());
most_recent_tilde = parsed_tok_count;
}
Some(Ok(Token::Ident(id))) if tilde_tok_range.is_some() => {
push_tok!(Token::MacroSplice(id.0.clone()));
tilde_tok_range = None;
}
Some(Ok(tok @ Token::ParenOpen)) => {
nested_paren_count += 1;
push_tok!(tok);
}
Some(Ok(tok @ Token::ParenClose)) if nested_paren_count > 0 => {
nested_paren_count -= 1;
push_tok!(tok);
}
Some(Ok(Token::ParenClose)) => {
let _ = self.token_stream.nth(parsed_tok_count - 1);
if all_args
.last()
.expect("Args vec is always valid.")
.is_empty()
{
all_args.pop();
}
let cparen_span = args_token_stream.span();
return Ok((
oparen_span.start,
Token::MacroCallArgs(all_args),
cparen_span.end,
));
}
Some(Ok(tok)) => {
push_tok!(tok)
}
Some(Err(_)) => {
return Err(ParseError::InvalidToken);
}
}
if most_recent_tilde != parsed_tok_count {
if let Some(range) = tilde_tok_range {
return Err(ParseError::BadSplice(Span::new(
self.filepath.clone(),
range,
)));
}
}
}
}
}
#[derive(Clone)]
enum TokenSource<'a> {
LogosLexer(logos::Lexer<'a, Token>),
VecToken(VecTokenSourceState),
}
#[derive(Clone)]
struct VecTokenSourceState {
toks: Vec<(usize, Token, usize)>,
index: usize,
start: usize,
end: usize,
}
impl VecTokenSourceState {
fn new(toks: Vec<(usize, Token, usize)>) -> Self {
VecTokenSourceState {
toks,
index: 0,
start: 0,
end: 0,
}
}
}
impl TokenSource<'_> {
fn next(&mut self) -> Option<Result<Token, ParseError>> {
match self {
TokenSource::LogosLexer(lex) => lex.next(),
TokenSource::VecToken(_) => self.nth(0),
}
}
fn nth(&mut self, n: usize) -> Option<Result<Token, ParseError>> {
match self {
TokenSource::LogosLexer(lex) => lex.nth(n),
TokenSource::VecToken(state) => {
state.toks.get(state.index + n).cloned().map(|(s, t, e)| {
state.index += n + 1;
state.start = s;
state.end = e;
Ok(t)
})
}
}
}
fn span(&self) -> Range<usize> {
match self {
TokenSource::LogosLexer(lex) => lex.span(),
TokenSource::VecToken(state) => state.start..state.end,
}
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq)]
enum LexerState {
#[default]
Normal,
Macro,
MacroName,
MacroParams,
MacroCall,
}
impl Iterator for Lexer<'_> {
type Item = Result<(usize, Token, usize), ParseError>;
fn next(&mut self) -> Option<Self::Item> {
self.token_stream.next().map(|res| {
let span = self.token_stream.span();
res.and_then(|tok| match tok {
Token::Macro if self.state == LexerState::Normal => {
self.state = LexerState::Macro;
Ok((span.start, tok, span.end))
}
Token::MacroName(_) if self.state == LexerState::Macro => {
self.state = LexerState::MacroName;
Ok((span.start, tok, span.end))
}
Token::ParenOpen if self.state == LexerState::MacroName => {
self.state = LexerState::MacroParams;
Ok((span.start, tok, span.end))
}
Token::MacroParam(_)
| Token::MacroParamPack(_)
| Token::MacroSplice(_)
| Token::Comma
| Token::ParenClose
if self.state == LexerState::MacroParams =>
{
Ok((span.start, tok, span.end))
}
Token::BraceOpen if self.state == LexerState::MacroParams => {
self.state = LexerState::Normal;
self.gather_macro_body(tok, &span)
}
Token::MacroName(_) if self.state == LexerState::Normal => {
self.state = LexerState::MacroCall;
Ok((span.start, tok, span.end))
}
Token::MacroTag(_) if self.state == LexerState::MacroCall => {
Ok((span.start, tok, span.end))
}
Token::ParenOpen if self.state == LexerState::MacroCall => {
self.state = LexerState::Normal;
self.gather_macro_call_args(tok, &span)
}
_ => {
self.state = LexerState::Normal;
Ok((span.start, tok, span.end))
}
})
.map_err(|err| match err {
ParseError::BadSplice(_) => err,
_ => ParseError::Lex {
span: Span::new(self.filepath.clone(), span.start..span.end),
},
})
})
}
}
#[derive(Clone, Debug, Eq, Hash, Logos, PartialEq, Ord, PartialOrd)]
#[logos(error = ParseError)]
enum StringLiteralChar {
#[regex(r"\\x[0-9a-fA-F]{2}",
|lex| {
char::from_u32(
lex.slice()
.chars()
.skip(2)
.fold(0, |n, c| n * 16 + c.to_digit(16).unwrap()),
)
.unwrap_or('\x00')
}
)]
HexEscape(char),
#[token(r"\n", |_| '\n')]
Newline(char),
#[token(r"\t", |_| '\t')]
Tab(char),
#[token(r#"\""#, |_| '\"')]
DoubleQuote(char),
#[token(r"\\", |_| '\\')]
Backslash(char),
#[regex(r"\\\n[ \t]*", logos::skip)]
JoinNewline,
#[token(r#"""#, logos::skip)]
Delimiter,
#[regex(r#"[^"\\]"#, |lex| lex.slice().chars().next().unwrap())]
Any(char),
}
impl From<StringLiteralChar> for char {
fn from(value: StringLiteralChar) -> Self {
match value {
StringLiteralChar::HexEscape(c)
| StringLiteralChar::Newline(c)
| StringLiteralChar::Tab(c)
| StringLiteralChar::DoubleQuote(c)
| StringLiteralChar::Backslash(c)
| StringLiteralChar::Any(c) => c,
StringLiteralChar::JoinNewline | StringLiteralChar::Delimiter => {
unreachable!("Should be skipped by the tokenizer.")
}
}
}
}
pub fn get_token_error_category(lalrpop_token: &Option<String>) -> Option<String> {
if let Some(token) = lalrpop_token {
match token.as_str() {
"int_ty" | "real_ty" | "bool_ty" | "string_ty" | "b256_ty" => Some("a type".to_owned()),
"int_lit" | "real_lit" | "str_lit" | "nil" => Some("a literal".to_owned()),
"true" | "false" => Some("a boolean".to_owned()),
"ident" => Some("an identifier".to_owned()),
_ => Some(token.to_string()),
}
} else {
None
}
}