#![forbid(unsafe_code)]
#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
use std::fmt::{self, Display, Formatter};
use std::str::FromStr;
use std::sync::OnceLock;
use span::{Span, Spanned};
pub mod macros;
pub mod span;
#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)]
pub struct Precedence(u8);
impl Precedence {
pub fn new(prec: u8) -> Precedence {
Precedence(prec)
}
pub fn lowest() -> Precedence {
Precedence(0)
}
pub fn highest() -> Precedence {
Precedence(255)
}
pub fn next(&self) -> Precedence {
Precedence::new(self.0 + 1)
}
}
#[derive(Clone, Debug, PartialEq, serde::Deserialize, serde::Serialize)]
#[serde(rename_all = "lowercase")]
pub enum Token {
As(Span),
Class(Span),
Declare(Span),
Else(Span),
Fn(Span),
For(Span),
If(Span),
Import(Span),
Is(Span),
Instance(Span),
Match(Span),
Pub(Span),
Type(Span),
When(Span),
Then(Span),
With(Span),
Where(Span),
Add(Span),
And(Span),
Concat(Span),
Div(Span),
Dot(Span),
Eq(Span),
Ne(Span),
Ge(Span),
Gt(Span),
Le(Span),
Lt(Span),
Mod(Span),
Mul(Span),
Or(Span),
Sub(Span),
ArrowL(Span),
ArrowR(Span),
Assign(Span),
BackSlash(Span),
BraceL(Span),
BraceR(Span),
BracketL(Span),
BracketR(Span),
Colon(Span),
ColonColon(Span),
Comma(Span),
CommentL(Span),
CommentR(Span),
DotDot(Span),
HashTag(Span),
In(Span),
Let(Span),
Rec(Span),
ParenL(Span),
ParenR(Span),
Pipe(Span),
Question(Span),
SemiColon(Span),
Whitespace(Span),
WhitespaceNewline(Span),
Bool(bool, Span),
Float(f64, Span),
Int(u64, Span),
Null(Span),
String(String, Span),
HttpsUrl(String, Span),
Ident(String, Span),
Eof(Span),
}
#[derive(Debug, thiserror::Error)]
pub enum LexicalError {
#[error("Unexpected token {0}")]
UnexpectedToken(Span),
#[error("invalid {kind} literal `{text}`: {error}")]
InvalidLiteral {
kind: &'static str,
text: String,
error: String,
span: Span,
},
#[error("internal lexer error: {0}")]
Internal(String),
}
impl Token {
pub fn tokenize(input: &str) -> Result<Tokens, LexicalError> {
let mut line = 1;
let mut column = 1;
let mut tokens = Vec::new();
let re = Token::regex()?;
for capture in re.captures_iter(input) {
let Some(lexeme) = capture.get(0).map(|m| m.as_str()) else {
return Err(LexicalError::Internal(
"regex capture missing group 0".into(),
));
};
let begin_line = line;
let begin_column = column;
column += lexeme.chars().count();
let span = Span::new(begin_line, begin_column, line, column);
if lexeme == "\n" {
line += 1;
column = 1;
}
let token =
if capture.name("As").is_some() {
Token::As(span)
} else if capture.name("Class").is_some() {
Token::Class(span)
} else if capture.name("Declare").is_some() {
Token::Declare(span)
} else if capture.name("Else").is_some() {
Token::Else(span)
} else if capture.name("Fn").is_some() {
Token::Fn(span)
} else if capture.name("For").is_some() {
Token::For(span)
} else if capture.name("If").is_some() {
Token::If(span)
} else if capture.name("Import").is_some() {
Token::Import(span)
} else if capture.name("Is").is_some() {
Token::Is(span)
} else if capture.name("Instance").is_some() {
Token::Instance(span)
} else if capture.name("Match").is_some() {
Token::Match(span)
} else if capture.name("Pub").is_some() {
Token::Pub(span)
} else if capture.name("Type").is_some() {
Token::Type(span)
} else if capture.name("When").is_some() {
Token::When(span)
} else if capture.name("In").is_some() {
Token::In(span)
} else if capture.name("Then").is_some() {
Token::Then(span)
} else if capture.name("With").is_some() {
Token::With(span)
} else if capture.name("Where").is_some() {
Token::Where(span)
}
else if capture.name("ArrowL").is_some() {
Token::ArrowL(span)
} else if capture.name("ArrowR").is_some() {
Token::ArrowR(span)
} else if capture.name("BackSlash").is_some() {
Token::BackSlash(span)
} else if capture.name("CommentL").is_some() {
Token::CommentL(span)
} else if capture.name("CommentR").is_some() {
Token::CommentR(span)
} else if capture.name("BraceL").is_some() {
Token::BraceL(span)
} else if capture.name("BraceR").is_some() {
Token::BraceR(span)
} else if capture.name("BracketL").is_some() {
Token::BracketL(span)
} else if capture.name("BracketR").is_some() {
Token::BracketR(span)
} else if capture.name("ColonColon").is_some() {
Token::ColonColon(span)
} else if capture.name("Colon").is_some() {
Token::Colon(span)
} else if capture.name("Comma").is_some() {
Token::Comma(span)
} else if capture.name("DotDot").is_some() {
Token::DotDot(span)
} else if capture.name("HashTag").is_some() {
Token::HashTag(span)
} else if capture.name("In").is_some() {
Token::In(span)
} else if capture.name("Let").is_some() {
Token::Let(span)
} else if capture.name("Rec").is_some() {
Token::Rec(span)
} else if capture.name("ParenL").is_some() {
Token::ParenL(span)
} else if capture.name("ParenR").is_some() {
Token::ParenR(span)
} else if capture.name("Pipe").is_some() {
Token::Pipe(span)
} else if capture.name("Question").is_some() {
Token::Question(span)
} else if capture.name("SemiColon").is_some() {
Token::SemiColon(span)
} else if capture.name("Whitespace").is_some() {
Token::Whitespace(span)
} else if capture.name("WhitespaceNewline").is_some() {
Token::WhitespaceNewline(span)
}
else if capture.name("Concat").is_some() {
Token::Concat(span)
} else if capture.name("Add").is_some() {
Token::Add(span)
} else if capture.name("And").is_some() {
Token::And(span)
} else if capture.name("Div").is_some() {
Token::Div(span)
} else if capture.name("Dot").is_some() {
Token::Dot(span)
} else if capture.name("Equal").is_some() {
Token::Eq(span)
} else if capture.name("NotEqual").is_some() {
Token::Ne(span)
} else if capture.name("GreaterThanEq").is_some() {
Token::Ge(span)
} else if capture.name("GreaterThan").is_some() {
Token::Gt(span)
} else if capture.name("LessThanEq").is_some() {
Token::Le(span)
} else if capture.name("LessThan").is_some() {
Token::Lt(span)
} else if capture.name("Mod").is_some() {
Token::Mod(span)
} else if capture.name("Mul").is_some() {
Token::Mul(span)
} else if capture.name("Or").is_some() {
Token::Or(span)
} else if capture.name("Sub").is_some() {
Token::Sub(span)
} else if capture.name("Assign").is_some() {
Token::Assign(span) }
else if let Some(m) = capture.name("Bool") {
let text = m.as_str();
let v = bool::from_str(text).map_err(|e| LexicalError::InvalidLiteral {
kind: "bool",
text: text.to_string(),
error: e.to_string(),
span,
})?;
Token::Bool(v, span)
} else if let Some(m) = capture.name("Float") {
let text = m.as_str();
let v = f64::from_str(text).map_err(|e| LexicalError::InvalidLiteral {
kind: "float",
text: text.to_string(),
error: e.to_string(),
span,
})?;
Token::Float(v, span)
} else if let Some(m) = capture.name("Int") {
let text = m.as_str();
let v = u64::from_str(text).map_err(|e| LexicalError::InvalidLiteral {
kind: "int",
text: text.to_string(),
error: e.to_string(),
span,
})?;
Token::Int(v, span)
} else if capture.name("Null").is_some() {
Token::Null(span)
} else if let Some(m) = capture.name("DoubleString") {
Token::String(m.as_str().to_string(), span)
} else if let Some(m) = capture.name("SingleString") {
Token::String(m.as_str().to_string(), span)
}
else if let Some(m) = capture.name("HttpsUrl") {
Token::HttpsUrl(m.as_str().to_string(), span)
}
else if let Some(m) = capture.name("Ident") {
Token::Ident(m.as_str().to_string(), span)
}
else {
return Err(LexicalError::UnexpectedToken(span));
};
tokens.push(token)
}
Ok(Tokens {
items: tokens
.into_iter()
.filter(|token| !matches!(*token, Token::Whitespace(..)))
.collect(),
eof: Span::new(line, column, line, column),
})
}
pub fn regex() -> Result<&'static regex::Regex, LexicalError> {
static TOKEN_REGEX: OnceLock<Result<regex::Regex, String>> = OnceLock::new();
let compiled = TOKEN_REGEX.get_or_init(|| {
regex::Regex::from_str(concat!(
r"(?P<As>\bas\b)|",
r"(?P<Class>\bclass\b)|",
r"(?P<Declare>\bdeclare\b)|",
r"(?P<Else>\belse\b)|",
r"(?P<Fn>\bfn\b)|",
r"(?P<For>\bfor\b)|",
r"(?P<If>\bif\b)|",
r"(?P<Import>\bimport\b)|",
r"(?P<Is>\bis\b)|",
r"(?P<Instance>\binstance\b)|",
r"(?P<Match>\bmatch\b)|",
r"(?P<Pub>\bpub\b)|",
r"(?P<Type>\btype\b)|",
r"(?P<When>\bwhen\b)|",
r"(?P<Then>\bthen\b)|",
r"(?P<With>\bwith\b)|",
r"(?P<Where>\bwhere\b)|",
r"(?P<ArrowL><-|←)|",
r"(?P<ArrowR>->|→)|",
r"(?P<BackSlash>\\|λ)|",
r"(?P<CommentL>\{-)|",
r"(?P<CommentR>-\})|",
r"(?P<BraceL>\{)|",
r"(?P<BraceR>\})|",
r"(?P<BracketL>\[)|",
r"(?P<BracketR>\])|",
r"(?P<ColonColon>::)|", r"(?P<Colon>:)|",
r"(?P<Comma>,)|",
r"(?P<DotDot>\.\.)|",
r"(?P<HashTag>\#)|",
r"(?P<In>\bin\b)|", r"(?P<Let>\blet\b)|", r"(?P<Rec>\brec\b)|",
r"(?P<LambdaR>->)|",
r"(?P<ParenL>\()|",
r"(?P<ParenR>\))|",
r"(?P<Question>\?)|",
r"(?P<SemiColon>;)|",
r"(?P<Whitespace>( |\t))|",
r"(?P<WhitespaceNewline>(\n|\r))|",
r"(?P<Concat>\+\+)|",
r"(?P<Add>\+)|",
r"(?P<And>&&)|",
r"(?P<Div>/)|",
r"(?P<Dot>\.)|",
r"(?P<Equal>==)|",
r"(?P<Assign>=)|", r"(?P<NotEqual>!=)|",
r"(?P<LessThanEq><=)|",
r"(?P<LessThan><)|",
r"(?P<GreaterThanEq>>=)|",
r"(?P<GreaterThan>>)|",
r"(?P<Mod>%)|",
r"(?P<Mul>\*)|",
r"(?P<Or>\|\|)|",
r"(?P<Pipe>\|)|",
r"(?P<Sub>-)|",
r"(?P<Bool>\b(true|false)\b)|",
r"(?P<Float>[0-9]+\.[0-9]+)|",
r"(?P<Int>[0-9]+)|",
r"(?P<Null>\bnull\b)|",
r#""(?P<DoubleString>(\\"|[^"])*)"|"#,
r#"'(?P<SingleString>(\\'|[^'])*)'|"#,
r"(?P<HttpsUrl>https://[^\s]+)|",
r"(?P<Ident>[_a-zA-Z]([_a-zA-Z]|[0-9])*)|",
r"(.)",
))
.map_err(|e| e.to_string())
});
match compiled {
Ok(re) => Ok(re),
Err(msg) => Err(LexicalError::Internal(format!(
"failed to compile token regex: {msg}"
))),
}
}
pub fn precedence(&self) -> Precedence {
use Token::*;
match self {
Or(..) => Precedence(1),
And(..) => Precedence(2),
Eq(..) | Ne(..) | Lt(..) | Le(..) | Gt(..) | Ge(..) => Precedence(3),
Add(..) | Sub(..) | Concat(..) => Precedence(4),
Mul(..) | Div(..) | Mod(..) => Precedence(5),
Ident(..) | HttpsUrl(..) => Precedence::highest(),
_ => Precedence::lowest(),
}
}
pub fn is_whitespace(&self) -> bool {
matches!(self, Token::Whitespace(..) | Token::WhitespaceNewline(..))
}
}
impl Spanned for Token {
fn span(&self) -> &Span {
use Token::*;
match self {
As(span, ..) => span,
Class(span, ..) => span,
Declare(span, ..) => span,
Else(span, ..) => span,
Fn(span, ..) => span,
For(span, ..) => span,
If(span, ..) => span,
Import(span, ..) => span,
Is(span, ..) => span,
Instance(span, ..) => span,
Match(span, ..) => span,
Pub(span, ..) => span,
Type(span, ..) => span,
When(span, ..) => span,
Then(span, ..) => span,
With(span, ..) => span,
Where(span, ..) => span,
ArrowL(span, ..) => span,
ArrowR(span, ..) => span,
Assign(span, ..) => span,
BackSlash(span, ..) => span,
BraceL(span, ..) => span,
BraceR(span, ..) => span,
BracketL(span, ..) => span,
BracketR(span, ..) => span,
Colon(span, ..) => span,
ColonColon(span, ..) => span,
Comma(span, ..) => span,
CommentL(span, ..) => span,
CommentR(span, ..) => span,
Dot(span, ..) => span,
DotDot(span, ..) => span,
HashTag(span, ..) => span,
In(span, ..) => span,
Let(span, ..) => span,
Rec(span, ..) => span,
ParenL(span, ..) => span,
ParenR(span, ..) => span,
Pipe(span, ..) => span,
Question(span, ..) => span,
SemiColon(span, ..) => span,
Whitespace(span, ..) => span,
WhitespaceNewline(span, ..) => span,
Add(span, ..) => span,
And(span, ..) => span,
Concat(span, ..) => span,
Div(span, ..) => span,
Eq(span, ..) => span,
Ne(span, ..) => span,
Ge(span, ..) => span,
Gt(span, ..) => span,
Le(span, ..) => span,
Lt(span, ..) => span,
Mod(span, ..) => span,
Mul(span, ..) => span,
Or(span, ..) => span,
Sub(span, ..) => span,
Bool(_, span, ..) => span,
Float(_, span, ..) => span,
Int(_, span, ..) => span,
Null(span, ..) => span,
String(_, span, ..) => span,
HttpsUrl(_, span, ..) => span,
Ident(_, span, ..) => span,
Eof(span) => span,
}
}
fn span_mut(&mut self) -> &mut Span {
use Token::*;
match self {
As(span, ..) => span,
Class(span, ..) => span,
Declare(span, ..) => span,
Else(span, ..) => span,
Fn(span, ..) => span,
For(span, ..) => span,
If(span, ..) => span,
Import(span, ..) => span,
Is(span, ..) => span,
Instance(span, ..) => span,
Match(span, ..) => span,
Pub(span, ..) => span,
Type(span, ..) => span,
When(span, ..) => span,
Then(span, ..) => span,
With(span, ..) => span,
Where(span, ..) => span,
ArrowL(span, ..) => span,
ArrowR(span, ..) => span,
Assign(span, ..) => span,
BackSlash(span, ..) => span,
BraceL(span, ..) => span,
BraceR(span, ..) => span,
BracketL(span, ..) => span,
BracketR(span, ..) => span,
Colon(span, ..) => span,
ColonColon(span, ..) => span,
Comma(span, ..) => span,
CommentL(span, ..) => span,
CommentR(span, ..) => span,
Dot(span, ..) => span,
DotDot(span, ..) => span,
HashTag(span, ..) => span,
In(span, ..) => span,
Let(span, ..) => span,
Rec(span, ..) => span,
ParenL(span, ..) => span,
ParenR(span, ..) => span,
Pipe(span, ..) => span,
Question(span, ..) => span,
SemiColon(span, ..) => span,
Whitespace(span, ..) => span,
WhitespaceNewline(span, ..) => span,
Add(span, ..) => span,
And(span, ..) => span,
Concat(span, ..) => span,
Div(span, ..) => span,
Eq(span, ..) => span,
Ne(span, ..) => span,
Ge(span, ..) => span,
Gt(span, ..) => span,
Le(span, ..) => span,
Lt(span, ..) => span,
Mod(span, ..) => span,
Mul(span, ..) => span,
Or(span, ..) => span,
Sub(span, ..) => span,
Bool(_, span, ..) => span,
Float(_, span, ..) => span,
Int(_, span, ..) => span,
Null(span, ..) => span,
String(_, span, ..) => span,
HttpsUrl(_, span, ..) => span,
Ident(_, span, ..) => span,
Eof(span) => span,
}
}
}
impl Display for Token {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use Token::*;
match self {
As(..) => write!(f, "as"),
Class(..) => write!(f, "class"),
Declare(..) => write!(f, "declare"),
Else(..) => write!(f, "else"),
Fn(..) => write!(f, "fn"),
For(..) => write!(f, "for"),
If(..) => write!(f, "if"),
Import(..) => write!(f, "import"),
Is(..) => write!(f, "is"),
Instance(..) => write!(f, "instance"),
Match(..) => write!(f, "match"),
Pub(..) => write!(f, "pub"),
Type(..) => write!(f, "type"),
When(..) => write!(f, "when"),
Then(..) => write!(f, "then"),
With(..) => write!(f, "with"),
Where(..) => write!(f, "where"),
ArrowL(..) => write!(f, "<-"),
ArrowR(..) => write!(f, "->"),
Assign(..) => write!(f, "="),
BackSlash(..) => write!(f, "\\"),
BraceL(..) => write!(f, "{{"),
BraceR(..) => write!(f, "}}"),
BracketL(..) => write!(f, "["),
BracketR(..) => write!(f, "]"),
Colon(..) => write!(f, ":"),
ColonColon(..) => write!(f, "::"),
Comma(..) => write!(f, ","),
CommentL(..) => write!(f, "{{-"),
CommentR(..) => write!(f, "-}}"),
Dot(..) => write!(f, "."),
DotDot(..) => write!(f, ".."),
HashTag(..) => write!(f, "#"),
In(..) => write!(f, "in"),
Let(..) => write!(f, "let"),
Rec(..) => write!(f, "rec"),
ParenL(..) => write!(f, "("),
ParenR(..) => write!(f, ")"),
Pipe(..) => write!(f, "|"),
Question(..) => write!(f, "?"),
SemiColon(..) => write!(f, ";"),
Whitespace(..) => write!(f, " "),
WhitespaceNewline(..) => writeln!(f),
Add(..) => write!(f, "+"),
And(..) => write!(f, "&&"),
Concat(..) => write!(f, "++"),
Div(..) => write!(f, "/"),
Eq(..) => write!(f, "=="),
Ne(..) => write!(f, "!="),
Gt(..) => write!(f, ">"),
Ge(..) => write!(f, ">="),
Lt(..) => write!(f, "<"),
Le(..) => write!(f, "<="),
Mod(..) => write!(f, "%"),
Mul(..) => write!(f, "*"),
Or(..) => write!(f, "||"),
Sub(..) => write!(f, "-"),
Bool(x, ..) => write!(f, "{}", x),
Float(x, ..) => write!(f, "{}", x),
Int(x, ..) => write!(f, "{}", x),
Null(..) => write!(f, "null"),
String(x, ..) => write!(f, "{}", x),
HttpsUrl(url, ..) => write!(f, "{}", url),
Ident(ident, ..) => write!(f, "{}", ident),
Eof(..) => write!(f, "EOF"),
}
}
}
#[derive(Clone, Debug, PartialEq, serde::Deserialize, serde::Serialize)]
pub struct Tokens {
pub items: Vec<Token>,
pub eof: Span,
}