mod callbacks;
pub use callbacks::parse_regex;
use callbacks::{
parse_doc_comment, parse_inner_doc_comment, parse_multiline_string, parse_number, parse_string,
skip_block_comment,
};
use logos::Logos;
#[derive(Default, Debug)]
pub struct LexerExtras {
pub unterminated_block_comments: Vec<(usize, usize)>,
pub invalid_unicode_escapes: Vec<(usize, usize, String)>,
}
#[expect(
clippy::exhaustive_enums,
reason = "token enum is matched exhaustively by the parser"
)]
#[derive(Logos, Debug, Clone, PartialEq)]
#[logos(extras = LexerExtras)]
#[logos(skip r"[ \t\n\r]+")]
#[logos(skip r"//([^/!\n][^\n]*)?|//[/!][\n]")]
pub enum Token {
#[token("/*", skip_block_comment)]
BlockComment,
#[regex(r"///[^\n]*", parse_doc_comment)]
DocComment(String),
#[regex(r"//![^\n]*", parse_inner_doc_comment)]
InnerDocComment(String),
#[token("trait")]
Trait,
#[token("struct")]
Struct,
#[token("impl")]
Impl,
#[token("enum")]
Enum,
#[token("mod")]
Module,
#[token("use")]
Use,
#[token("pub")]
Pub,
#[token("let")]
Let,
#[token("mut")]
Mut,
#[token("sink")]
Sink,
#[token("extern")]
Extern,
#[token("match")]
Match,
#[token("for")]
For,
#[token("in")]
In,
#[token("if")]
If,
#[token("else")]
Else,
#[token("true")]
True,
#[token("false")]
False,
#[token("nil")]
Nil,
#[token("as")]
As,
#[token("self")]
SelfKeyword,
#[token("fn")]
Fn,
#[token("inline")]
Inline,
#[token("no_inline")]
NoInline,
#[token("cold")]
Cold,
#[regex(r#""([^"\\\n]|\\["\\ntr]|\\u[0-9a-fA-F]{4})*""#, parse_string)]
#[regex(
r#""""([^"\\\n\r\t]|\n|\r|\t|"[^"]|""[^"]|\\["\\ntr]|\\u[0-9a-fA-F]{4})*""""#,
parse_multiline_string
)]
String(String),
#[regex(
r"[0-9][0-9_]*(\.[0-9][0-9_]*)?([eE][+-]?[0-9]+)?(I32|I64|F32|F64)?",
|lex| parse_number(lex.slice())
)]
Number(crate::ast::NumberLiteral),
#[regex(r"r/([^/\\]|\\.)+/[gimsuvy]*", |lex| lex.slice().to_string())]
Regex(String),
#[regex(
r"/[a-zA-Z._~][^/\s\\,(){}\[\]]*(/([^/\s\\,(){}\[\]]|\\.)+)*",
|lex| lex.slice()[1..].to_string()
)]
Path(String),
#[regex(r"[a-zA-Z][a-zA-Z0-9_]*|_[a-zA-Z0-9_]+", |lex| lex.slice().to_string())]
Ident(String),
#[token(".")]
Dot,
#[token(":")]
Colon,
#[token("::")]
DoubleColon,
#[token(",")]
Comma,
#[token("=")]
Equals,
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("*")]
Star,
#[token("/")]
Slash,
#[token("%")]
Percent,
#[token("==")]
EqEq,
#[token("!=")]
Ne,
#[token("<")]
Lt,
#[token(">")]
Gt,
#[token("<=")]
Le,
#[token(">=")]
Ge,
#[token("&&")]
And,
#[token("||")]
Or,
#[token("|")]
Pipe,
#[token("!")]
Bang,
#[token("?")]
Question,
#[token("->")]
Arrow,
#[token("_")]
Underscore,
#[token("..")]
DotDot,
#[token("...")]
DotDotDot,
#[token("(")]
LParen,
#[token(")")]
RParen,
#[token("{")]
LBrace,
#[token("}")]
RBrace,
#[token("[")]
LBracket,
#[token("]")]
RBracket,
}
impl Token {
#[must_use]
pub const fn is_keyword(&self) -> bool {
matches!(
self,
Self::Trait
| Self::Struct
| Self::Impl
| Self::Enum
| Self::Module
| Self::Use
| Self::Pub
| Self::Let
| Self::Mut
| Self::Sink
| Self::Extern
| Self::Match
| Self::For
| Self::In
| Self::If
| Self::Else
| Self::True
| Self::False
| Self::Nil
| Self::As
| Self::SelfKeyword
| Self::Fn
| Self::Inline
| Self::NoInline
| Self::Cold
)
}
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Trait => "trait",
Self::Struct => "struct",
Self::Impl => "impl",
Self::Enum => "enum",
Self::Module => "mod",
Self::Use => "use",
Self::Pub => "pub",
Self::Let => "let",
Self::Mut => "mut",
Self::Sink => "sink",
Self::Extern => "extern",
Self::Match => "match",
Self::For => "for",
Self::In => "in",
Self::If => "if",
Self::Else => "else",
Self::True => "true",
Self::False => "false",
Self::Nil => "nil",
Self::As => "as",
Self::SelfKeyword => "self",
Self::Fn => "fn",
Self::Inline => "inline",
Self::NoInline => "no_inline",
Self::Cold => "cold",
Self::Dot => ".",
Self::Colon => ":",
Self::DoubleColon => "::",
Self::Comma => ",",
Self::Equals => "=",
Self::Plus => "+",
Self::Minus => "-",
Self::Star => "*",
Self::Slash => "/",
Self::Percent => "%",
Self::EqEq => "==",
Self::Ne => "!=",
Self::Lt => "<",
Self::Gt => ">",
Self::Le => "<=",
Self::Ge => ">=",
Self::And => "&&",
Self::Or => "||",
Self::Pipe => "|",
Self::Bang => "!",
Self::Question => "?",
Self::Arrow => "->",
Self::Underscore => "_",
Self::DotDot => "..",
Self::DotDotDot => "...",
Self::LParen => "(",
Self::RParen => ")",
Self::LBrace => "{",
Self::RBrace => "}",
Self::LBracket => "[",
Self::RBracket => "]",
Self::String(_)
| Self::Number(_)
| Self::Regex(_)
| Self::Path(_)
| Self::Ident(_)
| Self::DocComment(_)
| Self::InnerDocComment(_) => "<complex token>",
Self::BlockComment => "<block comment>",
}
}
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::String(_) => write!(f, "string"),
Self::Number(_) => write!(f, "number"),
Self::Regex(_) => write!(f, "regex"),
Self::Path(_) => write!(f, "path"),
Self::Ident(_) => write!(f, "identifier"),
Self::DocComment(_) => write!(f, "doc comment"),
Self::InnerDocComment(_) => write!(f, "inner doc comment"),
Self::Trait
| Self::Struct
| Self::Impl
| Self::Enum
| Self::Module
| Self::Use
| Self::Pub
| Self::Let
| Self::Mut
| Self::Sink
| Self::Extern
| Self::Match
| Self::For
| Self::In
| Self::If
| Self::Else
| Self::True
| Self::False
| Self::Nil
| Self::As
| Self::SelfKeyword
| Self::Fn
| Self::Inline
| Self::NoInline
| Self::Cold
| Self::Dot
| Self::Colon
| Self::DoubleColon
| Self::Comma
| Self::Equals
| Self::Plus
| Self::Minus
| Self::Star
| Self::Slash
| Self::Percent
| Self::EqEq
| Self::Ne
| Self::Lt
| Self::Gt
| Self::Le
| Self::Ge
| Self::And
| Self::Or
| Self::Pipe
| Self::Bang
| Self::Question
| Self::Arrow
| Self::Underscore
| Self::DotDot
| Self::DotDotDot
| Self::LParen
| Self::RParen
| Self::LBrace
| Self::RBrace
| Self::LBracket
| Self::RBracket
| Self::BlockComment => write!(f, "'{}'", self.as_str()),
}
}
}