use derive_new::new;
use logos::{Logos, Skip};
use std::{ops::Range, slice::Iter};
#[derive(Logos, Debug, PartialEq, Clone, Copy)]
pub enum Token {
#[regex("//.*")]
SinglelineComment,
#[regex("#.*")]
HashComment,
#[regex("/[\\*]([^\\*]|([\\*][^/]))*[\\*]+/")]
MultilineComment,
#[regex("#[\\*]([^\\*]|([\\*][^/]))*[\\*]+#")]
MultilineHashComment,
#[token("%%")]
ScriptBlock,
#[token("mod")]
Module,
#[token("internal")]
Internal,
#[token("pub")]
Pub,
#[token("export")]
Export,
#[token("use")]
Use,
#[token("data")]
Data,
#[token("class")]
Class,
#[token("fn")]
Function,
#[token("enum")]
Enum,
#[token("self")]
SelfKeyword,
#[token("super")]
Super,
#[token("outer")]
Outer,
#[token("let")]
Let,
#[token("mut")]
Mut,
#[token("const")]
Const,
#[token("static")]
Static,
#[token("new")]
New,
#[token("unsafe")]
Unsafe,
#[token("if")]
If,
#[token("else")]
Else,
#[token("return")]
Return,
#[token("break")]
Break,
#[token("while")]
While,
#[token("for")]
For,
#[token("match")]
Match,
#[token("continue")]
Continue,
#[token("loop")]
Loop,
#[token("yield")]
Yield,
#[token("true")]
True,
#[token("false")]
False,
#[token("and")]
And,
#[token("or")]
Or,
#[token("not")]
Not,
#[token("is")]
Is,
#[token("as")]
As,
#[token("in")]
In,
#[token("@")]
At,
#[token(",")]
Comma,
#[token("(")]
ParenLeft,
#[token(")")]
ParenRight,
#[token("{")]
CurlyBraceLeft,
#[token("}")]
CurlyBraceRight,
#[token("$")]
DollarSign,
#[token("->")]
FnReturn,
#[token("=>")]
ArrowFn,
#[token("...")]
TripleDot,
#[token("+")]
Plus,
#[token("-")]
Minus,
#[token("/")]
LeftSlash,
#[token("*")]
Star,
#[token("<")]
LeftArrow,
#[token(">")]
RightArrow,
#[token("=")]
Equals,
#[token("==")]
Identity,
#[token("!")]
Exclamation,
#[token("?")]
Question,
#[token(".")]
Dot,
#[token("..")]
DoubleDot,
#[token(":")]
Colon,
#[token("::")]
DoubleColon,
#[token("'")]
SingleQuote,
#[token("\"")]
DoubleQuote,
#[token("^")]
UpArrow,
#[regex("\x26")]
EOF,
#[regex(r"[_a-zA-Z]\w*")]
Identifier,
#[regex("[-][0-9]+", |lex| lex.slice().parse())]
Int(i64),
#[regex("-?[0-9]+\\.[0-9]+", |lex| lex.slice().parse())]
Float(f64),
#[regex("\"(?:[^\"]|\\.)*\"")]
DoubleQuotedString,
#[regex("'(?:[^\"]|\\.)*'")]
SingleQuotedString,
#[regex("`(?:[^\"]|\\.)*`")]
DashQuotedString,
#[error]
#[regex(r"[ \t\n\f]+", |_| Skip)]
Whitespace,
}
pub fn tokenise(file: &str) -> Vec<(Token, Range<usize>)> {
let mut tokens = Token::lexer(file);
let mut str_ = tokens.source();
log::info!("\n====SOURCE====\n{}\n==============", str_);
let res = tokens.spanned().collect();
res
}
pub fn tokenise_into_parser(file: &str) -> Parser {
let res = tokenise(file);
Parser::new(res, 0, file.to_string())
}
#[derive(Debug, new)]
pub struct Parser {
tokens: Vec<(Token, Range<usize>)>,
curr_index: usize,
input_string: String,
}
impl Parser {
pub fn next_sym(&mut self) {
self.curr_index += 1;
}
pub fn accept(&mut self, token: Token) -> Result<String, Token> {
let t = &self.tokens[self.curr_index];
if token == t.0 {
let res = Ok(self.input_string[t.1.clone()].to_owned());
self.next_sym();
return res;
}
Err(t.0)
}
pub fn accept_ok(&mut self, token: Token) -> bool {
self.accept(token).is_ok()
}
pub fn expect(&mut self, token: Token) -> String {
match self.accept(token) {
Ok(t) => t,
Err(e) => panic!("Token \"{token:?}\" was not expected... \"{e:?}\" was the actual token"),
}
}
pub fn log_tokens(&self) {
for token in &self.tokens {
log::info!("token = {:?}", token.0);
log::info!(" range = {:?}", token.1);
}
}
pub fn print_tokens(&self) {
for token in &self.tokens {
println!("token = {:?}", token.0);
println!(" range = {:?}", token.1);
}
}
}
#[test]
fn test_parser() {
let example = "
mod root {
fn main() {
output(\"Hello, world!\")
}
}
";
let mut parser = tokenise_into_parser(example);
parser.print_tokens();
match parser.accept(Token::Module) {
Ok(ok) => println!("token = {ok}"),
Err(err) => println!("Didnt match, got {err:?} instead"),
}
match parser.accept(Token::Identifier) {
Ok(ok) => println!("token = {ok}"),
Err(err) => println!("Didnt match, got {err:?} instead"),
}
}