use std::ops::Range;
use logos::Logos;
use crate::{
ast::{Expr, Ops, TopLevel},
error::ParseError,
};
pub type Result<T> = std::result::Result<T, ParseError>;
fn log(s: impl AsRef<str>) {
#[cfg(debug_assertions)]
println!("{}", s.as_ref());
}
macro_rules! dbg_debug {
($val:expr) => {
#[cfg(debug_assertions)]
{
dbg!($val)
}
};
}
#[derive(Clone)]
pub struct Lexer {
tokens: logos::Lexer<'static, Tokens>,
pos: (usize, usize),
brace_stack: Vec<Braces>,
close_until: Option<Braces>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Braces {
Paren,
Square,
}
impl std::fmt::Display for Braces {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let content = match self {
Braces::Paren => "Parenthesis",
Braces::Square => "Square Bracket",
};
f.write_str(content)
}
}
impl Lexer {
pub fn new<'a>(src: &'a str) -> Self {
let src = unsafe { std::mem::transmute::<&'a str, &'static str>(src) };
let tokens = logos::Lexer::new(src);
Self {
tokens,
pos: (0, 0),
brace_stack: Vec::new(),
close_until: None,
}
}
pub fn brace(&self) -> Option<Braces> {
self.brace_stack.last().map(|v| *v)
}
pub fn next(&mut self) -> Result<Token> {
let next = match self.tokens.next() {
Some(t) => t,
None => Tokens::Eof,
};
let span = self.tokens.span();
if next == Tokens::Whitespace {
self.pos.1 += span.len();
return self.next();
}
if next == Tokens::Newline {
self.pos.0 += span.len();
return self.next();
}
if next == Tokens::OpenParen {
self.brace_stack.push(Braces::Paren);
}
if next == Tokens::OpenSquare {
self.brace_stack.push(Braces::Square);
}
if next == Tokens::CloseParen {
match self.brace_stack.pop() {
None => {
return Err(ParseError::new(
format!(
"Tried to close {} with no matching opening {}.",
Braces::Paren,
Braces::Paren
),
self.pos,
))
}
Some(brace_ty) => {
if brace_ty != Braces::Paren {
return Err(ParseError::new(
format!(
"Tried to close {} but expected closing {brace_ty}.",
Braces::Paren
),
self.pos,
));
}
}
}
}
if next == Tokens::CloseSquare {
match self.brace_stack.pop() {
None => {
return Err(ParseError::new(
format!(
"Tried to close {} with no matching opening {}.",
Braces::Square,
Braces::Square
),
self.pos,
))
}
Some(brace_ty) => {
if brace_ty != Braces::Square {
return Err(ParseError::new(
format!(
"Tried to close {} but expected closing {brace_ty}.",
Braces::Square
),
self.pos,
));
}
}
}
}
self.pos.1 += span.len();
Ok(Token {
data: next,
span: span.into(),
})
}
pub fn expect(&mut self, t: Tokens) -> Result<Token> {
let e = self.next()?;
if e.data == t {
Ok(e)
} else {
Err(ParseError::new(
format!(
"Unexpected token input: Expected `{t:?}`, found {:?}.",
e.data
),
self.pos,
))
}
}
pub fn expect_peek(&mut self, t: Tokens) -> Result<Token> {
let e = self.peek()?;
if e.data == t {
Ok(e)
} else {
Err(ParseError::new(
format!(
"Unexpected token input: Expected `{t:?}`, found {:?}.",
e.data
),
self.pos,
))
}
}
pub fn peek(&self) -> Result<Token> {
let mut lexer = self.clone();
lexer.next()
}
pub fn parse(&mut self) -> Result<TopLevel> {
let expr = self.expr_bp(0, 0, None)?;
self.next()?;
Ok(TopLevel::new(expr))
}
pub fn expr_bp(
&mut self,
min_bp: u8,
call_depth: u8,
current_brace: Option<Braces>,
) -> Result<Expr> {
dbg_debug!(call_depth);
let next = self.next()?;
dbg_debug!(self.tokens.slice());
dbg_debug!(&next.data);
let mut lhs = self.parse_lhs(next, call_depth)?;
loop {
if self.should_close(current_brace) {
log(format!(
"<- breaking out due to matching brackets {call_depth}"
));
dbg_debug!(&lhs);
return Ok(lhs);
};
let op = {
let maybe_operator = self.peek()?;
dbg_debug!(&maybe_operator.data);
let ty = maybe_operator.ty();
if let TokenType::Op = ty {
maybe_operator.data
} else {
return self.resolve_not_op(maybe_operator, lhs, call_depth, ty);
}
};
if let Some((l_bp, ())) = postfix_binding_power(op.clone()) {
if l_bp < min_bp {
break;
}
self.next()?;
lhs = if op == Tokens::DropLowest {
let drop_lowest = Expr::DropLowest(Box::new(lhs));
log(format!("::: emitting {drop_lowest:?}"));
drop_lowest
} else if op == Tokens::DropHighest {
let drop_highest = Expr::DropHighest(Box::new(lhs));
log(format!("::: emitting {drop_highest:?}"));
drop_highest
} else {
panic!("unhandled token {op:?}");
};
continue;
}
if let Some((l_bp, r_bp)) = infix_binding_power(op.clone()) {
if l_bp < min_bp {
break;
}
self.next()?;
dbg_debug!(&self.tokens.slice());
lhs = {
log(format!("-> eval rhs of op {op:?}"));
let rhs = self.expr_bp(r_bp, call_depth + 1, current_brace.clone())?;
let mut operator = Ops::from_token(op).unwrap();
operator.set_lhs(lhs);
operator.set_rhs(rhs);
let op = Expr::Op(Box::new(operator));
log(format!("::: emitting op {op:?}"));
op
};
continue;
}
log(format!("<- end of loop {call_depth}"));
break;
}
Ok(lhs)
}
fn should_close(&mut self, current_br: Option<Braces>) -> bool {
match &self.close_until {
Some(close_brace) => {
log(format!("target brace is {close_brace}"));
match ¤t_br {
Some(c_brace) => {
log(format!("current brace is {}", c_brace));
if c_brace == close_brace {
return true;
}
}
None => {
panic!("current brace is None");
}
};
}
None => {}
}
false
}
fn parse_lhs(&mut self, next: Spanned<Tokens>, call_depth: u8) -> Result<Expr> {
Ok(match next.data {
Tokens::Number => {
let num = Expr::Number(self.tokens.slice().parse().unwrap());
num
}
Tokens::OpenParen => {
log("-> eval content of open_paren");
let lhs = self.expr_bp(0, call_depth + 1, Some(Braces::Paren))?;
self.close_until = None;
let parens = Expr::Parens(Box::new(lhs));
log(format!("::: emitting parens expr {parens:?}"));
parens
}
Tokens::OpenSquare => {
log("-> eval content of open_square");
let lhs = self.expr_bp(0, call_depth + 1, Some(Braces::Square))?;
self.close_until = None;
let sum = Expr::Sum(Box::new(lhs));
log(format!("::: emitting sum {sum:?}"));
sum
}
t => return Err(ParseError::new(
format!("Invalid token: {t:?}"),
self.pos,
)),
})
}
fn resolve_not_op(
&mut self,
maybe_operator: Token,
lhs: Expr,
call_depth: u8,
ty: TokenType,
) -> Result<Expr> {
if maybe_operator.data == Tokens::CloseParen {
self.close_until = Some(Braces::Paren);
self.expect(Tokens::CloseParen)?;
log(format!("lhs {lhs:?}"));
log(format!("<- close paren {call_depth}"));
return Ok(lhs);
} else if maybe_operator.data == Tokens::CloseSquare {
self.close_until = Some(Braces::Square);
self.expect(Tokens::CloseSquare)?;
log(format!("lhs {lhs:?}"));
log(format!("<- close square {call_depth}"));
return Ok(lhs);
} else if maybe_operator.data == Tokens::Eof {
if call_depth == 0 {
match self.brace_stack.pop() {
None => {}
Some(brace_type) => {
return Err(ParseError::new(
format!("Unclosed {brace_type}, but found EOF"),
self.pos,
));
}
}
}
log(format!("<- eof {call_depth}"));
return Ok(lhs);
} else {
return Err(ParseError::new(
format!("Unexpected token input: Expected `Op`, found {ty:?}."),
self.pos,
));
}
}
}
fn prefix_binding_power(op: Tokens) -> ((), u8) {
match op {
_ => panic!("bad op: {:?}", op),
}
}
fn postfix_binding_power(op: Tokens) -> Option<(u8, ())> {
let res = match op {
Tokens::DropLowest => (11, ()),
Tokens::DropHighest => (11, ()),
_ => return None,
};
Some(res)
}
fn infix_binding_power(op: Tokens) -> Option<(u8, u8)> {
let res = match op {
Tokens::Sub | Tokens::SubEq => (1, 2),
Tokens::Add | Tokens::AddEq => (3, 4),
Tokens::Mul | Tokens::FDiv | Tokens::RDiv => (5, 6),
Tokens::Max | Tokens::Min => (9, 10),
Tokens::Roll => (99, 100),
_ => return None,
};
Some(res)
}
#[derive(Debug, Clone, Copy)]
pub struct Span {
start: usize,
end: usize,
}
impl From<Range<usize>> for Span {
fn from(range: Range<usize>) -> Self {
Self {
start: range.start,
end: range.end,
}
}
}
#[derive(Debug)]
pub struct Spanned<T> {
data: T,
span: Span,
}
pub type Token = Spanned<Tokens>;
impl Token {
pub fn ty(&self) -> TokenType {
match self.data {
Tokens::OpenParen | Tokens::CloseParen | Tokens::OpenSquare | Tokens::CloseSquare => {
TokenType::Punct
}
Tokens::Add
| Tokens::Sub
| Tokens::Mul
| Tokens::RDiv
| Tokens::FDiv
| Tokens::AddEq
| Tokens::SubEq
| Tokens::Max
| Tokens::Min
| Tokens::Roll
| Tokens::DropLowest
| Tokens::DropHighest => TokenType::Op,
Tokens::Number => TokenType::Num,
Tokens::Unknown => TokenType::Unknown,
Tokens::Newline | Tokens::Whitespace | Tokens::Eof => {
TokenType::Afpulgdjawpf98g6jdh49awfdp654f6glndprkus74junldhfuhl
}
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum TokenType {
Op,
Num,
Punct,
Unknown,
Afpulgdjawpf98g6jdh49awfdp654f6glndprkus74junldhfuhl,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Logos)]
pub enum Tokens {
#[token("(")]
OpenParen,
#[token(")")]
CloseParen,
#[token("[")]
OpenSquare,
#[token("]")]
CloseSquare,
#[token("+")]
Add,
#[token("-")]
Sub,
#[token("*")]
Mul,
#[token("/")]
RDiv,
#[token("//")]
FDiv,
#[token("+=")]
AddEq,
#[token("-=")]
SubEq,
#[token("^")]
Max,
#[token("_")]
Min,
#[token("d")]
Roll,
#[token("-L")]
DropLowest,
#[token("-D")]
DropHighest,
#[regex(r"\d+")]
Number,
#[regex(r"\s+")]
Whitespace,
#[regex(r"\n+")]
Newline,
#[error]
Unknown,
Eof,
}
#[cfg(test)]
mod test {
use super::*;
fn expect_syntax(src: &str, expect: Expr) {
println!("\n\nparsing '{src}'");
let mut lexer = Lexer::new(src);
let result = lexer.parse();
let expr = result.unwrap().expr;
dbg!(&expr);
assert_eq!(
expect, expr,
"expected: \n{expect:?} \nbut parsed: \n{expr:?}"
);
}
fn expect_parse_err(src: &str, expect: ParseError) {
println!("\n\nparsing '{src}'");
let mut lexer = Lexer::new(src);
let result = lexer.parse();
let err = result.unwrap_err();
dbg!(&err);
assert_eq!(
expect, err,
"expected: \n{expect:?} \nbut parsed: \n{err:?}"
);
}
fn parse_syntax(src: &str) -> Result<TopLevel> {
log(format!("\n\nparsing '{src}'"));
let mut lexer = Lexer::new(src);
let result = lexer.parse();
dbg_debug!(&result);
result
}
fn toplevel(expr: Expr) -> Result<TopLevel> {
Ok(TopLevel { expr })
}
fn add(l: Expr, r: Expr) -> Expr {
Ops::Add(l, r).to_expr()
}
fn sub(l: Expr, r: Expr) -> Expr {
Ops::Sub(l, r).to_expr()
}
fn mul(l: Expr, r: Expr) -> Expr {
Ops::Mul(l, r).to_expr()
}
fn rdiv(l: Expr, r: Expr) -> Expr {
Ops::RDiv(l, r).to_expr()
}
fn fdiv(l: Expr, r: Expr) -> Expr {
Ops::FDiv(l, r).to_expr()
}
fn add_eq(l: Expr, r: Expr) -> Expr {
Ops::AddEq(l, r).to_expr()
}
fn sub_eq(l: Expr, r: Expr) -> Expr {
Ops::SubEq(l, r).to_expr()
}
fn max(l: Expr, r: Expr) -> Expr {
Ops::Max(l, r).to_expr()
}
fn min(l: Expr, r: Expr) -> Expr {
Ops::Min(l, r).to_expr()
}
fn roll(l: Expr, r: Expr) -> Expr {
Ops::Roll(l, r).to_expr()
}
fn sum(expr: Expr) -> Expr {
Expr::Sum(Box::new(expr))
}
fn num(num: i128) -> Expr {
Expr::Number(num)
}
fn paren(expr: Expr) -> Expr {
Expr::Parens(Box::new(expr))
}
fn parse_err(msg: &str, pos: (usize, usize)) -> ParseError {
ParseError::new(msg, pos)
}
#[test]
fn parse_things() {
expect_syntax("1+2*3", add(num(1), mul(num(2), num(3))));
expect_syntax("2^3_4", min(max(num(2), num(3)), num(4)));
expect_syntax("2^3_4d5", min(max(num(2), num(3)), roll(num(4), num(5))));
expect_syntax("2d5+3", add(roll(num(2), num(5)), num(3)));
expect_syntax("(2d5)+3", add(paren(roll(num(2), num(5))), num(3)));
expect_syntax("((2d5)+3)", paren(add(paren(roll(num(2), num(5))), num(3))));
expect_syntax("(2d5)", paren(roll(num(2), num(5))));
expect_syntax(
"(2d5+3)*4",
mul(paren(add(roll(num(2), num(5)), num(3))), num(4)),
);
}
#[test]
fn parse_parenthesis() {
expect_syntax("(1)", paren(num(1)));
expect_parse_err(
"(1",
parse_err("Unclosed Parenthesis, but found EOF", (0, 2)),
);
expect_parse_err(
"1)",
parse_err(
"Tried to close Parenthesis with no matching opening Parenthesis.",
(0, 1),
),
);
}
#[test]
fn parse_brackets() {
expect_parse_err(
"([2)",
parse_err(
"Tried to close Parenthesis but expected closing Square Bracket.",
(0, 3),
),
);
expect_parse_err(
"(2])",
parse_err(
"Tried to close Square Bracket but expected closing Parenthesis.",
(0, 2),
),
);
expect_parse_err(
"[(2)",
parse_err("Unclosed Square Bracket, but found EOF", (0, 4)),
);
expect_parse_err(
"[2",
parse_err("Unclosed Square Bracket, but found EOF", (0, 2)),
);
expect_parse_err(
"3]",
parse_err(
"Tried to close Square Bracket with no matching opening Square Bracket.",
(0, 1),
),
);
expect_syntax(
"(1+2)+(3+4)",
add(paren(add(num(1), num(2))), paren(add(num(3), num(4)))),
);
expect_syntax("[2d5]", sum(roll(num(2), num(5))));
expect_syntax("[1]", sum(num(1)));
expect_syntax(
"[1d6]d[1d6]",
roll(sum(roll(num(1), num(6))), sum(roll(num(1), num(6)))),
);
expect_syntax("[1d6]d6", roll(sum(roll(num(1), num(6))), num(6)));
}
#[test]
fn parse_complicated_syntax() {
expect_syntax(
"([6d([1d6])]_96*[1d6])//7",
fdiv(
paren(mul(
min(sum(roll(num(6), paren(sum(roll(num(1), num(6)))))), num(96)),
sum(roll(num(1), num(6))),
)),
num(7),
),
);
}
#[test]
fn parse_invalid_syntax() {
expect_parse_err(
"10d6++2",
parse_err("Invalid token: Add", (0,6))
);
expect_parse_err(
"10d6+-2",
parse_err("Invalid token: Sub", (0,6))
);
}
#[test]
fn display() {
let toplevel = parse_syntax("([6d([1d6])]_96*[1d6]) // 7").unwrap();
assert_eq!(format!("{toplevel}"), "([6d([1d6])]_96*[1d6])//7")
}
}