shedron/
parser.rs

1use std::ops::Range;
2
3use logos::Logos;
4
5use crate::{
6    ast::{Expr, Ops, TopLevel},
7    error::ParseError,
8};
9
10/// Module level result for returning `T` or a [`ParseError`]
11///
12/// [`RunError`]: crate::error::ParseError
13pub type Result<T> = std::result::Result<T, ParseError>;
14
15fn log(s: impl AsRef<str>) {
16    #[cfg(debug_assertions)]
17    println!("{}", s.as_ref());
18}
19
20macro_rules! dbg_debug {
21    ($val:expr) => {
22        #[cfg(debug_assertions)]
23        {
24            dbg!($val)
25        }
26    };
27}
28
29#[derive(Clone)]
30/// Outer lexer struct to parse shedron
31pub struct Lexer {
32    /// Internal lexer provided by logos
33    tokens: logos::Lexer<'static, Tokens>,
34
35    /// Position of the cursor in the form `(line, column)`
36    pos: (usize, usize),
37
38    brace_stack: Vec<Braces>,
39    close_until: Option<Braces>,
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
43pub enum Braces {
44    Paren,
45    Square,
46}
47
48impl std::fmt::Display for Braces {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        let content = match self {
51            Braces::Paren => "Parenthesis",
52            Braces::Square => "Square Bracket",
53        };
54
55        f.write_str(content)
56    }
57}
58
59impl Lexer {
60    /// Instance a `Lexer`
61    ///
62    /// This function is very safe :)
63    pub fn new<'a>(src: &'a str) -> Self {
64        let src = unsafe { std::mem::transmute::<&'a str, &'static str>(src) };
65        let tokens = logos::Lexer::new(src);
66
67        Self {
68            tokens,
69            pos: (0, 0),
70            brace_stack: Vec::new(),
71            close_until: None,
72        }
73    }
74
75    pub fn brace(&self) -> Option<Braces> {
76        self.brace_stack.last().map(|v| *v)
77    }
78
79    /// Consume a single token and advance the lexer
80    ///
81    /// This function has possible side effects:
82    /// - Changes `self.pos`
83    /// - Changes `self.brace_stack`
84    pub fn next(&mut self) -> Result<Token> {
85        let next = match self.tokens.next() {
86            Some(t) => t,
87            None => Tokens::Eof,
88        };
89
90        let span = self.tokens.span();
91
92        if next == Tokens::Whitespace {
93            self.pos.1 += span.len();
94            return self.next();
95        }
96
97        if next == Tokens::Newline {
98            self.pos.0 += span.len();
99            return self.next();
100        }
101
102        if next == Tokens::OpenParen {
103            self.brace_stack.push(Braces::Paren);
104        }
105
106        if next == Tokens::OpenSquare {
107            self.brace_stack.push(Braces::Square);
108        }
109
110        if next == Tokens::CloseParen {
111            match self.brace_stack.pop() {
112                None => {
113                    return Err(ParseError::new(
114                        format!(
115                            "Tried to close {} with no matching opening {}.",
116                            Braces::Paren,
117                            Braces::Paren
118                        ),
119                        self.pos,
120                    ))
121                }
122                Some(brace_ty) => {
123                    if brace_ty != Braces::Paren {
124                        return Err(ParseError::new(
125                            format!(
126                                "Tried to close {} but expected closing {brace_ty}.",
127                                Braces::Paren
128                            ),
129                            self.pos,
130                        ));
131                    }
132                }
133            }
134        }
135
136        if next == Tokens::CloseSquare {
137            match self.brace_stack.pop() {
138                None => {
139                    return Err(ParseError::new(
140                        format!(
141                            "Tried to close {} with no matching opening {}.",
142                            Braces::Square,
143                            Braces::Square
144                        ),
145                        self.pos,
146                    ))
147                }
148                Some(brace_ty) => {
149                    if brace_ty != Braces::Square {
150                        return Err(ParseError::new(
151                            format!(
152                                "Tried to close {} but expected closing {brace_ty}.",
153                                Braces::Square
154                            ),
155                            self.pos,
156                        ));
157                    }
158                }
159            }
160        }
161
162        self.pos.1 += span.len();
163
164        Ok(Token {
165            data: next,
166            span: span.into(),
167        })
168    }
169
170    /// Consume a single token, and compare it to `t`
171    ///
172    /// Errors if `self.next()?.data` is not equal to `t`
173    pub fn expect(&mut self, t: Tokens) -> Result<Token> {
174        let e = self.next()?;
175
176        if e.data == t {
177            Ok(e)
178        } else {
179            Err(ParseError::new(
180                format!(
181                    "Unexpected token input: Expected `{t:?}`, found {:?}.",
182                    e.data
183                ),
184                self.pos,
185            ))
186        }
187    }
188
189    /// Peeks ahead a single token, and compare it to `t`
190    ///
191    /// Errors if `self.next()?.data` is not equal to `t`
192    pub fn expect_peek(&mut self, t: Tokens) -> Result<Token> {
193        let e = self.peek()?;
194
195        if e.data == t {
196            Ok(e)
197        } else {
198            Err(ParseError::new(
199                format!(
200                    "Unexpected token input: Expected `{t:?}`, found {:?}.",
201                    e.data
202                ),
203                self.pos,
204            ))
205        }
206    }
207
208    /// Peeks forward one token without consuming anything
209    ///
210    /// This function is expensive
211    pub fn peek(&self) -> Result<Token> {
212        let mut lexer = self.clone();
213        lexer.next()
214    }
215
216    /// Parse some source
217    pub fn parse(&mut self) -> Result<TopLevel> {
218        // let expr = self.parse_expr()?;
219        let expr = self.expr_bp(0, 0, None)?;
220
221        self.next()?;
222
223        Ok(TopLevel::new(expr))
224    }
225
226    /// Recursively parses a single expression, while observing operactor precedence
227    pub fn expr_bp(
228        &mut self,
229        min_bp: u8,
230        call_depth: u8,
231        current_brace: Option<Braces>,
232    ) -> Result<Expr> {
233        dbg_debug!(call_depth);
234        let next = self.next()?;
235        dbg_debug!(self.tokens.slice());
236        dbg_debug!(&next.data);
237        let mut lhs = self.parse_lhs(next, call_depth)?;
238
239        loop {
240            if self.should_close(current_brace) {
241                log(format!(
242                    "<- breaking out due to matching brackets {call_depth}"
243                ));
244                dbg_debug!(&lhs);
245                return Ok(lhs);
246            };
247
248            let op = {
249                let maybe_operator = self.peek()?;
250                dbg_debug!(&maybe_operator.data);
251                let ty = maybe_operator.ty();
252
253                if let TokenType::Op = ty {
254                    maybe_operator.data
255                } else {
256                    return self.resolve_not_op(maybe_operator, lhs, call_depth, ty);
257                }
258            };
259
260            if let Some((l_bp, ())) = postfix_binding_power(op.clone()) {
261                if l_bp < min_bp {
262                    break;
263                }
264                self.next()?;
265
266                lhs = if op == Tokens::DropLowest {
267                    let drop_lowest = Expr::DropLowest(Box::new(lhs));
268                    log(format!("::: emitting {drop_lowest:?}"));
269                    drop_lowest
270                } else if op == Tokens::DropHighest {
271                    let drop_highest = Expr::DropHighest(Box::new(lhs));
272                    log(format!("::: emitting {drop_highest:?}"));
273                    drop_highest
274                } else {
275                    panic!("unhandled token {op:?}");
276                    // log(format!("-> eval rhs of op {op:?}"));
277                    // let rhs = self.expr_bp(0, call_depth+1, current_brace)?;
278                    // let mut operator = Ops::from_token(op).unwrap();
279
280                    // operator.set_rhs(rhs);
281
282                    // let op = Expr::Op(Box::new(operator));
283
284                    // log(format!("::: emitting op {op:?}"));
285                    // op
286                };
287                continue;
288            }
289
290            if let Some((l_bp, r_bp)) = infix_binding_power(op.clone()) {
291                if l_bp < min_bp {
292                    break;
293                }
294                self.next()?;
295                dbg_debug!(&self.tokens.slice());
296
297                lhs = {
298                    log(format!("-> eval rhs of op {op:?}"));
299                    let rhs = self.expr_bp(r_bp, call_depth + 1, current_brace.clone())?;
300                    let mut operator = Ops::from_token(op).unwrap();
301
302                    operator.set_lhs(lhs);
303                    operator.set_rhs(rhs);
304
305                    let op = Expr::Op(Box::new(operator));
306                    log(format!("::: emitting op {op:?}"));
307                    op
308                };
309                continue;
310            }
311
312            log(format!("<- end of loop {call_depth}"));
313            break;
314        }
315
316        //dbg_debug!(&lhs);
317        Ok(lhs)
318    }
319
320    /// Recursively traverse up the recursion callstack, returning true if [`current_br`] is equal to [`self.close_until`]
321    /// 
322    /// [`current_br`]: std::option::Option<crate::parser::Braces>
323    /// [`self.close_until`]: std::option::Option<crate::parser::Braces>
324    fn should_close(&mut self, current_br: Option<Braces>) -> bool {
325        match &self.close_until {
326            Some(close_brace) => {
327                log(format!("target brace is {close_brace}"));
328                match &current_br {
329                    Some(c_brace) => {
330                        log(format!("current brace is {}", c_brace));
331                        if c_brace == close_brace {
332                            return true;
333                        }
334                    }
335                    None => {
336                        panic!("current brace is None");
337                    }
338                };
339            }
340            None => {}
341        }
342        false
343    }
344
345    /// Parse the left hand side of a expression
346    fn parse_lhs(&mut self, next: Spanned<Tokens>, call_depth: u8) -> Result<Expr> {
347        Ok(match next.data {
348            Tokens::Number => {
349                let num = Expr::Number(self.tokens.slice().parse().unwrap());
350                num
351            }
352            Tokens::OpenParen => {
353                log("-> eval content of open_paren");
354                let lhs = self.expr_bp(0, call_depth + 1, Some(Braces::Paren))?;
355                self.close_until = None;
356
357                let parens = Expr::Parens(Box::new(lhs));
358                log(format!("::: emitting parens expr {parens:?}"));
359                parens
360            }
361            Tokens::OpenSquare => {
362                log("-> eval content of open_square");
363                let lhs = self.expr_bp(0, call_depth + 1, Some(Braces::Square))?;
364                self.close_until = None;
365
366                let sum = Expr::Sum(Box::new(lhs));
367                log(format!("::: emitting sum {sum:?}"));
368                sum
369            }
370
371            // Token::Op(op) => {
372            //     let ((), r_bp) = prefix_binding_power(op);
373            //     let rhs = expr_bp(lexer, r_bp);
374            //     log("{} ", op);
375            //     S::Cons(op, vec![rhs])
376            // }
377            //t => panic!("bad token: {:?}", t),
378            t => return Err(ParseError::new(
379                format!("Invalid token: {t:?}"),
380                self.pos,
381            )),
382        })
383    }
384
385    /// Used if rhs is not an operator
386    fn resolve_not_op(
387        &mut self,
388        maybe_operator: Token,
389        lhs: Expr,
390        call_depth: u8,
391        ty: TokenType,
392    ) -> Result<Expr> {
393        if maybe_operator.data == Tokens::CloseParen {
394            self.close_until = Some(Braces::Paren);
395            self.expect(Tokens::CloseParen)?;
396            log(format!("lhs {lhs:?}"));
397            log(format!("<- close paren {call_depth}"));
398            return Ok(lhs);
399        } else if maybe_operator.data == Tokens::CloseSquare {
400            self.close_until = Some(Braces::Square);
401            self.expect(Tokens::CloseSquare)?;
402            log(format!("lhs {lhs:?}"));
403            log(format!("<- close square {call_depth}"));
404            return Ok(lhs);
405        } else if maybe_operator.data == Tokens::Eof {
406            if call_depth == 0 {
407                match self.brace_stack.pop() {
408                    None => {}
409                    Some(brace_type) => {
410                        return Err(ParseError::new(
411                            format!("Unclosed {brace_type}, but found EOF"),
412                            self.pos,
413                        ));
414                    }
415                }
416            }
417            log(format!("<- eof {call_depth}"));
418            return Ok(lhs);
419        } else {
420            return Err(ParseError::new(
421                format!("Unexpected token input: Expected `Op`, found {ty:?}."),
422                self.pos,
423            ));
424        }
425    }
426}
427
428/// Binding power of a prefixing operator
429fn prefix_binding_power(op: Tokens) -> ((), u8) {
430    match op {
431        //Tokens::Add => ((), 7),
432        _ => panic!("bad op: {:?}", op),
433    }
434}
435
436/// Binding power of a postfix operator
437fn postfix_binding_power(op: Tokens) -> Option<(u8, ())> {
438    let res = match op {
439        Tokens::DropLowest => (11, ()),
440        Tokens::DropHighest => (11, ()),
441        // '!' => (11, ()),
442        // '[' => (11, ()),
443        _ => return None,
444    };
445    Some(res)
446}
447
448/// Binding power of an infix operator
449fn infix_binding_power(op: Tokens) -> Option<(u8, u8)> {
450    let res = match op {
451        // '=' => (2, 1),
452        // '?' => (4, 3),
453        Tokens::Sub | Tokens::SubEq => (1, 2),
454        Tokens::Add | Tokens::AddEq => (3, 4),
455        Tokens::Mul | Tokens::FDiv | Tokens::RDiv => (5, 6),
456        Tokens::Max | Tokens::Min => (9, 10),
457        Tokens::Roll => (99, 100),
458        _ => return None,
459    };
460    Some(res)
461}
462
463#[derive(Debug, Clone, Copy)]
464pub struct Span {
465    start: usize,
466    end: usize,
467}
468
469impl From<Range<usize>> for Span {
470    fn from(range: Range<usize>) -> Self {
471        Self {
472            start: range.start,
473            end: range.end,
474        }
475    }
476}
477
478#[derive(Debug)]
479/// Represents a byte span of source code with some associated data
480pub struct Spanned<T> {
481    data: T,
482    span: Span,
483}
484
485/// Single token
486pub type Token = Spanned<Tokens>;
487
488impl Token {
489    pub fn ty(&self) -> TokenType {
490        match self.data {
491            Tokens::OpenParen | Tokens::CloseParen | Tokens::OpenSquare | Tokens::CloseSquare => {
492                TokenType::Punct
493            }
494            Tokens::Add
495            | Tokens::Sub
496            | Tokens::Mul
497            | Tokens::RDiv
498            | Tokens::FDiv
499            | Tokens::AddEq
500            | Tokens::SubEq
501            | Tokens::Max
502            | Tokens::Min
503            | Tokens::Roll
504            | Tokens::DropLowest
505            | Tokens::DropHighest => TokenType::Op,
506            Tokens::Number => TokenType::Num,
507            Tokens::Unknown => TokenType::Unknown,
508            Tokens::Newline | Tokens::Whitespace | Tokens::Eof => {
509                TokenType::Afpulgdjawpf98g6jdh49awfdp654f6glndprkus74junldhfuhl
510            }
511        }
512    }
513}
514
515#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
516/// Different possible types of any given token
517pub enum TokenType {
518    Op,
519    Num,
520    Punct,
521    Unknown,
522    Afpulgdjawpf98g6jdh49awfdp654f6glndprkus74junldhfuhl,
523}
524
525#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Logos)]
526pub enum Tokens {
527    // Punct
528    #[token("(")]
529    OpenParen,
530
531    #[token(")")]
532    CloseParen,
533
534    #[token("[")]
535    OpenSquare,
536
537    #[token("]")]
538    CloseSquare,
539
540    // Operators
541    #[token("+")]
542    Add,
543
544    #[token("-")]
545    Sub,
546
547    #[token("*")]
548    Mul,
549
550    #[token("/")]
551    RDiv,
552
553    #[token("//")]
554    FDiv,
555
556    #[token("+=")]
557    AddEq,
558
559    #[token("-=")]
560    SubEq,
561
562    #[token("^")]
563    Max,
564
565    #[token("_")]
566    Min,
567
568    #[token("d")]
569    Roll,
570
571    #[token("-L")]
572    DropLowest,
573
574    #[token("-D")]
575    DropHighest,
576
577    // Values
578    #[regex(r"\d+")]
579    Number,
580
581    // afpulgdj;awpf98g6jdh49awfd;p654f6glndprkus74junldhfuhl
582    #[regex(r"\s+")]
583    Whitespace,
584
585    #[regex(r"\n+")]
586    Newline,
587
588    #[error]
589    Unknown,
590
591    Eof,
592}
593
594#[cfg(test)]
595mod test {
596    use super::*;
597
598    fn expect_syntax(src: &str, expect: Expr) {
599        println!("\n\nparsing '{src}'");
600        let mut lexer = Lexer::new(src);
601        let result = lexer.parse();
602        let expr = result.unwrap().expr;
603        // println!("\n {:?}", &result);
604        dbg!(&expr);
605
606        assert_eq!(
607            expect, expr,
608            "expected: \n{expect:?} \nbut parsed: \n{expr:?}"
609        );
610    }
611
612    fn expect_parse_err(src: &str, expect: ParseError) {
613        println!("\n\nparsing '{src}'");
614        let mut lexer = Lexer::new(src);
615        let result = lexer.parse();
616        let err = result.unwrap_err();
617        // println!("\n {:?}", &result);
618        dbg!(&err);
619
620        assert_eq!(
621            expect, err,
622            "expected: \n{expect:?} \nbut parsed: \n{err:?}"
623        );
624    }
625    fn parse_syntax(src: &str) -> Result<TopLevel> {
626        log(format!("\n\nparsing '{src}'"));
627        let mut lexer = Lexer::new(src);
628        let result = lexer.parse();
629        dbg_debug!(&result);
630
631        result
632    }
633
634    fn toplevel(expr: Expr) -> Result<TopLevel> {
635        Ok(TopLevel { expr })
636    }
637    fn add(l: Expr, r: Expr) -> Expr {
638        Ops::Add(l, r).to_expr()
639    }
640    fn sub(l: Expr, r: Expr) -> Expr {
641        Ops::Sub(l, r).to_expr()
642    }
643    fn mul(l: Expr, r: Expr) -> Expr {
644        Ops::Mul(l, r).to_expr()
645    }
646    fn rdiv(l: Expr, r: Expr) -> Expr {
647        Ops::RDiv(l, r).to_expr()
648    }
649    fn fdiv(l: Expr, r: Expr) -> Expr {
650        Ops::FDiv(l, r).to_expr()
651    }
652    fn add_eq(l: Expr, r: Expr) -> Expr {
653        Ops::AddEq(l, r).to_expr()
654    }
655    fn sub_eq(l: Expr, r: Expr) -> Expr {
656        Ops::SubEq(l, r).to_expr()
657    }
658    fn max(l: Expr, r: Expr) -> Expr {
659        Ops::Max(l, r).to_expr()
660    }
661    fn min(l: Expr, r: Expr) -> Expr {
662        Ops::Min(l, r).to_expr()
663    }
664    fn roll(l: Expr, r: Expr) -> Expr {
665        Ops::Roll(l, r).to_expr()
666    }
667
668    fn sum(expr: Expr) -> Expr {
669        Expr::Sum(Box::new(expr))
670    }
671    fn num(num: i128) -> Expr {
672        Expr::Number(num)
673    }
674    fn paren(expr: Expr) -> Expr {
675        Expr::Parens(Box::new(expr))
676    }
677
678    fn parse_err(msg: &str, pos: (usize, usize)) -> ParseError {
679        ParseError::new(msg, pos)
680    }
681
682    #[test]
683    fn parse_things() {
684        expect_syntax("1+2*3", add(num(1), mul(num(2), num(3))));
685        expect_syntax("2^3_4", min(max(num(2), num(3)), num(4)));
686        expect_syntax("2^3_4d5", min(max(num(2), num(3)), roll(num(4), num(5))));
687        expect_syntax("2d5+3", add(roll(num(2), num(5)), num(3)));
688        expect_syntax("(2d5)+3", add(paren(roll(num(2), num(5))), num(3)));
689        expect_syntax("((2d5)+3)", paren(add(paren(roll(num(2), num(5))), num(3))));
690        expect_syntax("(2d5)", paren(roll(num(2), num(5))));
691        expect_syntax(
692            "(2d5+3)*4",
693            mul(paren(add(roll(num(2), num(5)), num(3))), num(4)),
694        );
695    }
696
697    #[test]
698    fn parse_parenthesis() {
699        expect_syntax("(1)", paren(num(1)));
700        expect_parse_err(
701            "(1",
702            parse_err("Unclosed Parenthesis, but found EOF", (0, 2)),
703        );
704        expect_parse_err(
705            "1)",
706            parse_err(
707                "Tried to close Parenthesis with no matching opening Parenthesis.",
708                (0, 1),
709            ),
710        );
711    }
712
713    #[test]
714    fn parse_brackets() {
715        expect_parse_err(
716            "([2)",
717            parse_err(
718                "Tried to close Parenthesis but expected closing Square Bracket.",
719                (0, 3),
720            ),
721        );
722        expect_parse_err(
723            "(2])",
724            parse_err(
725                "Tried to close Square Bracket but expected closing Parenthesis.",
726                (0, 2),
727            ),
728        );
729        expect_parse_err(
730            "[(2)",
731            parse_err("Unclosed Square Bracket, but found EOF", (0, 4)),
732        );
733        expect_parse_err(
734            "[2",
735            parse_err("Unclosed Square Bracket, but found EOF", (0, 2)),
736        );
737        expect_parse_err(
738            "3]",
739            parse_err(
740                "Tried to close Square Bracket with no matching opening Square Bracket.",
741                (0, 1),
742            ),
743        );
744
745        expect_syntax(
746            "(1+2)+(3+4)",
747            add(paren(add(num(1), num(2))), paren(add(num(3), num(4)))),
748        );
749
750        expect_syntax("[2d5]", sum(roll(num(2), num(5))));
751        expect_syntax("[1]", sum(num(1)));
752
753        expect_syntax(
754            "[1d6]d[1d6]",
755            roll(sum(roll(num(1), num(6))), sum(roll(num(1), num(6)))),
756        );
757
758        expect_syntax("[1d6]d6", roll(sum(roll(num(1), num(6))), num(6)));
759    }
760
761    #[test]
762    fn parse_complicated_syntax() {
763        expect_syntax(
764            "([6d([1d6])]_96*[1d6])//7",
765            fdiv(
766                paren(mul(
767                    min(sum(roll(num(6), paren(sum(roll(num(1), num(6)))))), num(96)),
768                    sum(roll(num(1), num(6))),
769                )),
770                num(7),
771            ),
772        );
773    }
774
775    #[test]
776    fn parse_invalid_syntax() {
777        expect_parse_err(
778            "10d6++2",
779            parse_err("Invalid token: Add", (0,6))
780        );
781        expect_parse_err(
782            "10d6+-2",
783            parse_err("Invalid token: Sub", (0,6))
784        );
785    }
786
787    #[test]
788    fn display() {
789        let toplevel = parse_syntax("([6d([1d6])]_96*[1d6]) // 7").unwrap();
790
791        assert_eq!(format!("{toplevel}"), "([6d([1d6])]_96*[1d6])//7")
792    }
793}