polymath_rs/cst/
mod.rs

1//!
2//! # CST
3//!
4//! This module contains parser for transforming a stream of asciimath tokens
5//! into a Concrete Syntax Tree.
6//!
7//! ## Asciimath grammar
8//!
9//! This grammar has been taken from the asciimath website.
10//!
11//! E ::= IE | I/I                                                      Expression
12//! I ::= S_S | S^S | S_S^S | S                                         Intermediate expression
13//! S ::= v | lEr | uS | bSS                                            Simple expression
14//! v ::= [A-Za-z] | greek letters | numbers | other constant symbols
15//! u ::= sqrt | text | bb | other unary symbols for font commands
16//! b ::= frac | root | stackrel | other binary symbols
17//! l ::= ( | [ | { | (: | {: | other left brackets
18//! r ::= ) | ] | } | :) | :} | other right brackets
19//!
20//! There are two challanges for parsing this particular grammar. For one
21//! this grammar is ambigious and second it is non deterministic.
22//! I am gonna try a few different techniques to solve those problems.
23//! So the is eventually going to be a parser that parses a modified version
24//! of the grammar and one that uses lookahead to solve this.
25//!
26//! In both cases I want to circumvent back tracking entirely.
27//!
28
29use std::cell::RefCell;
30
31use crate::tokens::{types::TokenType, Token};
32
33pub mod predictive;
34
35pub type TokenStream<'a> = &'a [Token<'a>];
36
37#[derive(Debug)]
38pub struct Cursor {
39    pos: RefCell<usize>,
40}
41
42impl Cursor {
43    fn get_pos(&self) -> usize {
44        *self.pos.borrow()
45    }
46
47    fn set_pos(&self, pos: usize) -> usize {
48        *self.pos.borrow_mut() = pos;
49        self.get_pos()
50    }
51
52    fn parse<'a, F: Fn(&'a TokenType) -> bool + 'static>(
53        &self,
54        tokens: TokenStream<'a>,
55        func: F,
56    ) -> Option<&'a Token<'a>> {
57        if let Some(token) = self.peek(tokens, func) {
58            self.advance(tokens);
59            Some(token)
60        } else {
61            None
62        }
63    }
64
65    fn advance<'a>(&self, tokens: TokenStream<'a>) -> Option<&'a Token<'a>> {
66        self.set_pos(self.get_pos() + 1);
67        tokens.get(self.get_pos() - 1)
68    }
69
70    fn slice_to<'a>(
71        &self,
72        tokens: TokenStream<'a>,
73        token: &Token<'a>,
74    ) -> Option<(Cursor, TokenStream<'a>)> {
75        let token_pos = tokens
76            .iter()
77            .enumerate()
78            .find(|(_, tk)| *tk == token)
79            .map(|(index, _)| index);
80
81        token_pos.map(|token_pos| {
82            (
83                Cursor {
84                    pos: RefCell::new(0),
85                },
86                &tokens[self.get_pos()..token_pos],
87            )
88        })
89    }
90
91    fn peek<'a, F: Fn(&'a TokenType) -> bool>(
92        &self,
93        tokens: TokenStream<'a>,
94        func: F,
95    ) -> Option<&'a Token<'a>> {
96        self.peek_n(tokens, 0, func)
97    }
98
99    fn peek_n<'a, F: Fn(&'a TokenType) -> bool>(
100        &self,
101        tokens: TokenStream<'a>,
102        offset: usize,
103        func: F,
104    ) -> Option<&'a Token<'a>> {
105        tokens
106            .get(self.get_pos() + offset)
107            .filter(|token| (func)(&token.token_type))
108    }
109
110    fn tokens_left(&self, tokens: TokenStream) -> usize {
111        tokens.len() - self.get_pos()
112    }
113
114    fn eos(&self, tokens: TokenStream) -> bool {
115        self.get_pos() >= tokens.len()
116    }
117}