polymath_rs/cst/
predictive.rs

1//!
2//! # Predictive
3//!
4//! This parser is going to utilise lookahead to predict the applicable
5//! production rule.
6//!
7//! One modification will still be made to the grammar: An Expression
8//! can be empty. Otherwise the parser would never halt since E is defined
9//! as:
10//!
11//! E ::= I/I | IE <- there has to always be at least one more character
12//!
13//! So E becomes:
14//!
15//! E ::= I/E | IE | ε
16//!
17//! `I/I` also became `I/E` otherwise the parser would stop consuming after `I`
18//! so that nothing further will be processed after a fraction. Yet, All `E`s
19//! coming after the next I can't be part of the fraction which they are due to
20//! this change. This will be corrected in CST to AST conversion though.
21//!
22
23use std::cell::RefCell;
24
25use crate::tokens::{types::TokenType, Span};
26
27use super::{Cursor, Token, TokenStream};
28
29#[derive(Debug, Clone, PartialOrd, PartialEq)]
30pub struct PredictiveCST<'a> {
31    pub expression: Expression<'a>,
32}
33
34#[derive(Debug, Clone, PartialOrd, PartialEq)]
35pub enum Expression<'a> {
36    // IE
37    IE(IntermediateExpression<'a>, Box<Expression<'a>>),
38    // I/I
39    II(IntermediateExpression<'a>, Token<'a>, Box<Expression<'a>>),
40    Unit,
41}
42
43#[derive(Debug, Clone, PartialOrd, PartialEq)]
44pub enum IntermediateExpression<'a> {
45    // S_S
46    SimpleSub(SimpleExpression<'a>, Token<'a>, SimpleExpression<'a>),
47    // S^S
48    SimpleSup(SimpleExpression<'a>, Token<'a>, SimpleExpression<'a>),
49    // S_S^S
50    SimpleSubSup(
51        SimpleExpression<'a>,
52        Token<'a>,
53        SimpleExpression<'a>,
54        Token<'a>,
55        SimpleExpression<'a>,
56    ),
57    // S
58    Simple(SimpleExpression<'a>),
59}
60
61#[derive(Debug, Clone, PartialOrd, PartialEq)]
62pub enum SimpleExpression<'a> {
63    // v
64    Symbol(Token<'a>),
65    // lEr
66    Group(Token<'a>, Box<Expression<'a>>, Token<'a>),
67    // uS
68    UnarySymbol(Token<'a>, Box<SimpleExpression<'a>>),
69    // bSS
70    BinarySymbol(
71        Token<'a>,
72        Box<SimpleExpression<'a>>,
73        Box<SimpleExpression<'a>>,
74    ),
75}
76
77///
78/// Parses a stream of tokes according to the grammar.
79///
80pub fn parse(tokens: TokenStream) -> PredictiveCST {
81    let cursor = Cursor {
82        pos: RefCell::new(0),
83    };
84    if cursor.eos(tokens) {
85        PredictiveCST {
86            expression: Expression::Unit,
87        }
88    } else {
89        PredictiveCST {
90            expression: parse_expression(tokens, &cursor),
91        }
92    }
93}
94
95fn parse_expression<'a>(tokens: TokenStream<'a>, cursor: &Cursor) -> Expression<'a> {
96    let intermediate_expression = parse_intermediate_expression(tokens, cursor);
97    // test for division
98    if let Some(division_token) = cursor.parse(tokens, |tt| {
99        matches!(tt, crate::tokens::types::TokenType::Division)
100    }) {
101        Expression::II(
102            intermediate_expression,
103            division_token.clone(),
104            Box::new(parse_expression(tokens, cursor)),
105        )
106    } else if cursor.eos(tokens) {
107        Expression::IE(intermediate_expression, Box::new(Expression::Unit))
108    } else {
109        Expression::IE(
110            intermediate_expression,
111            Box::new(parse_expression(tokens, cursor)),
112        )
113    }
114}
115
116fn parse_intermediate_expression<'a>(
117    tokens: TokenStream<'a>,
118    cursor: &Cursor,
119) -> IntermediateExpression<'a> {
120    let simple_expression = parse_simple_expression(tokens, cursor);
121
122    if let Some(sub) = cursor.parse(tokens, |tt| {
123        matches!(tt, crate::tokens::types::TokenType::Underscorce)
124    }) {
125        let simple_expression_2 = parse_simple_expression(tokens, cursor);
126
127        if let Some(sup) = cursor.parse(tokens, |tt| {
128            matches!(tt, crate::tokens::types::TokenType::Hat)
129        }) {
130            IntermediateExpression::SimpleSubSup(
131                simple_expression,
132                sub.clone(),
133                simple_expression_2,
134                sup.clone(),
135                parse_simple_expression(tokens, cursor),
136            )
137        } else {
138            IntermediateExpression::SimpleSub(simple_expression, sub.clone(), simple_expression_2)
139        }
140    } else if let Some(sup) = cursor.parse(tokens, |tt| {
141        matches!(tt, crate::tokens::types::TokenType::Hat)
142    }) {
143        IntermediateExpression::SimpleSup(
144            simple_expression,
145            sup.clone(),
146            parse_simple_expression(tokens, cursor),
147        )
148    } else {
149        IntermediateExpression::Simple(simple_expression)
150    }
151}
152
153fn parse_simple_expression<'a>(tokens: TokenStream<'a>, cursor: &Cursor) -> SimpleExpression<'a> {
154    if let Some(simple_expression) = parse_group(tokens, cursor) {
155        simple_expression
156    } else if let Some(unary_symbol) = cursor.parse(tokens, |tt| {
157        matches!(tt, crate::tokens::types::TokenType::UnaryOperator(_))
158    }) {
159        SimpleExpression::UnarySymbol(
160            unary_symbol.clone(),
161            Box::new(parse_simple_expression(tokens, cursor)),
162        )
163    } else if let Some(binary_symbol) = cursor.parse(tokens, |tt| {
164        matches!(tt, crate::tokens::types::TokenType::BinaryOperator(_))
165    }) {
166        SimpleExpression::BinarySymbol(
167            binary_symbol.clone(),
168            Box::new(parse_simple_expression(tokens, cursor)),
169            Box::new(parse_simple_expression(tokens, cursor)),
170        )
171    } else if let Some(token) = cursor.parse(tokens, |_| true) {
172        SimpleExpression::Symbol(token.clone())
173    } else {
174        SimpleExpression::Symbol(Token {
175            span: Span {
176                start: cursor.get_pos(),
177                end: cursor.get_pos(),
178                text: "",
179            },
180            token_type: TokenType::None,
181        })
182    }
183}
184
185fn parse_group<'a>(tokens: TokenStream<'a>, cursor: &Cursor) -> Option<SimpleExpression<'a>> {
186    if cursor
187        .peek(tokens, |tt| {
188            matches!(tt, crate::tokens::types::TokenType::LBrace(_))
189        })
190        .is_some()
191    {
192        let mut group_count = 1;
193        let mut closing_bracket = None;
194
195        for i in 1..cursor.tokens_left(tokens) {
196            let opening_parens = cursor.peek_n(tokens, i, |tt| {
197                matches!(tt, crate::tokens::types::TokenType::LBrace(_))
198            });
199            let closing_parens = cursor.peek_n(tokens, i, |tt| {
200                matches!(tt, crate::tokens::types::TokenType::RBrace(_))
201            });
202
203            match (opening_parens, closing_parens) {
204                (None, Some(_)) => group_count -= 1,
205                (Some(_), None) => group_count += 1,
206                _ => {}
207            }
208
209            if group_count == 0 {
210                closing_bracket = closing_parens;
211                break;
212            }
213        }
214
215        if let Some(closing_bracket) = closing_bracket {
216            let lbrace = cursor.advance(tokens).unwrap();
217            let (group_cursor, group_tokens) = cursor.slice_to(tokens, closing_bracket).unwrap();
218
219            cursor.set_pos(cursor.get_pos() + group_tokens.len());
220
221            Some(SimpleExpression::Group(
222                lbrace.clone(),
223                Box::new(parse_expression(group_tokens, &group_cursor)),
224                cursor.advance(tokens).unwrap().clone(),
225            ))
226        } else {
227            None
228        }
229    } else {
230        None
231    }
232}