rflex_lib/
parser.rs

1use crate::lexer::{TokenGiver, Token, TokenErr, Group, Op};
2use crate::ast::{Node, BinaryExprNode, UnaryExprNode, Match};
3use Token::*;
4use Group::*;
5use Op::*;
6
7#[derive(Debug)]
8pub enum ParseError {
9    Parse(String),
10    Token(TokenErr)
11}
12impl From<TokenErr> for ParseError {
13    fn from(err: TokenErr) -> ParseError {
14        return ParseError::Token(err);
15    }
16}
17
18pub struct Parser<T: TokenGiver> {
19    cur: Token,
20    lexer: T,
21}
22
23impl<T: TokenGiver> Parser<T> {
24    pub fn new(mut lexer: T) -> Result<Self, ParseError> {
25        return Ok(Parser { 
26            cur: lexer.next()?, 
27            lexer,
28        });
29    }
30
31    fn advance(&mut self) -> Result<Token, ParseError> {
32        let temp = self.cur;
33        self.cur = self.lexer.next()?;
34        return Ok(temp);
35    }
36
37    fn consume(&mut self, token: Token, caller: &str) -> Result<(), ParseError> {
38        if token == self.cur {
39            self.advance()?;
40            return Ok(())
41        } else {
42            return Err(ParseError::Parse(
43                format!("{}: Expected {:?} but got {:?}",
44                    caller, token, self.cur)
45            ));
46        }
47    }
48
49    pub fn parse(&mut self) -> Result<Vec<Match>, ParseError> {
50        let mut matches = Vec::new();
51        while let GROUP(DBQ) = self.cur {
52            self.consume(GROUP(DBQ), "Parse")?;
53            let root = self.expr()?;
54            self.consume(GROUP(DBQ), "Parse")?;
55            let name = self.name()?; 
56            matches.push(Match { root, name: name });
57        }
58        if self.cur != EOF { 
59            return Err(ParseError::Parse(
60                format!("Parse: Expected EOF but got {:?}", self.cur)
61            ));
62        }
63        return Ok(matches);
64    }
65
66    fn expr(&mut self) -> Result<Node, ParseError> {
67        let mut root = self.term()?;
68        while let OP(BAR) = self.cur {
69            self.advance()?;
70            let term = self.term()?;
71            let new_root = BinaryExprNode {
72                op: BAR,
73                left: Box::new(root),
74                right: Box::new(term)
75            };
76            root = Node::BinaryExpr(new_root);
77        }
78        return Ok(root);
79    }
80
81    fn term(&mut self) -> Result<Node, ParseError> {
82        let mut root = self.factor()?;
83        while let CHAR(_) | GROUP(LPR) | GROUP(LBR) = self.cur {
84            let node= self.factor()?;
85            let new_root = BinaryExprNode {
86                op: AND,
87                left: Box::new(root),
88                right: Box::new(node)
89            };
90            root = Node::BinaryExpr(new_root);
91        }
92        return Ok(root);
93    }
94
95    fn factor(&mut self) -> Result<Node, ParseError> {
96        let node = self.atom()?;
97        if let OP(op) = self.cur {
98            if let QUESTION | STAR | PLUS = op {
99                let root = UnaryExprNode { 
100                    op, child: Box::new(node)
101                };
102                self.consume(OP(op), "Factor")?;
103                return Ok(Node::UnaryExpr(root));
104            }
105        }
106        return Ok(node);
107    }
108
109    fn atom(&mut self) -> Result<Node, ParseError> {
110        match self.advance()? {
111            GROUP(LPR) => { 
112                let node = self.expr()?;
113                self.consume(GROUP(RPR), "Atom")?;
114                return Ok(node);
115            },
116            CHAR(c) => return Ok(Node::Char(c)),
117            GROUP(LBR) => return Ok(self.bracketed()?),
118            token => Err(ParseError::Parse(
119                format!("Atom: Expected CHAR, [, (, but found {:?}", token)
120            ))
121        }
122    }
123
124    fn bracketed(&mut self) -> Result<Node, ParseError> {
125        let mut root: Option<Node> = None;
126        loop { match self.cur {
127            CHAR(_) => {
128                let res = match self.lexer.peek()? {
129                    OP(DASH) => self.dash()?,
130                    CHAR(_) | GROUP(RBR) => self.atom()?,
131                    t => return Err(ParseError::Parse(format!(
132                        "Expected Char or Dash got {:?}", t)
133                    )),
134                };
135                root = match root {
136                    None => Some(res),
137                    Some(node) => Some(Node::BinaryExpr(
138                        BinaryExprNode { 
139                            left: Box::new(node), 
140                            right: Box::new(res), 
141                            op: BAR 
142                        }
143                    ))
144                }
145            },
146            GROUP(RBR) => break,
147            t => return Err(ParseError::Parse(format!(
148                "Expected ] or Char got {:?}", t
149            )))
150        }}
151        self.consume(GROUP(RBR), "Dashes")?;
152        match root {
153            None => return Err(ParseError::Parse(
154                format!("Invalid Bracketed Expression")
155            )),
156            Some(node) => return Ok(node)
157        }
158    }
159
160    fn dash(&mut self) -> Result<Node, ParseError> {
161        let root: Node;
162        let c = self.advance()?.char();
163        if c.is_digit(10) {
164            self.consume(OP(DASH), "Dash")?;
165            let d = self.advance()?.char();
166            if d.is_digit(10) { 
167                root = Node::BinaryExpr(BinaryExprNode {
168                    op: DASH,
169                    left: Box::new(Node::Char(c)),
170                    right: Box::new(Node::Char(d))
171                });
172            } else {
173                return Err(ParseError::Parse(
174                    format!("Dash: Expected Num-Num but got Num-{}", d)
175                ));
176            }
177        } else if c.is_alphabetic() {
178            self.consume(OP(DASH), "Dash")?;
179            let d = self.advance()?.char();
180            if d.is_alphabetic() {
181                root = Node::BinaryExpr(BinaryExprNode {
182                    op: DASH,
183                    left: Box::new(Node::Char(c)),
184                    right: Box::new(Node::Char(d))
185                });
186            } 
187            else {
188                return Err(ParseError::Parse(
189                    format!("Dash: Expected Alpha-Alpha but got Alpha-{}", d)
190                ));
191            }
192        } else {
193            return Err(ParseError::Parse(
194                format!("Dash: Expected Alphanumeric but got {}", c)
195            ));
196        }
197        return Ok(root);
198    }
199
200    fn name(&mut self) -> Result<String, ParseError> {
201        let mut name: String = String::new();
202        while let CHAR(c) = self.cur {
203            name.push(c);
204            self.advance()?;
205        }
206        self.consume(SEMI, "Name")?;
207        return Ok(name);
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214    use std::{fs, path::Path};
215    use crate::lexer::Lexer;
216
217    #[test]
218    fn ast() {
219        let mut i = 0;
220        while Path::new(&format!("tests/data/parser/input/AST-{i}.txt")).exists() {
221            let inpath = &format!("tests/data/parser/input/AST-{i}.txt");
222            let outpath = &format!("tests/data/parser/output/AST-{i}.txt");
223            let tr = Lexer::new(&inpath).expect("File Doesn't Exist");
224            let mut parser = Parser::new(tr).expect("Invalid Token Stream");
225            let matches = parser.parse().expect("Expression should be valid.");
226            for m in matches { 
227                let ans: String = fs::read_to_string(outpath).expect("File doesn't exist.");
228                assert!(ans.trim() == m.root.to_string().trim());
229            }
230            i += 1;
231        }
232    }
233}