skiff/parser/
parse.rs

1use std::borrow::Borrow;
2
3use crate::ast::{Ast, BinOp, Identifier, Program};
4use crate::lexer::lex::Token;
5use crate::parser::parselets::*;
6use crate::parser::util::ParseError;
7
8use super::types::parse::parse_type;
9
10pub fn get_binding_power(op: &Token) -> i64 {
11    match op {
12        Token::LOr => 10,
13        Token::LAnd => 20,
14        Token::DoubleEq => 30,
15        Token::Gt | Token::GtEq | Token::Lt | Token::LtEq => 40,
16        Token::Pipe => 50,
17        Token::BitXor => 60,
18        Token::BitAnd => 70,
19        Token::Plus | Token::Minus => 80,
20        Token::Times | Token::Divide | Token::Modulo => 90,
21        Token::Exp => 100,
22        Token::LParen => 110,
23        _ => panic!("Tried to get binding power of non-op token {:?}", op),
24    }
25}
26
27fn prefix_map(tok: &Token) -> Option<Box<dyn PrefixParselet>> {
28    match *tok {
29        Token::Number(_) => Some(Box::new(NumberParselet {})),
30        Token::Bool(_) => Some(Box::new(BoolParselet {})),
31        Token::Identifier(_) => Some(Box::new(IdentifierParselet {})),
32        Token::LParen => Some(Box::new(ParenthesisParselet {})),
33        Token::Lambda => Some(Box::new(LambdaParselet {})),
34        Token::If => Some(Box::new(IfParselet {})),
35        Token::Let => Some(Box::new(LetParselet {})),
36        Token::Def => Some(Box::new(FunctionParselet {})),
37        Token::Data => Some(Box::new(DataParselet {})),
38        Token::Match => Some(Box::new(MatchParselet {})),
39        _ => None,
40    }
41}
42
43fn infix_map(tok: &Token) -> Option<Box<dyn InfixParselet>> {
44    match *tok {
45        Token::Plus => Some(Box::new(OperatorParselet::new(BinOp::Plus, true))),
46        Token::Minus => Some(Box::new(OperatorParselet::new(BinOp::Minus, true))),
47        Token::Times => Some(Box::new(OperatorParselet::new(BinOp::Times, true))),
48        Token::Divide => Some(Box::new(OperatorParselet::new(BinOp::Divide, true))),
49        Token::Modulo => Some(Box::new(OperatorParselet::new(BinOp::Modulo, true))),
50        Token::Exp => Some(Box::new(OperatorParselet::new(BinOp::Exp, false))),
51        Token::DoubleEq => Some(Box::new(OperatorParselet::new(BinOp::Eq, true))),
52        Token::Gt => Some(Box::new(OperatorParselet::new(BinOp::Gt, true))),
53        Token::Lt => Some(Box::new(OperatorParselet::new(BinOp::Lt, true))),
54        Token::GtEq => Some(Box::new(OperatorParselet::new(BinOp::GtEq, true))),
55        Token::LtEq => Some(Box::new(OperatorParselet::new(BinOp::LtEq, true))),
56        Token::LAnd => Some(Box::new(OperatorParselet::new(BinOp::LAnd, true))),
57        Token::LOr => Some(Box::new(OperatorParselet::new(BinOp::LOr, true))),
58        Token::BitAnd => Some(Box::new(OperatorParselet::new(BinOp::BitAnd, true))),
59        // Token::Pipe => Some(Box::new(OperatorParselet::new(BinOp::BitOr, true))),
60        Token::BitXor => Some(Box::new(OperatorParselet::new(BinOp::BitXor, true))),
61        _ => None,
62    }
63}
64
65fn postfix_map(tok: &Token) -> Option<Box<dyn PostfixParselet>> {
66    match *tok {
67        Token::LParen => Some(Box::new(FunCallParselet {})),
68        _ => None,
69    }
70}
71
72pub fn parse_expr(
73    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
74    current_binding_power: i64,
75    is_top_level: bool,
76) -> Result<Ast, ParseError> {
77    // Pop the first token and find which parselet we should use
78    let initial_token = match tokens.pop() {
79        Some(v) => v,
80        None => return Err(ParseError("Unexpected end of file".to_string(), None)),
81    };
82
83    let initial_parselet = match prefix_map(&initial_token.0) {
84        None => {
85            return Err(ParseError(
86                format!("Unexpected Token: {:?}", initial_token).to_string(),
87                None,
88            ))
89        }
90        Some(v) => v,
91    };
92
93    let mut left_node = (*initial_parselet).parse(tokens, initial_token, is_top_level)?;
94
95    loop {
96        // If next token is empty then stop repeating
97        let next_token = match tokens.last() {
98            None => break,
99            Some(v) => v,
100        };
101
102        if let Some(postfix_parselet) = postfix_map(next_token.0.borrow()) {
103            if get_binding_power(next_token.0.borrow()) <= current_binding_power {
104                break;
105            };
106
107            // Consume the token we peeped
108            let next_token = tokens.pop().unwrap();
109
110            left_node = postfix_parselet.parse(tokens, left_node, next_token)?;
111
112            continue;
113        }
114
115        if let Some(infix_parselet) = infix_map(next_token.0.borrow()) {
116            if get_binding_power(next_token.0.borrow()) <= current_binding_power {
117                break;
118            };
119
120            // Consume the token we peeped
121            let next_token = tokens.pop().unwrap();
122
123            left_node = infix_parselet.parse(tokens, left_node, next_token)?;
124
125            continue;
126        }
127        break;
128    }
129
130    return Ok(left_node);
131}
132
133// A recursive descent parser for function argument lists
134pub fn parse_args(
135    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
136) -> Result<(Vec<Ast>, usize), ParseError> {
137    match tokens.last() {
138        Some((Token::RParen, span)) => {
139            let end = span.end;
140            tokens.pop();
141            Ok((vec![], end))
142        }
143        Some(_) => {
144            let arg = parse_expr(tokens, 0, false)?;
145            let (mut rest, end) = parse_rest_args(tokens)?;
146            rest.push(arg);
147            rest.reverse();
148            Ok((rest, end))
149        }
150        None => Err(ParseError(
151            "Expected right paren or function arg".to_string(),
152            None,
153        )),
154    }
155}
156
157fn parse_rest_args(
158    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
159) -> Result<(Vec<Ast>, usize), ParseError> {
160    match tokens.pop() {
161        Some((Token::RParen, span)) => Ok((vec![], span.end)),
162        Some((Token::Comma, _span)) => {
163            let expr = parse_expr(tokens, 0, false)?;
164            let (mut rest, end) = parse_rest_args(tokens)?;
165            rest.push(expr);
166            Ok((rest, end))
167        }
168        Some((e, span)) => Err(ParseError(
169            format!("Expected comma but got {:?}", e).to_string(),
170            Some(span),
171        )),
172        None => Err(ParseError(
173            "Ran out of tokens while parsing args".to_string(),
174            None,
175        )),
176    }
177}
178
179// A recursive descent parser for function parameter lists
180pub fn parse_params(
181    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
182) -> Result<Vec<Identifier>, ParseError> {
183    match tokens.last() {
184        Some((Token::RParen, _)) => {
185            tokens.pop();
186            Ok(vec![])
187        }
188        Some((_, _)) => {
189            let (param, _) = parse_identifier(None, tokens)?;
190            // TODO: fix this
191            // consume_if_present(tokens, Token::Comma)?;
192            let mut rest = parse_rest_params(tokens)?;
193            rest.push(param);
194            rest.reverse();
195            Ok(rest)
196        }
197        None => Err(ParseError(
198            "Ran out of tokens while parsing params".to_string(),
199            None,
200        )),
201    }
202}
203
204fn parse_rest_params(
205    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
206) -> Result<Vec<Identifier>, ParseError> {
207    match tokens.last() {
208        Some((Token::RParen, _)) => {
209            tokens.pop();
210            Ok(vec![])
211        }
212        Some((Token::Comma, _span)) => {
213            tokens.pop();
214            let (param, _id) = parse_identifier(None, tokens)?;
215            let mut rest = parse_rest_params(tokens)?;
216            rest.push(param);
217            Ok(rest)
218        }
219        Some((_, span)) => Err(ParseError("Expected comma".to_string(), Some(span.clone()))),
220        None => Err(ParseError(
221            "Ran out of tokens while parsing rest of params".to_string(),
222            None,
223        )),
224    }
225}
226
227// A recursive descent parser for the top-level program
228pub fn parse_program(
229    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
230) -> Result<Program, ParseError> {
231    let exprs = parse_exprs(tokens)?;
232    Ok(exprs)
233}
234
235pub fn parse_exprs(
236    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
237) -> Result<Vec<Ast>, ParseError> {
238    let mut exprs = vec![];
239    while tokens.len() != 0 {
240        let expr = parse_expr(tokens, 0, true)?;
241        exprs.push(expr);
242    }
243    return Ok(exprs);
244}
245
246pub fn parse_identifier(
247    initial_token: Option<(Token, std::ops::Range<usize>)>,
248    tokens: &mut Vec<(Token, std::ops::Range<usize>)>,
249) -> Result<(Identifier, std::ops::Range<usize>), ParseError> {
250    let id_token;
251
252    match initial_token {
253        Some(t) => id_token = t,
254        None => match tokens.pop() {
255            Some(t) => id_token = t,
256            None => {
257                return Err(ParseError(
258                    "Ran out of tokens while parsing identifier".to_string(),
259                    None,
260                ))
261            }
262        },
263    };
264
265    return match id_token {
266        (Token::Identifier(id), id_span) => match tokens.last() {
267            Some((Token::Colon, _)) => {
268                tokens.pop();
269                let (type_decl, type_span) = parse_type(tokens)?;
270                Ok((
271                    Identifier::new_with_type(id, type_decl),
272                    id_span.start..type_span.end,
273                ))
274            }
275            Some(_) => Ok((Identifier::new_without_type(id), id_span)),
276            None => Err(ParseError(
277                "Ran out of tokens while parsing typed identifier".to_string(),
278                None,
279            )),
280        },
281        (_, span) => Err(ParseError(
282            "Found non identifier token while parsing identifier".to_string(),
283            Some(span),
284        )),
285    };
286}