zen_parser/parser/standard/
mod.rs

1use std::cell::Cell;
2
3use bumpalo::Bump;
4
5use crate::ast::Node;
6use crate::lexer::token::{Token, TokenKind};
7use crate::parser::definitions::{Arity, Associativity};
8use crate::parser::error::ParserError::{MemoryFailure, UnexpectedToken, UnknownBuiltIn};
9use crate::parser::error::ParserResult;
10use crate::parser::iter::ParserIterator;
11use crate::parser::standard::constants::{BINARY_OPERATORS, BUILT_INS, UNARY_OPERATORS};
12
13mod constants;
14
15pub struct StandardParser<'a, 'b>
16where
17    'b: 'a,
18{
19    iterator: ParserIterator<'a, 'b>,
20    bump: &'b Bump,
21    depth: Cell<u8>,
22}
23
24impl<'a, 'b> StandardParser<'a, 'b>
25where
26    'b: 'a,
27{
28    pub fn try_new(tokens: &'a Vec<Token>, bump: &'b Bump) -> ParserResult<Self> {
29        Ok(Self {
30            iterator: ParserIterator::try_new(tokens, bump)?,
31            bump,
32            depth: Cell::new(0),
33        })
34    }
35
36    pub fn parse(&self) -> ParserResult<&'b Node<'b>> {
37        self.parse_expression(0)
38    }
39
40    fn parse_expression(&self, precedence: u8) -> ParserResult<&'b Node<'b>> {
41        let mut node_left = self.parse_primary()?;
42        let mut token = self.iterator.current();
43
44        while !self.iterator.is_done() {
45            if token.kind == TokenKind::Operator {
46                if let Some(op) = BINARY_OPERATORS.get(token.value) {
47                    if op.precedence >= precedence {
48                        self.iterator.next()?;
49                        let node_right = match op.associativity {
50                            Associativity::Left => self.parse_expression(op.precedence + 1)?,
51                            _ => self.parse_expression(op.precedence)?,
52                        };
53
54                        node_left = self.iterator.node(Node::Binary {
55                            operator: self.iterator.str_value(token.value),
56                            left: node_left,
57                            right: node_right,
58                        })?;
59                        token = self.iterator.current();
60                        continue;
61                    }
62                }
63            }
64
65            break;
66        }
67
68        if precedence == 0 {
69            node_left = self.parse_conditional(node_left)?;
70        }
71
72        Ok(node_left)
73    }
74
75    fn parse_primary(&self) -> ParserResult<&'b Node<'b>> {
76        let token = self.iterator.current();
77        if token.kind == TokenKind::Operator {
78            if let Some(op) = UNARY_OPERATORS.get(token.value) {
79                self.iterator.next()?;
80                let expr = self.parse_expression(op.precedence)?;
81                let node = self.iterator.node(Node::Unary {
82                    operator: self.iterator.str_value(token.value),
83                    node: expr,
84                })?;
85
86                return self.parse_postfix(node);
87            }
88        }
89
90        if let Some(interval_node) = self.parse_interval()? {
91            return self.parse_postfix(interval_node);
92        }
93
94        if token.kind == TokenKind::Bracket && token.value == "(" {
95            self.iterator.next()?;
96            let expr = self.parse_expression(0)?;
97            self.iterator.expect(TokenKind::Bracket, Some(&[")"]))?;
98            return self.parse_postfix(expr);
99        }
100
101        if self.depth.get() > 0 {
102            if token.kind == TokenKind::Operator && (token.value == "#" || token.value == ".") {
103                if token.value == "#" {
104                    self.iterator.next()?;
105                }
106                let node = self.iterator.node(Node::Pointer)?;
107                return self.parse_postfix(node);
108            }
109        } else if token.kind == TokenKind::Operator && (token.value == "#" || token.value == ".") {
110            return Err(UnexpectedToken {
111                expected: "anything but Operator(#, .)".to_string(),
112                received: format!("{token:?}"),
113            });
114        }
115
116        self.parse_primary_expression()
117    }
118
119    fn parse_conditional(&self, node: &'b Node<'b>) -> ParserResult<&'b Node<'b>> {
120        let mut nd = self.iterator.node(node.clone())?;
121        let mut expr1: &'b Node;
122        let mut expr2: &'b Node;
123
124        while self.iterator.current().kind == TokenKind::Operator
125            && self.iterator.current().value == "?"
126        {
127            self.iterator.next()?;
128
129            let token = self.iterator.current();
130            if token.kind != TokenKind::Operator && token.value != ":" {
131                expr1 = self.parse_expression(0)?;
132                self.iterator.expect(TokenKind::Operator, Some(&[":"]))?;
133                expr2 = self.parse_expression(0)?;
134            } else {
135                self.iterator.next()?;
136                expr1 = node;
137                expr2 = self.parse_expression(0)?;
138            }
139
140            nd = self.iterator.node(Node::Conditional {
141                condition: nd,
142                on_true: expr1,
143                on_false: expr2,
144            })?;
145        }
146
147        Ok(nd)
148    }
149
150    fn parse_primary_expression(&self) -> ParserResult<&'b Node<'b>> {
151        let node: &'b Node;
152        let token = self.iterator.current();
153
154        match token.kind {
155            TokenKind::Identifier => {
156                self.iterator.next()?;
157                match token.value {
158                    "true" | "false" => return self.iterator.bool(token),
159                    "null" => return self.iterator.null(token),
160                    _ => node = self.parse_identifier_expression(token)?,
161                }
162            }
163            TokenKind::Number => return self.iterator.number(token),
164            TokenKind::String => return self.iterator.string(token),
165            _ => {
166                if token.kind == TokenKind::Bracket && token.value == "[" {
167                    node = self.parse_array(token)?;
168                } else {
169                    return Err(UnexpectedToken {
170                        expected: "identifier, string, number or opening bracket".to_string(),
171                        received: format!("{token:?}"),
172                    });
173                }
174            }
175        }
176
177        self.parse_postfix(node)
178    }
179
180    fn parse_interval(&self) -> ParserResult<Option<&'b Node<'b>>> {
181        if self.iterator.current().kind != TokenKind::Bracket {
182            return Ok(None);
183        }
184
185        if !self.iterator.lookup(2, TokenKind::Operator, Some(&[".."])) {
186            return Ok(None);
187        }
188
189        let left_bracket = self.iterator.current().value;
190        self.iterator.expect(TokenKind::Bracket, None)?;
191        let left = self.parse_primary_expression()?;
192        self.iterator.expect(TokenKind::Operator, Some(&[".."]))?;
193        let right = self.parse_primary_expression()?;
194        let right_bracket = self.iterator.current().value;
195        self.iterator.expect(TokenKind::Bracket, None)?;
196
197        let interval_node = self.iterator.node(Node::Interval {
198            left_bracket: self.iterator.str_value(left_bracket),
199            left,
200            right,
201            right_bracket: self.iterator.str_value(right_bracket),
202        })?;
203
204        Ok(Some(interval_node))
205    }
206    fn parse_identifier_expression(&self, token: &Token) -> ParserResult<&'b Node<'b>> {
207        if self.iterator.current().kind != TokenKind::Bracket
208            || self.iterator.current().value != "("
209        {
210            return self
211                .iterator
212                .node(Node::Identifier(self.iterator.str_value(token.value)));
213        }
214
215        let builtin = BUILT_INS.get(token.value).ok_or_else(|| UnknownBuiltIn {
216            token: token.value.to_string(),
217        })?;
218
219        self.iterator.expect(TokenKind::Bracket, Some(&["("]))?;
220
221        return match builtin.arity {
222            Arity::Single => {
223                let arg = self.parse_expression(0)?;
224                self.iterator.expect(TokenKind::Bracket, Some(&[")"]))?;
225
226                self.iterator.node(Node::BuiltIn {
227                    name: self.iterator.str_value(token.value),
228                    arguments: self.bump.alloc_slice_copy(&[arg]),
229                })
230            }
231            Arity::Dual => {
232                let arg1 = self.parse_expression(0)?;
233                self.iterator.expect(TokenKind::Operator, Some(&[","]))?;
234                let arg2 = self.parse_primary_expression()?;
235                self.iterator.expect(TokenKind::Bracket, Some(&[")"]))?;
236
237                self.iterator.node(Node::BuiltIn {
238                    name: self.iterator.str_value(token.value),
239                    arguments: self.bump.alloc_slice_copy(&[arg1, arg2]),
240                })
241            }
242            Arity::Closure => {
243                let arg1 = self.parse_expression(0)?;
244                self.iterator.expect(TokenKind::Operator, Some(&[","]))?;
245                let arg2 = self.parse_closure()?;
246                self.iterator.expect(TokenKind::Bracket, Some(&[")"]))?;
247
248                self.iterator.node(Node::BuiltIn {
249                    name: self.iterator.str_value(token.value),
250                    arguments: self.bump.alloc_slice_copy(&[arg1, arg2]),
251                })
252            }
253        };
254    }
255
256    fn parse_array(&self, _token: &Token) -> ParserResult<&'b Node<'b>> {
257        let mut nodes = Vec::new();
258
259        self.iterator.expect(TokenKind::Bracket, Some(&["["]))?;
260        while self.iterator.current().kind != TokenKind::Bracket
261            && self.iterator.current().value != "]"
262        {
263            if !nodes.is_empty() {
264                self.iterator.expect(TokenKind::Operator, Some(&[","]))?;
265                if self.iterator.current().value == "]" {
266                    break;
267                }
268            }
269
270            nodes.push(self.parse_primary()?);
271        }
272
273        self.iterator.expect(TokenKind::Bracket, Some(&["]"]))?;
274        let node = Node::Array(self.bump.alloc_slice_copy(nodes.as_slice()));
275
276        self.iterator.node(node)
277    }
278
279    fn parse_closure(&self) -> ParserResult<&'b Node<'b>> {
280        self.depth.set(self.depth.get() + 1);
281        let node = self.parse_expression(0)?;
282        self.depth.set(self.depth.get() - 1);
283
284        return self.iterator.node(Node::Closure(node));
285    }
286
287    fn parse_postfix(&self, node: &'b Node<'b>) -> ParserResult<&'b Node<'b>> {
288        let mut postfix_token = self.iterator.current();
289        let mut nd = self.iterator.node(node.clone())?;
290
291        while postfix_token.kind == TokenKind::Bracket || postfix_token.kind == TokenKind::Operator
292        {
293            if postfix_token.value == "." {
294                self.iterator.next()?;
295                let property_token = self.iterator.current();
296                self.iterator.next()?;
297
298                if property_token.kind != TokenKind::Identifier
299                    && (property_token.kind != TokenKind::Operator
300                        || !is_valid_identifier(property_token.value))
301                {
302                    return Err(UnexpectedToken {
303                        expected: "member identifier token".to_string(),
304                        received: format!("{postfix_token:?}"),
305                    });
306                }
307
308                let property = self
309                    .iterator
310                    .node(Node::String(self.iterator.str_value(property_token.value)))?;
311                nd = self.iterator.node(Node::Member { node: nd, property })?;
312            } else if postfix_token.value == "[" {
313                self.iterator.next()?;
314                let mut from: Option<&'b Node<'b>> = None;
315                let mut to: Option<&'b Node<'b>> = None;
316
317                let mut c = self.iterator.current();
318                if c.kind == TokenKind::Operator && c.value == ":" {
319                    self.iterator.next()?;
320                    c = self.iterator.current();
321
322                    if c.kind != TokenKind::Bracket && c.value != "]" {
323                        to = Some(self.parse_expression(0)?);
324                    }
325
326                    nd = self.iterator.node(Node::Slice { node: nd, to, from })?;
327                    self.iterator.expect(TokenKind::Bracket, Some(&["]"]))?;
328                } else {
329                    from = Some(self.parse_expression(0)?);
330                    c = self.iterator.current();
331
332                    if c.kind == TokenKind::Operator && c.value == ":" {
333                        self.iterator.next()?;
334                        c = self.iterator.current();
335
336                        if c.kind != TokenKind::Bracket && c.value != "]" {
337                            to = Some(self.parse_expression(0)?);
338                        }
339
340                        nd = self.iterator.node(Node::Slice { node: nd, from, to })?;
341                        self.iterator.expect(TokenKind::Bracket, Some(&["]"]))?;
342                    } else {
343                        // Slice operator [:] was not found,
344                        // it should be just an index node.
345                        nd = self.iterator.node(Node::Member {
346                            node: nd,
347                            property: from.ok_or(MemoryFailure)?,
348                        })?;
349                        self.iterator.expect(TokenKind::Bracket, Some(&["]"]))?;
350                    }
351                }
352            } else {
353                break;
354            }
355
356            postfix_token = self.iterator.current();
357        }
358
359        Ok(nd)
360    }
361}
362
363fn is_valid_identifier(str: &str) -> bool {
364    matches!(str, "and" | "or" | "in" | "not")
365}