Skip to main content

jpx_core/
parser.rs

1//! Module for parsing JMESPath expressions into an AST.
2//!
3//! This JMESPath parser is implemented using a Pratt parser,
4//! or top down operator precedence parser.
5
6use crate::ast::{Ast, Comparator, KeyValuePair};
7use crate::lexer::{Token, TokenTuple, tokenize};
8use crate::{ErrorReason, JmespathError};
9
10/// Result of parsing an expression.
11pub type ParseResult = Result<Ast, JmespathError>;
12
13/// Parses a JMESPath expression into an AST.
14pub fn parse(expr: &str) -> ParseResult {
15    let tokens = tokenize(expr)?;
16    Parser::new(tokens, expr).parse()
17}
18
19/// The maximum binding power for a token that can stop a projection.
20const PROJECTION_STOP: usize = 10;
21
22struct Parser<'a> {
23    tokens: Vec<TokenTuple<'a>>,
24    cursor: usize,
25    expr: &'a str,
26    offset: usize,
27}
28
29impl<'a> Parser<'a> {
30    fn new(tokens: Vec<TokenTuple<'a>>, expr: &'a str) -> Parser<'a> {
31        Parser {
32            tokens,
33            cursor: 0,
34            expr,
35            offset: 0,
36        }
37    }
38
39    #[inline]
40    fn parse(&mut self) -> ParseResult {
41        self.expr(0).and_then(|result| match self.peek(0) {
42            &Token::Eof => Ok(result),
43            t => Err(self.err(t, "Did not parse the complete expression", true)),
44        })
45    }
46
47    #[inline]
48    fn advance(&mut self) -> Token<'a> {
49        self.advance_with_pos().1
50    }
51
52    #[inline]
53    fn advance_with_pos(&mut self) -> (usize, Token<'a>) {
54        if self.cursor < self.tokens.len() {
55            let (pos, tok) = self.tokens[self.cursor].clone();
56            self.cursor += 1;
57            self.offset = pos;
58            (pos, tok)
59        } else {
60            (self.offset, Token::Eof)
61        }
62    }
63
64    #[inline]
65    fn peek(&self, lookahead: usize) -> &Token<'a> {
66        let idx = self.cursor + lookahead;
67        if idx < self.tokens.len() {
68            &self.tokens[idx].1
69        } else {
70            &Token::Eof
71        }
72    }
73
74    fn err(&self, current_token: &Token<'_>, error_msg: &str, is_peek: bool) -> JmespathError {
75        let mut actual_pos = self.offset;
76        let mut buff = error_msg.to_string();
77        buff.push_str(&format!(" -- found {current_token:?}"));
78        if is_peek && self.cursor < self.tokens.len() {
79            actual_pos = self.tokens[self.cursor].0;
80        }
81        JmespathError::new(self.expr, actual_pos, ErrorReason::Parse(buff))
82    }
83
84    fn expr(&mut self, rbp: usize) -> ParseResult {
85        let mut left = self.nud();
86        while rbp < self.peek(0).lbp() {
87            left = self.led(Box::new(left?));
88        }
89        left
90    }
91
92    fn nud(&mut self) -> ParseResult {
93        let (offset, token) = self.advance_with_pos();
94        match token {
95            Token::At => Ok(Ast::Identity { offset }),
96            #[cfg(feature = "let-expr")]
97            Token::Identifier(value) if *value == *"let" => self.parse_let(offset),
98            Token::Identifier(value) => Ok(Ast::Field {
99                name: value.to_owned(),
100                offset,
101            }),
102            Token::QuotedIdentifier(value) => match self.peek(0) {
103                Token::Lparen => {
104                    let message = "Quoted strings can't be a function name";
105                    Err(self.err(&Token::Lparen, message, true))
106                }
107                _ => Ok(Ast::Field {
108                    name: value,
109                    offset,
110                }),
111            },
112            Token::Star => self.parse_wildcard_values(Box::new(Ast::Identity { offset })),
113            Token::Literal(value) => Ok(Ast::Literal { value, offset }),
114            Token::Lbracket => match self.peek(0) {
115                &Token::Number(_) | &Token::Colon => self.parse_index(),
116                &Token::Star if self.peek(1) == &Token::Rbracket => {
117                    self.advance();
118                    self.parse_wildcard_index(Box::new(Ast::Identity { offset }))
119                }
120                _ => self.parse_multi_list(),
121            },
122            Token::Flatten => self.parse_flatten(Box::new(Ast::Identity { offset })),
123            Token::Lbrace => {
124                let mut pairs = vec![];
125                loop {
126                    pairs.push(self.parse_kvp()?);
127                    match self.advance() {
128                        Token::Rbrace => break,
129                        Token::Comma => continue,
130                        ref t => return Err(self.err(t, "Expected '}' or ','", false)),
131                    }
132                }
133                Ok(Ast::MultiHash {
134                    elements: pairs,
135                    offset,
136                })
137            }
138            t @ Token::Ampersand => {
139                let rhs = self.expr(t.lbp())?;
140                Ok(Ast::Expref {
141                    ast: Box::new(rhs),
142                    offset,
143                })
144            }
145            t @ Token::Not => Ok(Ast::Not {
146                node: Box::new(self.expr(t.lbp())?),
147                offset,
148            }),
149            Token::Filter => self.parse_filter(Box::new(Ast::Identity { offset })),
150            Token::Lparen => {
151                let result = self.expr(0)?;
152                match self.advance() {
153                    Token::Rparen => Ok(result),
154                    ref t => Err(self.err(t, "Expected ')' to close '('", false)),
155                }
156            }
157            #[cfg(feature = "let-expr")]
158            Token::Variable(name) => Ok(Ast::VariableRef {
159                name: name.to_owned(),
160                offset,
161            }),
162            ref t => Err(self.err(t, "Unexpected nud token", false)),
163        }
164    }
165
166    fn led(&mut self, left: Box<Ast>) -> ParseResult {
167        let (offset, token) = self.advance_with_pos();
168        match token {
169            t @ Token::Dot => {
170                if self.peek(0) == &Token::Star {
171                    self.advance();
172                    self.parse_wildcard_values(left)
173                } else {
174                    let rhs = self.parse_dot(t.lbp())?;
175                    Ok(Ast::Subexpr {
176                        offset,
177                        lhs: left,
178                        rhs: Box::new(rhs),
179                    })
180                }
181            }
182            Token::Lbracket => {
183                if match self.peek(0) {
184                    &Token::Number(_) | &Token::Colon => true,
185                    &Token::Star => false,
186                    t => return Err(self.err(t, "Expected number, ':', or '*'", true)),
187                } {
188                    Ok(Ast::Subexpr {
189                        offset,
190                        lhs: left,
191                        rhs: Box::new(self.parse_index()?),
192                    })
193                } else {
194                    self.advance();
195                    self.parse_wildcard_index(left)
196                }
197            }
198            t @ Token::Or => {
199                let rhs = self.expr(t.lbp())?;
200                Ok(Ast::Or {
201                    offset,
202                    lhs: left,
203                    rhs: Box::new(rhs),
204                })
205            }
206            t @ Token::And => {
207                let rhs = self.expr(t.lbp())?;
208                Ok(Ast::And {
209                    offset,
210                    lhs: left,
211                    rhs: Box::new(rhs),
212                })
213            }
214            t @ Token::Pipe => {
215                let rhs = self.expr(t.lbp())?;
216                Ok(Ast::Subexpr {
217                    offset,
218                    lhs: left,
219                    rhs: Box::new(rhs),
220                })
221            }
222            Token::Lparen => match *left {
223                Ast::Field { name: v, .. } => Ok(Ast::Function {
224                    offset,
225                    name: v,
226                    args: self.parse_list(Token::Rparen)?,
227                }),
228                _ => Err(self.err(self.peek(0), "Invalid function name", true)),
229            },
230            Token::Flatten => self.parse_flatten(left),
231            Token::Filter => self.parse_filter(left),
232            Token::Eq => self.parse_comparator(Comparator::Equal, left),
233            Token::Ne => self.parse_comparator(Comparator::NotEqual, left),
234            Token::Gt => self.parse_comparator(Comparator::GreaterThan, left),
235            Token::Gte => self.parse_comparator(Comparator::GreaterThanEqual, left),
236            Token::Lt => self.parse_comparator(Comparator::LessThan, left),
237            Token::Lte => self.parse_comparator(Comparator::LessThanEqual, left),
238            ref t => Err(self.err(t, "Unexpected led token", false)),
239        }
240    }
241
242    #[cfg(feature = "let-expr")]
243    fn parse_let(&mut self, offset: usize) -> ParseResult {
244        let mut bindings = vec![];
245        loop {
246            match self.peek(0) {
247                Token::Variable(_) => {
248                    let var_name = match self.advance() {
249                        Token::Variable(name) => name.to_owned(),
250                        _ => unreachable!(),
251                    };
252                    match self.advance() {
253                        Token::Assign => {}
254                        ref t => {
255                            return Err(self.err(
256                                t,
257                                "Expected '=' after variable in let binding",
258                                false,
259                            ));
260                        }
261                    }
262                    let value = self.parse_let_binding_expr()?;
263                    bindings.push((var_name, value));
264                    match self.peek(0) {
265                        Token::Comma => {
266                            self.advance();
267                        }
268                        Token::Identifier(s) if *s == "in" => {
269                            break;
270                        }
271                        t => {
272                            return Err(self.err(
273                                t,
274                                "Expected ',' or 'in' after let binding",
275                                true,
276                            ));
277                        }
278                    }
279                }
280                t => {
281                    return Err(self.err(t, "Expected variable binding ($name) after 'let'", true));
282                }
283            }
284        }
285        match self.advance() {
286            Token::Identifier(s) if *s == *"in" => {}
287            ref t => {
288                return Err(self.err(t, "Expected 'in' keyword after let bindings", false));
289            }
290        }
291        let body = self.expr(0)?;
292        Ok(Ast::Let {
293            offset,
294            bindings,
295            expr: Box::new(body),
296        })
297    }
298
299    #[cfg(feature = "let-expr")]
300    fn parse_let_binding_expr(&mut self) -> ParseResult {
301        self.parse_let_binding_expr_bp(0)
302    }
303
304    #[cfg(feature = "let-expr")]
305    fn parse_let_binding_expr_bp(&mut self, rbp: usize) -> ParseResult {
306        let mut left = self.nud();
307        loop {
308            match self.peek(0) {
309                Token::Comma => break,
310                Token::Identifier(s) if *s == "in" => break,
311                _ => {}
312            }
313            if rbp >= self.peek(0).lbp() {
314                break;
315            }
316            left = self.led(Box::new(left?));
317        }
318        left
319    }
320
321    fn parse_kvp(&mut self) -> Result<KeyValuePair, JmespathError> {
322        match self.advance() {
323            Token::Identifier(value) => {
324                if self.peek(0) == &Token::Colon {
325                    self.advance();
326                    Ok(KeyValuePair {
327                        key: value.to_owned(),
328                        value: self.expr(0)?,
329                    })
330                } else {
331                    Err(self.err(self.peek(0), "Expected ':' to follow key", true))
332                }
333            }
334            Token::QuotedIdentifier(value) => {
335                if self.peek(0) == &Token::Colon {
336                    self.advance();
337                    Ok(KeyValuePair {
338                        key: value,
339                        value: self.expr(0)?,
340                    })
341                } else {
342                    Err(self.err(self.peek(0), "Expected ':' to follow key", true))
343                }
344            }
345            ref t => Err(self.err(t, "Expected Field to start key value pair", false)),
346        }
347    }
348
349    fn parse_filter(&mut self, lhs: Box<Ast>) -> ParseResult {
350        let condition_lhs = Box::new(self.expr(0)?);
351        match self.advance() {
352            Token::Rbracket => {
353                let condition_rhs = Box::new(self.projection_rhs(Token::Filter.lbp())?);
354                Ok(Ast::Projection {
355                    offset: self.offset,
356                    lhs,
357                    rhs: Box::new(Ast::Condition {
358                        offset: self.offset,
359                        predicate: condition_lhs,
360                        then: condition_rhs,
361                    }),
362                })
363            }
364            ref t => Err(self.err(t, "Expected ']'", false)),
365        }
366    }
367
368    fn parse_flatten(&mut self, lhs: Box<Ast>) -> ParseResult {
369        let rhs = Box::new(self.projection_rhs(Token::Flatten.lbp())?);
370        Ok(Ast::Projection {
371            offset: self.offset,
372            lhs: Box::new(Ast::Flatten {
373                offset: self.offset,
374                node: lhs,
375            }),
376            rhs,
377        })
378    }
379
380    fn parse_comparator(&mut self, cmp: Comparator, lhs: Box<Ast>) -> ParseResult {
381        let rhs = Box::new(self.expr(Token::Eq.lbp())?);
382        Ok(Ast::Comparison {
383            offset: self.offset,
384            comparator: cmp,
385            lhs,
386            rhs,
387        })
388    }
389
390    fn parse_dot(&mut self, lbp: usize) -> ParseResult {
391        if match self.peek(0) {
392            &Token::Lbracket => true,
393            &Token::Identifier(_)
394            | &Token::QuotedIdentifier(_)
395            | &Token::Star
396            | &Token::Lbrace
397            | &Token::Ampersand => false,
398            t => return Err(self.err(t, "Expected identifier, '*', '{', '[', '&', or '[?'", true)),
399        } {
400            self.advance();
401            self.parse_multi_list()
402        } else {
403            self.expr(lbp)
404        }
405    }
406
407    fn projection_rhs(&mut self, lbp: usize) -> ParseResult {
408        if match self.peek(0) {
409            &Token::Dot => true,
410            &Token::Lbracket | &Token::Filter => false,
411            t if t.lbp() < PROJECTION_STOP => {
412                return Ok(Ast::Identity {
413                    offset: self.offset,
414                });
415            }
416            t => {
417                return Err(self.err(t, "Expected '.', '[', or '[?'", true));
418            }
419        } {
420            self.advance();
421            self.parse_dot(lbp)
422        } else {
423            self.expr(lbp)
424        }
425    }
426
427    fn parse_wildcard_index(&mut self, lhs: Box<Ast>) -> ParseResult {
428        match self.advance() {
429            Token::Rbracket => {
430                let rhs = Box::new(self.projection_rhs(Token::Star.lbp())?);
431                Ok(Ast::Projection {
432                    offset: self.offset,
433                    lhs,
434                    rhs,
435                })
436            }
437            ref t => Err(self.err(t, "Expected ']' for wildcard index", false)),
438        }
439    }
440
441    fn parse_wildcard_values(&mut self, lhs: Box<Ast>) -> ParseResult {
442        let rhs = Box::new(self.projection_rhs(Token::Star.lbp())?);
443        Ok(Ast::Projection {
444            offset: self.offset,
445            lhs: Box::new(Ast::ObjectValues {
446                offset: self.offset,
447                node: lhs,
448            }),
449            rhs,
450        })
451    }
452
453    fn parse_index(&mut self) -> ParseResult {
454        let mut parts = [None, None, None];
455        let mut pos = 0;
456        loop {
457            match self.advance() {
458                Token::Number(value) => {
459                    parts[pos] = Some(value);
460                    match self.peek(0) {
461                        &Token::Colon | &Token::Rbracket => (),
462                        t => return Err(self.err(t, "Expected ':', or ']'", true)),
463                    };
464                }
465                Token::Rbracket => break,
466                Token::Colon if pos >= 2 => {
467                    return Err(self.err(&Token::Colon, "Too many colons in slice expr", false));
468                }
469                Token::Colon => {
470                    pos += 1;
471                    match self.peek(0) {
472                        &Token::Number(_) | &Token::Colon | &Token::Rbracket => continue,
473                        t => return Err(self.err(t, "Expected number, ':', or ']'", true)),
474                    };
475                }
476                ref t => return Err(self.err(t, "Expected number, ':', or ']'", false)),
477            }
478        }
479
480        if pos == 0 {
481            Ok(Ast::Index {
482                offset: self.offset,
483                idx: parts[0].ok_or_else(|| {
484                    JmespathError::new(
485                        self.expr,
486                        self.offset,
487                        ErrorReason::Parse(
488                            "Expected parts[0] to be Some; but found None".to_owned(),
489                        ),
490                    )
491                })?,
492            })
493        } else {
494            Ok(Ast::Projection {
495                offset: self.offset,
496                lhs: Box::new(Ast::Slice {
497                    offset: self.offset,
498                    start: parts[0],
499                    stop: parts[1],
500                    step: parts[2].unwrap_or(1),
501                }),
502                rhs: Box::new(self.projection_rhs(Token::Star.lbp())?),
503            })
504        }
505    }
506
507    fn parse_multi_list(&mut self) -> ParseResult {
508        Ok(Ast::MultiList {
509            offset: self.offset,
510            elements: self.parse_list(Token::Rbracket)?,
511        })
512    }
513
514    fn parse_list(&mut self, closing: Token<'_>) -> Result<Vec<Ast>, JmespathError> {
515        let mut nodes = vec![];
516        while self.peek(0) != &closing {
517            nodes.push(self.expr(0)?);
518            if self.peek(0) == &Token::Comma {
519                self.advance();
520                if self.peek(0) == &closing {
521                    return Err(self.err(self.peek(0), "invalid token after ','", true));
522                }
523            }
524        }
525        self.advance();
526        Ok(nodes)
527    }
528}