use crate::ast::{Ast, Comparator, KeyValuePair};
use crate::lexer::{Token, TokenTuple, tokenize};
use crate::{ErrorReason, JmespathError};
pub type ParseResult = Result<Ast, JmespathError>;
pub fn parse(expr: &str) -> ParseResult {
let tokens = tokenize(expr)?;
Parser::new(tokens, expr).parse()
}
const PROJECTION_STOP: usize = 10;
struct Parser<'a> {
tokens: Vec<TokenTuple<'a>>,
cursor: usize,
expr: &'a str,
offset: usize,
}
impl<'a> Parser<'a> {
fn new(tokens: Vec<TokenTuple<'a>>, expr: &'a str) -> Parser<'a> {
Parser {
tokens,
cursor: 0,
expr,
offset: 0,
}
}
#[inline]
fn parse(&mut self) -> ParseResult {
self.expr(0).and_then(|result| match self.peek(0) {
&Token::Eof => Ok(result),
t => Err(self.err(t, "Did not parse the complete expression", true)),
})
}
#[inline]
fn advance(&mut self) -> Token<'a> {
self.advance_with_pos().1
}
#[inline]
fn advance_with_pos(&mut self) -> (usize, Token<'a>) {
if self.cursor < self.tokens.len() {
let (pos, tok) = self.tokens[self.cursor].clone();
self.cursor += 1;
self.offset = pos;
(pos, tok)
} else {
(self.offset, Token::Eof)
}
}
#[inline]
fn peek(&self, lookahead: usize) -> &Token<'a> {
let idx = self.cursor + lookahead;
if idx < self.tokens.len() {
&self.tokens[idx].1
} else {
&Token::Eof
}
}
fn err(&self, current_token: &Token<'_>, error_msg: &str, is_peek: bool) -> JmespathError {
let mut actual_pos = self.offset;
let mut buff = error_msg.to_string();
buff.push_str(&format!(" -- found {current_token:?}"));
if is_peek && self.cursor < self.tokens.len() {
actual_pos = self.tokens[self.cursor].0;
}
JmespathError::new(self.expr, actual_pos, ErrorReason::Parse(buff))
}
fn expr(&mut self, rbp: usize) -> ParseResult {
let mut left = self.nud();
while rbp < self.peek(0).lbp() {
left = self.led(Box::new(left?));
}
left
}
fn nud(&mut self) -> ParseResult {
let (offset, token) = self.advance_with_pos();
match token {
Token::At => Ok(Ast::Identity { offset }),
#[cfg(feature = "let-expr")]
Token::Identifier(value) if *value == *"let" => self.parse_let(offset),
Token::Identifier(value) => Ok(Ast::Field {
name: value.to_owned(),
offset,
}),
Token::QuotedIdentifier(value) => match self.peek(0) {
Token::Lparen => {
let message = "Quoted strings can't be a function name";
Err(self.err(&Token::Lparen, message, true))
}
_ => Ok(Ast::Field {
name: value,
offset,
}),
},
Token::Star => self.parse_wildcard_values(Box::new(Ast::Identity { offset })),
Token::Literal(value) => Ok(Ast::Literal { value, offset }),
Token::Lbracket => match self.peek(0) {
&Token::Number(_) | &Token::Colon => self.parse_index(),
&Token::Star if self.peek(1) == &Token::Rbracket => {
self.advance();
self.parse_wildcard_index(Box::new(Ast::Identity { offset }))
}
_ => self.parse_multi_list(),
},
Token::Flatten => self.parse_flatten(Box::new(Ast::Identity { offset })),
Token::Lbrace => {
let mut pairs = vec![];
loop {
pairs.push(self.parse_kvp()?);
match self.advance() {
Token::Rbrace => break,
Token::Comma => continue,
ref t => return Err(self.err(t, "Expected '}' or ','", false)),
}
}
Ok(Ast::MultiHash {
elements: pairs,
offset,
})
}
t @ Token::Ampersand => {
let rhs = self.expr(t.lbp())?;
Ok(Ast::Expref {
ast: Box::new(rhs),
offset,
})
}
t @ Token::Not => Ok(Ast::Not {
node: Box::new(self.expr(t.lbp())?),
offset,
}),
Token::Filter => self.parse_filter(Box::new(Ast::Identity { offset })),
Token::Lparen => {
let result = self.expr(0)?;
match self.advance() {
Token::Rparen => Ok(result),
ref t => Err(self.err(t, "Expected ')' to close '('", false)),
}
}
#[cfg(feature = "let-expr")]
Token::Variable(name) => Ok(Ast::VariableRef {
name: name.to_owned(),
offset,
}),
ref t => Err(self.err(t, "Unexpected nud token", false)),
}
}
fn led(&mut self, left: Box<Ast>) -> ParseResult {
let (offset, token) = self.advance_with_pos();
match token {
t @ Token::Dot => {
if self.peek(0) == &Token::Star {
self.advance();
self.parse_wildcard_values(left)
} else {
let rhs = self.parse_dot(t.lbp())?;
Ok(Ast::Subexpr {
offset,
lhs: left,
rhs: Box::new(rhs),
})
}
}
Token::Lbracket => {
if match self.peek(0) {
&Token::Number(_) | &Token::Colon => true,
&Token::Star => false,
t => return Err(self.err(t, "Expected number, ':', or '*'", true)),
} {
Ok(Ast::Subexpr {
offset,
lhs: left,
rhs: Box::new(self.parse_index()?),
})
} else {
self.advance();
self.parse_wildcard_index(left)
}
}
t @ Token::Or => {
let rhs = self.expr(t.lbp())?;
Ok(Ast::Or {
offset,
lhs: left,
rhs: Box::new(rhs),
})
}
t @ Token::And => {
let rhs = self.expr(t.lbp())?;
Ok(Ast::And {
offset,
lhs: left,
rhs: Box::new(rhs),
})
}
t @ Token::Pipe => {
let rhs = self.expr(t.lbp())?;
Ok(Ast::Subexpr {
offset,
lhs: left,
rhs: Box::new(rhs),
})
}
Token::Lparen => match *left {
Ast::Field { name: v, .. } => Ok(Ast::Function {
offset,
name: v,
args: self.parse_list(Token::Rparen)?,
}),
_ => Err(self.err(self.peek(0), "Invalid function name", true)),
},
Token::Flatten => self.parse_flatten(left),
Token::Filter => self.parse_filter(left),
Token::Eq => self.parse_comparator(Comparator::Equal, left),
Token::Ne => self.parse_comparator(Comparator::NotEqual, left),
Token::Gt => self.parse_comparator(Comparator::GreaterThan, left),
Token::Gte => self.parse_comparator(Comparator::GreaterThanEqual, left),
Token::Lt => self.parse_comparator(Comparator::LessThan, left),
Token::Lte => self.parse_comparator(Comparator::LessThanEqual, left),
ref t => Err(self.err(t, "Unexpected led token", false)),
}
}
#[cfg(feature = "let-expr")]
fn parse_let(&mut self, offset: usize) -> ParseResult {
let mut bindings = vec![];
loop {
match self.peek(0) {
Token::Variable(_) => {
let var_name = match self.advance() {
Token::Variable(name) => name.to_owned(),
_ => unreachable!(),
};
match self.advance() {
Token::Assign => {}
ref t => {
return Err(self.err(
t,
"Expected '=' after variable in let binding",
false,
));
}
}
let value = self.parse_let_binding_expr()?;
bindings.push((var_name, value));
match self.peek(0) {
Token::Comma => {
self.advance();
}
Token::Identifier(s) if *s == "in" => {
break;
}
t => {
return Err(self.err(
t,
"Expected ',' or 'in' after let binding",
true,
));
}
}
}
t => {
return Err(self.err(t, "Expected variable binding ($name) after 'let'", true));
}
}
}
match self.advance() {
Token::Identifier(s) if *s == *"in" => {}
ref t => {
return Err(self.err(t, "Expected 'in' keyword after let bindings", false));
}
}
let body = self.expr(0)?;
Ok(Ast::Let {
offset,
bindings,
expr: Box::new(body),
})
}
#[cfg(feature = "let-expr")]
fn parse_let_binding_expr(&mut self) -> ParseResult {
self.parse_let_binding_expr_bp(0)
}
#[cfg(feature = "let-expr")]
fn parse_let_binding_expr_bp(&mut self, rbp: usize) -> ParseResult {
let mut left = self.nud();
loop {
match self.peek(0) {
Token::Comma => break,
Token::Identifier(s) if *s == "in" => break,
_ => {}
}
if rbp >= self.peek(0).lbp() {
break;
}
left = self.led(Box::new(left?));
}
left
}
fn parse_kvp(&mut self) -> Result<KeyValuePair, JmespathError> {
match self.advance() {
Token::Identifier(value) => {
if self.peek(0) == &Token::Colon {
self.advance();
Ok(KeyValuePair {
key: value.to_owned(),
value: self.expr(0)?,
})
} else {
Err(self.err(self.peek(0), "Expected ':' to follow key", true))
}
}
Token::QuotedIdentifier(value) => {
if self.peek(0) == &Token::Colon {
self.advance();
Ok(KeyValuePair {
key: value,
value: self.expr(0)?,
})
} else {
Err(self.err(self.peek(0), "Expected ':' to follow key", true))
}
}
ref t => Err(self.err(t, "Expected Field to start key value pair", false)),
}
}
fn parse_filter(&mut self, lhs: Box<Ast>) -> ParseResult {
let condition_lhs = Box::new(self.expr(0)?);
match self.advance() {
Token::Rbracket => {
let condition_rhs = Box::new(self.projection_rhs(Token::Filter.lbp())?);
Ok(Ast::Projection {
offset: self.offset,
lhs,
rhs: Box::new(Ast::Condition {
offset: self.offset,
predicate: condition_lhs,
then: condition_rhs,
}),
})
}
ref t => Err(self.err(t, "Expected ']'", false)),
}
}
fn parse_flatten(&mut self, lhs: Box<Ast>) -> ParseResult {
let rhs = Box::new(self.projection_rhs(Token::Flatten.lbp())?);
Ok(Ast::Projection {
offset: self.offset,
lhs: Box::new(Ast::Flatten {
offset: self.offset,
node: lhs,
}),
rhs,
})
}
fn parse_comparator(&mut self, cmp: Comparator, lhs: Box<Ast>) -> ParseResult {
let rhs = Box::new(self.expr(Token::Eq.lbp())?);
Ok(Ast::Comparison {
offset: self.offset,
comparator: cmp,
lhs,
rhs,
})
}
fn parse_dot(&mut self, lbp: usize) -> ParseResult {
if match self.peek(0) {
&Token::Lbracket => true,
&Token::Identifier(_)
| &Token::QuotedIdentifier(_)
| &Token::Star
| &Token::Lbrace
| &Token::Ampersand => false,
t => return Err(self.err(t, "Expected identifier, '*', '{', '[', '&', or '[?'", true)),
} {
self.advance();
self.parse_multi_list()
} else {
self.expr(lbp)
}
}
fn projection_rhs(&mut self, lbp: usize) -> ParseResult {
if match self.peek(0) {
&Token::Dot => true,
&Token::Lbracket | &Token::Filter => false,
t if t.lbp() < PROJECTION_STOP => {
return Ok(Ast::Identity {
offset: self.offset,
});
}
t => {
return Err(self.err(t, "Expected '.', '[', or '[?'", true));
}
} {
self.advance();
self.parse_dot(lbp)
} else {
self.expr(lbp)
}
}
fn parse_wildcard_index(&mut self, lhs: Box<Ast>) -> ParseResult {
match self.advance() {
Token::Rbracket => {
let rhs = Box::new(self.projection_rhs(Token::Star.lbp())?);
Ok(Ast::Projection {
offset: self.offset,
lhs,
rhs,
})
}
ref t => Err(self.err(t, "Expected ']' for wildcard index", false)),
}
}
fn parse_wildcard_values(&mut self, lhs: Box<Ast>) -> ParseResult {
let rhs = Box::new(self.projection_rhs(Token::Star.lbp())?);
Ok(Ast::Projection {
offset: self.offset,
lhs: Box::new(Ast::ObjectValues {
offset: self.offset,
node: lhs,
}),
rhs,
})
}
fn parse_index(&mut self) -> ParseResult {
let mut parts = [None, None, None];
let mut pos = 0;
loop {
match self.advance() {
Token::Number(value) => {
parts[pos] = Some(value);
match self.peek(0) {
&Token::Colon | &Token::Rbracket => (),
t => return Err(self.err(t, "Expected ':', or ']'", true)),
};
}
Token::Rbracket => break,
Token::Colon if pos >= 2 => {
return Err(self.err(&Token::Colon, "Too many colons in slice expr", false));
}
Token::Colon => {
pos += 1;
match self.peek(0) {
&Token::Number(_) | &Token::Colon | &Token::Rbracket => continue,
t => return Err(self.err(t, "Expected number, ':', or ']'", true)),
};
}
ref t => return Err(self.err(t, "Expected number, ':', or ']'", false)),
}
}
if pos == 0 {
Ok(Ast::Index {
offset: self.offset,
idx: parts[0].ok_or_else(|| {
JmespathError::new(
self.expr,
self.offset,
ErrorReason::Parse(
"Expected parts[0] to be Some; but found None".to_owned(),
),
)
})?,
})
} else {
Ok(Ast::Projection {
offset: self.offset,
lhs: Box::new(Ast::Slice {
offset: self.offset,
start: parts[0],
stop: parts[1],
step: parts[2].unwrap_or(1),
}),
rhs: Box::new(self.projection_rhs(Token::Star.lbp())?),
})
}
}
fn parse_multi_list(&mut self) -> ParseResult {
Ok(Ast::MultiList {
offset: self.offset,
elements: self.parse_list(Token::Rbracket)?,
})
}
fn parse_list(&mut self, closing: Token<'_>) -> Result<Vec<Ast>, JmespathError> {
let mut nodes = vec![];
while self.peek(0) != &closing {
nodes.push(self.expr(0)?);
if self.peek(0) == &Token::Comma {
self.advance();
if self.peek(0) == &closing {
return Err(self.err(self.peek(0), "invalid token after ','", true));
}
}
}
self.advance();
Ok(nodes)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::Comparator;
#[test]
fn parse_field() {
let ast = parse("foo").unwrap();
assert!(matches!(ast, Ast::Field { name, .. } if name == "foo"));
}
#[test]
fn parse_identity() {
let ast = parse("@").unwrap();
assert!(matches!(ast, Ast::Identity { .. }));
}
#[test]
fn parse_subexpr() {
let ast = parse("foo.bar").unwrap();
assert!(matches!(ast, Ast::Subexpr { .. }));
}
#[test]
fn parse_deeply_nested_subexpr() {
let ast = parse("a.b.c.d.e").unwrap();
assert!(matches!(ast, Ast::Subexpr { .. }));
}
#[test]
fn parse_index_positive() {
let ast = parse("[0]").unwrap();
assert!(matches!(ast, Ast::Index { idx: 0, .. }));
}
#[test]
fn parse_index_negative() {
let ast = parse("[-1]").unwrap();
assert!(matches!(ast, Ast::Index { idx: -1, .. }));
}
#[test]
fn parse_slice_basic() {
let ast = parse("[0:5]").unwrap();
match ast {
Ast::Projection { lhs, .. } => {
assert!(matches!(
*lhs,
Ast::Slice {
start: Some(0),
stop: Some(5),
step: 1,
..
}
));
}
_ => panic!("expected Projection with Slice lhs"),
}
}
#[test]
fn parse_slice_with_step() {
let ast = parse("[::2]").unwrap();
match ast {
Ast::Projection { lhs, .. } => {
assert!(matches!(
*lhs,
Ast::Slice {
start: None,
stop: None,
step: 2,
..
}
));
}
_ => panic!("expected Projection with Slice lhs"),
}
}
#[test]
fn parse_slice_negative_step() {
let ast = parse("[::-1]").unwrap();
match ast {
Ast::Projection { lhs, .. } => {
assert!(matches!(*lhs, Ast::Slice { step: -1, .. }));
}
_ => panic!("expected Projection with Slice lhs"),
}
}
#[test]
fn parse_wildcard_values() {
let ast = parse("*").unwrap();
assert!(matches!(ast, Ast::Projection { .. }));
}
#[test]
fn parse_wildcard_index() {
let ast = parse("[*]").unwrap();
assert!(matches!(ast, Ast::Projection { .. }));
}
#[test]
fn parse_flatten() {
let ast = parse("[]").unwrap();
match ast {
Ast::Projection { lhs, .. } => {
assert!(matches!(*lhs, Ast::Flatten { .. }));
}
_ => panic!("expected Projection with Flatten"),
}
}
#[test]
fn parse_filter() {
let ast = parse("[?a > `1`]").unwrap();
assert!(matches!(ast, Ast::Projection { .. }));
}
#[test]
fn parse_or() {
let ast = parse("a || b").unwrap();
assert!(matches!(ast, Ast::Or { .. }));
}
#[test]
fn parse_and() {
let ast = parse("a && b").unwrap();
assert!(matches!(ast, Ast::And { .. }));
}
#[test]
fn parse_not() {
let ast = parse("!a").unwrap();
assert!(matches!(ast, Ast::Not { .. }));
}
#[test]
fn parse_pipe() {
let ast = parse("a | b").unwrap();
assert!(matches!(ast, Ast::Subexpr { .. }));
}
#[test]
fn parse_function_call() {
let ast = parse("length(@)").unwrap();
match ast {
Ast::Function { name, args, .. } => {
assert_eq!(name, "length");
assert_eq!(args.len(), 1);
}
_ => panic!("expected Function"),
}
}
#[test]
fn parse_multi_list() {
let ast = parse("[a, b, c]").unwrap();
match ast {
Ast::MultiList { elements, .. } => {
assert_eq!(elements.len(), 3);
}
_ => panic!("expected MultiList"),
}
}
#[test]
fn parse_multi_hash() {
let ast = parse("{a: b, c: d}").unwrap();
match ast {
Ast::MultiHash { elements, .. } => {
assert_eq!(elements.len(), 2);
}
_ => panic!("expected MultiHash"),
}
}
#[test]
fn parse_literal_string() {
let ast = parse("`\"hello\"`").unwrap();
assert!(matches!(ast, Ast::Literal { .. }));
}
#[test]
fn parse_literal_number() {
let ast = parse("`42`").unwrap();
assert!(matches!(ast, Ast::Literal { .. }));
}
#[test]
fn parse_literal_null() {
let ast = parse("`null`").unwrap();
assert!(matches!(ast, Ast::Literal { .. }));
}
#[test]
fn parse_raw_string() {
let ast = parse("'hello'").unwrap();
match ast {
Ast::Literal { value, .. } => {
assert_eq!(value, serde_json::json!("hello"));
}
_ => panic!("expected Literal from raw string"),
}
}
#[test]
fn parse_expref() {
let ast = parse("&foo").unwrap();
assert!(matches!(ast, Ast::Expref { .. }));
}
#[test]
fn parse_all_comparators() {
for (expr, cmp) in [
("a == b", Comparator::Equal),
("a != b", Comparator::NotEqual),
("a > b", Comparator::GreaterThan),
("a >= b", Comparator::GreaterThanEqual),
("a < b", Comparator::LessThan),
("a <= b", Comparator::LessThanEqual),
] {
let ast = parse(expr).unwrap();
match ast {
Ast::Comparison { comparator, .. } => assert_eq!(comparator, cmp, "for {expr}"),
_ => panic!("expected Comparison for {expr}"),
}
}
}
#[test]
fn parse_quoted_identifier() {
let ast = parse("\"foo bar\"").unwrap();
match ast {
Ast::Field { name, .. } => assert_eq!(name, "foo bar"),
_ => panic!("expected Field from quoted identifier"),
}
}
#[test]
fn parse_parenthesized_expression() {
let ast = parse("(a)").unwrap();
assert!(matches!(ast, Ast::Field { .. }));
}
#[test]
fn error_empty_expression() {
let result = parse("");
assert!(result.is_err());
}
#[test]
fn error_unclosed_bracket() {
let result = parse("[0");
assert!(result.is_err());
}
#[test]
fn error_trailing_garbage() {
let result = parse("foo bar");
assert!(result.is_err());
}
#[test]
fn error_quoted_string_as_function() {
let result = parse("\"foo\"()");
assert!(result.is_err());
}
#[test]
fn error_trailing_comma_in_list() {
let result = parse("[a, b,]");
assert!(result.is_err());
}
#[cfg(feature = "let-expr")]
#[test]
fn parse_let_expression() {
let ast = parse("let $x = `1` in $x").unwrap();
assert!(matches!(ast, Ast::Let { .. }));
}
#[cfg(feature = "let-expr")]
#[test]
fn parse_variable_ref() {
let ast = parse("$x").unwrap();
assert!(matches!(ast, Ast::VariableRef { .. }));
}
#[cfg(feature = "let-expr")]
#[test]
fn error_let_missing_in() {
let result = parse("let $x = `1` $x");
assert!(result.is_err());
}
}