json_e/interpreter/
parser.rs

1//! An interpreter for the expression language embedded in JSON-e.
2//!
3//! Evaluation of expressions operates in two phases: parsing to an AST, and evaluating that AST.
4
5#![allow(unused_variables)]
6#![allow(dead_code)]
7
8use super::Node;
9use crate::whitespace::ws;
10use anyhow::{anyhow, Result};
11use nom::{
12    branch::alt,
13    bytes::complete::{tag, take_till},
14    character::complete::{alpha1, alphanumeric1, char, digit1},
15    combinator::{map_res, not, opt, recognize},
16    multi::{fold_many0, many0, separated_list0},
17    sequence::{delimited, pair, tuple},
18    Err, IResult,
19};
20
21// atomic values
22
23/// Parse a number token (integer or decimal)
24fn number(input: &str) -> IResult<&str, Node<'_>> {
25    fn node(input: &str) -> Result<Node, std::num::ParseIntError> {
26        Ok(Node::Number(input))
27    }
28
29    map_res(
30        ws(recognize(pair(digit1, opt(pair(char('.'), digit1))))),
31        node,
32    )(input)
33}
34
35/// Parse a atomic literal JSON value (true, false, null)
36fn literal(input: &str) -> IResult<&str, Node<'_>> {
37    fn node(input: &str) -> Result<Node, ()> {
38        Ok(match input {
39            "true" => Node::True,
40            "false" => Node::False,
41            "null" => Node::Null,
42            _ => unreachable!(),
43        })
44    }
45
46    map_res(
47        ws(recognize(pair(
48            alt((tag("true"), tag("false"), tag("null"))),
49            // things like "falsey", where a literal is followed by more
50            // identifier characters, are not literals
51            not(alt((alphanumeric1, tag("_")))),
52        ))),
53        node,
54    )(input)
55}
56
57/// Parse an identifier as an &str
58fn ident_str(input: &str) -> IResult<&str, &str> {
59    ws(recognize(pair(
60        alt((alpha1, tag("_"))),
61        many0(alt((alphanumeric1, tag("_")))),
62    )))(input)
63}
64
65/// Parse an identifier as a Node
66fn ident(input: &str) -> IResult<&str, Node<'_>> {
67    fn node(input: &str) -> Result<Node, ()> {
68        Ok(Node::Ident(input))
69    }
70
71    map_res(ident_str, node)(input)
72}
73
74/// Parse a string (single- or double-quoted, with no escaping)
75fn string_str(input: &str) -> IResult<&str, &str> {
76    fn strip_quotes(s: &str) -> Result<&str, ()> {
77        Ok(&s[1..s.len() - 1])
78    }
79    map_res(
80        ws(recognize(alt((
81            delimited(char('"'), take_till(|c| c == '"'), char('"')),
82            delimited(char('\''), take_till(|c| c == '\''), char('\'')),
83        )))),
84        strip_quotes,
85    )(input)
86}
87
88/// Parse a string as a Node
89fn string(input: &str) -> IResult<&str, Node<'_>> {
90    fn node(input: &str) -> Result<Node, ()> {
91        Ok(Node::String(input))
92    }
93
94    map_res(string_str, node)(input)
95}
96
97/// Parse any atomic value
98fn atom(input: &str) -> IResult<&str, Node<'_>> {
99    alt((number, literal, ident, string))(input)
100}
101
102// recognizers for operators that are prefixes of other tokens
103
104/// The "in" operator (disambiguated from longer identifiers)
105fn in_op(input: &str) -> IResult<&str, &str> {
106    recognize(pair(tag("in"), not(alt((alphanumeric1, tag("_"))))))(input)
107}
108
109/// The "<" operator (disambiguated from "<=")
110fn lt_op(input: &str) -> IResult<&str, &str> {
111    recognize(pair(tag("<"), not(tag("="))))(input)
112}
113
114/// The ">" operator (disambiguated from ">=")
115fn gt_op(input: &str) -> IResult<&str, &str> {
116    recognize(pair(tag(">"), not(tag("="))))(input)
117}
118
119/// The "!" operator (disambiguated from "!=")
120fn bang_op(input: &str) -> IResult<&str, &str> {
121    recognize(pair(tag("!"), not(tag("="))))(input)
122}
123
124/// The "*" operator (disambiguated from "**")
125fn mul_op(input: &str) -> IResult<&str, &str> {
126    recognize(pair(tag("*"), not(tag("*"))))(input)
127}
128
129// combinations of atoms into larger structures
130
131/// A parenthesized expression
132fn parens(input: &str) -> IResult<&str, Node<'_>> {
133    ws(delimited(char('('), expression, char(')')))(input)
134}
135
136/// An array literal
137fn array_literal(input: &str) -> IResult<&str, Node<'_>> {
138    fn node<'a>(input: Vec<Node<'a>>) -> Result<Node<'a>, ()> {
139        Ok(Node::Array(input))
140    }
141    map_res(
142        ws(delimited(
143            char('['),
144            separated_list0(ws(tag(",")), expression),
145            char(']'),
146        )),
147        node,
148    )(input)
149}
150
151/// An object literal, allowing either strings or identifiers as keys
152fn object_literal(input: &str) -> IResult<&str, Node<'_>> {
153    fn node<'a>(mut input: Vec<(&'a str, &'a str, Node<'a>)>) -> Result<Node<'a>, ()> {
154        Ok(Node::Object(
155            input.drain(..).map(|(k, _, v)| (k, v)).collect(),
156        ))
157    }
158    map_res(
159        ws(delimited(
160            char('{'),
161            separated_list0(
162                ws(tag(",")),
163                tuple((ws(alt((string_str, ident_str))), tag(":"), ws(expression))),
164            ),
165            char('}'),
166        )),
167        node,
168    )(input)
169}
170
171/// A single value (an atom, parenthesized value, or compound literal
172fn value(input: &str) -> IResult<&str, Node<'_>> {
173    alt((atom, parens, array_literal, object_literal))(input)
174}
175
176/// A unary expression
177fn unary_expr(input: &str) -> IResult<&str, Node<'_>> {
178    fn node<'a>(input: (&'a str, Node<'a>)) -> Result<Node<'a>, ()> {
179        Ok(Node::Un(input.0, Box::new(input.1)))
180    }
181    alt((
182        map_res(ws(tuple((alt((bang_op, tag("-"), tag("+"))), value))), node),
183        value,
184    ))(input)
185}
186
187/// An index expression (`x[i]`, `x[a..b]` or `x.p`) or function call.  These are left-associative
188/// at equal precedence.
189fn index_or_fn_expr(input: &str) -> IResult<&str, Node<'_>> {
190    // An index operation without its left-hand side.  The `fold_multi0` closure attaches
191    // these to their LHS's and creates Nodes.
192    enum ExprKind<'a> {
193        Index(Box<Node<'a>>),
194        Slice(Option<Box<Node<'a>>>, Option<Box<Node<'a>>>),
195        Dot(&'a str),
196        Func(Vec<Node<'a>>),
197    }
198
199    fn index_expr<'a>(input: (&'a str, Node<'a>, &'a str)) -> Result<ExprKind<'a>, ()> {
200        Ok(ExprKind::Index(Box::new(input.1)))
201    }
202
203    fn slice_expr<'a>(
204        input: (
205            &'a str,
206            Option<Node<'a>>,
207            &'a str,
208            Option<Node<'a>>,
209            &'a str,
210        ),
211    ) -> Result<ExprKind<'a>> {
212        Ok(ExprKind::Slice(
213            input.1.map(Box::new),
214            input.3.map(Box::new),
215        ))
216    }
217
218    fn dot_expr<'a>(input: (&'a str, &'a str)) -> Result<ExprKind<'a>> {
219        Ok(ExprKind::Dot(input.1))
220    }
221
222    fn func_expr<'a>(input: (&'a str, Vec<Node<'a>>, &'a str)) -> Result<ExprKind<'a>> {
223        Ok(ExprKind::Func(input.1))
224    }
225
226    let (i, init) = unary_expr(input)?;
227
228    fold_many0(
229        ws(alt((
230            map_res(tuple((tag("["), expression, tag("]"))), index_expr),
231            map_res(
232                tuple((
233                    tag("["),
234                    opt(expression),
235                    tag(":"),
236                    opt(expression),
237                    tag("]"),
238                )),
239                slice_expr,
240            ),
241            map_res(tuple((tag("."), ident_str)), dot_expr),
242            map_res(
243                ws(tuple((
244                    tag("("),
245                    separated_list0(ws(tag(",")), expression),
246                    tag(")"),
247                ))),
248                func_expr,
249            ),
250        ))),
251        // This clone is necessary because backtracking may result in this
252        // parser running more than once.
253        move || init.clone(),
254        |acc: Node, expr_kind: ExprKind| {
255            let acc = Box::new(acc);
256            match expr_kind {
257                ExprKind::Index(i) => Node::Index(acc, i),
258                ExprKind::Slice(a, b) => Node::Slice(acc, a, b),
259                ExprKind::Dot(p) => Node::Dot(acc, p),
260                ExprKind::Func(args) => Node::Func(acc, args),
261            }
262        },
263    )(i)
264}
265
266/// Exponentiation is right-associative
267fn exp_expr(input: &str) -> IResult<&str, Node<'_>> {
268    fn node<'a>(input: (Node<'a>, &'a str, Node<'a>)) -> Result<Node<'a>, ()> {
269        Ok(Node::Op(Box::new(input.0), input.1, Box::new(input.2)))
270    }
271
272    alt((
273        map_res(tuple((index_or_fn_expr, tag("**"), exp_expr)), node),
274        index_or_fn_expr,
275    ))(input)
276}
277
278/// Define a simple left-associative binary operation which chains to a
279/// higher-precedence operation.
280macro_rules! binop {
281    ($name:ident, $higher_prec:ident, $ops:expr) => {
282        fn $name(input: &str) -> IResult<&str, Node<'_>> {
283            let (i, init) = $higher_prec(input)?;
284
285            fold_many0(
286                pair($ops, $higher_prec),
287                // This clone is necessary because backtracking may result in this
288                // parser running more than once.
289                move || init.clone(),
290                |acc: Node, (op, val): (&str, Node)| Node::Op(Box::new(acc), op, Box::new(val)),
291            )(i)
292        }
293    };
294}
295
296binop!(muldiv_expr, exp_expr, alt((mul_op, tag("/"))));
297binop!(addsub_expr, muldiv_expr, alt((tag("+"), tag("-"))));
298binop!(
299    inequality_expr,
300    addsub_expr,
301    alt((tag("<="), tag(">="), lt_op, gt_op))
302);
303binop!(equality_expr, inequality_expr, alt((tag("=="), tag("!="))));
304binop!(in_expr, equality_expr, in_op);
305binop!(and_expr, in_expr, tag("&&"));
306binop!(or_expr, and_expr, tag("||"));
307
308/// Parse a JSON-e expression.
309fn expression(input: &str) -> IResult<&str, Node<'_>> {
310    alt((or_expr, value))(input)
311}
312
313/// Parse an entire string as an expression.  Un-parsed characters are treated as an error.
314pub(crate) fn parse_all(input: &str) -> anyhow::Result<Node> {
315    match expression(input) {
316        Ok(("", node)) => Ok(node),
317        Ok((unused, _)) => Err(anyhow!("Unexpected trailing characters {}", unused)),
318        Err(Err::Incomplete(_)) => unreachable!(),
319        Err(Err::Error(e)) => Err(anyhow!("Parse error at {:?}", e.input)),
320        Err(Err::Failure(e)) => Err(anyhow!("Parse error at {:?}", e.input)),
321    }
322}
323
324/// Parse a part of a string as an expression, returning the remainder of the string.
325pub(crate) fn parse_partial(input: &str) -> anyhow::Result<(Node, &str)> {
326    match expression(input) {
327        Ok((unused, node)) => Ok((node, unused)),
328        Err(Err::Incomplete(_)) => unreachable!(),
329        Err(Err::Error(e)) => Err(anyhow!("Parse error at {:?}", e.input)),
330        Err(Err::Failure(e)) => Err(anyhow!("Parse error at {:?}", e.input)),
331    }
332}
333
334#[cfg(test)]
335mod test {
336    use super::*;
337
338    #[test]
339    fn test_number_integer() {
340        assert_eq!(number("123"), Ok(("", Node::Number("123"))));
341    }
342
343    #[test]
344    fn test_number_integer_ws() {
345        assert_eq!(number("  123\t\n"), Ok(("", Node::Number("123"))));
346    }
347
348    #[test]
349    fn test_number_decimal() {
350        assert_eq!(number("123.456"), Ok(("", Node::Number("123.456"))));
351    }
352
353    #[test]
354    fn test_literal_true() {
355        assert_eq!(literal("true"), Ok(("", Node::True)));
356    }
357
358    #[test]
359    fn test_literal_true_as_atom() {
360        assert_eq!(atom("true"), Ok(("", Node::True)));
361    }
362
363    #[test]
364    fn test_literal_false_as_atom() {
365        assert_eq!(atom("false"), Ok(("", Node::False)));
366    }
367
368    #[test]
369    fn test_literal_null_as_atom() {
370        assert_eq!(atom("null"), Ok(("", Node::Null)));
371    }
372
373    #[test]
374    fn test_ident() {
375        assert_eq!(ident("abc"), Ok(("", Node::Ident("abc"))));
376    }
377
378    #[test]
379    fn test_ident_digits() {
380        assert!(ident("1").is_err());
381    }
382
383    #[test]
384    fn test_ident_literal_prefix_as_atom() {
385        assert_eq!(atom("falsey"), Ok(("", Node::Ident("falsey"))));
386    }
387
388    #[test]
389    fn test_ident_underscore() {
390        assert_eq!(ident("_abc"), Ok(("", Node::Ident("_abc"))));
391    }
392
393    #[test]
394    fn test_ident_underscore_numeric() {
395        assert_eq!(ident("_abc0def"), Ok(("", Node::Ident("_abc0def"))));
396    }
397
398    #[test]
399    fn test_string_single_quote() {
400        assert_eq!(string(" 'ab \"cd'"), Ok(("", Node::String("ab \"cd"))));
401    }
402
403    #[test]
404    fn test_string_double_quote() {
405        assert_eq!(string("\"a' bcd\" "), Ok(("", Node::String("a' bcd"))));
406    }
407
408    #[test]
409    fn test_empty_string_single_quote() {
410        assert_eq!(string("''"), Ok(("", Node::String(""))));
411    }
412
413    #[test]
414    fn test_empty_string_double_quote() {
415        assert_eq!(string("\"\""), Ok(("", Node::String(""))));
416    }
417
418    #[test]
419    fn test_in_op() {
420        assert_eq!(in_op("in"), Ok(("", "in")));
421    }
422
423    #[test]
424    fn test_in_op_in_larger_identifier() {
425        assert!(in_op("insinuation").is_err());
426    }
427
428    #[test]
429    fn test_unary_neg() {
430        assert_eq!(
431            expression("- 1 + -2"),
432            Ok((
433                "",
434                Node::Op(
435                    Box::new(Node::Un("-", Box::new(Node::Number("1")))),
436                    "+",
437                    Box::new(Node::Un("-", Box::new(Node::Number("2"))))
438                )
439            ))
440        );
441    }
442
443    #[test]
444    fn test_index() {
445        assert_eq!(
446            expression("a[2+3]"),
447            Ok((
448                "",
449                Node::Index(
450                    Box::new(Node::Ident("a")),
451                    Box::new(Node::Op(
452                        Box::new(Node::Number("2")),
453                        "+",
454                        Box::new(Node::Number("3"))
455                    )),
456                )
457            ))
458        );
459    }
460
461    #[test]
462    fn test_slice_some() {
463        assert_eq!(
464            expression("a[2:3]"),
465            Ok((
466                "",
467                Node::Slice(
468                    Box::new(Node::Ident("a")),
469                    Some(Box::new(Node::Number("2"))),
470                    Some(Box::new(Node::Number("3")))
471                )
472            ))
473        );
474    }
475
476    #[test]
477    fn test_slice_none() {
478        assert_eq!(
479            expression("a[:]"),
480            Ok(("", Node::Slice(Box::new(Node::Ident("a")), None, None)))
481        );
482    }
483
484    #[test]
485    fn test_dot() {
486        assert_eq!(
487            expression("a.b"),
488            Ok(("", Node::Dot(Box::new(Node::Ident("a")), "b")))
489        );
490    }
491
492    #[test]
493    fn test_function() {
494        assert_eq!(
495            expression("-1 (2, 3)"),
496            Ok((
497                "",
498                Node::Func(
499                    Box::new(Node::Un("-", Box::new(Node::Number("1")))),
500                    vec![Node::Number("2"), Node::Number("3"),],
501                )
502            ))
503        );
504    }
505
506    #[test]
507    fn test_function_indexed() {
508        assert_eq!(
509            expression("f(2)[0]"),
510            Ok((
511                "",
512                Node::Index(
513                    Box::new(Node::Func(
514                        Box::new(Node::Ident("f")),
515                        vec![Node::Number("2")]
516                    )),
517                    Box::new(Node::Number("0")),
518                ),
519            ))
520        );
521    }
522
523    #[test]
524    fn test_function_dot() {
525        assert_eq!(
526            expression("f(2).result"),
527            Ok((
528                "",
529                Node::Dot(
530                    Box::new(Node::Func(
531                        Box::new(Node::Ident("f")),
532                        vec![Node::Number("2")]
533                    )),
534                    "result",
535                ),
536            ))
537        );
538    }
539
540    #[test]
541    fn test_expr_or() {
542        assert_eq!(
543            expression("true || ( false || true ) || false"),
544            Ok((
545                "",
546                Node::Op(
547                    Box::new(Node::Op(
548                        Box::new(Node::True),
549                        "||",
550                        Box::new(Node::Op(Box::new(Node::False), "||", Box::new(Node::True)))
551                    )),
552                    "||",
553                    Box::new(Node::False),
554                )
555            ))
556        );
557    }
558
559    #[test]
560    fn test_expr_and_or() {
561        assert_eq!(
562            expression("a || b && c || d"),
563            Ok((
564                "",
565                Node::Op(
566                    Box::new(Node::Op(
567                        Box::new(Node::Ident("a")),
568                        "||",
569                        Box::new(Node::Op(
570                            Box::new(Node::Ident("b")),
571                            "&&",
572                            Box::new(Node::Ident("c"))
573                        ))
574                    )),
575                    "||",
576                    Box::new(Node::Ident("d")),
577                )
578            ))
579        );
580    }
581
582    #[test]
583    fn test_inequalities() {
584        assert_eq!(
585            expression("1 < 2 == 3 >= 4"),
586            Ok((
587                "",
588                Node::Op(
589                    Box::new(Node::Op(
590                        Box::new(Node::Number("1")),
591                        "<",
592                        Box::new(Node::Number("2")),
593                    )),
594                    "==",
595                    Box::new(Node::Op(
596                        Box::new(Node::Number("3")),
597                        ">=",
598                        Box::new(Node::Number("4"))
599                    ))
600                )
601            ))
602        );
603    }
604
605    #[test]
606    fn test_exp_right_assoc() {
607        assert_eq!(
608            expression("a + 1 ** 2 ** 3"),
609            expression("a + (1 ** (2 ** 3))")
610        );
611    }
612
613    #[test]
614    fn test_parse_all() {
615        assert_eq!(parse_all("abcd").unwrap(), Node::Ident("abcd"));
616    }
617
618    #[test]
619    fn test_exp_high_followed_by_low_prec() {
620        assert_eq!(
621            parse_all("9 * 10 + 11").unwrap(),
622            Node::Op(
623                Box::new(Node::Op(
624                    Box::new(Node::Number("9")),
625                    "*",
626                    Box::new(Node::Number("10")),
627                )),
628                "+",
629                Box::new(Node::Number("11"))
630            )
631        );
632    }
633
634    #[test]
635    fn test_parse_function_call_in_operator() {
636        assert_eq!(
637            parse_all("x(10) + 11").unwrap(),
638            Node::Op(
639                Box::new(Node::Func(
640                    Box::new(Node::Ident("x")),
641                    vec![Node::Number("10")]
642                )),
643                "+",
644                Box::new(Node::Number("11"))
645            )
646        );
647    }
648
649    #[test]
650    fn test_parse_no_args_fn() {
651        assert_eq!(
652            parse_all("f()").unwrap(),
653            Node::Func(Box::new(Node::Ident("f")), vec![],)
654        );
655    }
656
657    #[test]
658    fn test_parse_all_err() {
659        assert!(parse_all("~~~").is_err());
660    }
661
662    #[test]
663    fn test_parse_all_trailing_chars() {
664        assert!(parse_all("abc 123").is_err());
665    }
666
667    #[test]
668    fn test_parse_partial() {
669        assert_eq!(parse_partial("abcd").unwrap(), (Node::Ident("abcd"), ""));
670    }
671
672    #[test]
673    fn test_parse_partial_err() {
674        assert!(parse_partial("~~~").is_err());
675    }
676
677    #[test]
678    fn test_parse_partial_trailing_chars() {
679        // note that this consumes the whitespace, too
680        assert_eq!(
681            parse_partial("abc 123").unwrap(),
682            (Node::Ident("abc"), "123")
683        );
684    }
685}