sql_json_path/
parser.rs

1// Copyright 2023 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! JSON Path parser written in [nom].
16
17use crate::{ast::*, eval::NumberExt};
18use nom::{
19    branch::alt,
20    bytes::complete::{tag, tag_no_case, take_while, take_while1},
21    character::complete::{char, multispace0 as s, u32},
22    combinator::{cut, eof, map, opt, value, verify},
23    error::context,
24    multi::{fold_many0, many0, separated_list1},
25    number::complete::double,
26    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
27    Err, Finish, IResult, Offset,
28};
29use serde_json::Number;
30use std::str::FromStr;
31
32impl JsonPath {
33    /// Compiles a JSON Path expression.
34    pub fn new(s: &str) -> Result<Self, Error> {
35        Self::from_str(s)
36    }
37}
38
39impl FromStr for JsonPath {
40    type Err = Error;
41
42    /// Parse a JSON Path from string.
43    fn from_str(s: &str) -> Result<Self, Self::Err> {
44        if s.trim().is_empty() {
45            return Err(Error {
46                position: 0,
47                message: "empty jsonpath".into(),
48            });
49        }
50        let (_, json_path) = json_path(s)
51            .finish()
52            .map_err(|e| Error::from_input_error(s, e))?;
53        Checker::default()
54            .visit_json_path(&json_path)
55            .map_err(|msg| Error {
56                position: 0,
57                message: msg.into(),
58            })?;
59        Ok(json_path)
60    }
61}
62
63/// The error type returned when parsing a JSON Path.
64#[derive(Debug, thiserror::Error)]
65#[error("at position {position}, {message}")]
66pub struct Error {
67    position: usize,
68    message: Box<str>,
69}
70
71impl Error {
72    fn from_input_error(input: &str, err: nom::error::Error<&str>) -> Self {
73        let position = input.offset(err.input);
74        let message = err.to_string().into();
75        Self { position, message }
76    }
77}
78
79fn json_path(input: &str) -> IResult<&str, JsonPath> {
80    map(
81        preceded(s, separated_pair(mode, s, expr_or_predicate_eof)),
82        |(mode, expr)| JsonPath { mode, expr },
83    )(input)
84}
85
86fn expr_or_predicate_eof(input: &str) -> IResult<&str, ExprOrPredicate> {
87    alt((
88        map(terminated(predicate, pair(s, eof)), ExprOrPredicate::Pred),
89        map(terminated(expr, pair(s, eof)), ExprOrPredicate::Expr),
90    ))(input)
91}
92
93fn expr_or_predicate(input: &str) -> IResult<&str, ExprOrPredicate> {
94    alt((
95        map(predicate, ExprOrPredicate::Pred),
96        map(expr, ExprOrPredicate::Expr),
97    ))(input)
98}
99
100fn mode(input: &str) -> IResult<&str, Mode> {
101    alt((
102        value(Mode::Strict, tag_no_case("strict")),
103        value(Mode::Lax, tag_no_case("lax")),
104        value(Mode::Lax, tag_no_case("")),
105    ))(input)
106}
107
108fn predicate(input: &str) -> IResult<&str, Predicate> {
109    let (input, first) = predicate1(input)?;
110    let mut first0 = Some(first);
111    fold_many0(
112        preceded(delimited(s, tag("||"), s), predicate1),
113        move || first0.take().unwrap(),
114        |acc, pred| Predicate::Or(Box::new(acc), Box::new(pred)),
115    )(input)
116}
117
118fn predicate1(input: &str) -> IResult<&str, Predicate> {
119    let (input, first) = predicate2(input)?;
120    let mut first0 = Some(first);
121    fold_many0(
122        preceded(delimited(s, tag("&&"), s), predicate2),
123        move || first0.take().unwrap(),
124        |acc, pred| Predicate::And(Box::new(acc), Box::new(pred)),
125    )(input)
126}
127
128fn predicate2(input: &str) -> IResult<&str, Predicate> {
129    alt((
130        map(
131            tuple((expr, delimited(s, cmp_op, s), expr)),
132            |(left, op, right)| Predicate::Compare(op, Box::new(left), Box::new(right)),
133        ),
134        map(
135            delimited(
136                pair(char('('), s),
137                predicate,
138                tuple((
139                    s,
140                    char(')'),
141                    s,
142                    tag_no_case("is"),
143                    s,
144                    tag_no_case("unknown"),
145                )),
146            ),
147            |p| Predicate::IsUnknown(Box::new(p)),
148        ),
149        map(
150            separated_pair(
151                expr,
152                tuple((s, tag_no_case("starts"), s, tag_no_case("with"), s)),
153                starts_with_literal,
154            ),
155            |(expr, literal)| Predicate::StartsWith(Box::new(expr), literal),
156        ),
157        like_regex,
158        map(preceded(pair(tag("!"), s), delimited_predicate), |p| {
159            Predicate::Not(Box::new(p))
160        }),
161        delimited_predicate,
162    ))(input)
163}
164
165fn like_regex(input: &str) -> IResult<&str, Predicate> {
166    let (rest, ((expr, pattern), flags)) = pair(
167        separated_pair(expr, tuple((s, tag_no_case("like_regex"), s)), string),
168        opt(preceded(tuple((s, tag_no_case("flag"), s)), string)),
169    )(input)?;
170    let regex = Regex::with_flags(&pattern, flags).map_err(|_| {
171        Err::Failure(nom::error::Error::new(
172            input,
173            // FIXME: should return a custom error
174            nom::error::ErrorKind::RegexpMatch,
175        ))
176    })?;
177    Ok((rest, Predicate::LikeRegex(Box::new(expr), Box::new(regex))))
178}
179
180fn delimited_predicate(input: &str) -> IResult<&str, Predicate> {
181    alt((
182        delimited(pair(char('('), s), predicate, pair(s, char(')'))),
183        map(
184            delimited(
185                tuple((tag_no_case("exists"), s, char('('), s)),
186                expr,
187                pair(s, char(')')),
188            ),
189            |expr| Predicate::Exists(Box::new(expr)),
190        ),
191    ))(input)
192}
193
194fn expr(input: &str) -> IResult<&str, Expr> {
195    let (input, first) = expr1(input)?;
196    let mut first0 = Some(first);
197    fold_many0(
198        pair(delimited(s, alt((char('+'), char('-'))), s), expr1),
199        move || first0.take().unwrap(),
200        |acc, (op, expr)| match op {
201            '+' => Expr::BinaryOp(BinaryOp::Add, Box::new(acc), Box::new(expr)),
202            '-' => Expr::BinaryOp(BinaryOp::Sub, Box::new(acc), Box::new(expr)),
203            _ => unreachable!(),
204        },
205    )(input)
206}
207
208fn expr1(input: &str) -> IResult<&str, Expr> {
209    let (input, first) = expr2(input)?;
210    let mut first0 = Some(first);
211    fold_many0(
212        pair(
213            delimited(s, alt((char('*'), char('/'), char('%'))), s),
214            expr2,
215        ),
216        move || first0.take().unwrap(),
217        |acc, (op, expr)| match op {
218            '*' => Expr::BinaryOp(BinaryOp::Mul, Box::new(acc), Box::new(expr)),
219            '/' => Expr::BinaryOp(BinaryOp::Div, Box::new(acc), Box::new(expr)),
220            '%' => Expr::BinaryOp(BinaryOp::Rem, Box::new(acc), Box::new(expr)),
221            _ => unreachable!(),
222        },
223    )(input)
224}
225
226fn expr2(input: &str) -> IResult<&str, Expr> {
227    alt((
228        accessor_expr,
229        map(preceded(pair(char('+'), s), expr2), |expr| match &expr {
230            // constant folding
231            Expr::PathPrimary(PathPrimary::Value(Value::Number(_))) => expr,
232            _ => Expr::UnaryOp(UnaryOp::Plus, Box::new(expr)),
233        }),
234        map(preceded(pair(char('-'), s), expr2), |expr| match &expr {
235            // constant folding
236            Expr::PathPrimary(PathPrimary::Value(Value::Number(n))) => {
237                Expr::PathPrimary(PathPrimary::Value(Value::Number(n.neg())))
238            }
239            _ => Expr::UnaryOp(UnaryOp::Minus, Box::new(expr)),
240        }),
241    ))(input)
242}
243
244fn accessor_expr(input: &str) -> IResult<&str, Expr> {
245    map(
246        pair(path_primary, many0(preceded(s, accessor_op))),
247        |(primary, ops)| {
248            let mut expr = Expr::PathPrimary(primary.unnest());
249            for op in ops {
250                expr = Expr::Accessor(Box::new(expr), op);
251            }
252            expr
253        },
254    )(input)
255}
256
257fn path_primary(input: &str) -> IResult<&str, PathPrimary> {
258    alt((
259        map(scalar_value, PathPrimary::Value),
260        value(PathPrimary::Root, char('$')),
261        value(PathPrimary::Current, char('@')),
262        value(PathPrimary::Last, tag_no_case("last")),
263        map(
264            delimited(pair(char('('), s), expr_or_predicate, pair(s, char(')'))),
265            |expr| PathPrimary::ExprOrPred(Box::new(expr)),
266        ),
267    ))(input)
268}
269
270fn accessor_op(input: &str) -> IResult<&str, AccessorOp> {
271    alt((
272        map(
273            preceded(tag(".**"), level_range),
274            AccessorOp::DescendantMemberWildcard,
275        ),
276        value(AccessorOp::MemberWildcard, tag(".*")),
277        value(AccessorOp::ElementWildcard, element_wildcard),
278        map(item_method, AccessorOp::Method),
279        map(member_accessor, AccessorOp::Member),
280        map(array_accessor, AccessorOp::Element),
281        map(filter_expr, |expr| AccessorOp::FilterExpr(Box::new(expr))),
282    ))(input)
283}
284
285fn level_range(input: &str) -> IResult<&str, LevelRange> {
286    alt((
287        map(
288            delimited(
289                pair(char('{'), s),
290                separated_pair(level, delimited(s, tag_no_case("to"), s), level),
291                pair(s, char('}')),
292            ),
293            |(start, end)| {
294                if start == end {
295                    LevelRange::One(start)
296                } else {
297                    LevelRange::Range(start, end)
298                }
299            },
300        ),
301        map(
302            delimited(pair(char('{'), s), level, pair(s, char('}'))),
303            LevelRange::One,
304        ),
305        value(LevelRange::All, tag("")),
306    ))(input)
307}
308
309fn level(input: &str) -> IResult<&str, Level> {
310    alt((value(Level::Last, tag_no_case("last")), map(u32, Level::N)))(input)
311}
312
313fn element_wildcard(input: &str) -> IResult<&str, ()> {
314    value((), tuple((char('['), s, char('*'), s, char(']'))))(input)
315}
316
317fn member_accessor(input: &str) -> IResult<&str, String> {
318    preceded(pair(char('.'), s), alt((string, raw_string)))(input)
319}
320
321fn array_accessor(input: &str) -> IResult<&str, Vec<ArrayIndex>> {
322    delimited(
323        char('['),
324        separated_list1(char(','), delimited(s, index_elem, s)),
325        char(']'),
326    )(input)
327}
328
329fn index_elem(input: &str) -> IResult<&str, ArrayIndex> {
330    alt((
331        map(
332            separated_pair(expr, delimited(s, tag_no_case("to"), s), expr),
333            |(start, end)| ArrayIndex::Slice(start, end),
334        ),
335        map(expr, ArrayIndex::Index),
336    ))(input)
337}
338
339fn filter_expr(input: &str) -> IResult<&str, Predicate> {
340    delimited(
341        tuple((char('?'), s, char('('), s)),
342        predicate,
343        tuple((s, char(')'))),
344    )(input)
345}
346
347fn cmp_op(input: &str) -> IResult<&str, CompareOp> {
348    alt((
349        value(CompareOp::Eq, tag("==")),
350        value(CompareOp::Ne, tag("!=")),
351        value(CompareOp::Ne, tag("<>")),
352        value(CompareOp::Le, tag("<=")),
353        value(CompareOp::Lt, char('<')),
354        value(CompareOp::Ge, tag(">=")),
355        value(CompareOp::Gt, char('>')),
356    ))(input)
357}
358
359fn item_method(input: &str) -> IResult<&str, Method> {
360    delimited(
361        pair(char('.'), s),
362        method,
363        tuple((s, char('('), s, char(')'))),
364    )(input)
365}
366
367fn method(input: &str) -> IResult<&str, Method> {
368    alt((
369        value(Method::Type, tag_no_case("type")),
370        value(Method::Size, tag_no_case("size")),
371        value(Method::Double, tag_no_case("double")),
372        value(Method::Ceiling, tag_no_case("ceiling")),
373        value(Method::Floor, tag_no_case("floor")),
374        value(Method::Abs, tag_no_case("abs")),
375        value(Method::Keyvalue, tag_no_case("keyvalue")),
376    ))(input)
377}
378
379fn scalar_value(input: &str) -> IResult<&str, Value> {
380    alt((
381        value(Value::Null, tag("null")),
382        value(Value::Boolean(true), tag("true")),
383        value(Value::Boolean(false), tag("false")),
384        map(number, Value::Number),
385        map(string, Value::String),
386        map(variable, Value::Variable),
387    ))(input)
388}
389
390fn number(input: &str) -> IResult<&str, Number> {
391    map(double, |v| {
392        if v == v.trunc() {
393            Number::from(v as i64)
394        } else {
395            Number::from_f64(v).unwrap()
396        }
397    })(input)
398}
399
400fn starts_with_literal(input: &str) -> IResult<&str, Value> {
401    alt((map(string, Value::String), map(variable, Value::Variable)))(input)
402}
403
404fn variable(input: &str) -> IResult<&str, String> {
405    preceded(char('$'), raw_string)(input)
406}
407
408fn string(input: &str) -> IResult<&str, String> {
409    context(
410        "double quoted string",
411        delimited(
412            char('"'),
413            fold_many0(
414                alt((
415                    map(unescaped_str, String::from),
416                    map(escaped_char, String::from),
417                )),
418                String::new,
419                |mut string, fragment| {
420                    if string.is_empty() {
421                        fragment
422                    } else {
423                        string.push_str(&fragment);
424                        string
425                    }
426                },
427            ),
428            cut(char('"')),
429        ),
430    )(input)
431}
432
433fn escaped_char(input: &str) -> IResult<&str, char> {
434    context(
435        "escaped character",
436        preceded(
437            char('\\'),
438            alt((
439                value('\u{0008}', char('b')),
440                value('\u{0009}', char('t')),
441                value('\u{000A}', char('n')),
442                value('\u{000C}', char('f')),
443                value('\u{000D}', char('r')),
444                value('\u{002F}', char('/')),
445                value('\u{005C}', char('\\')),
446                value('\u{0022}', char('"')),
447                // unicode_sequence,
448            )),
449        ),
450    )(input)
451}
452
453fn unescaped_str(input: &str) -> IResult<&str, &str> {
454    context(
455        "unescaped character",
456        verify(take_while(is_valid_unescaped_char), |s: &str| !s.is_empty()),
457    )(input)
458}
459
460fn is_valid_unescaped_char(chr: char) -> bool {
461    match chr {
462        '"' => false,
463        '\u{20}'..='\u{5B}' // Omit control characters
464        | '\u{5D}'..='\u{10FFFF}' => true, // Omit \
465        _ => false,
466    }
467}
468
469fn raw_string(input: &str) -> IResult<&str, String> {
470    map(
471        take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_' || c >= '\u{0080}'),
472        String::from,
473    )(input)
474}
475
476/// A visitor that checks if a JSON Path is valid.
477///
478/// An error is returned if:
479///
480/// - `@` is used in a non-root expression
481/// - `last` is used in a non-array subscript
482#[derive(Debug, Clone, Copy, Default)]
483struct Checker {
484    non_root: bool,
485    inside_element_accessor: bool,
486}
487
488impl Checker {
489    fn visit_json_path(&self, json_path: &JsonPath) -> Result<(), &'static str> {
490        self.visit_expr_or_predicate(&json_path.expr)
491    }
492
493    fn visit_expr_or_predicate(&self, expr_or_pred: &ExprOrPredicate) -> Result<(), &'static str> {
494        match expr_or_pred {
495            ExprOrPredicate::Expr(expr) => self.visit_expr(expr),
496            ExprOrPredicate::Pred(pred) => self.visit_predicate(pred),
497        }
498    }
499
500    fn visit_expr(&self, expr: &Expr) -> Result<(), &'static str> {
501        match expr {
502            Expr::PathPrimary(primary) => self.visit_path_primary(primary),
503            Expr::Accessor(base, accessor) => {
504                self.visit_expr(base)?;
505                self.visit_accessor_op(accessor)
506            }
507            Expr::UnaryOp(_, expr) => self.visit_expr(expr),
508            Expr::BinaryOp(_, left, right) => {
509                self.visit_expr(left)?;
510                self.visit_expr(right)
511            }
512        }
513    }
514
515    fn visit_predicate(&self, pred: &Predicate) -> Result<(), &'static str> {
516        match pred {
517            Predicate::Compare(_, left, right) => {
518                self.visit_expr(left)?;
519                self.visit_expr(right)
520            }
521            Predicate::Exists(expr) => self.visit_expr(expr),
522            Predicate::And(left, right) | Predicate::Or(left, right) => {
523                self.visit_predicate(left)?;
524                self.visit_predicate(right)
525            }
526            Predicate::Not(pred) => self.visit_predicate(pred),
527            Predicate::IsUnknown(pred) => self.visit_predicate(pred),
528            Predicate::StartsWith(expr, _) => self.visit_expr(expr),
529            Predicate::LikeRegex(expr, _) => self.visit_expr(expr),
530        }
531    }
532
533    fn visit_path_primary(&self, primary: &PathPrimary) -> Result<(), &'static str> {
534        match primary {
535            PathPrimary::Last if !self.inside_element_accessor => {
536                Err("LAST is allowed only in array subscripts")
537            }
538            PathPrimary::Current if !self.non_root => Err("@ is not allowed in root expressions"),
539            _ => Ok(()),
540        }
541    }
542
543    fn visit_accessor_op(&self, accessor_op: &AccessorOp) -> Result<(), &'static str> {
544        match accessor_op {
545            AccessorOp::ElementWildcard | AccessorOp::MemberWildcard => Ok(()),
546            AccessorOp::DescendantMemberWildcard(_) => Ok(()),
547            AccessorOp::Member(_) | AccessorOp::Method(_) => Ok(()),
548            AccessorOp::FilterExpr(pred) => Self {
549                non_root: true,
550                ..*self
551            }
552            .visit_predicate(pred),
553            AccessorOp::Element(indices) => {
554                let next = Self {
555                    non_root: true,
556                    inside_element_accessor: true,
557                };
558                for index in indices {
559                    match index {
560                        ArrayIndex::Index(i) => next.visit_expr(i)?,
561                        ArrayIndex::Slice(s, e) => {
562                            next.visit_expr(s)?;
563                            next.visit_expr(e)?;
564                        }
565                    }
566                }
567                Ok(())
568            }
569        }
570    }
571}
572
573#[cfg(test)]
574mod tests {
575    use super::*;
576
577    #[test]
578    fn test_json_path() {
579        JsonPath::from_str(r#"lax $.name ? (@ starts with "O''")"#).unwrap();
580        JsonPath::from_str(r#"lax $.name ? (@ starts with "\"hello")"#).unwrap();
581        // JsonPath::from_str(r#"lax $.name ? (@ starts with "O\u0027")"#).unwrap();
582        // JsonPath::from_str(r#"lax $.name ? (@ starts with "\u0022hello")"#).unwrap();
583    }
584}