Skip to main content

sql_json_path/
parser.rs

1// Copyright 2023 RisingWave Labs
2// Modifications Copyright (c) Citadel contributors.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// This file has been modified by Citadel contributors.
17
18//! JSON Path parser written in [nom].
19
20use crate::{ast::*, eval::NumberExt};
21use nom::{
22    branch::alt,
23    bytes::complete::{tag, tag_no_case, take_while, take_while1},
24    character::complete::{char, multispace0 as s, u32},
25    combinator::{cut, eof, map, opt, value, verify},
26    error::context,
27    multi::{fold_many0, many0, separated_list1},
28    number::complete::double,
29    sequence::{delimited, pair, preceded, separated_pair, terminated, tuple},
30    Err, Finish, IResult, Offset,
31};
32use serde_json::Number;
33use std::str::FromStr;
34
35impl JsonPath {
36    /// Compiles a JSON Path expression.
37    pub fn new(s: &str) -> Result<Self, Error> {
38        Self::from_str(s)
39    }
40}
41
42impl FromStr for JsonPath {
43    type Err = Error;
44
45    /// Parse a JSON Path from string.
46    fn from_str(s: &str) -> Result<Self, Self::Err> {
47        if s.trim().is_empty() {
48            return Err(Error {
49                position: 0,
50                message: "empty jsonpath".into(),
51            });
52        }
53        let (_, json_path) = json_path(s)
54            .finish()
55            .map_err(|e| Error::from_input_error(s, e))?;
56        Checker::default()
57            .visit_json_path(&json_path)
58            .map_err(|msg| Error {
59                position: 0,
60                message: msg.into(),
61            })?;
62        Ok(json_path)
63    }
64}
65
66/// The error type returned when parsing a JSON Path.
67#[derive(Debug, thiserror::Error)]
68#[error("at position {position}, {message}")]
69pub struct Error {
70    position: usize,
71    message: Box<str>,
72}
73
74impl Error {
75    fn from_input_error(input: &str, err: nom::error::Error<&str>) -> Self {
76        let position = input.offset(err.input);
77        let message = err.to_string().into();
78        Self { position, message }
79    }
80}
81
82fn json_path(input: &str) -> IResult<&str, JsonPath> {
83    map(
84        preceded(s, separated_pair(mode, s, expr_or_predicate_eof)),
85        |(mode, expr)| JsonPath { mode, expr },
86    )(input)
87}
88
89fn expr_or_predicate_eof(input: &str) -> IResult<&str, ExprOrPredicate> {
90    alt((
91        map(terminated(predicate, pair(s, eof)), ExprOrPredicate::Pred),
92        map(terminated(expr, pair(s, eof)), ExprOrPredicate::Expr),
93    ))(input)
94}
95
96fn expr_or_predicate(input: &str) -> IResult<&str, ExprOrPredicate> {
97    alt((
98        map(predicate, ExprOrPredicate::Pred),
99        map(expr, ExprOrPredicate::Expr),
100    ))(input)
101}
102
103fn mode(input: &str) -> IResult<&str, Mode> {
104    alt((
105        value(Mode::Strict, tag_no_case("strict")),
106        value(Mode::Lax, tag_no_case("lax")),
107        value(Mode::Lax, tag_no_case("")),
108    ))(input)
109}
110
111fn predicate(input: &str) -> IResult<&str, Predicate> {
112    let (input, first) = predicate1(input)?;
113    let mut first0 = Some(first);
114    fold_many0(
115        preceded(delimited(s, tag("||"), s), predicate1),
116        move || first0.take().unwrap(),
117        |acc, pred| Predicate::Or(Box::new(acc), Box::new(pred)),
118    )(input)
119}
120
121fn predicate1(input: &str) -> IResult<&str, Predicate> {
122    let (input, first) = predicate2(input)?;
123    let mut first0 = Some(first);
124    fold_many0(
125        preceded(delimited(s, tag("&&"), s), predicate2),
126        move || first0.take().unwrap(),
127        |acc, pred| Predicate::And(Box::new(acc), Box::new(pred)),
128    )(input)
129}
130
131fn predicate2(input: &str) -> IResult<&str, Predicate> {
132    alt((
133        map(
134            tuple((expr, delimited(s, cmp_op, s), expr)),
135            |(left, op, right)| Predicate::Compare(op, Box::new(left), Box::new(right)),
136        ),
137        map(
138            delimited(
139                pair(char('('), s),
140                predicate,
141                tuple((
142                    s,
143                    char(')'),
144                    s,
145                    tag_no_case("is"),
146                    s,
147                    tag_no_case("unknown"),
148                )),
149            ),
150            |p| Predicate::IsUnknown(Box::new(p)),
151        ),
152        map(
153            separated_pair(
154                expr,
155                tuple((s, tag_no_case("starts"), s, tag_no_case("with"), s)),
156                starts_with_literal,
157            ),
158            |(expr, literal)| Predicate::StartsWith(Box::new(expr), literal),
159        ),
160        like_regex,
161        map(preceded(pair(tag("!"), s), delimited_predicate), |p| {
162            Predicate::Not(Box::new(p))
163        }),
164        delimited_predicate,
165    ))(input)
166}
167
168fn like_regex(input: &str) -> IResult<&str, Predicate> {
169    let (rest, ((expr, pattern), flags)) = pair(
170        separated_pair(expr, tuple((s, tag_no_case("like_regex"), s)), string),
171        opt(preceded(tuple((s, tag_no_case("flag"), s)), string)),
172    )(input)?;
173    let regex = Regex::with_flags(&pattern, flags).map_err(|_| {
174        Err::Failure(nom::error::Error::new(
175            input,
176            // FIXME: should return a custom error
177            nom::error::ErrorKind::RegexpMatch,
178        ))
179    })?;
180    Ok((rest, Predicate::LikeRegex(Box::new(expr), Box::new(regex))))
181}
182
183fn delimited_predicate(input: &str) -> IResult<&str, Predicate> {
184    alt((
185        delimited(pair(char('('), s), predicate, pair(s, char(')'))),
186        map(
187            delimited(
188                tuple((tag_no_case("exists"), s, char('('), s)),
189                expr,
190                pair(s, char(')')),
191            ),
192            |expr| Predicate::Exists(Box::new(expr)),
193        ),
194    ))(input)
195}
196
197fn expr(input: &str) -> IResult<&str, Expr> {
198    let (input, first) = expr1(input)?;
199    let mut first0 = Some(first);
200    fold_many0(
201        pair(delimited(s, alt((char('+'), char('-'))), s), expr1),
202        move || first0.take().unwrap(),
203        |acc, (op, expr)| match op {
204            '+' => Expr::BinaryOp(BinaryOp::Add, Box::new(acc), Box::new(expr)),
205            '-' => Expr::BinaryOp(BinaryOp::Sub, Box::new(acc), Box::new(expr)),
206            _ => unreachable!(),
207        },
208    )(input)
209}
210
211fn expr1(input: &str) -> IResult<&str, Expr> {
212    let (input, first) = expr2(input)?;
213    let mut first0 = Some(first);
214    fold_many0(
215        pair(
216            delimited(s, alt((char('*'), char('/'), char('%'))), s),
217            expr2,
218        ),
219        move || first0.take().unwrap(),
220        |acc, (op, expr)| match op {
221            '*' => Expr::BinaryOp(BinaryOp::Mul, Box::new(acc), Box::new(expr)),
222            '/' => Expr::BinaryOp(BinaryOp::Div, Box::new(acc), Box::new(expr)),
223            '%' => Expr::BinaryOp(BinaryOp::Rem, Box::new(acc), Box::new(expr)),
224            _ => unreachable!(),
225        },
226    )(input)
227}
228
229fn expr2(input: &str) -> IResult<&str, Expr> {
230    alt((
231        accessor_expr,
232        map(preceded(pair(char('+'), s), expr2), |expr| match &expr {
233            // constant folding
234            Expr::PathPrimary(PathPrimary::Value(Value::Number(_))) => expr,
235            _ => Expr::UnaryOp(UnaryOp::Plus, Box::new(expr)),
236        }),
237        map(preceded(pair(char('-'), s), expr2), |expr| match &expr {
238            // constant folding
239            Expr::PathPrimary(PathPrimary::Value(Value::Number(n))) => {
240                Expr::PathPrimary(PathPrimary::Value(Value::Number(n.neg())))
241            }
242            _ => Expr::UnaryOp(UnaryOp::Minus, Box::new(expr)),
243        }),
244    ))(input)
245}
246
247fn accessor_expr(input: &str) -> IResult<&str, Expr> {
248    map(
249        pair(path_primary, many0(preceded(s, accessor_op))),
250        |(primary, ops)| {
251            let mut expr = Expr::PathPrimary(primary.unnest());
252            for op in ops {
253                expr = Expr::Accessor(Box::new(expr), op);
254            }
255            expr
256        },
257    )(input)
258}
259
260fn path_primary(input: &str) -> IResult<&str, PathPrimary> {
261    alt((
262        map(scalar_value, PathPrimary::Value),
263        value(PathPrimary::Root, char('$')),
264        value(PathPrimary::Current, char('@')),
265        value(PathPrimary::Last, tag_no_case("last")),
266        map(
267            delimited(pair(char('('), s), expr_or_predicate, pair(s, char(')'))),
268            |expr| PathPrimary::ExprOrPred(Box::new(expr)),
269        ),
270    ))(input)
271}
272
273fn accessor_op(input: &str) -> IResult<&str, AccessorOp> {
274    alt((
275        map(
276            preceded(tag(".**"), level_range),
277            AccessorOp::DescendantMemberWildcard,
278        ),
279        value(AccessorOp::MemberWildcard, tag(".*")),
280        value(AccessorOp::ElementWildcard, element_wildcard),
281        map(item_method, AccessorOp::Method),
282        map(member_accessor, AccessorOp::Member),
283        map(array_accessor, AccessorOp::Element),
284        map(filter_expr, |expr| AccessorOp::FilterExpr(Box::new(expr))),
285    ))(input)
286}
287
288fn level_range(input: &str) -> IResult<&str, LevelRange> {
289    alt((
290        map(
291            delimited(
292                pair(char('{'), s),
293                separated_pair(level, delimited(s, tag_no_case("to"), s), level),
294                pair(s, char('}')),
295            ),
296            |(start, end)| {
297                if start == end {
298                    LevelRange::One(start)
299                } else {
300                    LevelRange::Range(start, end)
301                }
302            },
303        ),
304        map(
305            delimited(pair(char('{'), s), level, pair(s, char('}'))),
306            LevelRange::One,
307        ),
308        value(LevelRange::All, tag("")),
309    ))(input)
310}
311
312fn level(input: &str) -> IResult<&str, Level> {
313    alt((value(Level::Last, tag_no_case("last")), map(u32, Level::N)))(input)
314}
315
316fn element_wildcard(input: &str) -> IResult<&str, ()> {
317    value((), tuple((char('['), s, char('*'), s, char(']'))))(input)
318}
319
320fn member_accessor(input: &str) -> IResult<&str, String> {
321    preceded(pair(char('.'), s), alt((string, raw_string)))(input)
322}
323
324fn array_accessor(input: &str) -> IResult<&str, Vec<ArrayIndex>> {
325    delimited(
326        char('['),
327        separated_list1(char(','), delimited(s, index_elem, s)),
328        char(']'),
329    )(input)
330}
331
332fn index_elem(input: &str) -> IResult<&str, ArrayIndex> {
333    alt((
334        map(
335            separated_pair(expr, delimited(s, tag_no_case("to"), s), expr),
336            |(start, end)| ArrayIndex::Slice(start, end),
337        ),
338        map(expr, ArrayIndex::Index),
339    ))(input)
340}
341
342fn filter_expr(input: &str) -> IResult<&str, Predicate> {
343    delimited(
344        tuple((char('?'), s, char('('), s)),
345        predicate,
346        tuple((s, char(')'))),
347    )(input)
348}
349
350fn cmp_op(input: &str) -> IResult<&str, CompareOp> {
351    alt((
352        value(CompareOp::Eq, tag("==")),
353        value(CompareOp::Ne, tag("!=")),
354        value(CompareOp::Ne, tag("<>")),
355        value(CompareOp::Le, tag("<=")),
356        value(CompareOp::Lt, char('<')),
357        value(CompareOp::Ge, tag(">=")),
358        value(CompareOp::Gt, char('>')),
359    ))(input)
360}
361
362fn item_method(input: &str) -> IResult<&str, Method> {
363    let (input, _) = pair(char('.'), s)(input)?;
364    let (input, mut method) = method(input)?;
365    let (input, _) = tuple((s, char('(')))(input)?;
366    if matches!(method, Method::Datetime { .. }) {
367        let (next, _) = s(input)?;
368        let (next, tpl) = opt(string)(next)?;
369        if let Some(t) = tpl {
370            method = Method::Datetime { template: Some(t) };
371        }
372        let (next, _) = tuple((s, char(')')))(next)?;
373        return Ok((next, method));
374    }
375    let (input, _) = tuple((s, char(')')))(input)?;
376    Ok((input, method))
377}
378
379fn method(input: &str) -> IResult<&str, Method> {
380    alt((
381        value(Method::Type, tag_no_case("type")),
382        value(Method::Size, tag_no_case("size")),
383        value(Method::Double, tag_no_case("double")),
384        value(Method::Ceiling, tag_no_case("ceiling")),
385        value(Method::Floor, tag_no_case("floor")),
386        value(Method::Abs, tag_no_case("abs")),
387        value(Method::Keyvalue, tag_no_case("keyvalue")),
388        value(Method::Datetime { template: None }, tag_no_case("datetime")),
389    ))(input)
390}
391
392fn scalar_value(input: &str) -> IResult<&str, Value> {
393    alt((
394        value(Value::Null, tag("null")),
395        value(Value::Boolean(true), tag("true")),
396        value(Value::Boolean(false), tag("false")),
397        map(number, Value::Number),
398        map(string, Value::String),
399        map(variable, Value::Variable),
400    ))(input)
401}
402
403fn number(input: &str) -> IResult<&str, Number> {
404    map(double, |v| {
405        if v == v.trunc() {
406            Number::from(v as i64)
407        } else {
408            Number::from_f64(v).unwrap()
409        }
410    })(input)
411}
412
413fn starts_with_literal(input: &str) -> IResult<&str, Value> {
414    alt((map(string, Value::String), map(variable, Value::Variable)))(input)
415}
416
417fn variable(input: &str) -> IResult<&str, String> {
418    preceded(char('$'), raw_string)(input)
419}
420
421fn string(input: &str) -> IResult<&str, String> {
422    context(
423        "double quoted string",
424        delimited(
425            char('"'),
426            fold_many0(
427                alt((
428                    map(unescaped_str, String::from),
429                    map(escaped_char, String::from),
430                )),
431                String::new,
432                |mut string, fragment| {
433                    if string.is_empty() {
434                        fragment
435                    } else {
436                        string.push_str(&fragment);
437                        string
438                    }
439                },
440            ),
441            cut(char('"')),
442        ),
443    )(input)
444}
445
446fn escaped_char(input: &str) -> IResult<&str, char> {
447    context(
448        "escaped character",
449        preceded(
450            char('\\'),
451            alt((
452                value('\u{0008}', char('b')),
453                value('\u{0009}', char('t')),
454                value('\u{000A}', char('n')),
455                value('\u{000C}', char('f')),
456                value('\u{000D}', char('r')),
457                value('\u{002F}', char('/')),
458                value('\u{005C}', char('\\')),
459                value('\u{0022}', char('"')),
460                // unicode_sequence,
461            )),
462        ),
463    )(input)
464}
465
466fn unescaped_str(input: &str) -> IResult<&str, &str> {
467    context(
468        "unescaped character",
469        verify(take_while(is_valid_unescaped_char), |s: &str| !s.is_empty()),
470    )(input)
471}
472
473fn is_valid_unescaped_char(chr: char) -> bool {
474    match chr {
475        '"' => false,
476        '\u{20}'..='\u{5B}' // Omit control characters
477        | '\u{5D}'..='\u{10FFFF}' => true, // Omit \
478        _ => false,
479    }
480}
481
482fn raw_string(input: &str) -> IResult<&str, String> {
483    map(
484        take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_' || c >= '\u{0080}'),
485        String::from,
486    )(input)
487}
488
489/// A visitor that checks if a JSON Path is valid.
490///
491/// An error is returned if:
492///
493/// - `@` is used in a non-root expression
494/// - `last` is used in a non-array subscript
495#[derive(Debug, Clone, Copy, Default)]
496struct Checker {
497    non_root: bool,
498    inside_element_accessor: bool,
499}
500
501impl Checker {
502    fn visit_json_path(&self, json_path: &JsonPath) -> Result<(), &'static str> {
503        self.visit_expr_or_predicate(&json_path.expr)
504    }
505
506    fn visit_expr_or_predicate(&self, expr_or_pred: &ExprOrPredicate) -> Result<(), &'static str> {
507        match expr_or_pred {
508            ExprOrPredicate::Expr(expr) => self.visit_expr(expr),
509            ExprOrPredicate::Pred(pred) => self.visit_predicate(pred),
510        }
511    }
512
513    fn visit_expr(&self, expr: &Expr) -> Result<(), &'static str> {
514        match expr {
515            Expr::PathPrimary(primary) => self.visit_path_primary(primary),
516            Expr::Accessor(base, accessor) => {
517                self.visit_expr(base)?;
518                self.visit_accessor_op(accessor)
519            }
520            Expr::UnaryOp(_, expr) => self.visit_expr(expr),
521            Expr::BinaryOp(_, left, right) => {
522                self.visit_expr(left)?;
523                self.visit_expr(right)
524            }
525        }
526    }
527
528    fn visit_predicate(&self, pred: &Predicate) -> Result<(), &'static str> {
529        match pred {
530            Predicate::Compare(_, left, right) => {
531                self.visit_expr(left)?;
532                self.visit_expr(right)
533            }
534            Predicate::Exists(expr) => self.visit_expr(expr),
535            Predicate::And(left, right) | Predicate::Or(left, right) => {
536                self.visit_predicate(left)?;
537                self.visit_predicate(right)
538            }
539            Predicate::Not(pred) => self.visit_predicate(pred),
540            Predicate::IsUnknown(pred) => self.visit_predicate(pred),
541            Predicate::StartsWith(expr, _) => self.visit_expr(expr),
542            Predicate::LikeRegex(expr, _) => self.visit_expr(expr),
543        }
544    }
545
546    fn visit_path_primary(&self, primary: &PathPrimary) -> Result<(), &'static str> {
547        match primary {
548            PathPrimary::Last if !self.inside_element_accessor => {
549                Err("LAST is allowed only in array subscripts")
550            }
551            PathPrimary::Current if !self.non_root => Err("@ is not allowed in root expressions"),
552            _ => Ok(()),
553        }
554    }
555
556    fn visit_accessor_op(&self, accessor_op: &AccessorOp) -> Result<(), &'static str> {
557        match accessor_op {
558            AccessorOp::ElementWildcard | AccessorOp::MemberWildcard => Ok(()),
559            AccessorOp::DescendantMemberWildcard(_) => Ok(()),
560            AccessorOp::Member(_) | AccessorOp::Method(_) => Ok(()),
561            AccessorOp::FilterExpr(pred) => Self {
562                non_root: true,
563                ..*self
564            }
565            .visit_predicate(pred),
566            AccessorOp::Element(indices) => {
567                let next = Self {
568                    non_root: true,
569                    inside_element_accessor: true,
570                };
571                for index in indices {
572                    match index {
573                        ArrayIndex::Index(i) => next.visit_expr(i)?,
574                        ArrayIndex::Slice(s, e) => {
575                            next.visit_expr(s)?;
576                            next.visit_expr(e)?;
577                        }
578                    }
579                }
580                Ok(())
581            }
582        }
583    }
584}
585
586#[cfg(test)]
587mod tests {
588    use super::*;
589
590    #[test]
591    fn test_json_path() {
592        JsonPath::from_str(r#"lax $.name ? (@ starts with "O''")"#).unwrap();
593        JsonPath::from_str(r#"lax $.name ? (@ starts with "\"hello")"#).unwrap();
594        // JsonPath::from_str(r#"lax $.name ? (@ starts with "O\u0027")"#).unwrap();
595        // JsonPath::from_str(r#"lax $.name ? (@ starts with "\u0022hello")"#).unwrap();
596    }
597}