sql_json_path/
ast.rs

1// Copyright 2023 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! The AST of JSON Path.
16
17use std::fmt::Display;
18use std::fmt::Formatter;
19use std::ops::Deref;
20
21use serde_json::Number;
22
23/// A JSON Path value.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct JsonPath {
26    pub(crate) mode: Mode,
27    pub(crate) expr: ExprOrPredicate,
28}
29
30/// The mode of JSON Path.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum Mode {
33    /// Lax mode converts errors to empty SQL/JSON sequences.
34    Lax,
35    /// Strict mode raises an error if the data does not strictly adhere to the requirements of a path expression.
36    Strict,
37}
38
39/// An expression or predicate.
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub enum ExprOrPredicate {
42    Expr(Expr),
43    Pred(Predicate),
44}
45
46/// An expression in JSON Path.
47#[derive(Debug, Clone, PartialEq, Eq)]
48pub enum Expr {
49    /// Path primary
50    PathPrimary(PathPrimary),
51    /// Accessor expression.
52    Accessor(Box<Expr>, AccessorOp),
53    /// Unary operation.
54    UnaryOp(UnaryOp, Box<Expr>),
55    /// Binary operation.
56    BinaryOp(BinaryOp, Box<Expr>, Box<Expr>),
57}
58
59/// A filter expression that evaluates to a truth value.
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum Predicate {
62    /// `==`, `!=`, `<`, `<=`, `>`, `>=` represents the comparison between two values.
63    Compare(CompareOp, Box<Expr>, Box<Expr>),
64    /// `exists` represents the value exists.
65    Exists(Box<Expr>),
66    /// `&&` represents logical AND.
67    And(Box<Predicate>, Box<Predicate>),
68    /// `||` represents logical OR.
69    Or(Box<Predicate>, Box<Predicate>),
70    /// `!` represents logical NOT.
71    Not(Box<Predicate>),
72    /// `is unknown` represents the value is unknown.
73    IsUnknown(Box<Predicate>),
74    /// `starts with` represents the value starts with the given value.
75    StartsWith(Box<Expr>, Value),
76    /// `like_regex` represents the value matches the given regular expression.
77    LikeRegex(Box<Expr>, Box<Regex>),
78}
79
80/// A primary expression.
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub enum PathPrimary {
83    /// `$` represents the root node or element.
84    Root,
85    /// `@` represents the current node or element being processed in the filter expression.
86    Current,
87    /// `last` is the size of the array minus 1.
88    Last,
89    /// Literal value.
90    Value(Value),
91    /// `(expr)` represents an expression.
92    ExprOrPred(Box<ExprOrPredicate>),
93}
94
95/// An accessor operation.
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum AccessorOp {
98    /// `.*` represents selecting all elements in an object.
99    MemberWildcard,
100    /// `.**` represents selecting all elements in an object and its sub-objects.
101    DescendantMemberWildcard(LevelRange),
102    /// `[*]` represents selecting all elements in an array.
103    ElementWildcard,
104    /// `.<name>` represents selecting element that matched the name in an object, like `$.event`.
105    /// The name can also be written as a string literal, allowing the name to contain special characters, like `$." $price"`.
106    Member(String),
107    /// `[<index1>,<index2>,..]` represents selecting elements specified by the indices in an Array.
108    Element(Vec<ArrayIndex>),
109    /// `?(<predicate>)` represents filtering elements using the predicate.
110    FilterExpr(Box<Predicate>),
111    /// `.method()` represents calling a method.
112    Method(Method),
113}
114
115/// A level range.
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum LevelRange {
118    /// none
119    All,
120    /// `{level}`
121    One(Level),
122    /// `{start to end}`
123    Range(Level, Level),
124}
125
126/// A level number.
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub enum Level {
129    N(u32),
130    Last,
131}
132
133/// An array index.
134#[derive(Debug, Clone, PartialEq, Eq)]
135pub enum ArrayIndex {
136    /// The single number index.
137    Index(Expr),
138    /// `<start> to <end>` represents the slice of the array.
139    Slice(Expr, Expr),
140}
141
142/// Represents a scalar value.
143#[derive(Debug, Clone, PartialEq, Eq)]
144pub enum Value {
145    /// Null value.
146    Null,
147    /// Boolean value.
148    Boolean(bool),
149    /// Number value.
150    Number(Number),
151    /// UTF-8 string.
152    String(String),
153    /// Variable
154    Variable(String),
155}
156
157/// A binary operator.
158#[derive(Debug, Clone, Copy, PartialEq, Eq)]
159pub enum CompareOp {
160    /// `==` represents left is equal to right.
161    Eq,
162    /// `!=` and `<>` represents left is not equal to right.
163    Ne,
164    /// `<` represents left is less than right.
165    Lt,
166    /// `<=` represents left is less or equal to right.
167    Le,
168    /// `>` represents left is greater than right.
169    Gt,
170    /// `>=` represents left is greater than or equal to right.
171    Ge,
172}
173
174/// A unary operator.
175#[derive(Debug, Clone, Copy, PartialEq, Eq)]
176pub enum UnaryOp {
177    /// `+` represents plus.
178    Plus,
179    /// `-` represents minus.
180    Minus,
181}
182
183/// A binary operator.
184#[derive(Debug, Clone, Copy, PartialEq, Eq)]
185pub enum BinaryOp {
186    /// `+` represents left plus right.
187    Add,
188    /// `-` represents left minus right.
189    Sub,
190    /// `*` represents left multiply right.
191    Mul,
192    /// `/` represents left divide right.
193    Div,
194    /// `%` represents left modulo right.
195    Rem,
196}
197
198/// A item method.
199#[derive(Debug, Clone, PartialEq, Eq)]
200pub enum Method {
201    /// `.type()` returns a character string that names the type of the SQL/JSON item.
202    Type,
203    /// `.size()` returns the size of an SQL/JSON item.
204    Size,
205    /// `.double()` converts a string or numeric to an approximate numeric value.
206    Double,
207    /// `.ceiling()` returns the smallest integer that is greater than or equal to the argument.
208    Ceiling,
209    /// `.floor()` returns the largest integer that is less than or equal to the argument.
210    Floor,
211    /// `.abs()` returns the absolute value of the argument.
212    Abs,
213    /// `.keyvalue()` returns the key-value pairs of an object.
214    ///
215    /// For example, suppose:
216    /// ```json
217    /// { who: "Fred", what: 64 }
218    /// ```
219    /// Then:
220    /// ```json
221    /// $.keyvalue() =
222    /// ( { name: "who",  value: "Fred", id: 9045 },
223    ///   { name: "what", value: 64,     id: 9045 }
224    /// )
225    /// ```
226    Keyvalue,
227}
228
229impl PathPrimary {
230    /// If this is a nested path primary, unnest it.
231    /// `(primary) => primary`
232    pub(crate) fn unnest(self) -> Self {
233        match self {
234            Self::ExprOrPred(expr) => match *expr {
235                ExprOrPredicate::Expr(Expr::PathPrimary(inner)) => inner,
236                other => Self::ExprOrPred(Box::new(other)),
237            },
238            _ => self,
239        }
240    }
241}
242
243impl LevelRange {
244    /// Returns the upper bound of the range.
245    /// If no upper bound, returns `u32::MAX`.
246    pub(crate) fn end(&self) -> u32 {
247        match self {
248            Self::One(Level::N(n)) => *n,
249            Self::Range(_, Level::N(end)) => *end,
250            _ => u32::MAX,
251        }
252    }
253
254    /// Resolve the range with the given `last`.
255    ///
256    /// # Examples
257    ///
258    /// ```text
259    /// last = 3
260    /// .**             => 0..4
261    /// .**{1}          => 1..2
262    /// .**{1 to 4}     => 1..3
263    /// .**{1 to last}  => 1..4
264    /// .**{last to 2}  => 3..3
265    /// ```
266    pub(crate) fn to_range(&self, last: usize) -> std::ops::Range<usize> {
267        match self {
268            Self::All => 0..last + 1,
269            Self::One(level) => {
270                level.to_usize(last).min(last + 1)..level.to_usize(last).min(last) + 1
271            }
272            Self::Range(start, end) => {
273                start.to_usize(last).min(last + 1)..end.to_usize(last).min(last) + 1
274            }
275        }
276    }
277}
278
279impl Level {
280    fn to_usize(&self, last: usize) -> usize {
281        match self {
282            Self::N(n) => *n as usize,
283            Self::Last => last,
284        }
285    }
286}
287
288impl Display for JsonPath {
289    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
290        if self.mode == Mode::Strict {
291            write!(f, "strict ")?;
292        }
293        write!(f, "{}", self.expr)
294    }
295}
296
297impl Display for Mode {
298    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
299        match self {
300            Self::Lax => write!(f, "lax"),
301            Self::Strict => write!(f, "strict"),
302        }
303    }
304}
305
306impl Display for ExprOrPredicate {
307    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
308        match self {
309            Self::Expr(expr) => match expr {
310                Expr::BinaryOp(_, _, _) => write!(f, "({})", expr),
311                _ => write!(f, "{}", expr),
312            },
313            Self::Pred(pred) => match pred {
314                Predicate::Compare(_, _, _) | Predicate::And(_, _) | Predicate::Or(_, _) => {
315                    write!(f, "({})", pred)
316                }
317                _ => write!(f, "{}", pred),
318            },
319        }
320    }
321}
322
323impl Display for Predicate {
324    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
325        match self {
326            Self::Compare(op, left, right) => write!(f, "{left} {op} {right}"),
327            Self::Exists(expr) => write!(f, "exists ({expr})"),
328            Self::And(left, right) => {
329                match left.as_ref() {
330                    Self::Or(_, _) => write!(f, "({left})")?,
331                    _ => write!(f, "{left}")?,
332                }
333                write!(f, " && ")?;
334                match right.as_ref() {
335                    Self::Or(_, _) => write!(f, "({right})"),
336                    _ => write!(f, "{right}"),
337                }
338            }
339            Self::Or(left, right) => write!(f, "{left} || {right}"),
340            Self::Not(expr) => write!(f, "!({expr})"),
341            Self::IsUnknown(expr) => write!(f, "({expr}) is unknown"),
342            Self::StartsWith(expr, v) => write!(f, "{expr} starts with {v}"),
343            Self::LikeRegex(expr, regex) => {
344                write!(f, "{expr} like_regex \"{}\"", regex.pattern())?;
345                if let Some(flags) = regex.flags() {
346                    write!(f, " flag \"{flags}\"")?;
347                }
348                Ok(())
349            }
350        }
351    }
352}
353
354impl Display for Expr {
355    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
356        match self {
357            Expr::PathPrimary(primary) => write!(f, "{primary}"),
358            Expr::Accessor(base, op) => {
359                match base.as_ref() {
360                    Expr::PathPrimary(PathPrimary::Value(Value::Number(_))) => {
361                        write!(f, "({base})")?
362                    }
363                    Expr::PathPrimary(PathPrimary::ExprOrPred(expr)) => match expr.as_ref() {
364                        ExprOrPredicate::Expr(Expr::UnaryOp(_, _)) => write!(f, "({base})")?,
365                        _ => write!(f, "{base}")?,
366                    },
367                    _ => write!(f, "{base}")?,
368                }
369                write!(f, "{op}")?;
370                Ok(())
371            }
372            Expr::UnaryOp(op, expr) => match expr.as_ref() {
373                Expr::PathPrimary(_) | Expr::Accessor(_, _) => write!(f, "{op}{expr}"),
374                _ => write!(f, "{op}({expr})"),
375            },
376            Expr::BinaryOp(op, left, right) => write!(f, "{left} {op} {right}"),
377        }
378    }
379}
380
381impl Display for ArrayIndex {
382    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
383        match self {
384            Self::Index(idx) => write!(f, "{idx}"),
385            Self::Slice(start, end) => write!(f, "{start} to {end}"),
386        }
387    }
388}
389
390impl Display for PathPrimary {
391    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
392        match self {
393            Self::Root => write!(f, "$"),
394            Self::Current => write!(f, "@"),
395            Self::Value(v) => write!(f, "{v}"),
396            Self::Last => write!(f, "last"),
397            Self::ExprOrPred(expr) => write!(f, "{expr}"),
398        }
399    }
400}
401
402impl Display for AccessorOp {
403    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
404        match self {
405            Self::MemberWildcard => write!(f, ".*"),
406            Self::DescendantMemberWildcard(level) => write!(f, ".**{level}"),
407            Self::ElementWildcard => write!(f, "[*]"),
408            Self::Member(field) => write!(f, ".\"{field}\""),
409            Self::Element(indices) => {
410                write!(f, "[")?;
411                for (i, idx) in indices.iter().enumerate() {
412                    if i > 0 {
413                        write!(f, ",")?;
414                    }
415                    write!(f, "{idx}")?;
416                }
417                write!(f, "]")
418            }
419            Self::FilterExpr(expr) => write!(f, "?({expr})"),
420            Self::Method(method) => write!(f, ".{method}()"),
421        }
422    }
423}
424
425impl Display for LevelRange {
426    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
427        match self {
428            Self::All => Ok(()),
429            Self::One(level) => write!(f, "{{{level}}}"),
430            Self::Range(start, end) => write!(f, "{{{start} to {end}}}"),
431        }
432    }
433}
434
435impl Display for Level {
436    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
437        match self {
438            Self::N(n) => write!(f, "{n}"),
439            Self::Last => write!(f, "last"),
440        }
441    }
442}
443
444impl Display for Value {
445    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
446        match self {
447            Self::Null => write!(f, "null"),
448            Self::Boolean(v) => write!(f, "{v}"),
449            Self::Number(v) => write!(f, "{v}"),
450            Self::String(v) => write!(f, "\"{v}\""),
451            Self::Variable(v) => write!(f, "$\"{v}\""),
452        }
453    }
454}
455
456impl Display for UnaryOp {
457    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
458        match self {
459            Self::Plus => write!(f, "+"),
460            Self::Minus => write!(f, "-"),
461        }
462    }
463}
464
465impl Display for CompareOp {
466    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
467        match self {
468            Self::Eq => write!(f, "=="),
469            Self::Ne => write!(f, "!="),
470            Self::Lt => write!(f, "<"),
471            Self::Le => write!(f, "<="),
472            Self::Gt => write!(f, ">"),
473            Self::Ge => write!(f, ">="),
474        }
475    }
476}
477
478impl Display for BinaryOp {
479    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
480        match self {
481            Self::Add => write!(f, "+"),
482            Self::Sub => write!(f, "-"),
483            Self::Mul => write!(f, "*"),
484            Self::Div => write!(f, "/"),
485            Self::Rem => write!(f, "%"),
486        }
487    }
488}
489
490impl Display for Method {
491    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
492        match self {
493            Self::Type => write!(f, "type"),
494            Self::Size => write!(f, "size"),
495            Self::Double => write!(f, "double"),
496            Self::Ceiling => write!(f, "ceiling"),
497            Self::Floor => write!(f, "floor"),
498            Self::Abs => write!(f, "abs"),
499            Self::Keyvalue => write!(f, "keyvalue"),
500        }
501    }
502}
503
504/// A wrapper of `regex::Regex` to combine the pattern and flags.
505#[derive(Debug, Clone)]
506pub struct Regex {
507    regex: regex::Regex,
508    flags: String,
509}
510
511impl Regex {
512    pub(crate) fn with_flags(pattern: &str, flags: Option<String>) -> Result<Self, regex::Error> {
513        let mut builder = match flags.as_deref() {
514            Some(flags) if flags.contains('q') => regex::RegexBuilder::new(&regex::escape(pattern)),
515            _ => regex::RegexBuilder::new(pattern),
516        };
517        let mut out_flags = String::new();
518        if let Some(flags) = flags.as_deref() {
519            for c in flags.chars() {
520                match c {
521                    'q' => {}
522                    'i' => {
523                        builder.case_insensitive(true);
524                    }
525                    'm' => {
526                        builder.multi_line(true);
527                    }
528                    's' => {
529                        builder.dot_matches_new_line(true);
530                    }
531                    'x' => {
532                        return Err(regex::Error::Syntax(
533                            "XQuery \"x\" flag (expanded regular expressions) is not implemented"
534                                .to_string(),
535                        ))
536                    }
537                    _ => {
538                        return Err(regex::Error::Syntax(format!(
539                            "Unrecognized flag character \"{c}\" in LIKE_REGEX predicate."
540                        )))
541                    }
542                };
543                // Remove duplicated flags.
544                if !out_flags.contains(c) {
545                    out_flags.push(c);
546                }
547            }
548        }
549        let regex = builder.build()?;
550        Ok(Self {
551            regex,
552            flags: out_flags,
553        })
554    }
555
556    pub fn pattern(&self) -> &str {
557        self.regex.as_str()
558    }
559
560    pub fn flags(&self) -> Option<&str> {
561        if self.flags.is_empty() {
562            None
563        } else {
564            Some(&self.flags)
565        }
566    }
567}
568
569impl Deref for Regex {
570    type Target = regex::Regex;
571
572    fn deref(&self) -> &Self::Target {
573        &self.regex
574    }
575}
576
577impl PartialEq for Regex {
578    fn eq(&self, other: &Self) -> bool {
579        self.pattern() == other.pattern() && self.flags() == other.flags()
580    }
581}
582
583impl Eq for Regex {}