Skip to main content

semdiff_differ_json/json_path/
parser.rs

1use crate::json_path::parser::integer_literal::IntegerLiteral;
2use crate::json_path::parser::number_literal::{Number, NumberLiteral};
3use crate::json_path::parser::string_literal::StringLiteral;
4use nom::branch::alt;
5use nom::bytes::complete::{tag, take_while, take_while1};
6use nom::character::complete::char;
7use nom::combinator::{all_consuming, map, opt, recognize, value};
8use nom::error::ErrorKind;
9use nom::multi::{many0, separated_list0};
10use nom::sequence::{delimited, pair, preceded, terminated};
11use nom::{Err, IResult, Parser};
12use std::fmt;
13use std::str::FromStr;
14
15pub(crate) mod integer_literal;
16pub(crate) mod number_literal;
17pub(crate) mod string_literal;
18
19#[cfg(test)]
20mod tests;
21
22#[derive(Debug, Clone, PartialEq)]
23pub struct JsonPath {
24    pub(crate) segments: Vec<Segment>,
25}
26
27#[derive(Debug, Clone, PartialEq)]
28pub(crate) enum Segment {
29    Child(Vec<Selector>),
30    Descendant(Vec<Selector>),
31}
32
33#[derive(Debug, Clone, PartialEq)]
34pub(crate) enum Selector {
35    Name(String),
36    Wildcard,
37    Slice {
38        start: Option<i64>,
39        end: Option<i64>,
40        step: Option<i64>,
41    },
42    Index(i64),
43    Filter(LogicalExpr),
44}
45
46#[derive(Debug, Clone, PartialEq)]
47pub(crate) struct Query {
48    pub(crate) root: QueryRoot,
49    pub(crate) segments: Vec<Segment>,
50}
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub(crate) enum QueryRoot {
54    Root,
55    Current,
56}
57
58#[derive(Debug, Clone, PartialEq)]
59pub(crate) enum LogicalExpr {
60    Or(Box<LogicalExpr>, Box<LogicalExpr>),
61    And(Box<LogicalExpr>, Box<LogicalExpr>),
62    Not(Box<LogicalExpr>),
63    Paren(Box<LogicalExpr>),
64    Comparison {
65        left: Comparable,
66        op: ComparisonOp,
67        right: Comparable,
68    },
69    Test(TestExpr),
70}
71
72#[derive(Debug, Clone, PartialEq)]
73pub(crate) enum TestExpr {
74    Query(Query),
75    Function(FunctionExpr),
76}
77
78#[derive(Debug, Clone, PartialEq)]
79pub(crate) enum Comparable {
80    Literal(Literal),
81    SingularQuery(SingularQuery),
82    Function(FunctionExpr),
83}
84
85#[derive(Debug, Clone, PartialEq)]
86pub(crate) struct SingularQuery {
87    pub(crate) root: QueryRoot,
88    pub(crate) segments: Vec<SingularSegment>,
89}
90
91#[derive(Debug, Clone, PartialEq)]
92pub(crate) enum SingularSegment {
93    Name(String),
94    Index(i64),
95}
96
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub(crate) enum ComparisonOp {
99    Eq,
100    Ne,
101    Lt,
102    Le,
103    Gt,
104    Ge,
105}
106
107#[derive(Debug, Clone, PartialEq)]
108pub(crate) enum Literal {
109    Number(Number),
110    String(String),
111    Bool(bool),
112    Null,
113}
114
115#[derive(Debug, Clone, PartialEq)]
116pub(crate) struct FunctionExpr {
117    pub(crate) name: String,
118    pub(crate) arguments: Vec<FunctionArgument>,
119}
120
121#[derive(Debug, Clone, PartialEq)]
122pub(crate) enum FunctionArgument {
123    Literal(Literal),
124    Query(Query),
125    LogicalExpr(LogicalExpr),
126    Function(FunctionExpr),
127}
128
129#[derive(Debug, Clone, PartialEq, Eq)]
130pub struct ParseError(nom::Err<nom::error::Error<String>>);
131
132impl ParseError {
133    pub fn as_nom_error(&self) -> &nom::Err<nom::error::Error<String>> {
134        &self.0
135    }
136}
137
138impl fmt::Display for ParseError {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        write!(f, "{:?}", self.0)
141    }
142}
143
144impl std::error::Error for ParseError {}
145
146impl From<nom::Err<nom::error::Error<&str>>> for ParseError {
147    fn from(value: nom::Err<nom::error::Error<&str>>) -> Self {
148        match value {
149            Err::Incomplete(needed) => ParseError(Err::Incomplete(needed)),
150            Err::Error(error) => ParseError(Err::Error(nom::error::Error {
151                input: error.input.to_owned(),
152                code: error.code,
153            })),
154            Err::Failure(error) => ParseError(Err::Failure(nom::error::Error {
155                input: error.input.to_owned(),
156                code: error.code,
157            })),
158        }
159    }
160}
161
162impl FromStr for JsonPath {
163    type Err = ParseError;
164
165    fn from_str(input: &str) -> Result<Self, Self::Err> {
166        parse(input)
167    }
168}
169
170// JSONPath parser
171// see: RFC 9535
172
173pub fn parse(input: &str) -> Result<JsonPath, ParseError> {
174    all_consuming(jsonpath_query)
175        .parse(input)
176        .map(|(_, query)| JsonPath {
177            segments: query.segments,
178        })
179        .map_err(ParseError::from)
180}
181
182fn jsonpath_query(input: &str) -> IResult<&str, Query> {
183    map(pair(char('$'), segments), |(_, segments)| Query {
184        root: QueryRoot::Root,
185        segments,
186    })
187    .parse(input)
188}
189
190fn filter_query(input: &str) -> IResult<&str, Query> {
191    alt((
192        map(pair(char('@'), segments), |(_, segments)| Query {
193            root: QueryRoot::Current,
194            segments,
195        }),
196        jsonpath_query,
197    ))
198    .parse(input)
199}
200
201fn segments(input: &str) -> IResult<&str, Vec<Segment>> {
202    many0(preceded(s, segment)).parse(input)
203}
204
205fn segment(input: &str) -> IResult<&str, Segment> {
206    alt((descendant_segment, child_segment)).parse(input)
207}
208
209fn child_segment(input: &str) -> IResult<&str, Segment> {
210    alt((
211        map(bracketed_selection, Segment::Child),
212        map(
213            preceded(
214                char('.'),
215                alt((
216                    value(vec![Selector::Wildcard], char('*')),
217                    map(member_name_shorthand, |name| vec![Selector::Name(name)]),
218                )),
219            ),
220            Segment::Child,
221        ),
222    ))
223    .parse(input)
224}
225
226fn descendant_segment(input: &str) -> IResult<&str, Segment> {
227    map(
228        preceded(
229            tag(".."),
230            alt((
231                bracketed_selection,
232                value(vec![Selector::Wildcard], char('*')),
233                map(member_name_shorthand, |name| vec![Selector::Name(name)]),
234            )),
235        ),
236        Segment::Descendant,
237    )
238    .parse(input)
239}
240
241fn bracketed_selection(input: &str) -> IResult<&str, Vec<Selector>> {
242    delimited(
243        terminated(char('['), s),
244        separated_list0(delimited(s, char(','), s), selector),
245        preceded(s, char(']')),
246    )
247    .parse(input)
248}
249
250fn selector(input: &str) -> IResult<&str, Selector> {
251    alt((
252        map(StringLiteral::new(), Selector::Name),
253        value(Selector::Wildcard, char('*')),
254        slice_selector,
255        map(index_selector, Selector::Index),
256        filter_selector,
257    ))
258    .parse(input)
259}
260
261fn filter_selector(input: &str) -> IResult<&str, Selector> {
262    map(preceded(pair(char('?'), s), logical_expr), Selector::Filter).parse(input)
263}
264
265fn slice_selector(input: &str) -> IResult<&str, Selector> {
266    map(
267        (
268            opt(terminated(index_selector, s)),
269            char(':'),
270            preceded(s, opt(terminated(index_selector, s))),
271            opt(preceded(pair(char(':'), s), index_selector)),
272        ),
273        |(start, _, end, step)| Selector::Slice { start, end, step },
274    )
275    .parse(input)
276}
277
278fn index_selector(input: &str) -> IResult<&str, i64> {
279    IntegerLiteral::new().parse(input)
280}
281
282fn logical_expr(input: &str) -> IResult<&str, LogicalExpr> {
283    logical_or_expr(input)
284}
285
286fn logical_or_expr(input: &str) -> IResult<&str, LogicalExpr> {
287    let (mut input, mut expr) = logical_and_expr(input)?;
288    loop {
289        match preceded((s, tag("||"), s), logical_and_expr).parse(input) {
290            Ok((next, rhs)) => {
291                expr = LogicalExpr::Or(Box::new(expr), Box::new(rhs));
292                input = next;
293            }
294            Err(_) => return Ok((input, expr)),
295        }
296    }
297}
298
299fn logical_and_expr(input: &str) -> IResult<&str, LogicalExpr> {
300    let (mut input, mut expr) = basic_expr(input)?;
301    loop {
302        match preceded((s, tag("&&"), s), basic_expr).parse(input) {
303            Ok((next, rhs)) => {
304                expr = LogicalExpr::And(Box::new(expr), Box::new(rhs));
305                input = next;
306            }
307            Err(_) => return Ok((input, expr)),
308        }
309    }
310}
311
312fn basic_expr(input: &str) -> IResult<&str, LogicalExpr> {
313    alt((paren_expr, comparison_expr, test_expr)).parse(input)
314}
315
316fn paren_expr(input: &str) -> IResult<&str, LogicalExpr> {
317    map(
318        pair(
319            opt(terminated(logical_not_op, s)),
320            delimited(char('('), delimited(s, logical_expr, s), char(')')),
321        ),
322        |(not, expr)| {
323            let expr = LogicalExpr::Paren(Box::new(expr));
324            if not.is_some() {
325                LogicalExpr::Not(Box::new(expr))
326            } else {
327                expr
328            }
329        },
330    )
331    .parse(input)
332}
333
334fn test_expr(input: &str) -> IResult<&str, LogicalExpr> {
335    map(
336        pair(
337            opt(terminated(logical_not_op, s)),
338            alt((
339                map(filter_query, TestExpr::Query),
340                map(function_expr, TestExpr::Function),
341            )),
342        ),
343        |(not, expr)| {
344            let expr = LogicalExpr::Test(expr);
345            if not.is_some() {
346                LogicalExpr::Not(Box::new(expr))
347            } else {
348                expr
349            }
350        },
351    )
352    .parse(input)
353}
354
355fn logical_not_op(input: &str) -> IResult<&str, char> {
356    char('!').parse(input)
357}
358
359fn comparison_expr(input: &str) -> IResult<&str, LogicalExpr> {
360    map(
361        (comparable, delimited(s, comparison_op, s), comparable),
362        |(left, op, right)| LogicalExpr::Comparison { left, op, right },
363    )
364    .parse(input)
365}
366
367fn comparable(input: &str) -> IResult<&str, Comparable> {
368    alt((
369        map(literal, Comparable::Literal),
370        map(singular_query, Comparable::SingularQuery),
371        map(function_expr, Comparable::Function),
372    ))
373    .parse(input)
374}
375
376fn comparison_op(input: &str) -> IResult<&str, ComparisonOp> {
377    alt((
378        value(ComparisonOp::Eq, tag("==")),
379        value(ComparisonOp::Ne, tag("!=")),
380        value(ComparisonOp::Le, tag("<=")),
381        value(ComparisonOp::Ge, tag(">=")),
382        value(ComparisonOp::Lt, tag("<")),
383        value(ComparisonOp::Gt, tag(">")),
384    ))
385    .parse(input)
386}
387
388fn singular_query(input: &str) -> IResult<&str, SingularQuery> {
389    alt((
390        map(pair(char('@'), singular_query_segments), |(_, segments)| {
391            SingularQuery {
392                root: QueryRoot::Current,
393                segments,
394            }
395        }),
396        map(pair(char('$'), singular_query_segments), |(_, segments)| {
397            SingularQuery {
398                root: QueryRoot::Root,
399                segments,
400            }
401        }),
402    ))
403    .parse(input)
404}
405
406fn singular_query_segments(input: &str) -> IResult<&str, Vec<SingularSegment>> {
407    many0(preceded(
408        s,
409        alt((
410            map(name_segment, SingularSegment::Name),
411            map(index_segment, SingularSegment::Index),
412        )),
413    ))
414    .parse(input)
415}
416
417fn name_segment(input: &str) -> IResult<&str, String> {
418    alt((
419        delimited(char('['), delimited(s, StringLiteral::new(), s), char(']')),
420        preceded(char('.'), member_name_shorthand),
421    ))
422    .parse(input)
423}
424
425fn index_segment(input: &str) -> IResult<&str, i64> {
426    delimited(char('['), delimited(s, index_selector, s), char(']')).parse(input)
427}
428
429fn literal(input: &str) -> IResult<&str, Literal> {
430    alt((
431        map(NumberLiteral::new(), Literal::Number),
432        map(StringLiteral::new(), Literal::String),
433        value(Literal::Bool(true), tag("true")),
434        value(Literal::Bool(false), tag("false")),
435        value(Literal::Null, tag("null")),
436    ))
437    .parse(input)
438}
439
440fn function_expr(input: &str) -> IResult<&str, FunctionExpr> {
441    map(
442        pair(
443            function_name,
444            delimited(
445                pair(char('('), s),
446                separated_list0(delimited(s, char(','), s), function_argument),
447                preceded(s, char(')')),
448            ),
449        ),
450        |(name, arguments)| FunctionExpr { name, arguments },
451    )
452    .parse(input)
453}
454
455fn function_argument(input: &str) -> IResult<&str, FunctionArgument> {
456    alt((
457        map(function_expr, FunctionArgument::Function),
458        map(logical_expr, FunctionArgument::LogicalExpr),
459        map(filter_query, FunctionArgument::Query),
460        map(literal, FunctionArgument::Literal),
461    ))
462    .parse(input)
463}
464
465fn function_name(input: &str) -> IResult<&str, String> {
466    map(
467        recognize(pair(
468            take_while1(|c: char| c.is_ascii_lowercase()),
469            take_while(|c: char| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_'),
470        )),
471        ToOwned::to_owned,
472    )
473    .parse(input)
474}
475
476fn member_name_shorthand(input: &str) -> IResult<&str, String> {
477    let (input, first) = take_char_if(input, is_name_first)?;
478    let (input, rest) = take_while(is_name_char).parse(input)?;
479    let mut name = String::from(first);
480    name.push_str(rest);
481    Ok((input, name))
482}
483
484fn s(input: &str) -> IResult<&str, &str> {
485    take_while(|c: char| matches!(c, ' ' | '\t' | '\r' | '\n')).parse(input)
486}
487
488fn take_char_if(input: &str, predicate: fn(char) -> bool) -> IResult<&str, char> {
489    let Some(ch) = input.chars().next() else {
490        return Err(Err::Error(nom::error::Error::new(input, ErrorKind::Char)));
491    };
492    if predicate(ch) {
493        Ok((&input[ch.len_utf8()..], ch))
494    } else {
495        Err(Err::Error(nom::error::Error::new(input, ErrorKind::Char)))
496    }
497}
498
499fn is_name_first(ch: char) -> bool {
500    ch.is_ascii_alphabetic() || ch == '_' || is_non_surrogate_non_ascii(ch)
501}
502
503fn is_name_char(ch: char) -> bool {
504    is_name_first(ch) || ch.is_ascii_digit()
505}
506
507fn is_non_surrogate_non_ascii(ch: char) -> bool {
508    matches!(ch as u32, 0x80..=0xD7FF | 0xE000..=0x10FFFF)
509}