jsonpath_rust/
parser.rs

1#![allow(clippy::empty_docs)]
2pub mod errors;
3mod macros;
4pub mod model;
5mod tests;
6
7use crate::parser::errors::JsonPathError;
8use crate::parser::model::{
9    Comparable, Comparison, Filter, FilterAtom, FnArg, JpQuery, Literal, Segment, Selector,
10    SingularQuery, SingularQuerySegment, Test, TestFunction,
11};
12
13use pest::iterators::Pair;
14use pest::Parser;
15
16#[derive(Parser)]
17#[grammar = "parser/grammar/json_path_9535.pest"]
18pub(super) struct JSPathParser;
19// const MAX_VAL: i64 = 9007199254740991; // Maximum safe integer value in JavaScript
20// const MIN_VAL: i64 = -9007199254740991; // Minimum safe integer value in JavaScript
21
22pub type Parsed<T> = Result<T, JsonPathError>;
23
24/// Parses a string into a [JsonPath].
25///
26/// # Errors
27///
28/// Returns a variant of [crate::JsonPathParserError] if the parsing operation failed.
29pub fn parse_json_path(jp_str: &str) -> Parsed<JpQuery> {
30    JSPathParser::parse(Rule::main, jp_str)
31        .map_err(Box::new)?
32        .next()
33        .ok_or(JsonPathError::UnexpectedPestOutput)
34        .and_then(next_down)
35        .and_then(jp_query)
36}
37
38pub fn jp_query(rule: Pair<Rule>) -> Parsed<JpQuery> {
39    Ok(JpQuery::new(segments(next_down(rule)?)?))
40}
41pub fn rel_query(rule: Pair<Rule>) -> Parsed<Vec<Segment>> {
42    segments(next_down(rule)?)
43}
44
45pub fn segments(rule: Pair<Rule>) -> Parsed<Vec<Segment>> {
46    let mut segments = vec![];
47    for r in rule.into_inner() {
48        segments.push(segment(next_down(r)?)?);
49    }
50    Ok(segments)
51}
52
53pub fn child_segment(rule: Pair<Rule>) -> Parsed<Segment> {
54    match rule.as_rule() {
55        Rule::wildcard_selector => Ok(Segment::Selector(Selector::Wildcard)),
56        Rule::member_name_shorthand => Ok(Segment::name(rule.as_str().trim())),
57        Rule::bracketed_selection => {
58            let mut selectors = vec![];
59            for r in rule.into_inner() {
60                selectors.push(selector(r)?);
61            }
62            if selectors.len() == 1 {
63                Ok(Segment::Selector(
64                    selectors
65                        .into_iter()
66                        .next()
67                        .ok_or(JsonPathError::empty("selector"))?,
68                ))
69            } else {
70                Ok(Segment::Selectors(selectors))
71            }
72        }
73        _ => Err(rule.into()),
74    }
75}
76
77pub fn segment(child: Pair<Rule>) -> Parsed<Segment> {
78    match child.as_rule() {
79        Rule::child_segment => {
80            let val = child.as_str().strip_prefix(".").unwrap_or_default();
81            if val != val.trim_start() {
82                Err(JsonPathError::InvalidJsonPath(format!(
83                    "Invalid child segment `{}`",
84                    child.as_str()
85                )))
86            } else {
87                child_segment(next_down(child)?)
88            }
89        }
90        Rule::descendant_segment => {
91            if child
92                .as_str()
93                .chars()
94                .nth(2)
95                .ok_or(JsonPathError::empty(child.as_str()))?
96                .is_whitespace()
97            {
98                Err(JsonPathError::InvalidJsonPath(format!(
99                    "Invalid descendant segment `{}`",
100                    child.as_str()
101                )))
102            } else {
103                Ok(Segment::Descendant(Box::new(child_segment(next_down(
104                    child,
105                )?)?)))
106            }
107        }
108        _ => Err(child.into()),
109    }
110}
111
112pub fn selector(rule: Pair<Rule>) -> Parsed<Selector> {
113    let child = next_down(rule)?;
114    match child.as_rule() {
115        Rule::name_selector => Ok(Selector::Name(
116            validate_js_str(child.as_str().trim())?.to_string(),
117        )),
118        Rule::wildcard_selector => Ok(Selector::Wildcard),
119        Rule::index_selector => Ok(Selector::Index(
120            child
121                .as_str()
122                .trim()
123                .parse::<i64>()
124                .map_err(|e| (e, "wrong integer"))?,
125        )),
126        Rule::slice_selector => {
127            let (start, end, step) = slice_selector(child)?;
128            Ok(Selector::Slice(start, end, step))
129        }
130        Rule::filter_selector => Ok(Selector::Filter(logical_expr(next_down(child)?)?)),
131        _ => Err(child.into()),
132    }
133}
134
135pub fn function_expr(rule: Pair<Rule>) -> Parsed<TestFunction> {
136    let fn_str = rule.as_str();
137    let mut elems = rule.into_inner();
138    let name = elems
139        .next()
140        .map(|e| e.as_str())
141        .ok_or(JsonPathError::empty("function expression"))?;
142
143    // Check if the function name is valid namely nothing between the name and the opening parenthesis
144    if fn_str
145        .chars()
146        .nth(name.len())
147        .map(|c| c != '(')
148        .unwrap_or_default()
149    {
150        Err(JsonPathError::InvalidJsonPath(format!(
151            "Invalid function expression `{}`",
152            fn_str
153        )))
154    } else {
155        let mut args = vec![];
156        for arg in elems {
157            let next = next_down(arg)?;
158            match next.as_rule() {
159                Rule::literal => args.push(FnArg::Literal(literal(next)?)),
160                Rule::test => args.push(FnArg::Test(Box::new(test(next)?))),
161                Rule::logical_expr => args.push(FnArg::Filter(logical_expr(next)?)),
162
163                _ => return Err(next.into()),
164            }
165        }
166
167        TestFunction::try_new(name, args)
168    }
169}
170
171pub fn test(rule: Pair<Rule>) -> Parsed<Test> {
172    let child = next_down(rule)?;
173    match child.as_rule() {
174        Rule::jp_query => Ok(Test::AbsQuery(jp_query(child)?)),
175        Rule::rel_query => Ok(Test::RelQuery(rel_query(child)?)),
176        Rule::function_expr => Ok(Test::Function(Box::new(function_expr(child)?))),
177        _ => Err(child.into()),
178    }
179}
180
181pub fn logical_expr(rule: Pair<Rule>) -> Parsed<Filter> {
182    let mut ors = vec![];
183    for r in rule.into_inner() {
184        ors.push(logical_expr_and(r)?);
185    }
186    if ors.len() == 1 {
187        Ok(ors
188            .into_iter()
189            .next()
190            .ok_or(JsonPathError::empty("logical expression"))?)
191    } else {
192        Ok(Filter::Or(ors))
193    }
194}
195
196pub fn logical_expr_and(rule: Pair<Rule>) -> Parsed<Filter> {
197    let mut ands = vec![];
198    for r in rule.into_inner() {
199        ands.push(Filter::Atom(filter_atom(r)?));
200    }
201    if ands.len() == 1 {
202        Ok(ands
203            .into_iter()
204            .next()
205            .ok_or(JsonPathError::empty("logical expression"))?)
206    } else {
207        Ok(Filter::And(ands))
208    }
209}
210
211pub fn singular_query_segments(rule: Pair<Rule>) -> Parsed<Vec<SingularQuerySegment>> {
212    let mut segments = vec![];
213    for r in rule.into_inner() {
214        match r.as_rule() {
215            Rule::name_segment => {
216                segments.push(SingularQuerySegment::Name(
217                    next_down(r)?.as_str().trim().to_string(),
218                ));
219            }
220            Rule::index_segment => {
221                segments.push(SingularQuerySegment::Index(
222                    next_down(r)?
223                        .as_str()
224                        .trim()
225                        .parse::<i64>()
226                        .map_err(|e| (e, "int"))?,
227                ));
228            }
229            _ => return Err(r.into()),
230        }
231    }
232    Ok(segments)
233}
234
235pub fn slice_selector(rule: Pair<Rule>) -> Parsed<(Option<i64>, Option<i64>, Option<i64>)> {
236    let mut start = None;
237    let mut end = None;
238    let mut step = None;
239    let get_int = |r: Pair<Rule>| r.as_str().trim().parse::<i64>().map_err(|e| (e, "int"));
240
241    for r in rule.into_inner() {
242        match r.as_rule() {
243            Rule::start => start = Some(get_int(r)?),
244            Rule::end => end = Some(get_int(r)?),
245            Rule::step => {
246                step = {
247                    if let Some(int) = r.into_inner().next() {
248                        Some(get_int(int)?)
249                    } else {
250                        None
251                    }
252                }
253            }
254
255            _ => return Err(r.into()),
256        }
257    }
258    Ok((start, end, step))
259}
260
261pub fn singular_query(rule: Pair<Rule>) -> Parsed<SingularQuery> {
262    let query = next_down(rule)?;
263    let segments = singular_query_segments(next_down(query.clone())?)?;
264    match query.as_rule() {
265        Rule::rel_singular_query => Ok(SingularQuery::Current(segments)),
266        Rule::abs_singular_query => Ok(SingularQuery::Root(segments)),
267        _ => Err(query.into()),
268    }
269}
270
271pub fn comp_expr(rule: Pair<Rule>) -> Parsed<Comparison> {
272    let mut children = rule.into_inner();
273
274    let lhs = comparable(children.next().ok_or(JsonPathError::empty("comparison"))?)?;
275    let op = children
276        .next()
277        .ok_or(JsonPathError::empty("comparison"))?
278        .as_str();
279    let rhs = comparable(children.next().ok_or(JsonPathError::empty("comparison"))?)?;
280
281    Comparison::try_new(op, lhs, rhs)
282}
283
284/// Validates a JSONPath string literal according to RFC 9535
285/// Control characters (U+0000 through U+001F and U+007F) are not allowed unescaped
286/// in string literals, whether single-quoted or double-quoted
287fn validate_js_str(s: &str) -> Parsed<&str> {
288    for (i, c) in s.chars().enumerate() {
289        if c <= '\u{001F}' {
290            return Err(JsonPathError::InvalidJsonPath(format!(
291                "Invalid control character U+{:04X} at position {} in string literal",
292                c as u32, i
293            )));
294        }
295    }
296
297    Ok(s)
298}
299
300pub fn literal(rule: Pair<Rule>) -> Parsed<Literal> {
301    fn parse_number(num: &str) -> Parsed<Literal> {
302        let num = num.trim();
303
304        if num.contains('.') || num.contains('e') || num.contains('E') {
305            Ok(Literal::Float(num.parse::<f64>().map_err(|e| (e, num))?))
306        } else {
307            Ok(Literal::Int(
308                num.trim().parse::<i64>().map_err(|e| (e, num))?,
309            ))
310        }
311    }
312
313    fn parse_string(string: &str) -> Parsed<Literal> {
314        let string = validate_js_str(string.trim())?;
315        if string.starts_with('\'') && string.ends_with('\'') {
316            Ok(Literal::String(string[1..string.len() - 1].to_string()))
317        } else if string.starts_with('"') && string.ends_with('"') {
318            Ok(Literal::String(string[1..string.len() - 1].to_string()))
319        } else {
320            Err(JsonPathError::InvalidJsonPath(format!(
321                "Invalid string literal `{}`",
322                string
323            )))
324        }
325    }
326
327    let first = next_down(rule)?;
328
329    match first.as_rule() {
330        Rule::string => parse_string(first.as_str()),
331        Rule::number => parse_number(first.as_str()),
332        Rule::bool => Ok(Literal::Bool(first.as_str().parse::<bool>()?)),
333        Rule::null => Ok(Literal::Null),
334
335        _ => Err(first.into()),
336    }
337}
338
339pub fn filter_atom(pair: Pair<Rule>) -> Parsed<FilterAtom> {
340    let rule = next_down(pair)?;
341
342    match rule.as_rule() {
343        Rule::paren_expr => {
344            let mut not = false;
345            let mut logic_expr = None;
346            for r in rule.into_inner() {
347                match r.as_rule() {
348                    Rule::not_op => not = true,
349                    Rule::logical_expr => logic_expr = Some(logical_expr(r)?),
350                    _ => (),
351                }
352            }
353
354            logic_expr
355                .map(|expr| FilterAtom::filter(expr, not))
356                .ok_or("Logical expression is absent".into())
357        }
358        Rule::comp_expr => Ok(FilterAtom::cmp(Box::new(comp_expr(rule)?))),
359        Rule::test_expr => {
360            let mut not = false;
361            let mut test_expr = None;
362            for r in rule.into_inner() {
363                match r.as_rule() {
364                    Rule::not_op => not = true,
365                    Rule::test => test_expr = Some(test(r)?),
366                    _ => (),
367                }
368            }
369
370            test_expr
371                .map(|expr| FilterAtom::test(expr, not))
372                .ok_or("Logical expression is absent".into())
373        }
374        _ => Err(rule.into()),
375    }
376}
377
378pub fn comparable(rule: Pair<Rule>) -> Parsed<Comparable> {
379    let rule = next_down(rule)?;
380    match rule.as_rule() {
381        Rule::literal => Ok(Comparable::Literal(literal(rule)?)),
382        Rule::singular_query => Ok(Comparable::SingularQuery(singular_query(rule)?)),
383        Rule::function_expr => {
384            let tf = function_expr(rule)?;
385            if tf.is_comparable() {
386                Ok(Comparable::Function(tf))
387            } else {
388                Err(JsonPathError::InvalidJsonPath(format!(
389                    "Function {} is not comparable",
390                    tf.to_string()
391                )))
392            }
393        }
394        _ => Err(rule.into()),
395    }
396}
397
398fn next_down(rule: Pair<Rule>) -> Parsed<Pair<Rule>> {
399    let rule_as_str = rule.as_str().to_string();
400    rule.into_inner()
401        .next()
402        .ok_or(JsonPathError::InvalidJsonPath(rule_as_str))
403}