jsonpath_lib/parser/
mod.rs

1mod path_reader;
2mod tokenizer;
3
4use std::str::FromStr;
5
6use crate::{debug, trace};
7
8use self::tokenizer::*;
9
10const DUMMY: usize = 0;
11
12type ParseResult<T> = Result<T, String>;
13
14mod utils {
15    use std::str::FromStr;
16
17    pub fn string_to_num<F, S: FromStr>(string: &str, msg_handler: F) -> Result<S, String>
18    where
19        F: Fn() -> String,
20    {
21        match string.parse() {
22            Ok(n) => Ok(n),
23            _ => Err(msg_handler()),
24        }
25    }
26}
27
28#[derive(Debug, PartialEq, Clone)]
29pub enum ParseToken {
30    // '$'
31    Absolute,
32    // '@'
33    Relative,
34    // '.'
35    In,
36    // '..'
37    Leaves,
38    // '*'
39    All,
40
41    Key(String),
42    Keys(Vec<String>),
43    // []
44    Array,
45    // 메타토큰
46    ArrayEof,
47    // ?( filter )
48    Filter(FilterToken),
49    // 1 : 2
50    Range(Option<isize>, Option<isize>, Option<usize>),
51    // 1, 2, 3
52    Union(Vec<isize>),
53
54    Number(f64),
55
56    Bool(bool),
57
58    Eof,
59}
60
61#[derive(Debug, PartialEq, Clone)]
62pub enum FilterToken {
63    Equal,
64    NotEqual,
65    Little,
66    LittleOrEqual,
67    Greater,
68    GreaterOrEqual,
69    And,
70    Or,
71}
72
73#[deprecated(since = "0.4.0", note = "Please use `paths::PathParser`")]
74#[derive(Debug, Clone)]
75pub struct Node {
76    left: Option<Box<Node>>,
77    right: Option<Box<Node>>,
78    token: ParseToken,
79}
80
81#[deprecated(since = "0.4.0", note = "Please use `paths::PathParser`")]
82pub struct Parser;
83
84#[allow(deprecated)]
85impl Parser {
86    pub fn compile(input: &str) -> ParseResult<Node> {
87        let mut tokenizer = TokenReader::new(input);
88        Self::json_path(&mut tokenizer)
89    }
90
91    fn json_path(tokenizer: &mut TokenReader) -> ParseResult<Node> {
92        debug!("#json_path");
93        match tokenizer.next_token() {
94            Ok(Token::Absolute(_)) => {
95                let node = Self::node(ParseToken::Absolute);
96                Self::paths(node, tokenizer)
97            }
98            _ => Err(tokenizer.err_msg()),
99        }
100    }
101
102    fn paths(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
103        debug!("#paths");
104        match tokenizer.peek_token() {
105            Ok(Token::Dot(_)) => {
106                Self::eat_token(tokenizer);
107                Self::paths_dot(prev, tokenizer)
108            }
109            Ok(Token::OpenArray(_)) => {
110                Self::eat_token(tokenizer);
111                Self::eat_whitespace(tokenizer);
112                let node = Self::array(prev, tokenizer)?;
113                Self::paths(node, tokenizer)
114            }
115            _ => Ok(prev),
116        }
117    }
118
119    fn paths_dot(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
120        debug!("#paths_dot");
121        let node = Self::path(prev, tokenizer)?;
122        Self::paths(node, tokenizer)
123    }
124
125    fn path(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
126        debug!("#path");
127        match tokenizer.peek_token() {
128            Ok(Token::Dot(_)) => Self::path_leaves(prev, tokenizer),
129            Ok(Token::Asterisk(_)) => Self::path_in_all(prev, tokenizer),
130            Ok(Token::Key(_, _)) => Self::path_in_key(prev, tokenizer),
131            Ok(Token::OpenArray(_)) => {
132                Self::eat_token(tokenizer);
133                Self::array(prev, tokenizer)
134            }
135            _ => Err(tokenizer.err_msg()),
136        }
137    }
138
139    fn path_leaves(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
140        debug!("#path_leaves");
141        Self::eat_token(tokenizer);
142        match tokenizer.peek_token() {
143            Ok(Token::Asterisk(_)) => Self::path_leaves_all(prev, tokenizer),
144            Ok(Token::OpenArray(_)) => {
145                let mut leaves_node = Self::node(ParseToken::Leaves);
146                leaves_node.left = Some(Box::new(prev));
147                Ok(Self::paths(leaves_node, tokenizer)?)
148            }
149            _ => Self::path_leaves_key(prev, tokenizer),
150        }
151    }
152
153    fn path_leaves_key(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
154        debug!("#path_leaves_key");
155        Ok(Node {
156            token: ParseToken::Leaves,
157            left: Some(Box::new(prev)),
158            right: Some(Box::new(Self::key(tokenizer)?)),
159        })
160    }
161
162    fn path_leaves_all(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
163        debug!("#path_leaves_all");
164        Self::eat_token(tokenizer);
165        Ok(Node {
166            token: ParseToken::Leaves,
167            left: Some(Box::new(prev)),
168            right: Some(Box::new(Self::node(ParseToken::All))),
169        })
170    }
171
172    fn path_in_all(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
173        debug!("#path_in_all");
174        Self::eat_token(tokenizer);
175        Ok(Node {
176            token: ParseToken::In,
177            left: Some(Box::new(prev)),
178            right: Some(Box::new(Self::node(ParseToken::All))),
179        })
180    }
181
182    fn path_in_key(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
183        debug!("#path_in_key");
184        Ok(Node {
185            token: ParseToken::In,
186            left: Some(Box::new(prev)),
187            right: Some(Box::new(Self::key(tokenizer)?)),
188        })
189    }
190
191    fn key(tokenizer: &mut TokenReader) -> ParseResult<Node> {
192        debug!("#key");
193        match tokenizer.next_token() {
194            Ok(Token::Key(_, v)) => Ok(Self::node(ParseToken::Key(v))),
195            _ => Err(tokenizer.err_msg()),
196        }
197    }
198
199    fn boolean(tokenizer: &mut TokenReader) -> ParseResult<Node> {
200        debug!("#boolean");
201
202        fn validation_bool_value(v: &str) -> bool {
203            let b = v.as_bytes();
204            !b.is_empty() && (b[0] == b't' || b[0] == b'T' || b[0] == b'f' || b[0] == b'F')
205        }
206
207        match tokenizer.next_token() {
208            Ok(Token::Key(_, ref v)) if validation_bool_value(v) => {
209                Ok(Self::node(ParseToken::Bool(v.eq_ignore_ascii_case("true"))))
210            }
211            _ => Err(tokenizer.err_msg()),
212        }
213    }
214
215    fn array_keys(tokenizer: &mut TokenReader, first_key: String) -> ParseResult<Node> {
216        let mut keys = vec![first_key];
217
218        while let Ok(Token::Comma(_)) = tokenizer.peek_token() {
219            Self::eat_token(tokenizer);
220            Self::eat_whitespace(tokenizer);
221
222            match tokenizer.next_token() {
223                Ok(Token::SingleQuoted(_, val)) | Ok(Token::DoubleQuoted(_, val)) => {
224                    keys.push(val);
225                }
226                _ => return Err(tokenizer.err_msg()),
227            }
228
229            Self::eat_whitespace(tokenizer);
230        }
231
232        Ok(Self::node(ParseToken::Keys(keys)))
233    }
234
235    fn array_quote_value(tokenizer: &mut TokenReader) -> ParseResult<Node> {
236        debug!("#array_quote_value");
237        match tokenizer.next_token() {
238            Ok(Token::SingleQuoted(_, val)) | Ok(Token::DoubleQuoted(_, val)) => {
239                if let Ok(Token::Comma(_)) = tokenizer.peek_token() {
240                    Self::array_keys(tokenizer, val)
241                } else {
242                    Ok(Self::node(ParseToken::Key(val)))
243                }
244            }
245            _ => Err(tokenizer.err_msg()),
246        }
247    }
248
249    fn array_start(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
250        debug!("#array_start");
251        match tokenizer.peek_token() {
252            Ok(Token::Question(_)) => {
253                Self::eat_token(tokenizer);
254                Ok(Node {
255                    token: ParseToken::Array,
256                    left: Some(Box::new(prev)),
257                    right: Some(Box::new(Self::filter(tokenizer)?)),
258                })
259            }
260            Ok(Token::Asterisk(_)) => {
261                Self::eat_token(tokenizer);
262                Ok(Node {
263                    token: ParseToken::Array,
264                    left: Some(Box::new(prev)),
265                    right: Some(Box::new(Self::node(ParseToken::All))),
266                })
267            }
268            _ => Ok(Node {
269                token: ParseToken::Array,
270                left: Some(Box::new(prev)),
271                right: Some(Box::new(Self::array_value(tokenizer)?)),
272            }),
273        }
274    }
275
276    fn array(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
277        debug!("#array");
278        let ret = Self::array_start(prev, tokenizer)?;
279        Self::eat_whitespace(tokenizer);
280        Self::close_token(ret, Token::CloseArray(DUMMY), tokenizer)
281    }
282
283    fn array_value_key(tokenizer: &mut TokenReader) -> ParseResult<Node> {
284        debug!("#array_value_key");
285        match tokenizer.next_token() {
286            Ok(Token::Key(pos, ref val)) => {
287                let digit = utils::string_to_num(val, || tokenizer.err_msg_with_pos(pos))?;
288                Self::eat_whitespace(tokenizer);
289
290                match tokenizer.peek_token() {
291                    Ok(Token::Comma(_)) => Self::union(digit, tokenizer),
292                    Ok(Token::Split(_)) => Self::range_from(digit, tokenizer),
293                    _ => Ok(Self::node(ParseToken::Number(digit as f64))),
294                }
295            }
296            _ => Err(tokenizer.err_msg()),
297        }
298    }
299
300    fn array_value(tokenizer: &mut TokenReader) -> ParseResult<Node> {
301        debug!("#array_value");
302        match tokenizer.peek_token() {
303            Ok(Token::Key(_, _)) => Self::array_value_key(tokenizer),
304            Ok(Token::Split(_)) => {
305                Self::eat_token(tokenizer);
306                Self::range_to(tokenizer)
307            }
308            Ok(Token::DoubleQuoted(_, _)) | Ok(Token::SingleQuoted(_, _)) => {
309                Self::array_quote_value(tokenizer)
310            }
311            Err(TokenError::Eof) => Ok(Self::node(ParseToken::Eof)),
312            _ => {
313                Self::eat_token(tokenizer);
314                Err(tokenizer.err_msg())
315            }
316        }
317    }
318
319    fn union(num: isize, tokenizer: &mut TokenReader) -> ParseResult<Node> {
320        debug!("#union");
321        let mut values = vec![num];
322        while matches!(tokenizer.peek_token(), Ok(Token::Comma(_))) {
323            Self::eat_token(tokenizer);
324            Self::eat_whitespace(tokenizer);
325            match tokenizer.next_token() {
326                Ok(Token::Key(pos, ref val)) => {
327                    let digit = utils::string_to_num(val, || tokenizer.err_msg_with_pos(pos))?;
328                    values.push(digit);
329                }
330                _ => {
331                    return Err(tokenizer.err_msg());
332                }
333            }
334        }
335        Ok(Self::node(ParseToken::Union(values)))
336    }
337
338    fn range_value<S: FromStr>(tokenizer: &mut TokenReader) -> Result<Option<S>, String> {
339        Self::eat_whitespace(tokenizer);
340
341        match tokenizer.peek_token() {
342            Ok(Token::Split(_)) => {
343                Self::eat_token(tokenizer);
344                Self::eat_whitespace(tokenizer);
345            }
346            _ => {
347                return Ok(None);
348            }
349        }
350
351        match tokenizer.peek_token() {
352            Ok(Token::Key(_, _)) => {}
353            _ => {
354                return Ok(None);
355            }
356        }
357
358        match tokenizer.next_token() {
359            Ok(Token::Key(pos, str_step)) => {
360                match utils::string_to_num(&str_step, || tokenizer.err_msg_with_pos(pos)) {
361                    Ok(step) => Ok(Some(step)),
362                    Err(e) => Err(e),
363                }
364            }
365            _ => {
366                unreachable!();
367            }
368        }
369    }
370
371    fn range_from(from: isize, tokenizer: &mut TokenReader) -> ParseResult<Node> {
372        debug!("#range_from");
373        Self::eat_token(tokenizer);
374        Self::eat_whitespace(tokenizer);
375
376        match tokenizer.peek_token() {
377            Ok(Token::Key(_, _)) => Self::range(from, tokenizer),
378            Ok(Token::Split(_)) => match Self::range_value(tokenizer)? {
379                Some(step) => Ok(Self::node(ParseToken::Range(Some(from), None, Some(step)))),
380                _ => Ok(Self::node(ParseToken::Range(Some(from), None, None))),
381            },
382            _ => Ok(Self::node(ParseToken::Range(Some(from), None, None))),
383        }
384    }
385
386    fn range_to(tokenizer: &mut TokenReader) -> ParseResult<Node> {
387        debug!("#range_to");
388
389        if let Some(step) = Self::range_value(tokenizer)? {
390            return Ok(Self::node(ParseToken::Range(None, None, Some(step))));
391        }
392
393        if let Ok(Token::CloseArray(_)) = tokenizer.peek_token() {
394            return Ok(Self::node(ParseToken::Range(None, None, None)));
395        }
396
397        match tokenizer.next_token() {
398            Ok(Token::Key(pos, ref to_str)) => {
399                let to = utils::string_to_num(to_str, || tokenizer.err_msg_with_pos(pos))?;
400                let step = Self::range_value(tokenizer)?;
401                Ok(Self::node(ParseToken::Range(None, Some(to), step)))
402            }
403            _ => Err(tokenizer.err_msg()),
404        }
405    }
406
407    fn range(from: isize, tokenizer: &mut TokenReader) -> ParseResult<Node> {
408        debug!("#range");
409        match tokenizer.next_token() {
410            Ok(Token::Key(pos, ref str_to)) => {
411                let to = utils::string_to_num(str_to, || tokenizer.err_msg_with_pos(pos))?;
412                let step = Self::range_value(tokenizer)?;
413                Ok(Self::node(ParseToken::Range(Some(from), Some(to), step)))
414            }
415            _ => Err(tokenizer.err_msg()),
416        }
417    }
418
419    fn filter(tokenizer: &mut TokenReader) -> ParseResult<Node> {
420        debug!("#filter");
421        match tokenizer.next_token() {
422            Ok(Token::OpenParenthesis(_)) => {
423                let ret = Self::exprs(tokenizer)?;
424                Self::eat_whitespace(tokenizer);
425                Self::close_token(ret, Token::CloseParenthesis(DUMMY), tokenizer)
426            }
427            _ => Err(tokenizer.err_msg()),
428        }
429    }
430
431    fn exprs(tokenizer: &mut TokenReader) -> ParseResult<Node> {
432        Self::eat_whitespace(tokenizer);
433        debug!("#exprs");
434        let node = match tokenizer.peek_token() {
435            Ok(Token::OpenParenthesis(_)) => {
436                Self::eat_token(tokenizer);
437                trace!("\t-exprs - open_parenthesis");
438                let ret = Self::exprs(tokenizer)?;
439                Self::eat_whitespace(tokenizer);
440                Self::close_token(ret, Token::CloseParenthesis(DUMMY), tokenizer)?
441            }
442            _ => {
443                trace!("\t-exprs - else");
444                Self::expr(tokenizer)?
445            }
446        };
447        Self::eat_whitespace(tokenizer);
448        Self::condition_expr(node, tokenizer)
449    }
450
451    fn condition_expr(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
452        debug!("#condition_expr");
453        match tokenizer.peek_token() {
454            Ok(Token::And(_)) => {
455                Self::eat_token(tokenizer);
456                Ok(Node {
457                    token: ParseToken::Filter(FilterToken::And),
458                    left: Some(Box::new(prev)),
459                    right: Some(Box::new(Self::exprs(tokenizer)?)),
460                })
461            }
462            Ok(Token::Or(_)) => {
463                Self::eat_token(tokenizer);
464                Ok(Node {
465                    token: ParseToken::Filter(FilterToken::Or),
466                    left: Some(Box::new(prev)),
467                    right: Some(Box::new(Self::exprs(tokenizer)?)),
468                })
469            }
470            _ => Ok(prev),
471        }
472    }
473
474    fn expr(tokenizer: &mut TokenReader) -> ParseResult<Node> {
475        debug!("#expr");
476
477        let has_prop_candidate = matches!(tokenizer.peek_token(), Ok(Token::At(_)));
478
479        let node = Self::term(tokenizer)?;
480        Self::eat_whitespace(tokenizer);
481
482        if matches!(
483            tokenizer.peek_token(),
484            Ok(Token::Equal(_))
485                | Ok(Token::NotEqual(_))
486                | Ok(Token::Little(_))
487                | Ok(Token::LittleOrEqual(_))
488                | Ok(Token::Greater(_))
489                | Ok(Token::GreaterOrEqual(_))
490        ) {
491            Self::op(node, tokenizer)
492        } else if has_prop_candidate {
493            Ok(node)
494        } else {
495            Err(tokenizer.err_msg())
496        }
497    }
498
499    fn term_num(tokenizer: &mut TokenReader) -> ParseResult<Node> {
500        debug!("#term_num");
501        match tokenizer.next_token() {
502            Ok(Token::Key(pos, val)) => match tokenizer.peek_token() {
503                Ok(Token::Dot(_)) => Self::term_num_float(val.as_str(), tokenizer),
504                _ => {
505                    let number = utils::string_to_num(&val, || tokenizer.err_msg_with_pos(pos))?;
506                    Ok(Self::node(ParseToken::Number(number)))
507                }
508            },
509            _ => Err(tokenizer.err_msg()),
510        }
511    }
512
513    fn term_num_float(num: &str, tokenizer: &mut TokenReader) -> ParseResult<Node> {
514        debug!("#term_num_float");
515        Self::eat_token(tokenizer);
516        match tokenizer.next_token() {
517            Ok(Token::Key(pos, frac)) => {
518                let mut f = String::new();
519                f.push_str(num);
520                f.push('.');
521                f.push_str(frac.as_str());
522                let number = utils::string_to_num(&f, || tokenizer.err_msg_with_pos(pos))?;
523                Ok(Self::node(ParseToken::Number(number)))
524            }
525            _ => Err(tokenizer.err_msg()),
526        }
527    }
528
529    fn term(tokenizer: &mut TokenReader) -> ParseResult<Node> {
530        debug!("#term");
531
532        match tokenizer.peek_token() {
533            Ok(Token::At(_)) => {
534                Self::eat_token(tokenizer);
535                let node = Self::node(ParseToken::Relative);
536
537                match tokenizer.peek_token() {
538                    Ok(Token::Whitespace(_, _)) => {
539                        Self::eat_whitespace(tokenizer);
540                        Ok(node)
541                    }
542                    _ => Self::paths(node, tokenizer),
543                }
544            }
545            Ok(Token::Absolute(_)) => Self::json_path(tokenizer),
546            Ok(Token::DoubleQuoted(_, _)) | Ok(Token::SingleQuoted(_, _)) => {
547                Self::array_quote_value(tokenizer)
548            }
549            Ok(Token::Key(_, key)) => match key.as_bytes()[0] {
550                b'-' | b'0'..=b'9' => Self::term_num(tokenizer),
551                _ => Self::boolean(tokenizer),
552            },
553            _ => Err(tokenizer.err_msg()),
554        }
555    }
556
557    fn op(prev: Node, tokenizer: &mut TokenReader) -> ParseResult<Node> {
558        debug!("#op");
559        let token = match tokenizer.next_token() {
560            Ok(Token::Equal(_)) => ParseToken::Filter(FilterToken::Equal),
561            Ok(Token::NotEqual(_)) => ParseToken::Filter(FilterToken::NotEqual),
562            Ok(Token::Little(_)) => ParseToken::Filter(FilterToken::Little),
563            Ok(Token::LittleOrEqual(_)) => ParseToken::Filter(FilterToken::LittleOrEqual),
564            Ok(Token::Greater(_)) => ParseToken::Filter(FilterToken::Greater),
565            Ok(Token::GreaterOrEqual(_)) => ParseToken::Filter(FilterToken::GreaterOrEqual),
566            _ => {
567                return Err(tokenizer.err_msg());
568            }
569        };
570
571        Self::eat_whitespace(tokenizer);
572
573        Ok(Node {
574            token,
575            left: Some(Box::new(prev)),
576            right: Some(Box::new(Self::term(tokenizer)?)),
577        })
578    }
579
580    fn eat_whitespace(tokenizer: &mut TokenReader) {
581        while let Ok(Token::Whitespace(_, _)) = tokenizer.peek_token() {
582            let _ = tokenizer.next_token();
583        }
584    }
585
586    fn eat_token(tokenizer: &mut TokenReader) {
587        let _ = tokenizer.next_token();
588    }
589
590    fn node(token: ParseToken) -> Node {
591        Node {
592            left: None,
593            right: None,
594            token,
595        }
596    }
597
598    fn close_token(ret: Node, token: Token, tokenizer: &mut TokenReader) -> ParseResult<Node> {
599        debug!("#close_token");
600        match tokenizer.next_token() {
601            Ok(ref t) if t.is_match_token_type(token) => Ok(ret),
602            _ => Err(tokenizer.err_msg()),
603        }
604    }
605}
606
607#[allow(deprecated)]
608pub trait NodeVisitor {
609    fn visit(&mut self, node: &Node) {
610        match &node.token {
611            ParseToken::Absolute
612            | ParseToken::Relative
613            | ParseToken::All
614            | ParseToken::Key(_)
615            | ParseToken::Keys(_)
616            | ParseToken::Range(_, _, _)
617            | ParseToken::Union(_)
618            | ParseToken::Number(_)
619            | ParseToken::Bool(_) => {
620                self.visit_token(&node.token);
621            }
622            ParseToken::In | ParseToken::Leaves => {
623                if let Some(n) = &node.left {
624                    self.visit(n);
625                }
626
627                self.visit_token(&node.token);
628
629                if let Some(n) = &node.right {
630                    self.visit(n);
631                }
632            }
633            ParseToken::Array => {
634                if let Some(n) = &node.left {
635                    self.visit(n);
636                }
637
638                self.visit_token(&node.token);
639
640                if let Some(n) = &node.right {
641                    self.visit(n);
642                }
643
644                self.visit_token(&ParseToken::ArrayEof);
645            }
646            ParseToken::Filter(FilterToken::And) | ParseToken::Filter(FilterToken::Or) => {
647                if let Some(n) = &node.left {
648                    self.visit(n);
649                }
650
651                if let Some(n) = &node.right {
652                    self.visit(n);
653                }
654
655                self.visit_token(&node.token);
656            }
657            ParseToken::Filter(_) => {
658                if let Some(n) = &node.left {
659                    self.visit(n);
660                }
661
662                self.end_term();
663
664                if let Some(n) = &node.right {
665                    self.visit(n);
666                }
667
668                self.end_term();
669
670                self.visit_token(&node.token);
671            }
672            _ => {}
673        }
674    }
675
676    fn visit_token(&mut self, token: &ParseToken);
677    fn end_term(&mut self) {}
678}