jsonpath_lib/parser/
tokenizer.rs

1use std::result::Result;
2
3use super::path_reader::{PathReader, ReaderError};
4
5const CH_DOLLA: char = '$';
6const CH_DOT: char = '.';
7const CH_ASTERISK: char = '*';
8const CH_LARRAY: char = '[';
9const CH_RARRAY: char = ']';
10const CH_LPAREN: char = '(';
11const CH_RPAREN: char = ')';
12const CH_AT: char = '@';
13const CH_QUESTION: char = '?';
14const CH_COMMA: char = ',';
15const CH_SEMICOLON: char = ':';
16const CH_EQUAL: char = '=';
17const CH_AMPERSAND: char = '&';
18const CH_PIPE: char = '|';
19const CH_LITTLE: char = '<';
20const CH_GREATER: char = '>';
21const CH_EXCLAMATION: char = '!';
22const CH_SINGLE_QUOTE: char = '\'';
23const CH_DOUBLE_QUOTE: char = '"';
24
25#[derive(Debug, Clone, PartialEq)]
26pub enum TokenError {
27    Eof,
28    Position(usize),
29}
30
31fn to_token_error(read_err: ReaderError) -> TokenError {
32    match read_err {
33        ReaderError::Eof => TokenError::Eof,
34    }
35}
36
37#[derive(Debug, PartialEq)]
38pub enum Token {
39    Absolute(usize),
40    Dot(usize),
41    At(usize),
42    OpenArray(usize),
43    CloseArray(usize),
44    Asterisk(usize),
45    Question(usize),
46    Comma(usize),
47    Split(usize),
48    OpenParenthesis(usize),
49    CloseParenthesis(usize),
50    Key(usize, String),
51    DoubleQuoted(usize, String),
52    SingleQuoted(usize, String),
53    Equal(usize),
54    GreaterOrEqual(usize),
55    Greater(usize),
56    Little(usize),
57    LittleOrEqual(usize),
58    NotEqual(usize),
59    And(usize),
60    Or(usize),
61    Whitespace(usize, usize),
62}
63
64impl Token {
65    pub fn is_match_token_type(&self, other: Token) -> bool {
66        match self {
67            Token::Absolute(_) => match other {
68                Token::Absolute(_) => true,
69                _ => false,
70            },
71            Token::Dot(_) => match other {
72                Token::Dot(_) => true,
73                _ => false,
74            },
75            Token::At(_) => match other {
76                Token::At(_) => true,
77                _ => false,
78            },
79            Token::OpenArray(_) => match other {
80                Token::OpenArray(_) => true,
81                _ => false,
82            },
83            Token::CloseArray(_) => match other {
84                Token::CloseArray(_) => true,
85                _ => false,
86            },
87            Token::Asterisk(_) => match other {
88                Token::Asterisk(_) => true,
89                _ => false,
90            },
91            Token::Question(_) => match other {
92                Token::Question(_) => true,
93                _ => false,
94            },
95            Token::Comma(_) => match other {
96                Token::Comma(_) => true,
97                _ => false,
98            },
99            Token::Split(_) => match other {
100                Token::Split(_) => true,
101                _ => false,
102            },
103            Token::OpenParenthesis(_) => match other {
104                Token::OpenParenthesis(_) => true,
105                _ => false,
106            },
107            Token::CloseParenthesis(_) => match other {
108                Token::CloseParenthesis(_) => true,
109                _ => false,
110            },
111            Token::Key(_, _) => match other {
112                Token::Key(_, _) => true,
113                _ => false,
114            },
115            Token::DoubleQuoted(_, _) => match other {
116                Token::DoubleQuoted(_, _) => true,
117                _ => false,
118            },
119            Token::SingleQuoted(_, _) => match other {
120                Token::SingleQuoted(_, _) => true,
121                _ => false,
122            },
123            Token::Equal(_) => match other {
124                Token::Equal(_) => true,
125                _ => false,
126            },
127            Token::GreaterOrEqual(_) => match other {
128                Token::GreaterOrEqual(_) => true,
129                _ => false,
130            },
131            Token::Greater(_) => match other {
132                Token::Greater(_) => true,
133                _ => false,
134            },
135            Token::Little(_) => match other {
136                Token::Little(_) => true,
137                _ => false,
138            },
139            Token::LittleOrEqual(_) => match other {
140                Token::LittleOrEqual(_) => true,
141                _ => false,
142            },
143            Token::NotEqual(_) => match other {
144                Token::NotEqual(_) => true,
145                _ => false,
146            },
147            Token::And(_) => match other {
148                Token::And(_) => true,
149                _ => false,
150            },
151            Token::Or(_) => match other {
152                Token::Or(_) => true,
153                _ => false,
154            },
155            Token::Whitespace(_, _) => match other {
156                Token::Whitespace(_, _) => true,
157                _ => false,
158            },
159        }
160    }
161}
162
163pub struct Tokenizer<'a> {
164    input: PathReader<'a>,
165}
166
167impl<'a> Tokenizer<'a> {
168    pub fn new(input: &'a str) -> Self {
169        trace!("input: {}", input);
170        Tokenizer {
171            input: PathReader::new(input),
172        }
173    }
174
175    fn dolla(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
176        let fun = |c: &char| match c {
177            &CH_DOT | &CH_ASTERISK | &CH_LARRAY | &CH_RARRAY | &CH_LPAREN | &CH_RPAREN | &CH_AT
178            | &CH_QUESTION | &CH_COMMA | &CH_SEMICOLON | &CH_LITTLE | &CH_GREATER | &CH_EQUAL
179            | &CH_AMPERSAND | &CH_PIPE | &CH_EXCLAMATION => false,
180            _ => !c.is_whitespace(),
181        };
182        let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?;
183        vec.insert(0, ch);
184
185        if vec.len() == 1 {
186            Ok(Token::Absolute(pos))
187        } else {
188            Ok(Token::Key(pos, vec))
189        }
190    }
191
192    fn quote(&mut self, ch: char) -> Result<String, TokenError> {
193        let (_, mut val) = self
194            .input
195            .take_while(|c| *c != ch)
196            .map_err(to_token_error)?;
197
198        if let Some('\\') = val.chars().last() {
199            self.input.next_char().map_err(to_token_error)?;
200            let _ = val.pop();
201            let (_, val_remain) = self
202                .input
203                .take_while(|c| *c != ch)
204                .map_err(to_token_error)?;
205            self.input.next_char().map_err(to_token_error)?;
206            val.push(ch);
207            val.push_str(val_remain.as_str());
208        } else {
209            self.input.next_char().map_err(to_token_error)?;
210        }
211
212        Ok(val)
213    }
214
215    fn single_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
216        let val = self.quote(ch)?;
217        Ok(Token::SingleQuoted(pos, val))
218    }
219
220    fn double_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
221        let val = self.quote(ch)?;
222        Ok(Token::DoubleQuoted(pos, val))
223    }
224
225    fn equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
226        let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
227        match ch {
228            CH_EQUAL => {
229                self.input.next_char().map_err(to_token_error)?;
230                Ok(Token::Equal(pos))
231            }
232            _ => Err(TokenError::Position(pos)),
233        }
234    }
235
236    fn not_equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
237        let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
238        match ch {
239            CH_EQUAL => {
240                self.input.next_char().map_err(to_token_error)?;
241                Ok(Token::NotEqual(pos))
242            }
243            _ => Err(TokenError::Position(pos)),
244        }
245    }
246
247    fn little(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
248        let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
249        match ch {
250            CH_EQUAL => {
251                self.input.next_char().map_err(to_token_error)?;
252                Ok(Token::LittleOrEqual(pos))
253            }
254            _ => Ok(Token::Little(pos)),
255        }
256    }
257
258    fn greater(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
259        let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
260        match ch {
261            CH_EQUAL => {
262                self.input.next_char().map_err(to_token_error)?;
263                Ok(Token::GreaterOrEqual(pos))
264            }
265            _ => Ok(Token::Greater(pos)),
266        }
267    }
268
269    fn and(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
270        let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
271        match ch {
272            CH_AMPERSAND => {
273                let _ = self.input.next_char().map_err(to_token_error);
274                Ok(Token::And(pos))
275            }
276            _ => Err(TokenError::Position(pos)),
277        }
278    }
279
280    fn or(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
281        let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
282        match ch {
283            CH_PIPE => {
284                self.input.next_char().map_err(to_token_error)?;
285                Ok(Token::Or(pos))
286            }
287            _ => Err(TokenError::Position(pos)),
288        }
289    }
290
291    fn whitespace(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
292        let (_, vec) = self
293            .input
294            .take_while(|c| c.is_whitespace())
295            .map_err(to_token_error)?;
296        Ok(Token::Whitespace(pos, vec.len()))
297    }
298
299    fn other(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
300        let fun = |c: &char| match c {
301            &CH_DOLLA | &CH_DOT | &CH_ASTERISK | &CH_LARRAY | &CH_RARRAY | &CH_LPAREN
302            | &CH_RPAREN | &CH_AT | &CH_QUESTION | &CH_COMMA | &CH_SEMICOLON | &CH_LITTLE
303            | &CH_GREATER | &CH_EQUAL | &CH_AMPERSAND | &CH_PIPE | &CH_EXCLAMATION => false,
304            _ => !c.is_whitespace(),
305        };
306        let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?;
307        vec.insert(0, ch);
308        Ok(Token::Key(pos, vec))
309    }
310
311    pub fn next_token(&mut self) -> Result<Token, TokenError> {
312        let (pos, ch) = self.input.next_char().map_err(to_token_error)?;
313        match ch {
314            CH_DOLLA => self.dolla(pos, ch),
315            CH_DOT => Ok(Token::Dot(pos)),
316            CH_ASTERISK => Ok(Token::Asterisk(pos)),
317            CH_LARRAY => Ok(Token::OpenArray(pos)),
318            CH_RARRAY => Ok(Token::CloseArray(pos)),
319            CH_LPAREN => Ok(Token::OpenParenthesis(pos)),
320            CH_RPAREN => Ok(Token::CloseParenthesis(pos)),
321            CH_AT => Ok(Token::At(pos)),
322            CH_QUESTION => Ok(Token::Question(pos)),
323            CH_COMMA => Ok(Token::Comma(pos)),
324            CH_SEMICOLON => Ok(Token::Split(pos)),
325            CH_SINGLE_QUOTE => self.single_quote(pos, ch),
326            CH_DOUBLE_QUOTE => self.double_quote(pos, ch),
327            CH_EQUAL => self.equal(pos, ch),
328            CH_GREATER => self.greater(pos, ch),
329            CH_LITTLE => self.little(pos, ch),
330            CH_AMPERSAND => self.and(pos, ch),
331            CH_PIPE => self.or(pos, ch),
332            CH_EXCLAMATION => self.not_equal(pos, ch),
333            _ if ch.is_whitespace() => self.whitespace(pos, ch),
334            _ => self.other(pos, ch),
335        }
336    }
337
338    fn current_pos(&self) -> usize {
339        self.input.current_pos()
340    }
341}
342
343pub struct TokenReader<'a> {
344    origin_input: &'a str,
345    err: TokenError,
346    err_pos: usize,
347    tokens: Vec<(usize, Token)>,
348    curr_pos: Option<usize>,
349}
350
351impl<'a> TokenReader<'a> {
352    pub fn new(input: &'a str) -> Self {
353        let mut tokenizer = Tokenizer::new(input);
354        let mut tokens = vec![];
355        loop {
356            match tokenizer.next_token() {
357                Ok(t) => {
358                    tokens.insert(0, (tokenizer.current_pos(), t));
359                }
360                Err(e) => {
361                    return TokenReader {
362                        origin_input: input,
363                        err: e,
364                        err_pos: tokenizer.current_pos(),
365                        tokens,
366                        curr_pos: None,
367                    };
368                }
369            }
370        }
371    }
372
373    pub fn peek_token(&self) -> Result<&Token, TokenError> {
374        match self.tokens.last() {
375            Some((_, t)) => {
376                trace!("%{:?}", t);
377                Ok(t)
378            }
379            _ => {
380                trace!("%{:?}", self.err);
381                Err(self.err.clone())
382            }
383        }
384    }
385
386    pub fn next_token(&mut self) -> Result<Token, TokenError> {
387        match self.tokens.pop() {
388            Some((pos, t)) => {
389                self.curr_pos = Some(pos);
390                trace!("@{:?}", t);
391                Ok(t)
392            }
393            _ => {
394                trace!("@{:?}", self.err);
395                Err(self.err.clone())
396            }
397        }
398    }
399
400    pub fn err_msg_with_pos(&self, pos: usize) -> String {
401        format!("{}\n{}", self.origin_input, "^".repeat(pos))
402    }
403
404    pub fn err_msg(&self) -> String {
405        match self.curr_pos {
406            Some(pos) => self.err_msg_with_pos(pos),
407            _ => self.err_msg_with_pos(self.err_pos),
408        }
409    }
410}