toml_query/
tokenizer.rs

1//
2// This Source Code Form is subject to the terms of the Mozilla Public
3// License, v. 2.0. If a copy of the MPL was not distributed with this
4// file, You can obtain one at http://mozilla.org/MPL/2.0/.
5//
6
7/// The tokenizer for the query interpreter
8use crate::error::{Error, Result};
9
10#[derive(Debug, PartialEq, Eq)]
11pub enum Token {
12    Identifier {
13        ident: String,
14        next: Option<Box<Token>>,
15    },
16
17    Index {
18        idx: usize,
19        next: Option<Box<Token>>,
20    },
21}
22
23impl Token {
24    pub fn next(&self) -> Option<&Token> {
25        trace!("Matching token (self): {:?}", self);
26        match self {
27            Token::Identifier { ref next, .. } => next.as_ref().map(|t| &**t),
28            Token::Index { ref next, .. } => next.as_ref().map(|t| &**t),
29        }
30    }
31
32    /// Convenience function for `token.next().is_some()`
33    pub fn has_next(&self) -> bool {
34        trace!("self.has_next(): {:?}", self.next().is_some());
35        self.next().is_some()
36    }
37
38    pub fn set_next(&mut self, token: Token) {
39        trace!("self.set_next({:?})", token);
40        match self {
41            Token::Identifier { ref mut next, .. } => *next = Some(Box::new(token)),
42            Token::Index { ref mut next, .. } => *next = Some(Box::new(token)),
43        }
44    }
45
46    /// Pop the last token from the chain of tokens
47    ///
48    /// Returns None if the current Token has no next token
49    pub fn pop_last(&mut self) -> Option<Box<Token>> {
50        trace!("self.pop_last()");
51        if !self.has_next() {
52            trace!("self.pop_last(): No next");
53            None
54        } else {
55            trace!("self.pop_last(): Having next");
56            match self {
57                Token::Identifier { ref mut next, .. } => {
58                    trace!("self.pop_last(): self is Identifier");
59                    if next.is_some() {
60                        trace!("self.pop_last(): next is Some(_)");
61                        let mut n = next.take().unwrap();
62                        if n.has_next() {
63                            trace!("self.pop_last(): next also has a next");
64
65                            trace!("self.pop_last(): Recursing now");
66                            let result = n.pop_last();
67
68                            *next = Some(n);
69
70                            trace!("self.pop_last(): Returning Result");
71                            result
72                        } else {
73                            trace!("self.pop_last(): next itself has no next, returning Some");
74                            Some(n)
75                        }
76                    } else {
77                        trace!("self.pop_last(): next is none, returning None");
78                        None
79                    }
80                }
81
82                Token::Index { ref mut next, .. } => {
83                    trace!("self.pop_last(): self is Index");
84                    if next.is_some() {
85                        trace!("self.pop_last(): next is Some(_)");
86
87                        let mut n = next.take().unwrap();
88                        if n.has_next() {
89                            trace!("self.pop_last(): next also has a next");
90
91                            trace!("self.pop_last(): Recursing now");
92                            let result = n.pop_last();
93
94                            *next = Some(n);
95
96                            trace!("self.pop_last(): Returning Result");
97                            result
98                        } else {
99                            trace!("self.pop_last(): next itself has no next, returning Some");
100                            Some(n)
101                        }
102                    } else {
103                        trace!("self.pop_last(): next is none, returning None");
104                        None
105                    }
106                }
107            }
108        }
109    }
110
111    #[cfg(test)]
112    pub fn identifier(&self) -> &String {
113        trace!("self.identifier()");
114        match self {
115            Token::Identifier { ref ident, .. } => ident,
116            _ => unreachable!(),
117        }
118    }
119
120    #[cfg(test)]
121    pub fn idx(&self) -> usize {
122        trace!("self.idx()");
123        match self {
124            Token::Index { idx: i, .. } => *i,
125            _ => unreachable!(),
126        }
127    }
128}
129
130pub fn tokenize_with_seperator(query: &str, seperator: char) -> Result<Token> {
131    use std::str::Split;
132    trace!(
133        "tokenize_with_seperator(query: {:?}, seperator: {:?})",
134        query,
135        seperator
136    );
137
138    /// Creates a Token object from a string
139    ///
140    /// # Panics
141    ///
142    /// * If the internal regex does not compile (should never happen)
143    /// * If the token is non-valid (that is, a array index with a non-i64)
144    /// * If the regex does not find anything
145    /// * If the integer in the brackets (`[]`) cannot be parsed to a valid i64
146    ///
147    /// # Incorrect behaviour
148    ///
149    /// * If the regex finds multiple captures
150    ///
151    /// # Returns
152    ///
153    /// The `Token` object with the correct identifier/index for this token and no next token.
154    ///
155    fn mk_token_object(s: &str) -> Result<Token> {
156        use regex::Regex;
157        use std::str::FromStr;
158
159        trace!("mk_token_object(s: {:?})", s);
160
161        lazy_static! {
162            static ref RE: Regex = Regex::new(r"^\[\d+\]$").unwrap();
163        }
164
165        if !has_array_brackets(s) {
166            trace!("returning Ok(Identifier(ident: {:?}, next: None))", s);
167            return Ok(Token::Identifier {
168                ident: String::from(s),
169                next: None,
170            });
171        }
172
173        match RE.captures(s) {
174            None => Err(Error::ArrayAccessWithoutIndex),
175            Some(captures) => {
176                trace!("Captured: {:?}", captures);
177                match captures.get(0) {
178                    None => Ok(Token::Identifier {
179                        ident: String::from(s),
180                        next: None,
181                    }),
182                    Some(mtch) => {
183                        trace!("First capture: {:?}", mtch);
184
185                        let mtch = without_array_brackets(mtch.as_str());
186                        trace!(".. without array brackets: {:?}", mtch);
187
188                        let i: usize = FromStr::from_str(&mtch).unwrap(); // save because regex
189
190                        trace!("returning Ok(Index(idx: {}, next: None)", i);
191                        Ok(Token::Index { idx: i, next: None })
192                    }
193                }
194            }
195        }
196    }
197
198    /// Check whether a str begins with '[' and ends with ']'
199    fn has_array_brackets(s: &str) -> bool {
200        trace!("has_array_brackets({:?})", s);
201        s.as_bytes()[0] == b'[' && s.as_bytes()[s.len() - 1] == b']'
202    }
203
204    /// Remove '[' and ']' from a str
205    fn without_array_brackets(s: &str) -> String {
206        trace!("without_array_brackets({:?})", s);
207        s.replace(['[', ']'], "")
208    }
209
210    fn build_token_tree(split: &mut Split<'_, char>, last: &mut Token) -> Result<()> {
211        trace!("build_token_tree(split: {:?}, last: {:?})", split, last);
212        match split.next() {
213            None => { /* No more tokens */ }
214            Some(token) => {
215                trace!("build_token_tree(...): next from split: {:?}", token);
216
217                if token.is_empty() {
218                    trace!("build_token_tree(...): Empty identifier... returning Error");
219                    return Err(Error::EmptyIdentifier);
220                }
221
222                let mut token = mk_token_object(token)?;
223                build_token_tree(split, &mut token)?;
224                last.set_next(token);
225            }
226        }
227
228        trace!("build_token_tree(...): returning Ok(())");
229        Ok(())
230    }
231
232    if query.is_empty() {
233        trace!("Query is empty. Returning error");
234        return Err(Error::EmptyQueryError);
235    }
236
237    let mut tokens = query.split(seperator);
238    trace!("Tokens splitted: {:?}", tokens);
239
240    match tokens.next() {
241        None => Err(Error::EmptyQueryError),
242        Some(token) => {
243            trace!("next Token: {:?}", token);
244
245            if token.is_empty() {
246                trace!("Empty token. Returning Error");
247                return Err(Error::EmptyIdentifier);
248            }
249
250            let mut tok = mk_token_object(token)?;
251            build_token_tree(&mut tokens, &mut tok)?;
252
253            trace!("Returning Ok({:?})", tok);
254            Ok(tok)
255        }
256    }
257}
258
259#[cfg(test)]
260mod test {
261    use super::*;
262    use crate::error::Error;
263
264    use std::ops::Deref;
265
266    #[test]
267    fn test_tokenize_empty_query_to_error() {
268        let tokens = tokenize_with_seperator("", '.');
269        assert!(tokens.is_err());
270        let tokens = tokens.unwrap_err();
271
272        assert!(is_match!(tokens, Error::EmptyQueryError));
273    }
274
275    #[test]
276    fn test_tokenize_seperator_only() {
277        let tokens = tokenize_with_seperator(".", '.');
278        assert!(tokens.is_err());
279        let tokens = tokens.unwrap_err();
280
281        assert!(is_match!(tokens, Error::EmptyIdentifier));
282    }
283
284    #[test]
285    fn test_tokenize_array_brackets_only() {
286        let tokens = tokenize_with_seperator("[]", '.');
287        assert!(tokens.is_err());
288        let tokens = tokens.unwrap_err();
289
290        assert!(is_match!(tokens, Error::ArrayAccessWithoutIndex));
291    }
292
293    #[test]
294    fn test_tokenize_identifiers_with_array_brackets_only() {
295        let tokens = tokenize_with_seperator("a.b.c.[]", '.');
296        assert!(tokens.is_err());
297        let tokens = tokens.unwrap_err();
298
299        assert!(is_match!(tokens, Error::ArrayAccessWithoutIndex));
300    }
301
302    #[test]
303    fn test_tokenize_identifiers_in_array_brackets() {
304        let tokens = tokenize_with_seperator("[a]", '.');
305        assert!(tokens.is_err());
306        let tokens = tokens.unwrap_err();
307
308        assert!(is_match!(tokens, Error::ArrayAccessWithoutIndex));
309    }
310
311    #[test]
312    fn test_tokenize_single_token_query() {
313        let tokens = tokenize_with_seperator("example", '.');
314        assert!(tokens.is_ok());
315        let tokens = tokens.unwrap();
316
317        assert!(match tokens {
318            Token::Identifier {
319                ref ident,
320                next: None,
321            } => {
322                assert_eq!("example", ident);
323                true
324            }
325            _ => false,
326        });
327    }
328
329    #[test]
330    fn test_tokenize_double_token_query() {
331        let tokens = tokenize_with_seperator("a.b", '.');
332        assert!(tokens.is_ok());
333        let tokens = tokens.unwrap();
334
335        assert!(match tokens {
336            Token::Identifier {
337                next: Some(ref next),
338                ..
339            } => {
340                assert_eq!("b", next.deref().identifier());
341                std::matches!(next.deref(), Token::Identifier { next: None, .. })
342            }
343            _ => false,
344        });
345        assert_eq!("a", tokens.identifier());
346    }
347
348    #[test]
349    fn test_tokenize_ident_then_array_query() {
350        let tokens = tokenize_with_seperator("a.[0]", '.');
351        assert!(tokens.is_ok());
352        let tokens = tokens.unwrap();
353
354        assert_eq!("a", tokens.identifier());
355        assert!(match tokens {
356            Token::Identifier {
357                next: Some(ref next),
358                ..
359            } => std::matches!(next.deref(), Token::Index { idx: 0, next: None }),
360            _ => false,
361        });
362    }
363
364    #[test]
365    fn test_tokenize_many_idents_then_array_query() {
366        let tokens = tokenize_with_seperator("a.b.c.[1000]", '.');
367        assert!(tokens.is_ok());
368        let tokens = tokens.unwrap();
369
370        assert_eq!("a", tokens.identifier());
371
372        let expected = Token::Identifier {
373            ident: String::from("a"),
374            next: Some(Box::new(Token::Identifier {
375                ident: String::from("b"),
376                next: Some(Box::new(Token::Identifier {
377                    ident: String::from("c"),
378                    next: Some(Box::new(Token::Index {
379                        idx: 1000,
380                        next: None,
381                    })),
382                })),
383            })),
384        };
385
386        assert_eq!(expected, tokens);
387    }
388
389    #[test]
390    fn test_tokenize_empty_token_after_good_token() {
391        let tokens = tokenize_with_seperator("a..b", '.');
392        assert!(tokens.is_err());
393        let tokens = tokens.unwrap_err();
394
395        assert!(is_match!(tokens, Error::EmptyIdentifier));
396    }
397
398    quickcheck! {
399        fn test_array_index(i: usize) -> bool {
400            std::matches!(tokenize_with_seperator(&format!("[{}]", i), '.'),
401                Ok(Token::Index { next: None, ..  }))
402        }
403    }
404
405    #[test]
406    fn test_pop_last_token_from_single_identifier_token_is_none() {
407        let mut token = Token::Identifier {
408            ident: String::from("something"),
409            next: None,
410        };
411
412        let last = token.pop_last();
413        assert!(last.is_none());
414    }
415
416    #[test]
417    fn test_pop_last_token_from_single_index_token_is_none() {
418        let mut token = Token::Index { idx: 0, next: None };
419
420        let last = token.pop_last();
421        assert!(last.is_none());
422    }
423
424    #[test]
425    fn test_pop_last_token_from_single_identifier_token_is_one() {
426        let mut token = Token::Identifier {
427            ident: String::from("some"),
428            next: Some(Box::new(Token::Identifier {
429                ident: String::from("thing"),
430                next: None,
431            })),
432        };
433
434        let last = token.pop_last();
435
436        assert!(last.is_some());
437        let last = last.unwrap();
438
439        assert!(is_match!(*last, Token::Identifier { .. }));
440        match *last {
441            Token::Identifier { ident, .. } => {
442                assert_eq!("thing", ident);
443            }
444            _ => panic!("What just happened?"),
445        }
446    }
447
448    #[test]
449    fn test_pop_last_token_from_single_index_token_is_one() {
450        let mut token = Token::Index {
451            idx: 0,
452            next: Some(Box::new(Token::Index { idx: 1, next: None })),
453        };
454
455        let last = token.pop_last();
456
457        assert!(last.is_some());
458        let last = last.unwrap();
459
460        assert!(is_match!(*last, Token::Index { idx: 1, .. }));
461    }
462
463    #[test]
464    fn test_pop_last_token_from_identifier_chain() {
465        let tokens = tokenize_with_seperator("a.b.c.d.e.f", '.');
466        assert!(tokens.is_ok());
467        let mut tokens = tokens.unwrap();
468
469        let last = tokens.pop_last();
470        assert!(last.is_some());
471        assert_eq!("f", last.unwrap().identifier());
472    }
473
474    #[test]
475    fn test_pop_last_token_from_mixed_chain() {
476        let tokens = tokenize_with_seperator("a.[100].c.[3].e.f", '.');
477        assert!(tokens.is_ok());
478        let mut tokens = tokens.unwrap();
479
480        let last = tokens.pop_last();
481        assert!(last.is_some());
482        assert_eq!("f", last.unwrap().identifier());
483    }
484
485    #[test]
486    fn test_pop_last_token_from_identifier_chain_is_array() {
487        let tokens = tokenize_with_seperator("a.b.c.d.e.f.[1000]", '.');
488        assert!(tokens.is_ok());
489        let mut tokens = tokens.unwrap();
490
491        let last = tokens.pop_last();
492        assert!(last.is_some());
493        assert_eq!(1000, last.unwrap().idx());
494    }
495
496    #[test]
497    fn test_pop_last_token_from_mixed_chain_is_array() {
498        let tokens = tokenize_with_seperator("a.[100].c.[3].e.f.[1000]", '.');
499        assert!(tokens.is_ok());
500        let mut tokens = tokens.unwrap();
501
502        let last = tokens.pop_last();
503        assert!(last.is_some());
504        assert_eq!(1000, last.unwrap().idx());
505    }
506
507    #[test]
508    fn test_pop_last_token_from_one_token() {
509        let tokens = tokenize_with_seperator("a", '.');
510        assert!(tokens.is_ok());
511        let mut tokens = tokens.unwrap();
512
513        let last = tokens.pop_last();
514        assert!(last.is_none());
515    }
516
517    #[test]
518    fn test_pop_last_chain() {
519        let tokens = tokenize_with_seperator("a.[100].c.[3].e.f.[1000]", '.');
520        assert!(tokens.is_ok());
521        let mut tokens = tokens.unwrap();
522
523        let last = tokens.pop_last();
524        assert!(last.is_some());
525        assert_eq!(1000, last.unwrap().idx());
526
527        let last = tokens.pop_last();
528        assert!(last.is_some());
529        assert_eq!("f", last.unwrap().identifier());
530
531        let last = tokens.pop_last();
532        assert!(last.is_some());
533        assert_eq!("e", last.unwrap().identifier());
534
535        let last = tokens.pop_last();
536        assert!(last.is_some());
537        assert_eq!(3, last.unwrap().idx());
538
539        let last = tokens.pop_last();
540        assert!(last.is_some());
541        assert_eq!("c", last.unwrap().identifier());
542
543        let last = tokens.pop_last();
544        assert!(last.is_some());
545        assert_eq!(100, last.unwrap().idx());
546
547        let last = tokens.pop_last();
548        assert!(last.is_none());
549    }
550}