regex_tokenizer/
tokenization.rs

1pub mod matcher;
2
3use std::marker::PhantomData;
4
5use matcher::ValidToken;
6
7pub struct TokenGenerator<'a, 'b, T: matcher::ValidMatcher, TypeEnum>
8where
9    'a: 'b,
10{
11    query: &'b str,
12    matcher: &'a T,
13    current_position: usize,
14    _dummy: PhantomData<TypeEnum>,
15}
16
17pub trait Tokenizer<'a, 'b, T: matcher::ValidMatcher> {
18    /// Prepare an iterator to extract tokens from a string
19    ///
20    /// ```
21    /// # use regex_tokenizer::tokenizer;
22    /// tokenizer! {
23    ///     Test
24    ///
25    ///     r"[a-zA-Z]\w*" => Identifier
26    ///     r"\d+" => Number
27    ///     r"\s+" => _
28    /// }
29    ///
30    /// let tokenizer = Test::new();
31    /// let query = "Identifier  11";
32    ///
33    /// let mut tokens = tokenizer.tokenize(query);
34    /// let token = tokens.next().unwrap();
35    /// ```
36    fn tokenize(&'a self, data: &'b str) -> TokenGenerator<'a, 'b, T, T::TokenType>;
37}
38
39impl<'a, 'b, T> Tokenizer<'a, 'b, T> for T
40where
41    T: matcher::ValidMatcher,
42    'a: 'b,
43{
44    fn tokenize(&'a self, data: &'b str) -> TokenGenerator<'a, 'b, T, T::TokenType> {
45        TokenGenerator {
46            query: data,
47            matcher: &self,
48            current_position: 0,
49            _dummy: Default::default(),
50        }
51    }
52}
53
54impl<'a, 'b, T: matcher::ValidMatcher, TypeEnum> Iterator for TokenGenerator<'a, 'b, T, TypeEnum> {
55    type Item = T::TokenType;
56
57    fn next(&mut self) -> Option<Self::Item> {
58        let res = self.matcher.try_match(&self.query, self.current_position);
59
60        match res {
61            Some(result) => {
62                self.current_position = result.position().1;
63                self.query = &self.query[result.to_string().len()..];
64                Some(result)
65            }
66            None => None,
67        }
68    }
69}