rmd/file_remove_iterator/parser/
specification_string_tokenizer.rs

1#[derive(Debug, PartialEq)]
2pub enum SpecToken<'a> {
3    Number(u64),
4    Text(&'a str),
5}
6
7enum State {
8    Init,
9    Number,
10    Text,
11}
12
13pub struct SpecTokenizer<'a> {
14    string: &'a str,
15    state: State,
16    begin: usize,
17    end: usize,
18}
19
20impl<'a> SpecTokenizer<'a> {
21    pub fn new(string: &'a str) -> Self {
22        SpecTokenizer {
23            string,
24            state: State::Init,
25            begin: 0,
26            end: 0,
27        }
28    }
29    fn take_next(&mut self) {
30        self.begin = self.end;
31        self.state = State::Init;
32        let mut end = true;
33        for ch in self.string[self.begin..].chars() {
34            match self.state {
35                State::Init => {
36                    if ch.is_ascii_digit() {
37                        self.state = State::Number;
38                        self.end = self.begin;
39                    } else if ch.is_ascii_alphabetic() {
40                        self.state = State::Text;
41                        self.end = self.begin;
42                    } else {
43                        self.begin += 1;
44                    }
45                }
46                State::Number => {
47                    self.end += 1;
48                    if !ch.is_numeric() {
49                        end = false;
50                        break;
51                    }
52                }
53                State::Text => {
54                    self.end += 1;
55                    if !ch.is_ascii_alphabetic() {
56                        end = false;
57                        break;
58                    }
59                }
60            }
61        }
62        if end {
63            self.end = self.string.len();
64        }
65    }
66}
67
68impl<'a> Iterator for SpecTokenizer<'a> {
69    type Item = SpecToken<'a>;
70
71    fn next(&mut self) -> Option<SpecToken<'a>> {
72        self.take_next();
73        if self.begin == self.string.len() {
74            return None;
75        }
76
77        let tmp = &self.string[self.begin..self.end];
78        let out = match self.state {
79            State::Number => {
80                let n: u64 = tmp.parse().unwrap();
81                SpecToken::Number(n)
82            }
83            State::Text => SpecToken::Text(tmp),
84            State::Init => panic!(),
85        };
86        Some(out)
87    }
88}
89
90#[cfg(test)]
91mod test {
92
93    use super::*;
94
95    #[test]
96    fn test_specification_parser() {
97        let spec = "56year,,,12month++,+,5day";
98        let mut tokenizer = SpecTokenizer::new(spec);
99        assert_eq!(tokenizer.next(), Some(SpecToken::Number(56)));
100        assert_eq!(tokenizer.next(), Some(SpecToken::Text("year")));
101        assert_eq!(tokenizer.next(), Some(SpecToken::Number(12)));
102        assert_eq!(tokenizer.next(), Some(SpecToken::Text("month")));
103        assert_eq!(tokenizer.next(), Some(SpecToken::Number(5)));
104        assert_eq!(tokenizer.next(), Some(SpecToken::Text("day")));
105        assert_eq!(tokenizer.next(), None);
106    }
107
108    #[test]
109    fn test_wrong_specification() {
110        let spec = "...........";
111        let mut tokenizer = SpecTokenizer::new(spec);
112        assert_eq!(tokenizer.next(), None);
113    }
114
115    #[test]
116    fn test_take_next() {
117        let spec = "55year,12month+5day";
118        let mut tokenizer = SpecTokenizer::new(spec);
119        // 56
120        run_take_next(&mut tokenizer, 0, 2);
121        // year
122        run_take_next(&mut tokenizer, 2, 6);
123        // 12
124        run_take_next(&mut tokenizer, 7, 9);
125        // month
126        run_take_next(&mut tokenizer, 9, 14);
127        // 5
128        run_take_next(&mut tokenizer, 15, 16);
129        // day
130        run_take_next(&mut tokenizer, 16, 19);
131    }
132
133    fn run_take_next(tokenizer: &mut SpecTokenizer, begin: usize, end: usize) {
134        tokenizer.take_next();
135        assert_eq!(
136            tokenizer.begin,
137            begin,
138            "Fail: expected {} {}, actual {} {}, result '{}'",
139            begin,
140            end,
141            tokenizer.begin,
142            tokenizer.end,
143            &tokenizer.string[tokenizer.begin..tokenizer.end]
144        );
145        assert_eq!(
146            tokenizer.end,
147            end,
148            "Fail: expected {} {}, actual {} {}, result '{}'",
149            begin,
150            end,
151            tokenizer.begin,
152            tokenizer.end,
153            &tokenizer.string[tokenizer.begin..tokenizer.end]
154        );
155    }
156}