1use self::Error::*;
39use self::Token::*;
40use std::error;
41use std::fmt;
42use std::str;
43
44macro_rules! scan_while {
45    ($slf:expr, $start:expr, $first:pat $(| $rest:pat)*) => {{
46        let mut __end = $start;
47
48        loop {
49            if let Some((idx, c)) = $slf.one() {
50                __end = idx;
51
52                match c {
53                    $first $(| $rest)* => $slf.step(),
54                    _ => break,
55                }
56
57                continue;
58            } else {
59                __end = $slf.input.len();
60            }
61
62            break;
63        }
64
65        __end
66    }}
67}
68
69#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
71pub enum Token<'input> {
72    Eq,
74    Gt,
76    Lt,
78    LtEq,
80    GtEq,
82    Caret,
84    Tilde,
86    Star,
88    Dot,
90    Comma,
92    Hyphen,
94    Plus,
96    Or,
98    Whitespace(usize, usize),
100    Numeric(u64),
102    AlphaNumeric(&'input str),
104}
105
106impl<'input> Token<'input> {
107    pub fn is_whitespace(&self) -> bool {
109        match *self {
110            Whitespace(..) => true,
111            _ => false,
112        }
113    }
114
115    pub fn is_wildcard(&self) -> bool {
117        match *self {
118            Star | AlphaNumeric("X") | AlphaNumeric("x") => true,
119            _ => false,
120        }
121    }
122}
123
124#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
125pub enum Error {
126    UnexpectedChar(char),
128}
129
130impl fmt::Display for Error {
131    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
132        match *self {
133            UnexpectedChar(ref c) => write!(fmt, "unexpected character: {}", c),
134        }
135    }
136}
137
138impl error::Error for Error {
139    fn description(&self) -> &str {
140        match *self {
141            UnexpectedChar(..) => "unexpected character",
142        }
143    }
144}
145
146#[derive(Debug)]
148pub struct Lexer<'input> {
149    input: &'input str,
150    chars: str::CharIndices<'input>,
151    c1: Option<(usize, char)>,
153    c2: Option<(usize, char)>,
154}
155
156impl<'input> Lexer<'input> {
157    pub fn new(input: &str) -> Lexer {
159        let mut chars = input.char_indices();
160        let c1 = chars.next();
161        let c2 = chars.next();
162
163        Lexer {
164            input: input,
165            chars: chars,
166            c1: c1,
167            c2: c2,
168        }
169    }
170
171    fn step(&mut self) {
173        self.c1 = self.c2;
174        self.c2 = self.chars.next();
175    }
176
177    fn step_n(&mut self, n: usize) {
178        for _ in 0..n {
179            self.step();
180        }
181    }
182
183    fn one(&mut self) -> Option<(usize, char)> {
185        self.c1
186    }
187
188    fn two(&mut self) -> Option<(usize, char, char)> {
190        self.c1
191            .and_then(|(start, c1)| self.c2.map(|(_, c2)| (start, c1, c2)))
192    }
193
194    fn component(&mut self, start: usize) -> Result<Token<'input>, Error> {
199        let end = scan_while!(self, start, '0'...'9' | 'A'...'Z' | 'a'...'z');
200        let input = &self.input[start..end];
201
202        let mut it = input.chars();
203        let (a, b) = (it.next(), it.next());
204
205        if a == Some('0') && b.is_none() {
207            return Ok(Numeric(0));
208        }
209
210        if a != Some('0') {
211            if let Ok(numeric) = input.parse::<u64>() {
212                return Ok(Numeric(numeric));
213            }
214        }
215
216        Ok(AlphaNumeric(input))
217    }
218
219    fn whitespace(&mut self, start: usize) -> Result<Token<'input>, Error> {
221        let end = scan_while!(self, start, ' ' | '\t' | '\n' | '\r');
222        Ok(Whitespace(start, end))
223    }
224}
225
226impl<'input> Iterator for Lexer<'input> {
227    type Item = Result<Token<'input>, Error>;
228
229    fn next(&mut self) -> Option<Self::Item> {
230        loop {
231            if let Some((_, a, b)) = self.two() {
233                let two = match (a, b) {
234                    ('<', '=') => Some(LtEq),
235                    ('>', '=') => Some(GtEq),
236                    ('|', '|') => Some(Or),
237                    _ => None,
238                };
239
240                if let Some(two) = two {
241                    self.step_n(2);
242                    return Some(Ok(two));
243                }
244            }
245
246            if let Some((start, c)) = self.one() {
248                let tok = match c {
249                    ' ' | '\t' | '\n' | '\r' => {
250                        self.step();
251                        return Some(self.whitespace(start));
252                    }
253                    '=' => Eq,
254                    '>' => Gt,
255                    '<' => Lt,
256                    '^' => Caret,
257                    '~' => Tilde,
258                    '*' => Star,
259                    '.' => Dot,
260                    ',' => Comma,
261                    '-' => Hyphen,
262                    '+' => Plus,
263                    '0'...'9' | 'a'...'z' | 'A'...'Z' => {
264                        self.step();
265                        return Some(self.component(start));
266                    }
267                    c => return Some(Err(UnexpectedChar(c))),
268                };
269
270                self.step();
271                return Some(Ok(tok));
272            };
273
274            return None;
275        }
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    fn lex(input: &str) -> Vec<Token> {
284        Lexer::new(input).map(Result::unwrap).collect::<Vec<_>>()
285    }
286
287    #[test]
288    pub fn simple_tokens() {
289        assert_eq!(
290            lex("=><<=>=^~*.,-+||"),
291            vec![
292                Eq, Gt, Lt, LtEq, GtEq, Caret, Tilde, Star, Dot, Comma, Hyphen, Plus, Or
293            ]
294        );
295    }
296
297    #[test]
298    pub fn whitespace() {
299        assert_eq!(
300            lex("  foo \t\n\rbar"),
301            vec![
302                Whitespace(0, 2),
303                AlphaNumeric("foo"),
304                Whitespace(5, 9),
305                AlphaNumeric("bar"),
306            ]
307        );
308    }
309
310    #[test]
311    pub fn components() {
312        assert_eq!(lex("42"), vec![Numeric(42)]);
313        assert_eq!(lex("0"), vec![Numeric(0)]);
314        assert_eq!(lex("01"), vec![AlphaNumeric("01")]);
315        assert_eq!(lex("01"), vec![AlphaNumeric("01")]);
316        assert_eq!(lex("5885644aa"), vec![AlphaNumeric("5885644aa")]);
317        assert_eq!(lex("beta2"), vec![AlphaNumeric("beta2")]);
318        assert_eq!(lex("beta.2"), vec![AlphaNumeric("beta"), Dot, Numeric(2)]);
319    }
320
321    #[test]
322    pub fn is_wildcard() {
323        assert_eq!(Star.is_wildcard(), true);
324        assert_eq!(AlphaNumeric("x").is_wildcard(), true);
325        assert_eq!(AlphaNumeric("X").is_wildcard(), true);
326        assert_eq!(AlphaNumeric("other").is_wildcard(), false);
327    }
328
329    #[test]
330    pub fn empty() {
331        assert_eq!(lex(""), vec![]);
332    }
333
334    #[test]
335    pub fn numeric_all_numbers() {
336        let expected: Vec<Token> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
337            .into_iter()
338            .map(Numeric)
339            .collect::<Vec<_>>();
340
341        let actual: Vec<_> = lex("0 1 2 3 4 5 6 7 8 9")
342            .into_iter()
343            .filter(|t| !t.is_whitespace())
344            .collect();
345
346        assert_eq!(actual, expected);
347    }
348}