1use self::Error::*;
39use self::Token::*;
40use std::error;
41use std::fmt;
42use std::str;
43
44macro_rules! scan_while {
45 ($slf:expr, $start:expr, $first:pat $(| $rest:pat)*) => {{
46 let mut __end = $start;
47
48 loop {
49 if let Some((idx, c)) = $slf.one() {
50 __end = idx;
51
52 match c {
53 $first $(| $rest)* => $slf.step(),
54 _ => break,
55 }
56
57 continue;
58 } else {
59 __end = $slf.input.len();
60 }
61
62 break;
63 }
64
65 __end
66 }}
67}
68
69#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
71pub enum Token<'input> {
72 Eq,
74 Gt,
76 Lt,
78 LtEq,
80 GtEq,
82 Caret,
84 Tilde,
86 Star,
88 Dot,
90 Comma,
92 Hyphen,
94 Plus,
96 Or,
98 Whitespace(usize, usize),
100 Numeric(u64),
102 AlphaNumeric(&'input str),
104}
105
106impl<'input> Token<'input> {
107 pub fn is_whitespace(&self) -> bool {
109 match *self {
110 Whitespace(..) => true,
111 _ => false,
112 }
113 }
114
115 pub fn is_wildcard(&self) -> bool {
117 match *self {
118 Star | AlphaNumeric("X") | AlphaNumeric("x") => true,
119 _ => false,
120 }
121 }
122}
123
124#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
125pub enum Error {
126 UnexpectedChar(char),
128}
129
130impl fmt::Display for Error {
131 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
132 match *self {
133 UnexpectedChar(ref c) => write!(fmt, "unexpected character: {}", c),
134 }
135 }
136}
137
138impl error::Error for Error {
139 fn description(&self) -> &str {
140 match *self {
141 UnexpectedChar(..) => "unexpected character",
142 }
143 }
144}
145
146#[derive(Debug)]
148pub struct Lexer<'input> {
149 input: &'input str,
150 chars: str::CharIndices<'input>,
151 c1: Option<(usize, char)>,
153 c2: Option<(usize, char)>,
154}
155
156impl<'input> Lexer<'input> {
157 pub fn new(input: &str) -> Lexer {
159 let mut chars = input.char_indices();
160 let c1 = chars.next();
161 let c2 = chars.next();
162
163 Lexer {
164 input: input,
165 chars: chars,
166 c1: c1,
167 c2: c2,
168 }
169 }
170
171 fn step(&mut self) {
173 self.c1 = self.c2;
174 self.c2 = self.chars.next();
175 }
176
177 fn step_n(&mut self, n: usize) {
178 for _ in 0..n {
179 self.step();
180 }
181 }
182
183 fn one(&mut self) -> Option<(usize, char)> {
185 self.c1
186 }
187
188 fn two(&mut self) -> Option<(usize, char, char)> {
190 self.c1
191 .and_then(|(start, c1)| self.c2.map(|(_, c2)| (start, c1, c2)))
192 }
193
194 fn component(&mut self, start: usize) -> Result<Token<'input>, Error> {
199 let end = scan_while!(self, start, '0'...'9' | 'A'...'Z' | 'a'...'z');
200 let input = &self.input[start..end];
201
202 let mut it = input.chars();
203 let (a, b) = (it.next(), it.next());
204
205 if a == Some('0') && b.is_none() {
207 return Ok(Numeric(0));
208 }
209
210 if a != Some('0') {
211 if let Ok(numeric) = input.parse::<u64>() {
212 return Ok(Numeric(numeric));
213 }
214 }
215
216 Ok(AlphaNumeric(input))
217 }
218
219 fn whitespace(&mut self, start: usize) -> Result<Token<'input>, Error> {
221 let end = scan_while!(self, start, ' ' | '\t' | '\n' | '\r');
222 Ok(Whitespace(start, end))
223 }
224}
225
226impl<'input> Iterator for Lexer<'input> {
227 type Item = Result<Token<'input>, Error>;
228
229 fn next(&mut self) -> Option<Self::Item> {
230 loop {
231 if let Some((_, a, b)) = self.two() {
233 let two = match (a, b) {
234 ('<', '=') => Some(LtEq),
235 ('>', '=') => Some(GtEq),
236 ('|', '|') => Some(Or),
237 _ => None,
238 };
239
240 if let Some(two) = two {
241 self.step_n(2);
242 return Some(Ok(two));
243 }
244 }
245
246 if let Some((start, c)) = self.one() {
248 let tok = match c {
249 ' ' | '\t' | '\n' | '\r' => {
250 self.step();
251 return Some(self.whitespace(start));
252 }
253 '=' => Eq,
254 '>' => Gt,
255 '<' => Lt,
256 '^' => Caret,
257 '~' => Tilde,
258 '*' => Star,
259 '.' => Dot,
260 ',' => Comma,
261 '-' => Hyphen,
262 '+' => Plus,
263 '0'...'9' | 'a'...'z' | 'A'...'Z' => {
264 self.step();
265 return Some(self.component(start));
266 }
267 c => return Some(Err(UnexpectedChar(c))),
268 };
269
270 self.step();
271 return Some(Ok(tok));
272 };
273
274 return None;
275 }
276 }
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282
283 fn lex(input: &str) -> Vec<Token> {
284 Lexer::new(input).map(Result::unwrap).collect::<Vec<_>>()
285 }
286
287 #[test]
288 pub fn simple_tokens() {
289 assert_eq!(
290 lex("=><<=>=^~*.,-+||"),
291 vec![
292 Eq, Gt, Lt, LtEq, GtEq, Caret, Tilde, Star, Dot, Comma, Hyphen, Plus, Or
293 ]
294 );
295 }
296
297 #[test]
298 pub fn whitespace() {
299 assert_eq!(
300 lex(" foo \t\n\rbar"),
301 vec![
302 Whitespace(0, 2),
303 AlphaNumeric("foo"),
304 Whitespace(5, 9),
305 AlphaNumeric("bar"),
306 ]
307 );
308 }
309
310 #[test]
311 pub fn components() {
312 assert_eq!(lex("42"), vec![Numeric(42)]);
313 assert_eq!(lex("0"), vec![Numeric(0)]);
314 assert_eq!(lex("01"), vec![AlphaNumeric("01")]);
315 assert_eq!(lex("01"), vec![AlphaNumeric("01")]);
316 assert_eq!(lex("5885644aa"), vec![AlphaNumeric("5885644aa")]);
317 assert_eq!(lex("beta2"), vec![AlphaNumeric("beta2")]);
318 assert_eq!(lex("beta.2"), vec![AlphaNumeric("beta"), Dot, Numeric(2)]);
319 }
320
321 #[test]
322 pub fn is_wildcard() {
323 assert_eq!(Star.is_wildcard(), true);
324 assert_eq!(AlphaNumeric("x").is_wildcard(), true);
325 assert_eq!(AlphaNumeric("X").is_wildcard(), true);
326 assert_eq!(AlphaNumeric("other").is_wildcard(), false);
327 }
328
329 #[test]
330 pub fn empty() {
331 assert_eq!(lex(""), vec![]);
332 }
333
334 #[test]
335 pub fn numeric_all_numbers() {
336 let expected: Vec<Token> = vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
337 .into_iter()
338 .map(Numeric)
339 .collect::<Vec<_>>();
340
341 let actual: Vec<_> = lex("0 1 2 3 4 5 6 7 8 9")
342 .into_iter()
343 .filter(|t| !t.is_whitespace())
344 .collect();
345
346 assert_eq!(actual, expected);
347 }
348}