1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
extern crate regex;

use regex::Regex;
use std::boxed::Box;

pub struct Ruleset<T>(Vec<(Regex, Box<Fn(&str) -> Option<T>>)>);

impl <T: Clone + 'static> Ruleset<T> {
    pub fn new() -> Ruleset<T> {
        Ruleset::<T>(Vec::new())
    }

    pub fn add_rule<F: 'static + Fn(&str)->T>(&mut self, re: &str, rule: F) {
        let func = Box::new(move |_tok: &str| Some(rule(_tok)));
        self.0.push((Regex::new(convert_regex(re).as_ref()).unwrap(), func));
    }

    pub fn add_simple(&mut self, re: &str, token: T) {
        let func = Box::new(move |_tok: &str| Some(token.clone()));
        self.0.push((Regex::new(convert_regex(re).as_ref()).unwrap(), func));
    }

    pub fn add_noop(&mut self, re: &str) {
        let func = Box::new(|_tok: &str| None);
        self.0.push((Regex::new(convert_regex(re).as_ref()).unwrap(), func));
    }
}

pub struct Lexer<'a, T: Clone + 'static> {
    rules: &'a Ruleset<T>,
    text: String,
}

impl <'a, T: Clone + 'static> Iterator for Lexer<'a, T> {
    type Item = Result<T, String>;
    fn next(&mut self) -> Option<Result<T, String>> {
        let mut result: Option<Result<T, String>> = None;
        let mut matched;
        while result.is_none() {
            matched = false;
            for &(ref re, ref func) in self.rules.0.iter() {
                if self.text.is_empty() {
                    return None;
                }
                if let Some(mat) = re.find(self.text.clone().as_ref()) {
                    if let Some(token) = func(&self.text[mat.start()..mat.end()]) {
                        result = Some(Ok(token));
                    }
                    self.text = String::from(&self.text[mat.end()..]);
                    matched = true;
                    break;
                }
            }
            if !matched {
                result = Some(Err(format!("No rule matched \"{}\"", self.text)));
                self.text.clear();
            }
        }
        result
    }
}

pub fn lex<T: Clone + 'static, S: Into<String>>(rules: &Ruleset<T>, text: S) -> Lexer<T> {
    Lexer {
        rules: rules,
        text: text.into(),
    }
}

fn convert_regex(re: &str) -> String {
    format!("^{}", re)
}