1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
extern crate regex; use regex::Regex; use std::boxed::Box; pub struct Ruleset<T>(Vec<(Regex, Box<Fn(&str) -> Option<T>>)>); impl <T: Clone + 'static> Ruleset<T> { pub fn new() -> Ruleset<T> { Ruleset::<T>(Vec::new()) } pub fn add_rule<F: 'static + Fn(&str)->T>(&mut self, re: &str, rule: F) { let func = Box::new(move |_tok: &str| Some(rule(_tok))); self.0.push((Regex::new(convert_regex(re).as_ref()).unwrap(), func)); } pub fn add_simple(&mut self, re: &str, token: T) { let func = Box::new(move |_tok: &str| Some(token.clone())); self.0.push((Regex::new(convert_regex(re).as_ref()).unwrap(), func)); } pub fn add_noop(&mut self, re: &str) { let func = Box::new(|_tok: &str| None); self.0.push((Regex::new(convert_regex(re).as_ref()).unwrap(), func)); } } pub struct Lexer<'a, T: Clone + 'static> { rules: &'a Ruleset<T>, text: String, } impl <'a, T: Clone + 'static> Iterator for Lexer<'a, T> { type Item = Result<T, String>; fn next(&mut self) -> Option<Result<T, String>> { let mut result: Option<Result<T, String>> = None; let mut matched; while result.is_none() { matched = false; for &(ref re, ref func) in self.rules.0.iter() { if self.text.is_empty() { return None; } if let Some(mat) = re.find(self.text.clone().as_ref()) { if let Some(token) = func(&self.text[mat.start()..mat.end()]) { result = Some(Ok(token)); } self.text = String::from(&self.text[mat.end()..]); matched = true; break; } } if !matched { result = Some(Err(format!("No rule matched \"{}\"", self.text))); self.text.clear(); } } result } } pub fn lex<T: Clone + 'static, S: Into<String>>(rules: &Ruleset<T>, text: S) -> Lexer<T> { Lexer { rules: rules, text: text.into(), } } fn convert_regex(re: &str) -> String { format!("^{}", re) }