use std::rc::Rc;
use std::borrow::Borrow;
use regex::Regex;
#[macro_export]
macro_rules! lex_rules {
($($e: expr => $b: expr),+) => { vec![$( ($e, Box::new($b)) ),+] };
($($e: expr => $b: expr),+,) => { vec![$( ($e, Box::new($b)) ),+] };
}
#[derive(Debug, Clone, PartialEq)]
pub struct Span {
pub lo: usize,
pub hi: usize,
pub line: usize
}
impl Span {
pub fn new(lo: usize, hi: usize, line: usize) -> Self {
Self {
lo: lo,
hi: hi,
line: line
}
}
pub fn extend(mut self, hi: usize) -> Self {
self.hi = hi;
self
}
}
struct Arm<T> {
captures: usize,
handle: Box<for<'a> Fn(Span, &'a str, Vec<&'a str>) -> T>
}
struct LexerInternal<T> {
matcher: Regex,
arms: Vec<Arm<T>>,
unknown: Box<for<'a> Fn(Span, &'a str) -> T>
}
pub struct Lexer<T> {
internal: Rc<LexerInternal<T>>
}
pub struct LexIter<T> {
internal: Rc<LexerInternal<T>>,
src: String,
pos: usize,
line: usize
}
impl<T> Lexer<T> {
pub fn new<F>(rules: Vec<(&str, Box<for<'a> Fn(Span, &'a str, Vec<&'a str>) -> T>)>, unknown: F) -> Self
where F: 'static + for<'a> Fn(Span, &'a str) -> T {
if rules.is_empty() {
panic!("Empty rules set");
}
let (pattern, arms) = {
let mut pattern = String::new();
let mut arms = Vec::new();
let mut rules_iter = rules.into_iter();
if let Some((pat, handle)) = rules_iter.next() {
let captures = Regex::new(pat).unwrap().captures_len();
pattern.push_str(&format!("({})", pat));
arms.push(Arm { captures, handle });
for (pat, handle) in rules_iter {
let captures = Regex::new(pat).unwrap().captures_len();
pattern.push_str(&format!("|({})", pat));
arms.push(Arm { captures, handle });
}
}
(format!("^(?:{})", pattern), arms)
};
Self {
internal: Rc::new(LexerInternal {
matcher: Regex::new(&pattern).unwrap(),
arms: arms,
unknown: Box::new(unknown)
})
}
}
pub fn src_iter<S: Borrow<str>>(&self, src: S) -> LexIter<T> {
LexIter {
internal: self.internal.clone(),
src: src.borrow().to_owned(),
pos: 0,
line: 0
}
}
}
impl<T> Iterator for LexIter<T> {
type Item = T;
fn next(&mut self) -> Option<T> {
if self.pos >= self.src.len() {
return None;
}
let res = if let Some(caps) = self.internal.matcher.captures(&self.src[self.pos..]) {
let (pos, arm) = {
let mut pos = 1;
let mut arm = None;
for a in &self.internal.arms {
if caps.get(pos).is_some() {
arm = Some(a);
break;
}
pos += a.captures;
};
(pos, arm.unwrap())
};
let mat = caps.get(pos).unwrap();
let text = mat.as_str();
let prev_line = self.line;
let prev_pos = self.pos;
self.line += text.chars().filter(|&x| x == '\n').count();
self.pos += mat.end();
let span = Span {
lo: prev_pos,
hi: self.pos,
line: prev_line
};
let subcaptures: Vec<_> = (1..arm.captures)
.map(|idx| caps.get(idx + pos).unwrap().as_str())
.collect();
(arm.handle)(span, text, subcaptures)
} else {
self.pos += 1;
let span = Span {
lo: self.pos - 1,
hi: self.pos,
line: self.line
};
(self.internal.unknown)(span, &self.src[(self.pos - 1)..self.pos])
};
Some(res)
}
}