#![doc(html_root_url = "https://docs.rs/regex-lexer/0.1.0")]
use regex::{Regex, RegexSet};
pub struct LexerBuilder<'r, 't, T: 't> {
regexes: Vec<&'r str>,
fns: Vec<Box<dyn Fn(&'t str) -> Option<T>>>,
}
impl<'r, 't, T: 't> std::fmt::Debug for LexerBuilder<'r, 't, T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("LexerBuilder")
.field("regexes", &self.regexes)
.finish() }
}
impl<'r, 't, T: 't> Default for LexerBuilder<'r, 't, T> {
fn default() -> Self {
Self::new()
}
}
impl<'r, 't, T: 't> LexerBuilder<'r, 't, T> {
pub fn new() -> Self {
LexerBuilder {
regexes: Vec::new(),
fns: Vec::new(),
}
}
pub fn token<F>(mut self, re: &'r str, f: F) -> Self
where
F: Fn(&'t str) -> Option<T> + 'static,
{
self.regexes.push(re);
self.fns.push(Box::new(f));
self
}
pub fn build(self) -> Result<Lexer<'t, T>, regex::Error> {
let regexes = self.regexes.into_iter().map(|r| format!("^{}", r));
let regex_set = RegexSet::new(regexes)?;
let mut regexes = Vec::new();
for pattern in regex_set.patterns() {
regexes.push(Regex::new(pattern)?);
}
Ok(Lexer {
fns: self.fns,
regexes,
regex_set,
})
}
}
pub struct Lexer<'t, T: 't> {
fns: Vec<Box<dyn Fn(&'t str) -> Option<T>>>,
regexes: Vec<Regex>,
regex_set: RegexSet,
}
impl<'t, T: 't> Lexer<'t, T> {
pub fn builder<'r>() -> LexerBuilder<'r, 't, T> {
LexerBuilder::new()
}
pub fn tokens<'l>(&'l self, source: &'t str) -> Tokens<'l, 't, T> {
Tokens {
lexer: self,
source,
location: 0,
}
}
}
impl<'t, T: 't> std::fmt::Debug for Lexer<'t, T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("Lexer")
.field("regexes", &self.regexes)
.finish() }
}
#[derive(Debug)]
pub struct Tokens<'l, 't, T: 't> {
lexer: &'l Lexer<'t, T>,
source: &'t str,
location: usize,
}
impl<'l, 't, T: 't> Iterator for Tokens<'l, 't, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
loop {
if self.location == self.source.len() {
return None;
}
let string = &self.source[self.location..];
let match_set = self.lexer.regex_set.matches(string);
let (len, i) = match_set
.into_iter()
.map(|i: usize| {
let m = self.lexer.regexes[i].find(string).unwrap();
assert!(m.start() == 0);
(m.end(), i)
})
.max_by_key(|(len, _)| *len)
.unwrap();
let tok_str = &self.source[self.location..self.location + len];
self.location += len;
match self.lexer.fns[i](tok_str) {
Some(tok) => return Some(tok),
None => {}
}
}
}
}