#![doc(html_root_url = "https://docs.rs/regex-lexer/0.2.0/regex-lexer")]
use std::ops::Range;
use regex::{Regex, RegexSet};
pub use regex::Error;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token<'t, K> {
pub kind: K,
pub span: Range<usize>,
pub text: &'t str,
}
pub struct LexerBuilder<'r, K> {
regexes: Vec<&'r str>,
kinds: Vec<Option<K>>,
}
impl<'r, K> Default for LexerBuilder<'r, K> {
fn default() -> Self {
Self::new()
}
}
impl<'r, K> LexerBuilder<'r, K> {
pub fn new() -> Self {
LexerBuilder {
regexes: Vec::new(),
kinds: Vec::new(),
}
}
pub fn token(mut self, re: &'r str, kind: K) -> Self
{
self.regexes.push(re);
self.kinds.push(Some(kind));
self
}
pub fn ignore(mut self, re: &'r str) -> Self {
self.regexes.push(re);
self.kinds.push(None);
self
}
pub fn build(self) -> Result<Lexer<K>, Error> {
let regexes = self.regexes.into_iter().map(|r| format!("^{}", r));
let regex_set = RegexSet::new(regexes)?;
let mut regexes = Vec::new();
for pattern in regex_set.patterns() {
regexes.push(Regex::new(pattern)?);
}
Ok(Lexer {
kinds: self.kinds,
regexes,
regex_set,
})
}
}
#[derive(Debug)]
pub struct Lexer<K> {
kinds: Vec<Option<K>>,
regexes: Vec<Regex>,
regex_set: RegexSet,
}
impl<K> Lexer<K> {
pub fn builder<'r>() -> LexerBuilder<'r, K> {
LexerBuilder::new()
}
pub fn tokens<'l, 't>(&'l self, source: &'t str) -> Tokens<'l, 't, K> {
Tokens {
lexer: self,
source,
position: 0,
}
}
}
#[derive(Debug)]
pub struct Tokens<'l, 't, K> {
lexer: &'l Lexer<K>,
source: &'t str,
position: usize,
}
impl<'l, 't, K: Copy> Iterator for Tokens<'l, 't, K> {
type Item = Token<'t, K>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if self.position == self.source.len() {
return None;
}
let string = &self.source[self.position..];
let match_set = self.lexer.regex_set.matches(string);
let (len, i) = match_set
.into_iter()
.map(|i: usize| {
let m = self.lexer.regexes[i].find(string).unwrap();
assert!(m.start() == 0);
(m.end(), i)
})
.next_back()
.unwrap();
let span = self.position..self.position + len;
let text = &self.source[span.clone()];
self.position += len;
match self.lexer.kinds[i] {
Some(kind) => return Some(Token { kind, span, text}),
None => {}
}
}
}
}