mod regex_masker;
pub use regex_masker::RegexMasker;
use itertools::Itertools;
use crate::Span;
pub trait Masker: Send + Sync {
fn create_mask(&self, source: &[char]) -> Mask;
}
pub struct Mask {
pub(self) allowed: Vec<Span<char>>,
}
impl FromIterator<Span<char>> for Mask {
fn from_iter<T: IntoIterator<Item = Span<char>>>(iter: T) -> Self {
let allowed = iter
.into_iter()
.sorted_by_key(|span| span.start)
.collect_vec();
assert!(
allowed.is_sorted_by(|a, b| a.end <= b.start),
"Masker elements cannot overlap and must be sorted!"
);
Self { allowed }
}
}
impl Mask {
pub fn new_blank() -> Self {
Self {
allowed: Vec::new(),
}
}
pub fn iter_allowed<'a>(
&'a self,
source: &'a [char],
) -> impl Iterator<Item = (Span<char>, &'a [char])> {
self.allowed.iter().map(|s| (*s, s.get_content(source)))
}
pub fn push_allowed(&mut self, allowed: Span<char>) {
if let Some(last) = self.allowed.last_mut() {
assert!(
allowed.start >= last.end,
"Masker elements cannot overlap and must be sorted!"
);
if allowed.start == last.end {
last.end = allowed.end;
return;
}
}
self.allowed.push(allowed)
}
pub fn merge_whitespace_sep(&mut self, source: &[char]) {
let mut after = Vec::with_capacity(self.allowed.len());
let mut iter = 0..self.allowed.len();
while let Some(i) = iter.next() {
let a = self.allowed[i];
if let Some(b) = self.allowed.get(i + 1) {
let sep = Span::new(a.end, b.start);
let sep_content = sep.get_content(source);
if sep_content.iter().all(|c| c.is_whitespace() || *c == '\n') {
iter.next();
after.push(Span::new(a.start, b.end));
continue;
}
}
after.push(a);
}
if self.allowed.len() != after.len() {
self.allowed = after;
self.merge_whitespace_sep(source);
} else {
self.allowed = after;
}
}
}
#[cfg(test)]
mod tests {
use crate::{Mask, Span};
#[test]
fn bumps_existing() {
let mut mask = Mask::new_blank();
mask.push_allowed(Span::new_with_len(0, 1));
mask.push_allowed(Span::new_with_len(1, 2));
assert_eq!(mask.allowed.len(), 1)
}
#[test]
fn merges_whitespace_sep() {
let source: Vec<_> = "word word\nword".chars().collect();
let mut mask = Mask::new_blank();
mask.push_allowed(Span::new_with_len(0, 4));
mask.push_allowed(Span::new_with_len(5, 4));
mask.push_allowed(Span::new_with_len(10, 4));
assert_eq!(mask.allowed.len(), 3);
mask.merge_whitespace_sep(&source);
assert_eq!(mask.allowed.len(), 1);
}
}