1mod regex_masker;
2pub use regex_masker::RegexMasker;
3
4use itertools::Itertools;
5
6use crate::Span;
7
8pub trait Masker: Send + Sync {
16 fn create_mask(&self, source: &[char]) -> Mask;
17}
18
19pub struct Mask {
22 pub(self) allowed: Vec<Span<char>>,
26}
27
28impl FromIterator<Span<char>> for Mask {
29 fn from_iter<T: IntoIterator<Item = Span<char>>>(iter: T) -> Self {
30 let allowed = iter
31 .into_iter()
32 .sorted_by_key(|span| span.start)
33 .collect_vec();
34 assert!(
35 allowed.is_sorted_by(|a, b| a.end <= b.start),
36 "Masker elements cannot overlap and must be sorted!"
37 );
38
39 Self { allowed }
40 }
41}
42
43impl Mask {
44 pub fn new_blank() -> Self {
47 Self {
48 allowed: Vec::new(),
49 }
50 }
51
52 pub fn iter_allowed<'a>(
53 &'a self,
54 source: &'a [char],
55 ) -> impl Iterator<Item = (Span<char>, &'a [char])> {
56 self.allowed.iter().map(|s| (*s, s.get_content(source)))
57 }
58
59 pub fn push_allowed(&mut self, allowed: Span<char>) {
61 if let Some(last) = self.allowed.last_mut() {
62 assert!(
63 allowed.start >= last.end,
64 "Masker elements cannot overlap and must be sorted!"
65 );
66
67 if allowed.start == last.end {
68 last.end = allowed.end;
69 return;
70 }
71 }
72
73 self.allowed.push(allowed)
74 }
75
76 pub fn merge_whitespace_sep(&mut self, source: &[char]) {
78 let mut after = Vec::with_capacity(self.allowed.len());
79
80 let mut iter = 0..self.allowed.len();
81
82 while let Some(i) = iter.next() {
83 let a = self.allowed[i];
84
85 if let Some(b) = self.allowed.get(i + 1) {
86 let sep = Span::new(a.end, b.start);
87 let sep_content = sep.get_content(source);
88
89 if sep_content.iter().all(|c| c.is_whitespace() || *c == '\n') {
90 iter.next();
91 after.push(Span::new(a.start, b.end));
92 continue;
93 }
94 }
95
96 after.push(a);
97 }
98
99 if self.allowed.len() != after.len() {
100 self.allowed = after;
101 self.merge_whitespace_sep(source);
102 } else {
103 self.allowed = after;
104 }
105 }
106}
107
108#[cfg(test)]
109mod tests {
110 use crate::{Mask, Span};
111
112 #[test]
113 fn bumps_existing() {
114 let mut mask = Mask::new_blank();
115
116 mask.push_allowed(Span::new_with_len(0, 1));
117 mask.push_allowed(Span::new_with_len(1, 2));
118
119 assert_eq!(mask.allowed.len(), 1)
120 }
121
122 #[test]
123 fn merges_whitespace_sep() {
124 let source: Vec<_> = "word word\nword".chars().collect();
125
126 let mut mask = Mask::new_blank();
127 mask.push_allowed(Span::new_with_len(0, 4));
128 mask.push_allowed(Span::new_with_len(5, 4));
129 mask.push_allowed(Span::new_with_len(10, 4));
130
131 assert_eq!(mask.allowed.len(), 3);
132
133 mask.merge_whitespace_sep(&source);
134
135 assert_eq!(mask.allowed.len(), 1);
136 }
137}