harper_core/mask/
regex_masker.rs1use regex::Regex;
2
3use crate::{Span, offsets::build_byte_to_char_map};
4
5use super::{Mask, Masker};
6
7pub struct RegexMasker {
9 regex: Regex,
10 exclude_matches: bool,
11}
12
13impl RegexMasker {
14 pub fn new(regex: &str, exclude_matches: bool) -> Option<Self> {
21 Some(Self {
22 regex: Regex::new(regex).ok()?,
23 exclude_matches,
24 })
25 }
26}
27
28impl Masker for RegexMasker {
29 fn create_mask(&self, source: &[char]) -> Mask {
30 let source_s: String = source.iter().collect();
31 let byte_to_char = build_byte_to_char_map(&source_s);
32
33 let mut mask = Mask::new_blank();
34
35 if self.exclude_matches {
36 let mut allowed_start = 0;
37
38 for m in self.regex.find_iter(&source_s) {
39 let match_start = byte_to_char[m.start()];
40 let match_end = byte_to_char[m.end()];
41
42 if allowed_start < match_start {
43 mask.push_allowed(Span::new(allowed_start, match_start));
44 }
45
46 allowed_start = match_end;
47 }
48
49 if allowed_start < source.len() {
50 mask.push_allowed(Span::new(allowed_start, source.len()));
51 }
52 } else {
53 for m in self.regex.find_iter(&source_s) {
54 let match_start = byte_to_char[m.start()];
55 let match_end = byte_to_char[m.end()];
56
57 if match_start < match_end {
58 mask.push_allowed(Span::new(match_start, match_end));
59 }
60 }
61 }
62
63 mask
64 }
65}
66
67#[cfg(test)]
68mod tests {
69 use quickcheck::TestResult;
70 use quickcheck_macros::quickcheck;
71
72 use super::RegexMasker;
73 use crate::{Masker, Span};
74
75 #[test]
76 fn include_matches() {
77 let source: Vec<_> = "foo [ignore] bar [drop]".chars().collect();
78 let masker = RegexMasker::new(r"\[[^\]]+\]", false).unwrap();
79
80 let allowed = masker
81 .create_mask(&source)
82 .iter_allowed(&source)
83 .map(|(_, chars)| chars.iter().collect::<String>())
84 .collect::<Vec<_>>();
85
86 assert_eq!(allowed, vec!["[ignore]", "[drop]"]);
87 }
88
89 #[test]
90 fn exclude_matches() {
91 let source: Vec<_> = "foo [ignore] bar [drop]".chars().collect();
92 let masker = RegexMasker::new(r"\[[^\]]+\]", true).unwrap();
93
94 let allowed = masker
95 .create_mask(&source)
96 .iter_allowed(&source)
97 .map(|(_, chars)| chars.iter().collect::<String>())
98 .collect::<Vec<_>>();
99
100 assert_eq!(allowed, vec!["foo ", " bar "]);
101 }
102
103 #[test]
104 fn unicode_offsets_are_converted_to_char_spans() {
105 let source: Vec<_> = "A🙂B🙂C".chars().collect();
106 let masker = RegexMasker::new(r"🙂B🙂", false).unwrap();
107
108 let allowed = masker
109 .create_mask(&source)
110 .iter_allowed(&source)
111 .map(|(_, chars)| chars.iter().collect::<String>())
112 .collect::<Vec<_>>();
113
114 assert_eq!(allowed, vec!["🙂B🙂"]);
115 }
116
117 #[quickcheck]
118 fn can_match_everything(source: String) -> TestResult {
119 if source.contains(|s: char| !s.is_ascii() || s.is_control()) {
120 return TestResult::discard();
121 }
122
123 let masker = RegexMasker::new(".*", false).unwrap();
124
125 let chars: Vec<_> = source.chars().collect();
126 let mask = masker.create_mask(&chars);
127
128 if !chars.is_empty() {
129 assert_eq!(mask.allowed, vec![Span::new_with_len(0, chars.len())]);
130 TestResult::passed()
131 } else {
132 TestResult::discard()
133 }
134 }
135}