Skip to main content

harper_core/mask/
regex_masker.rs

1use regex::Regex;
2
3use crate::{Span, offsets::build_byte_to_char_map};
4
5use super::{Mask, Masker};
6
7/// Allows one to mask the sections of a document that match a regular expression (or vice versa).
8pub struct RegexMasker {
9    regex: Regex,
10    exclude_matches: bool,
11}
12
13impl RegexMasker {
14    /// Parses and compiles the provided Regex expression. Returns None if an invalid expression
15    /// was provided.
16    ///
17    /// If `exclude_matches` is marked `true`, then the areas selected by the regular expression
18    /// will be _removed_ from Harper's view. If it is `false`, those areas will be the only ones
19    /// _included_.
20    pub fn new(regex: &str, exclude_matches: bool) -> Option<Self> {
21        Some(Self {
22            regex: Regex::new(regex).ok()?,
23            exclude_matches,
24        })
25    }
26}
27
28impl Masker for RegexMasker {
29    fn create_mask(&self, source: &[char]) -> Mask {
30        let source_s: String = source.iter().collect();
31        let byte_to_char = build_byte_to_char_map(&source_s);
32
33        let mut mask = Mask::new_blank();
34
35        if self.exclude_matches {
36            let mut allowed_start = 0;
37
38            for m in self.regex.find_iter(&source_s) {
39                let match_start = byte_to_char[m.start()];
40                let match_end = byte_to_char[m.end()];
41
42                if allowed_start < match_start {
43                    mask.push_allowed(Span::new(allowed_start, match_start));
44                }
45
46                allowed_start = match_end;
47            }
48
49            if allowed_start < source.len() {
50                mask.push_allowed(Span::new(allowed_start, source.len()));
51            }
52        } else {
53            for m in self.regex.find_iter(&source_s) {
54                let match_start = byte_to_char[m.start()];
55                let match_end = byte_to_char[m.end()];
56
57                if match_start < match_end {
58                    mask.push_allowed(Span::new(match_start, match_end));
59                }
60            }
61        }
62
63        mask
64    }
65}
66
67#[cfg(test)]
68mod tests {
69    use quickcheck::TestResult;
70    use quickcheck_macros::quickcheck;
71
72    use super::RegexMasker;
73    use crate::{Masker, Span};
74
75    #[test]
76    fn include_matches() {
77        let source: Vec<_> = "foo [ignore] bar [drop]".chars().collect();
78        let masker = RegexMasker::new(r"\[[^\]]+\]", false).unwrap();
79
80        let allowed = masker
81            .create_mask(&source)
82            .iter_allowed(&source)
83            .map(|(_, chars)| chars.iter().collect::<String>())
84            .collect::<Vec<_>>();
85
86        assert_eq!(allowed, vec!["[ignore]", "[drop]"]);
87    }
88
89    #[test]
90    fn exclude_matches() {
91        let source: Vec<_> = "foo [ignore] bar [drop]".chars().collect();
92        let masker = RegexMasker::new(r"\[[^\]]+\]", true).unwrap();
93
94        let allowed = masker
95            .create_mask(&source)
96            .iter_allowed(&source)
97            .map(|(_, chars)| chars.iter().collect::<String>())
98            .collect::<Vec<_>>();
99
100        assert_eq!(allowed, vec!["foo ", " bar "]);
101    }
102
103    #[test]
104    fn unicode_offsets_are_converted_to_char_spans() {
105        let source: Vec<_> = "A🙂B🙂C".chars().collect();
106        let masker = RegexMasker::new(r"🙂B🙂", false).unwrap();
107
108        let allowed = masker
109            .create_mask(&source)
110            .iter_allowed(&source)
111            .map(|(_, chars)| chars.iter().collect::<String>())
112            .collect::<Vec<_>>();
113
114        assert_eq!(allowed, vec!["🙂B🙂"]);
115    }
116
117    #[quickcheck]
118    fn can_match_everything(source: String) -> TestResult {
119        if source.contains(|s: char| !s.is_ascii() || s.is_control()) {
120            return TestResult::discard();
121        }
122
123        let masker = RegexMasker::new(".*", false).unwrap();
124
125        let chars: Vec<_> = source.chars().collect();
126        let mask = masker.create_mask(&chars);
127
128        if !chars.is_empty() {
129            assert_eq!(mask.allowed, vec![Span::new_with_len(0, chars.len())]);
130            TestResult::passed()
131        } else {
132            TestResult::discard()
133        }
134    }
135}