Skip to main content

harper_core/mask/
mod.rs

1mod regex_masker;
2pub use regex_masker::RegexMasker;
3
4use itertools::Itertools;
5
6use crate::Span;
7
8/// A Masker is a tool that can be composed to eliminate chunks of text from
9/// being parsed. They can be composed to do things like isolate comments from a
10/// programming language or disable linting for languages that have been
11/// determined to not be English.
12///
13/// This is primarily used by [`crate::parsers::Mask`] to create parsers for
14/// things like comments of programming languages.
15pub trait Masker: Send + Sync {
16    fn create_mask(&self, source: &[char]) -> Mask;
17}
18
19/// Identifies portions of a [`char`] sequence that should __not__ be ignored by
20/// Harper.
21pub struct Mask {
22    // Right now, there aren't any use-cases where we can't treat this as a stack.
23    //
24    // Assumed that no elements overlap and exist in sorted order.
25    pub(self) allowed: Vec<Span<char>>,
26}
27
28impl FromIterator<Span<char>> for Mask {
29    fn from_iter<T: IntoIterator<Item = Span<char>>>(iter: T) -> Self {
30        let allowed = iter
31            .into_iter()
32            .sorted_by_key(|span| span.start)
33            .collect_vec();
34        assert!(
35            allowed.is_sorted_by(|a, b| a.end <= b.start),
36            "Masker elements cannot overlap and must be sorted!"
37        );
38
39        Self { allowed }
40    }
41}
42
43impl Mask {
44    /// Create a new Mask for a given piece of text, marking all text as
45    /// disallowed.
46    pub fn new_blank() -> Self {
47        Self {
48            allowed: Vec::new(),
49        }
50    }
51
52    pub fn iter_allowed<'a>(
53        &'a self,
54        source: &'a [char],
55    ) -> impl Iterator<Item = (Span<char>, &'a [char])> {
56        self.allowed.iter().map(|s| (*s, s.get_content(source)))
57    }
58
59    /// Mark a span of the text as allowed.
60    pub fn push_allowed(&mut self, allowed: Span<char>) {
61        if let Some(last) = self.allowed.last_mut() {
62            assert!(
63                allowed.start >= last.end,
64                "Masker elements cannot overlap and must be sorted!"
65            );
66
67            if allowed.start == last.end {
68                last.end = allowed.end;
69                return;
70            }
71        }
72
73        self.allowed.push(allowed)
74    }
75
76    /// Merge chunks that are only separated by whitespace.
77    pub fn merge_whitespace_sep(&mut self, source: &[char]) {
78        let mut after = Vec::with_capacity(self.allowed.len());
79
80        let mut iter = 0..self.allowed.len();
81
82        while let Some(i) = iter.next() {
83            let a = self.allowed[i];
84
85            if let Some(b) = self.allowed.get(i + 1) {
86                let sep = Span::new(a.end, b.start);
87                let sep_content = sep.get_content(source);
88
89                if sep_content.iter().all(|c| c.is_whitespace() || *c == '\n') {
90                    iter.next();
91                    after.push(Span::new(a.start, b.end));
92                    continue;
93                }
94            }
95
96            after.push(a);
97        }
98
99        if self.allowed.len() != after.len() {
100            self.allowed = after;
101            self.merge_whitespace_sep(source);
102        } else {
103            self.allowed = after;
104        }
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use crate::{Mask, Span};
111
112    #[test]
113    fn bumps_existing() {
114        let mut mask = Mask::new_blank();
115
116        mask.push_allowed(Span::new_with_len(0, 1));
117        mask.push_allowed(Span::new_with_len(1, 2));
118
119        assert_eq!(mask.allowed.len(), 1)
120    }
121
122    #[test]
123    fn merges_whitespace_sep() {
124        let source: Vec<_> = "word word\nword".chars().collect();
125
126        let mut mask = Mask::new_blank();
127        mask.push_allowed(Span::new_with_len(0, 4));
128        mask.push_allowed(Span::new_with_len(5, 4));
129        mask.push_allowed(Span::new_with_len(10, 4));
130
131        assert_eq!(mask.allowed.len(), 3);
132
133        mask.merge_whitespace_sep(&source);
134
135        assert_eq!(mask.allowed.len(), 1);
136    }
137}