harper_core/mask/
mod.rs

1use itertools::Itertools;
2
3use crate::Span;
4
5/// A Masker is a tool that can be composed to eliminate chunks of text from
6/// being parsed. They can be composed to do things like isolate comments from a
7/// programming language or disable linting for languages that have been
8/// determined to not be English.
9///
10/// This is primarily used by [`crate::parsers::Mask`] to create parsers for
11/// things like comments of programming languages.
12pub trait Masker: Send + Sync {
13    fn create_mask(&self, source: &[char]) -> Mask;
14}
15
16/// Identifies portions of a [`char`] sequence that should __not__ be ignored by
17/// Harper.
18pub struct Mask {
19    // Right now, there aren't any use-cases where we can't treat this as a stack.
20    //
21    // Assumed that no elements overlap and exist in sorted order.
22    pub(self) allowed: Vec<Span>,
23}
24
25impl FromIterator<Span> for Mask {
26    fn from_iter<T: IntoIterator<Item = Span>>(iter: T) -> Self {
27        let allowed = iter
28            .into_iter()
29            .sorted_by_key(|span| span.start)
30            .collect_vec();
31        assert!(
32            allowed.is_sorted_by(|a, b| a.end <= b.start),
33            "Masker elements cannot overlap and must be sorted!"
34        );
35
36        Self { allowed }
37    }
38}
39
40impl Mask {
41    /// Create a new Mask for a given piece of text, marking all text as
42    /// disallowed.
43    pub fn new_blank() -> Self {
44        Self {
45            allowed: Vec::new(),
46        }
47    }
48
49    pub fn iter_allowed<'a>(
50        &'a self,
51        source: &'a [char],
52    ) -> impl Iterator<Item = (Span, &'a [char])> {
53        self.allowed.iter().map(|s| (*s, s.get_content(source)))
54    }
55
56    /// Mark a span of the text as allowed.
57    pub fn push_allowed(&mut self, allowed: Span) {
58        if let Some(last) = self.allowed.last_mut() {
59            assert!(
60                allowed.start >= last.end,
61                "Masker elements cannot overlap and must be sorted!"
62            );
63
64            if allowed.start == last.end {
65                last.end = allowed.end;
66                return;
67            }
68        }
69
70        self.allowed.push(allowed)
71    }
72
73    /// Merge chunks that are only separated by whitespace.
74    pub fn merge_whitespace_sep(&mut self, source: &[char]) {
75        let mut after = Vec::with_capacity(self.allowed.len());
76
77        let mut iter = 0..self.allowed.len();
78
79        while let Some(i) = iter.next() {
80            let a = self.allowed[i];
81
82            if let Some(b) = self.allowed.get(i + 1) {
83                let sep = Span::new(a.end, b.start);
84                let sep_content = sep.get_content(source);
85
86                if sep_content.iter().all(|c| c.is_whitespace() || *c == '\n') {
87                    iter.next();
88                    after.push(Span::new(a.start, b.end));
89                    continue;
90                }
91            }
92
93            after.push(a);
94        }
95
96        if self.allowed.len() != after.len() {
97            self.allowed = after;
98            self.merge_whitespace_sep(source);
99        } else {
100            self.allowed = after;
101        }
102    }
103}
104
105#[cfg(test)]
106mod tests {
107    use crate::{Mask, Span};
108
109    #[test]
110    fn bumps_existing() {
111        let mut mask = Mask::new_blank();
112
113        mask.push_allowed(Span::new_with_len(0, 1));
114        mask.push_allowed(Span::new_with_len(1, 2));
115
116        assert_eq!(mask.allowed.len(), 1)
117    }
118
119    #[test]
120    fn merges_whitespace_sep() {
121        let source: Vec<_> = "word word\nword".chars().collect();
122
123        let mut mask = Mask::new_blank();
124        mask.push_allowed(Span::new_with_len(0, 4));
125        mask.push_allowed(Span::new_with_len(5, 4));
126        mask.push_allowed(Span::new_with_len(10, 4));
127
128        assert_eq!(mask.allowed.len(), 3);
129
130        mask.merge_whitespace_sep(&source);
131
132        assert_eq!(mask.allowed.len(), 1);
133    }
134}