litcheck_filecheck/pattern/search/
pattern_set.rs

1use std::{cmp::Ordering, collections::VecDeque};
2
3use crate::common::*;
4
5pub struct PatternSetSearcher<'a, 'patterns, 'input> {
6    input: Input<'input>,
7    last_match_end: Option<usize>,
8    /// The set of raw input patterns from which
9    /// this matcher was constructed
10    patterns: &'patterns [Pattern<'a>],
11    next_starts: Vec<usize>,
12    /// A bitset representing patterns which should be skipped
13    excluded: SmallVec<[u64; 1]>,
14    /// The buffer of pending pattern matches
15    ///
16    /// Each time a search is performed, we locate the next match
17    /// for all patterns, place them into this buffer, and sort by
18    /// the start position of each match. We then pop matches from
19    /// the buffer until no more remain, at which point we locate
20    /// the next set of matches.
21    found: VecDeque<MatchResult<'input>>,
22}
23impl<'a, 'patterns, 'input> PatternSetSearcher<'a, 'patterns, 'input> {
24    pub fn new(input: Input<'input>, patterns: &'patterns [Pattern<'a>]) -> DiagResult<Self> {
25        let num_patterns = patterns.len();
26        let excluded_chunks = (num_patterns / 64) + (num_patterns % 64 > 0) as usize;
27        let excluded = smallvec![0; excluded_chunks];
28        let next_starts = vec![0; num_patterns];
29
30        Ok(Self {
31            input,
32            last_match_end: None,
33            patterns,
34            next_starts,
35            excluded,
36            found: VecDeque::with_capacity(num_patterns),
37        })
38    }
39}
40impl<'a, 'patterns, 'input> fmt::Debug for PatternSetSearcher<'a, 'patterns, 'input> {
41    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
42        f.debug_struct("PatternSetSearcher")
43            .field("patterns", &self.patterns)
44            .finish()
45    }
46}
47
48impl<'a, 'patterns, 'input> Spanned for PatternSetSearcher<'a, 'patterns, 'input> {
49    fn span(&self) -> SourceSpan {
50        let start = self.patterns.iter().map(|p| p.start()).min().unwrap();
51        let end = self.patterns.iter().map(|p| p.end()).max().unwrap();
52        SourceSpan::from(start..end)
53    }
54}
55impl<'a, 'patterns, 'input> PatternSearcher<'input> for PatternSetSearcher<'a, 'patterns, 'input> {
56    type Input = Input<'input>;
57    type PatternID = usize;
58
59    fn input(&self) -> &Self::Input {
60        &self.input
61    }
62    fn last_match_end(&self) -> Option<usize> {
63        self.last_match_end
64    }
65    fn set_last_match_end(&mut self, end: usize) {
66        self.last_match_end = Some(end);
67        self.input.set_start(end);
68        for start in self.next_starts.iter_mut() {
69            *start = end;
70        }
71    }
72    fn patterns_len(&self) -> usize {
73        self.patterns.len()
74    }
75    fn pattern_span(&self, id: Self::PatternID) -> SourceSpan {
76        self.patterns[id].span()
77    }
78    fn try_match_next<'context, C>(&mut self, context: &mut C) -> DiagResult<MatchResult<'input>>
79    where
80        C: Context<'input, 'context> + ?Sized,
81    {
82        if let Some(mr) = self.found.pop_front() {
83            if let Some(range) = mr.matched_range() {
84                let last_end = self.last_match_end.get_or_insert(range.end);
85                *last_end = core::cmp::max(*last_end, range.end);
86            }
87            return Ok(mr);
88        }
89
90        let mut next_start = self.input.start();
91        for (pattern_id, pattern) in self.patterns.iter().enumerate() {
92            let excluded_chunk = pattern_id / 64;
93            let excluded_bit = 1 << (pattern_id % 64);
94            let is_excluded = self.excluded[excluded_chunk] & excluded_bit == excluded_bit;
95
96            if is_excluded {
97                continue;
98            }
99
100            let mut input = self.input;
101            input.set_start(self.next_starts[pattern_id]);
102
103            let mut guard = context.protect();
104            match pattern.try_match_mut(self.input, &mut guard)? {
105                MatchResult {
106                    info: Some(mut info),
107                    ty,
108                } if ty.is_ok() => {
109                    let end = info.span.end();
110                    next_start = core::cmp::min(end, next_start);
111                    self.next_starts[pattern_id] =
112                        if info.span.range().is_empty() && Some(end) == self.last_match_end {
113                            end.saturating_add(1)
114                        } else {
115                            end
116                        };
117                    info.pattern_id = pattern_id;
118                    self.found.push_back(MatchResult {
119                        info: Some(info),
120                        ty,
121                    });
122                }
123                MatchResult { info: None, .. } => {
124                    // We exclude patterns when we run out of matches for them
125                    self.next_starts[pattern_id] = self.input.buffer().len();
126                    self.excluded[excluded_chunk] |= excluded_bit;
127                }
128                MatchResult {
129                    info: Some(mut info),
130                    ty,
131                } => {
132                    // This is an error that we are going to return before other valid matches.
133                    let end = info.span.end();
134                    next_start = core::cmp::min(end, next_start);
135                    self.next_starts[pattern_id] =
136                        if info.span.range().is_empty() && Some(end) == self.last_match_end {
137                            end.saturating_add(1)
138                        } else {
139                            end
140                        };
141                    info.pattern_id = pattern_id;
142                    self.found.push_back(MatchResult {
143                        info: Some(info),
144                        ty,
145                    });
146                }
147            }
148        }
149
150        // Set the input next start to the smallest end match
151        self.input.set_start(next_start);
152
153        // Sort the matches, errors first, then leftmost-longest (by span length, then by span start, then by pattern id)
154        let found = self.found.make_contiguous();
155        found.sort_unstable_by(|a, b| match (a, b) {
156            (
157                MatchResult {
158                    ty: MatchType::Failed(_),
159                    info: Some(info1),
160                },
161                MatchResult {
162                    ty: MatchType::Failed(_),
163                    info: Some(info2),
164                },
165            ) => info1
166                .span
167                .len()
168                .cmp(&info2.span.len())
169                .then_with(|| info1.span.start().cmp(&info2.span.start()))
170                .then_with(|| info1.pattern_id.cmp(&info2.pattern_id)),
171            (
172                MatchResult {
173                    ty: MatchType::Failed(_),
174                    ..
175                },
176                _,
177            ) => Ordering::Less,
178            (
179                _,
180                MatchResult {
181                    ty: MatchType::Failed(_),
182                    ..
183                },
184            ) => Ordering::Greater,
185            (
186                MatchResult {
187                    info: Some(info1), ..
188                },
189                MatchResult {
190                    info: Some(info2), ..
191                },
192            ) => info1
193                .span
194                .len()
195                .cmp(&info2.span.len())
196                .then_with(|| info1.span.start().cmp(&info2.span.start()))
197                .then_with(|| info1.pattern_id.cmp(&info2.pattern_id)),
198            _ => unreachable!(),
199        });
200
201        // Return the next match
202        match self.found.pop_front() {
203            Some(found) => {
204                if let Some(range) = found.matched_range() {
205                    let last_end = self.last_match_end.get_or_insert(range.end);
206                    *last_end = core::cmp::max(*last_end, range.end);
207                }
208                Ok(found)
209            }
210            None => Ok(MatchResult::failed(
211                CheckFailedError::MatchNoneButExpected {
212                    span: self.span(),
213                    match_file: context.match_file(),
214                    note: None,
215                },
216            )),
217        }
218    }
219}