Skip to main content

litcheck_filecheck/pattern/search/
pattern_set.rs

1use std::{cmp::Ordering, collections::VecDeque};
2
3use crate::common::*;
4
5pub struct PatternSetSearcher<'a, 'patterns, 'input> {
6    input: Input<'input>,
7    last_match_end: Option<usize>,
8    /// The set of raw input patterns from which
9    /// this matcher was constructed
10    patterns: &'patterns [Pattern<'a>],
11    next_starts: Vec<usize>,
12    /// A bitset representing patterns which should be skipped
13    excluded: SmallVec<[u64; 1]>,
14    /// The buffer of pending pattern matches
15    ///
16    /// Each time a search is performed, we locate the next match
17    /// for all patterns, place them into this buffer, and sort by
18    /// the start position of each match. We then pop matches from
19    /// the buffer until no more remain, at which point we locate
20    /// the next set of matches.
21    found: VecDeque<MatchResult<'input>>,
22}
23impl<'a, 'patterns, 'input> PatternSetSearcher<'a, 'patterns, 'input> {
24    pub fn new(input: Input<'input>, patterns: &'patterns [Pattern<'a>]) -> DiagResult<Self> {
25        let num_patterns = patterns.len();
26        let excluded_chunks = (num_patterns / 64) + (!num_patterns.is_multiple_of(64) as usize);
27        let excluded = smallvec![0; excluded_chunks];
28        let next_starts = vec![0; num_patterns];
29
30        Ok(Self {
31            input,
32            last_match_end: None,
33            patterns,
34            next_starts,
35            excluded,
36            found: VecDeque::with_capacity(num_patterns),
37        })
38    }
39}
40impl<'a, 'patterns, 'input> fmt::Debug for PatternSetSearcher<'a, 'patterns, 'input> {
41    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
42        f.debug_struct("PatternSetSearcher")
43            .field("patterns", &self.patterns)
44            .finish()
45    }
46}
47
48impl<'a, 'patterns, 'input> Spanned for PatternSetSearcher<'a, 'patterns, 'input> {
49    fn span(&self) -> SourceSpan {
50        let start = self
51            .patterns
52            .iter()
53            .map(|p| p.span())
54            .min_by(|a, b| a.start().cmp(&b.start()))
55            .unwrap();
56        let end = self
57            .patterns
58            .iter()
59            .map(|p| p.span())
60            .max_by(|a, b| a.end().cmp(&b.end()))
61            .unwrap();
62        SourceSpan::new(start.source_id(), Range::new(start.start(), end.end()))
63    }
64}
65impl<'a, 'patterns, 'input> PatternSearcher<'input> for PatternSetSearcher<'a, 'patterns, 'input> {
66    type Input = Input<'input>;
67    type PatternID = usize;
68
69    fn input(&self) -> &Self::Input {
70        &self.input
71    }
72    fn last_match_end(&self) -> Option<usize> {
73        self.last_match_end
74    }
75    fn set_last_match_end(&mut self, end: usize) {
76        self.last_match_end = Some(end);
77        self.input.set_start(end);
78        for start in self.next_starts.iter_mut() {
79            *start = end;
80        }
81    }
82    fn patterns_len(&self) -> usize {
83        self.patterns.len()
84    }
85    fn pattern_span(&self, id: Self::PatternID) -> SourceSpan {
86        self.patterns[id].span()
87    }
88    fn try_match_next<'context, C>(&mut self, context: &mut C) -> DiagResult<MatchResult<'input>>
89    where
90        C: Context<'input, 'context> + ?Sized,
91    {
92        if let Some(mr) = self.found.pop_front() {
93            if let Some(range) = mr.matched_range() {
94                let last_end = self.last_match_end.get_or_insert(range.end);
95                *last_end = core::cmp::max(*last_end, range.end);
96            }
97            return Ok(mr);
98        }
99
100        let mut next_start = self.input.start();
101        for (pattern_id, pattern) in self.patterns.iter().enumerate() {
102            let excluded_chunk = pattern_id / 64;
103            let excluded_bit = 1 << (pattern_id % 64);
104            let is_excluded = self.excluded[excluded_chunk] & excluded_bit == excluded_bit;
105
106            if is_excluded {
107                continue;
108            }
109
110            let mut input = self.input;
111            input.set_start(self.next_starts[pattern_id]);
112
113            let mut guard = context.protect();
114            match pattern.try_match_mut(self.input, &mut guard)? {
115                MatchResult {
116                    info: Some(mut info),
117                    ty,
118                } if ty.is_ok() => {
119                    let end = info.span.end().to_usize();
120                    next_start = core::cmp::min(end, next_start);
121                    self.next_starts[pattern_id] =
122                        if info.span.is_empty() && Some(end) == self.last_match_end {
123                            end.saturating_add(1)
124                        } else {
125                            end
126                        };
127                    info.pattern_id = pattern_id;
128                    self.found.push_back(MatchResult {
129                        info: Some(info),
130                        ty,
131                    });
132                }
133                MatchResult { info: None, .. } => {
134                    // We exclude patterns when we run out of matches for them
135                    self.next_starts[pattern_id] = self.input.buffer().len();
136                    self.excluded[excluded_chunk] |= excluded_bit;
137                }
138                MatchResult {
139                    info: Some(mut info),
140                    ty,
141                } => {
142                    // This is an error that we are going to return before other valid matches.
143                    let end = info.span.end().to_usize();
144                    next_start = core::cmp::min(end, next_start);
145                    self.next_starts[pattern_id] =
146                        if info.span.is_empty() && Some(end) == self.last_match_end {
147                            end.saturating_add(1)
148                        } else {
149                            end
150                        };
151                    info.pattern_id = pattern_id;
152                    self.found.push_back(MatchResult {
153                        info: Some(info),
154                        ty,
155                    });
156                }
157            }
158        }
159
160        // Set the input next start to the smallest end match
161        self.input.set_start(next_start);
162
163        // Sort the matches, errors first, then leftmost-longest (by span length, then by span start, then by pattern id)
164        let found = self.found.make_contiguous();
165        found.sort_unstable_by(|a, b| match (a, b) {
166            (
167                MatchResult {
168                    ty: MatchType::Failed(_),
169                    info: Some(info1),
170                },
171                MatchResult {
172                    ty: MatchType::Failed(_),
173                    info: Some(info2),
174                },
175            ) => info1
176                .span
177                .len()
178                .cmp(&info2.span.len())
179                .then_with(|| info1.span.start().cmp(&info2.span.start()))
180                .then_with(|| info1.pattern_id.cmp(&info2.pattern_id)),
181            (
182                MatchResult {
183                    ty: MatchType::Failed(_),
184                    ..
185                },
186                _,
187            ) => Ordering::Less,
188            (
189                _,
190                MatchResult {
191                    ty: MatchType::Failed(_),
192                    ..
193                },
194            ) => Ordering::Greater,
195            (
196                MatchResult {
197                    info: Some(info1), ..
198                },
199                MatchResult {
200                    info: Some(info2), ..
201                },
202            ) => info1
203                .span
204                .len()
205                .cmp(&info2.span.len())
206                .then_with(|| info1.span.start().cmp(&info2.span.start()))
207                .then_with(|| info1.pattern_id.cmp(&info2.pattern_id)),
208            _ => unreachable!(),
209        });
210
211        // Return the next match
212        match self.found.pop_front() {
213            Some(found) => {
214                if let Some(range) = found.matched_range() {
215                    let last_end = self.last_match_end.get_or_insert(range.end);
216                    *last_end = core::cmp::max(*last_end, range.end);
217                }
218                Ok(found)
219            }
220            None => Ok(MatchResult::failed(
221                CheckFailedError::MatchNoneButExpected {
222                    span: self.span(),
223                    match_file: context.source_file(self.span().source_id()).unwrap(),
224                    note: None,
225                },
226            )),
227        }
228    }
229}