litcheck_filecheck/pattern/matcher/matchers/
all.rs

1use crate::{
2    ast::{CheckLine, CheckPattern, Prefix},
3    common::*,
4    pattern::{matcher::AlwaysMatch, PatternPrefix},
5};
6
7use super::{RegexSetMatcher, SubstringSetMatcher};
8
9#[derive(Debug)]
10pub enum MatchAll<'a> {
11    /// A matcher for a set of literal strings
12    Literal(SubstringSetMatcher<'a>),
13    /// A matcher for a set of regular expressions
14    Regex(RegexSetMatcher<'a>),
15    /// A highly dynamic pattern matcher, but less efficient, as
16    /// it is forced to perform some number of redundant searches
17    /// of the input in order to check for all patterns. This is
18    /// the only searcher which can match multiple patterns with
19    /// match/blocks substitutions in at least one of the pattern
20    /// prefixes
21    AnyPrefix {
22        /// The prefix patterns to search for
23        prefixes: Vec<Pattern<'a>>,
24        /// The suffix patterns corresponding to each prefix
25        suffixes: Vec<Vec<Pattern<'a>>>,
26    },
27    /// An optimized form of `AnyPrefix` when all of the prefixes
28    /// can be evaluated as a set of regular expressions.
29    RegexPrefix {
30        prefixes: RegexSetMatcher<'a>,
31        suffixes: Vec<Vec<Pattern<'a>>>,
32    },
33    /// An optimized form of `AnyPrefix` when all of the prefixes
34    /// can be evaluated as a set of substring literals.
35    SubstringPrefix {
36        prefixes: SubstringSetMatcher<'a>,
37        suffixes: Vec<Vec<Pattern<'a>>>,
38    },
39}
40
41impl<'a> MatchAll<'a> {
42    pub fn compile(
43        mut unordered: Vec<CheckLine<'a>>,
44        config: &Config,
45        interner: &mut StringInterner,
46    ) -> DiagResult<Self> {
47        use std::collections::BTreeMap;
48
49        let raw_prefixes = unordered
50            .iter_mut()
51            .map(|line| {
52                // Compact the patterns as much as possible first
53                line.pattern.compact(interner);
54                line.pattern.pop_prefix()
55            })
56            .collect::<SmallVec<[_; 4]>>();
57        let mut unique_prefixes = BTreeMap::<_, usize>::default();
58        let mut prefixes = Vec::with_capacity(raw_prefixes.len());
59        let mut suffixes = Vec::<Vec<Pattern<'a>>>::with_capacity(raw_prefixes.len());
60
61        for (id, prefix) in raw_prefixes.iter().enumerate() {
62            let span = prefix.span();
63            if let Some(canonical_prefix_id) = unique_prefixes.get(prefix).copied() {
64                let pattern =
65                    core::mem::replace(&mut unordered[id].pattern, CheckPattern::Empty(span));
66                if !pattern.is_empty() {
67                    suffixes[canonical_prefix_id]
68                        .push(Pattern::compile(pattern, config, interner)?);
69                } else {
70                    suffixes[canonical_prefix_id].push(Pattern::Empty(AlwaysMatch::new(span)));
71                }
72                continue;
73            }
74            let pattern_prefix = match &prefix {
75                Prefix::Literal(prefix) => PatternPrefix::Literal {
76                    id,
77                    prefix: prefix.clone(),
78                },
79                Prefix::Substring(prefix) => PatternPrefix::Substring {
80                    id,
81                    prefix: prefix.clone(),
82                },
83                Prefix::Regex(prefix) => PatternPrefix::Regex {
84                    id,
85                    prefix: prefix.clone(),
86                },
87                Prefix::Match(prefix) => PatternPrefix::Dynamic {
88                    id,
89                    prefix: prefix.clone(),
90                },
91                Prefix::Empty(span) => {
92                    let diag = Diag::new(
93                        "unexpected empty pattern encountered during pattern compilation",
94                    )
95                    .with_label(Label::new(*span, "pattern defined here"))
96                    .with_help("empty patterns are only valid in CHECK-NOT directives");
97                    return Err(Report::from(diag));
98                }
99            };
100            unique_prefixes.insert(prefix.clone(), id);
101            prefixes.push(pattern_prefix);
102            let pattern = core::mem::replace(&mut unordered[id].pattern, CheckPattern::Empty(span));
103            if !pattern.is_empty() {
104                suffixes.push(vec![Pattern::compile(pattern, config, interner)?]);
105            } else {
106                suffixes.push(vec![Pattern::Empty(AlwaysMatch::new(span))]);
107            }
108        }
109
110        // We've de-duplicated the prefixes, and combined all suffixes for shared prefixes,
111        // next, we need to determine what searcher to use for prefix matching, which depends
112        // on the composition of the patterns
113
114        // A single stage match means that each prefix is unique, and that the prefix represents the entire pattern
115        let is_single_stage = suffixes.iter().all(|patterns| {
116            patterns.is_empty() || matches!(patterns.as_slice(), [Pattern::Empty(_)])
117        });
118
119        // Literal patterns are always single stage, and can be performed with a simple substring set matcher
120        let is_literal = prefixes
121            .iter()
122            .all(|prefix| matches!(prefix, PatternPrefix::Literal { .. }));
123        if is_literal {
124            assert!(is_single_stage);
125            // We can use a simple substring searcher in this case
126            return SubstringSetMatcher::new(
127                prefixes
128                    .into_iter()
129                    .filter_map(PatternPrefix::into_str)
130                    .collect(),
131                config,
132            )
133            .map(MatchAll::Literal);
134        }
135
136        // Check if we have both literal+substring patterns, which require a slightly different strategy
137        let is_substring = prefixes.iter().all(|prefix| {
138            matches!(
139                prefix,
140                PatternPrefix::Literal { .. } | PatternPrefix::Substring { .. }
141            )
142        });
143        if is_substring {
144            // By definition this must be a multi-stage match
145            assert!(!is_single_stage);
146
147            let searcher = SubstringSetMatcher::new(
148                prefixes
149                    .into_iter()
150                    .filter_map(PatternPrefix::into_str)
151                    .collect(),
152                config,
153            )?;
154
155            return Ok(Self::SubstringPrefix {
156                prefixes: searcher,
157                suffixes,
158            });
159        }
160
161        // If all patterns are regular expressions, or can be safely converted to one, so we can
162        // use a regex searcher for at least the prefixes, if not all of the patterns
163        let is_regex = prefixes.iter().all(PatternPrefix::is_regex_compatible);
164        // Single-stage regular expression matches can be performed with a single matcher
165        if is_regex {
166            if is_single_stage {
167                return RegexSetMatcher::new(
168                    prefixes
169                        .into_iter()
170                        .filter_map(PatternPrefix::into_regex_pattern)
171                        .collect(),
172                    config,
173                    interner,
174                )
175                .map(MatchAll::Regex);
176            }
177
178            let searcher = RegexSetMatcher::new(
179                prefixes
180                    .into_iter()
181                    .filter_map(PatternPrefix::into_regex_pattern)
182                    .collect(),
183                config,
184                interner,
185            )?;
186            return Ok(Self::RegexPrefix {
187                prefixes: searcher,
188                suffixes,
189            });
190        }
191
192        // If we reach here, it's because at least one prefix contains a match/substitution block
193        // that cannot be easily grouped with other prefixes. In such cases we evaluate the search
194        // entirely manually rather than delegating parts to other searchers
195        let mut prefix_patterns = Vec::with_capacity(prefixes.len());
196        for prefix in prefixes.into_iter() {
197            prefix_patterns.push(Pattern::from_prefix(prefix, config, interner)?);
198        }
199
200        Ok(Self::AnyPrefix {
201            prefixes: prefix_patterns,
202            suffixes,
203        })
204    }
205}
206impl<'a> MatchAll<'a> {
207    pub fn pattern_len(&self) -> usize {
208        match self {
209            Self::Literal(ref matcher) => matcher.pattern_len(),
210            Self::Regex(ref matcher) => matcher.patterns_len(),
211            Self::AnyPrefix { suffixes, .. }
212            | Self::RegexPrefix { suffixes, .. }
213            | Self::SubstringPrefix { suffixes, .. } => suffixes.iter().map(|ps| ps.len()).sum(),
214        }
215    }
216
217    pub fn first_pattern(&self) -> Span<usize> {
218        match self {
219            Self::Literal(matcher) => matcher.first_pattern(),
220            Self::Regex(matcher) => matcher.first_pattern(),
221            Self::AnyPrefix {
222                ref prefixes,
223                ref suffixes,
224            } => {
225                let (prefix_id, start) = prefixes
226                    .iter()
227                    .enumerate()
228                    .map(|(i, p)| (i, p.span().start()))
229                    .min_by_key(|&(_, s)| s)
230                    .unwrap();
231                let (offset, end) = suffixes[prefix_id]
232                    .iter()
233                    .enumerate()
234                    .map(|(offset, p)| (offset, p.span().end()))
235                    .min_by_key(|&(_, e)| e)
236                    .unwrap();
237                Span::new(SourceSpan::from(start..end), prefix_id + offset)
238            }
239            Self::RegexPrefix {
240                ref prefixes,
241                ref suffixes,
242            } => {
243                let (first_prefix_span, first_prefix) = prefixes.first_pattern().into_parts();
244                let start = first_prefix_span.start();
245                let (offset, end) = suffixes[first_prefix]
246                    .iter()
247                    .enumerate()
248                    .map(|(offset, p)| (offset, p.span().end()))
249                    .min_by_key(|&(_, end)| end)
250                    .unwrap();
251                Span::new(SourceSpan::from(start..end), first_prefix + offset)
252            }
253            Self::SubstringPrefix {
254                ref prefixes,
255                ref suffixes,
256            } => {
257                let (first_prefix_span, first_prefix) = prefixes.first_pattern().into_parts();
258                let start = first_prefix_span.start();
259                let (offset, end) = suffixes[first_prefix]
260                    .iter()
261                    .enumerate()
262                    .map(|(offset, p)| (offset, p.span().end()))
263                    .min_by_key(|&(_, end)| end)
264                    .unwrap();
265                Span::new(SourceSpan::from(start..end), first_prefix + offset)
266            }
267        }
268    }
269
270    pub fn first_pattern_span(&self) -> SourceSpan {
271        self.first_pattern().span()
272    }
273}
274impl<'a> Spanned for MatchAll<'a> {
275    fn span(&self) -> SourceSpan {
276        match self {
277            Self::Literal(ref matcher) => matcher.span(),
278            Self::Regex(ref matcher) => matcher.span(),
279            Self::AnyPrefix {
280                ref prefixes,
281                ref suffixes,
282            } => {
283                let start = prefixes
284                    .iter()
285                    .map(|prefix| prefix.span().start())
286                    .min()
287                    .unwrap();
288                let end = prefixes
289                    .iter()
290                    .zip(suffixes.iter())
291                    .map(|(prefix, suffixes)| {
292                        suffixes
293                            .iter()
294                            .map(|p| p.span().end())
295                            .max()
296                            .unwrap_or(prefix.span().end())
297                    })
298                    .max()
299                    .unwrap();
300                SourceSpan::from(start..end)
301            }
302            Self::RegexPrefix {
303                ref prefixes,
304                ref suffixes,
305            } => {
306                let prefix_span = prefixes.span();
307                let start = prefix_span.start();
308                let prefix_end = prefix_span.end();
309                let end = core::cmp::max(
310                    prefix_end,
311                    suffixes
312                        .iter()
313                        .map(|suffixes| {
314                            suffixes
315                                .iter()
316                                .map(|p| p.span().end())
317                                .max()
318                                .unwrap_or(prefix_end)
319                        })
320                        .max()
321                        .unwrap(),
322                );
323                SourceSpan::from(start..end)
324            }
325            Self::SubstringPrefix {
326                ref prefixes,
327                ref suffixes,
328            } => {
329                let prefix_span = prefixes.span();
330                let start = prefix_span.start();
331                let prefix_end = prefix_span.end();
332                let end = core::cmp::max(
333                    prefix_end,
334                    suffixes
335                        .iter()
336                        .map(|suffixes| {
337                            suffixes
338                                .iter()
339                                .map(|p| p.span().end())
340                                .max()
341                                .unwrap_or(prefix_end)
342                        })
343                        .max()
344                        .unwrap(),
345                );
346                SourceSpan::from(start..end)
347            }
348        }
349    }
350}