Skip to main content

litcheck_filecheck/pattern/matcher/matchers/
substring_set.rs

1use aho_corasick::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, MatchKind, StartKind};
2
3use crate::{common::*, pattern::search::SubstringSetSearcher};
4
5/// This matcher is a variation on [SubstringMatcher] that searches
6/// for a match of any of multiple substrings in the input buffer.
7///
8/// This is much more efficient than performing multiple independent searches
9/// with [SubstringMatcher], so should be used whenever multiple substring
10/// patterns could be matched at the same time.
11pub struct SubstringSetMatcher<'a> {
12    /// The set of patterns to be matched
13    patterns: Vec<Span<Cow<'a, str>>>,
14    /// The automaton that will perform the search
15    searcher: AhoCorasick,
16}
17impl<'a> fmt::Debug for SubstringSetMatcher<'a> {
18    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19        f.debug_struct("SubstringSetMatcher")
20            .field("patterns", &self.patterns)
21            .field("kind", &self.searcher.kind())
22            .field("start_kind", &self.searcher.start_kind())
23            .field("match_kind", &self.searcher.match_kind())
24            .finish()
25    }
26}
27impl<'a> SubstringSetMatcher<'a> {
28    /// Create a new matcher for the given set of substring patterns
29    ///
30    /// NOTE: This function will panic if the set is empty.
31    pub fn new(patterns: Vec<Span<Cow<'a, str>>>, config: &Config) -> DiagResult<Self> {
32        let patterns = patterns
33            .into_iter()
34            .map(|p| {
35                p.map(|p| {
36                    text::canonicalize_horizontal_whitespace(p, config.options.strict_whitespace)
37                })
38            })
39            .collect();
40
41        let mut builder = SubstringSetBuilder::new_with_patterns(patterns);
42        builder.case_insensitive(config.options.ignore_case);
43        builder.build()
44    }
45
46    pub fn search<'input, 'patterns>(
47        &'patterns self,
48        input: Input<'input>,
49    ) -> DiagResult<SubstringSetSearcher<'a, 'patterns, 'input>> {
50        SubstringSetSearcher::new(input, Cow::Borrowed(&self.patterns))
51    }
52
53    pub fn pattern_len(&self) -> usize {
54        self.patterns.len()
55    }
56
57    pub fn first_pattern(&self) -> Span<usize> {
58        self.patterns
59            .iter()
60            .enumerate()
61            .map(|(i, p)| Span::new(p.span(), i))
62            .min_by_key(|span| span.span().start())
63            .unwrap()
64    }
65
66    pub fn first_pattern_span(&self) -> SourceSpan {
67        self.first_pattern().span()
68    }
69
70    /// Get a builder for configuring and building a new [SubstringSetMatcher]
71    pub fn build() -> SubstringSetBuilder<'a> {
72        SubstringSetBuilder::default()
73    }
74
75    /// Search for all of the non-overlapping matches in the input
76    pub fn try_match_all<'input>(&self, input: Input<'input>) -> Vec<MatchInfo<'input>> {
77        let mut matches = vec![];
78        for matched in self.searcher.find_iter(input) {
79            let pattern_id = matched.pattern().as_usize();
80            let pattern_span = self.patterns[pattern_id].span();
81            let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
82            matches.push(MatchInfo::new_with_pattern(span, pattern_span, pattern_id))
83        }
84        matches
85    }
86
87    /// Search for all of the matches in the input, including overlapping matches
88    pub fn try_match_overlapping<'input>(&self, input: Input<'input>) -> Vec<MatchInfo<'input>> {
89        let mut matches = vec![];
90        for matched in self.searcher.find_overlapping_iter(input) {
91            let pattern_id = matched.pattern().as_usize();
92            let pattern_span = self.patterns[pattern_id].span();
93            let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
94            matches.push(MatchInfo::new_with_pattern(span, pattern_span, pattern_id))
95        }
96        matches
97    }
98}
99impl<'a> Spanned for SubstringSetMatcher<'a> {
100    fn span(&self) -> SourceSpan {
101        let start = self
102            .patterns
103            .iter()
104            .map(|p| p.span())
105            .min_by_key(|span| span.start())
106            .unwrap();
107        let end = self
108            .patterns
109            .iter()
110            .map(|p| p.span())
111            .max_by_key(|span| span.end())
112            .unwrap();
113        SourceSpan::from_range_unchecked(
114            start.source_id(),
115            start.start().to_usize()..end.end().to_usize(),
116        )
117    }
118}
119impl<'a> MatcherMut for SubstringSetMatcher<'a> {
120    fn try_match_mut<'input, 'context, C>(
121        &self,
122        input: Input<'input>,
123        context: &mut C,
124    ) -> DiagResult<MatchResult<'input>>
125    where
126        C: Context<'input, 'context> + ?Sized,
127    {
128        self.try_match(input, context)
129    }
130}
131impl<'a> Matcher for SubstringSetMatcher<'a> {
132    fn try_match<'input, 'context, C>(
133        &self,
134        input: Input<'input>,
135        context: &C,
136    ) -> DiagResult<MatchResult<'input>>
137    where
138        C: Context<'input, 'context> + ?Sized,
139    {
140        if let Some(matched) = self.searcher.find(input) {
141            let pattern_id = matched.pattern().as_usize();
142            let pattern_span = self.patterns[pattern_id].span();
143            let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
144            Ok(MatchResult::ok(MatchInfo::new_with_pattern(
145                span,
146                pattern_span,
147                pattern_id,
148            )))
149        } else {
150            Ok(MatchResult::failed(
151                CheckFailedError::MatchNoneButExpected {
152                    span: self.span(),
153                    match_file: context.source_file(self.span().source_id()).unwrap(),
154                    note: None,
155                },
156            ))
157        }
158    }
159}
160
161pub struct SubstringSetBuilder<'a> {
162    patterns: Vec<Span<Cow<'a, str>>>,
163    start_kind: Option<StartKind>,
164    match_kind: Option<MatchKind>,
165    case_insensitive: bool,
166    support_overlapping_matches: bool,
167}
168impl<'a> Default for SubstringSetBuilder<'a> {
169    #[inline]
170    fn default() -> Self {
171        Self::new_with_patterns(vec![])
172    }
173}
174impl<'a> SubstringSetBuilder<'a> {
175    #[inline]
176    pub fn new() -> Self {
177        Self {
178            patterns: vec![],
179            start_kind: None,
180            match_kind: None,
181            case_insensitive: false,
182            support_overlapping_matches: false,
183        }
184    }
185
186    #[inline]
187    pub fn new_with_patterns(patterns: Vec<Span<Cow<'a, str>>>) -> Self {
188        Self {
189            patterns,
190            start_kind: None,
191            match_kind: None,
192            case_insensitive: false,
193            support_overlapping_matches: false,
194        }
195    }
196
197    /// Add `pattern` to the set of substrings to match
198    pub fn with_pattern(&mut self, pattern: Span<Cow<'a, str>>) -> &mut Self {
199        self.patterns.push(pattern);
200        self
201    }
202
203    /// Add `patterns` to the set of substrings to match
204    pub fn with_patterns<I>(&mut self, patterns: I) -> &mut Self
205    where
206        I: IntoIterator<Item = Span<Cow<'a, str>>>,
207    {
208        self.patterns.extend(patterns);
209        self
210    }
211
212    /// Set whether or not the matcher will support anchored searches.
213    ///
214    /// Since supporting anchored searches can be significantly more expensive,
215    /// you should only do so if you need such searches.
216    pub fn support_anchored_search(&mut self, yes: bool) -> &mut Self {
217        self.start_kind = if yes {
218            Some(StartKind::Both)
219        } else {
220            Some(StartKind::Unanchored)
221        };
222        self
223    }
224
225    /// Set whether or not the matcher will support asking for overlapping matches
226    ///
227    /// NOTE: This will force the [MatchKind] to `MatchKind::Standard`, as other semantics
228    /// are not support when computing overlapping matches.
229    pub fn support_overlapping_matches(&mut self, yes: bool) -> &mut Self {
230        self.support_overlapping_matches = yes;
231        self.match_kind = Some(MatchKind::Standard);
232        self
233    }
234
235    /// Set whether or not the search is case-sensitive.
236    ///
237    /// NOTE: This only applies to ASCII characters, if you require unicode
238    /// case insensitivity, you should use [RegexMatcher] or [RegexSetMatcher]
239    /// instead.
240    pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
241        self.case_insensitive = yes;
242        self
243    }
244
245    /// Configure the match semantics for this matcher
246    ///
247    /// NOTE: This function will panic if the provided option conflicts with
248    /// the `support_overlapping_matches` setting, as only the default semantics
249    /// are supported when overlapping matches are computed.
250    pub fn match_kind(&mut self, kind: MatchKind) -> &mut Self {
251        assert!(
252            !self.support_overlapping_matches,
253            "cannot support {kind:?} when overlapping matches are enabled"
254        );
255        self.match_kind = Some(kind);
256        self
257    }
258
259    /// Build the [SubstringSetMatcher]
260    ///
261    /// This function will panic if there are no patterns configured, or if
262    /// an incompatible configuration is provided.
263    pub fn build(self) -> DiagResult<SubstringSetMatcher<'a>> {
264        assert!(
265            !self.patterns.is_empty(),
266            "there must be at least one pattern in the set"
267        );
268
269        let kind = if self.support_overlapping_matches {
270            Some(AhoCorasickKind::DFA)
271        } else {
272            None
273        };
274        let match_kind = self.match_kind.unwrap_or(MatchKind::LeftmostLongest);
275        let start_kind = self.start_kind.unwrap_or(StartKind::Unanchored);
276        let mut builder = AhoCorasickBuilder::new();
277        builder
278            .ascii_case_insensitive(self.case_insensitive)
279            .match_kind(match_kind)
280            .start_kind(start_kind)
281            .kind(kind);
282        let searcher = builder
283            .build(self.patterns.iter().map(|p| p.as_bytes()))
284            .map_err(|err| {
285                let labels = self
286                    .patterns
287                    .iter()
288                    .map(|s| Label::new(s.span(), err.to_string()).into());
289                let diag = Diag::new("failed to build multi-substring aho-corasick searcher")
290                    .and_labels(labels)
291                    .with_help(format!(
292                        "search configuration: {kind:?}, {match_kind:?}, {start_kind:?}"
293                    ));
294                Report::from(diag)
295            })?;
296        Ok(SubstringSetMatcher {
297            patterns: self.patterns,
298            searcher,
299        })
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use crate::{source_file, span};
306
307    use super::*;
308
309    #[test]
310    fn test_multi_substring_matcher_overlapping() -> DiagResult<()> {
311        const INPUT: &str = "
312define void @sub1(i32* %p, i32 %v) {
313entry:
314        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
315        ret void
316}
317
318define void @inc4(i64* %p) {
319entry:
320        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
321        ret void
322}
323";
324        let mut context = TestContext::new();
325        let match_file = source_file!(
326            context.config,
327            "
328CHECK-DAG: tail call i64
329CHECK-DAG: tail call i32
330"
331        );
332        let input_file = source_file!(context.config, INPUT);
333        context
334            .with_checks(match_file)
335            .with_input(input_file.clone());
336
337        let pattern1 = Span::new(
338            span!(input_file.id(), 12, 24),
339            Cow::Borrowed("tail call i64"),
340        );
341        let pattern2 = Span::new(
342            span!(input_file.id(), 25, 41),
343            Cow::Borrowed("tail call i32"),
344        );
345        let matcher = SubstringSetMatcher::new(vec![pattern1, pattern2], &context.config)
346            .expect("expected pattern to be valid");
347        let mctx = context.match_context();
348        let input = mctx.search();
349        let result = matcher.try_match(input, &mctx)?;
350        let info = result.info.expect("expected match");
351        assert_eq!(info.span.start().to_u32(), 58);
352        assert_eq!(info.span.len(), 13);
353        assert_eq!(input.as_str(info.matched_range()), "tail call i32");
354        Ok(())
355    }
356
357    #[test]
358    fn test_multi_substring_matcher_overlapped() -> DiagResult<()> {
359        const INPUT: &str = "
360define void @sub1(i32* %p, i32 %v) {
361entry:
362        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
363        ret void
364}
365
366define void @inc4(i64* %p) {
367entry:
368        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
369        ret void
370}
371";
372        let mut context = TestContext::new();
373        let match_file = source_file!(
374            context.config,
375            "
376CHECK-DAG: tail call i32
377CHECK-DAG: tail call
378"
379        );
380        let input_file = source_file!(context.config, INPUT);
381        context
382            .with_checks(match_file)
383            .with_input(input_file.clone());
384
385        let pattern1 = Span::new(
386            span!(input_file.id(), 12, 24),
387            Cow::Borrowed("tail call i32"),
388        );
389        let pattern2 = Span::new(span!(input_file.id(), 25, 37), Cow::Borrowed("tail call"));
390        let matcher = SubstringSetMatcher::new(vec![pattern1, pattern2], &context.config)
391            .expect("expected pattern to be valid");
392        let mctx = context.match_context();
393        let input = mctx.search();
394        let result = matcher.try_match(input, &mctx)?;
395        let info = result.info.expect("expected match");
396        assert_eq!(info.span.start().to_u32(), 58);
397        assert_eq!(info.span.len(), 13);
398        assert_eq!(input.as_str(info.matched_range()), "tail call i32");
399        Ok(())
400    }
401
402    #[test]
403    fn test_multi_substring_matcher_disjoint() -> DiagResult<()> {
404        const INPUT: &str = "
405define void @sub1(i32* %p, i32 %v) {
406entry:
407        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
408        ret void
409}
410
411define void @inc4(i64* %p) {
412entry:
413        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
414        ret void
415}
416";
417        let mut context = TestContext::new();
418        let match_file = source_file!(
419            context.config,
420            "
421CHECK-DAG: inc4
422CHECK-DAG: sub1
423"
424        );
425        let input_file = source_file!(context.config, INPUT);
426        context
427            .with_checks(match_file)
428            .with_input(input_file.clone());
429
430        let pattern1 = Span::new(span!(input_file.id(), 12, 17), Cow::Borrowed("inc4"));
431        let pattern2 = Span::new(span!(input_file.id(), 19, 35), Cow::Borrowed("sub1"));
432        let matcher = SubstringSetMatcher::new(vec![pattern1, pattern2], &context.config)
433            .expect("expected pattern to be valid");
434        let mctx = context.match_context();
435        let input = mctx.search();
436        let result = matcher.try_match(input, &mctx)?;
437        let info = result.info.expect("expected match");
438        assert_eq!(info.span.start().to_u32(), 14);
439        assert_eq!(info.span.len(), 4);
440        assert_eq!(input.as_str(info.matched_range()), "sub1");
441        Ok(())
442    }
443
444    #[test]
445    fn test_multi_substring_matcher_anchored() -> DiagResult<()> {
446        const INPUT: &str = "
447define void @sub1(i32* %p, i32 %v) {
448entry:
449        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
450        ret void
451}
452
453define void @inc4(i64* %p) {
454entry:
455        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
456        ret void
457}
458";
459        let mut context = TestContext::new();
460        let match_file = source_file!(
461            context.config,
462            "
463CHECK-DAG: @inc4
464CHECK-DAG: @sub1
465"
466        );
467        let input_file = source_file!(context.config, INPUT);
468        context
469            .with_checks(match_file.clone())
470            .with_input(input_file);
471
472        let pattern1_span = SourceSpan::from_range_unchecked(match_file.id(), 13..18);
473        let pattern1 = Span::new(pattern1_span, Cow::Borrowed("@inc4"));
474        let pattern2_span = SourceSpan::from_range_unchecked(match_file.id(), 30..35);
475        let pattern2 = Span::new(pattern2_span, Cow::Borrowed("@sub1"));
476        let mut builder = SubstringSetMatcher::build();
477        builder
478            .with_patterns([pattern1, pattern2])
479            .support_anchored_search(true);
480        let matcher = builder.build().expect("expected pattern to be valid");
481        let mctx = context.match_context();
482        let input = mctx.search_range(13..).anchored(true);
483        let result = matcher.try_match(input, &mctx)?;
484        let info = result.info.expect("expected match");
485        assert_eq!(info.span.start().to_u32(), 13);
486        assert_eq!(info.span.len(), 5);
487        assert_eq!(input.as_str(info.matched_range()), "@sub1");
488
489        let input = mctx.search().anchored(true);
490        let result = matcher.try_match(input, &mctx)?;
491        assert_eq!(result.info, None);
492        Ok(())
493    }
494}