litcheck_filecheck/pattern/matcher/matchers/
substring_set.rs1use aho_corasick::{AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, MatchKind, StartKind};
2
3use crate::{common::*, pattern::search::SubstringSetSearcher};
4
5pub struct SubstringSetMatcher<'a> {
12 patterns: Vec<Span<Cow<'a, str>>>,
14 searcher: AhoCorasick,
16}
17impl<'a> fmt::Debug for SubstringSetMatcher<'a> {
18 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19 f.debug_struct("SubstringSetMatcher")
20 .field("patterns", &self.patterns)
21 .field("kind", &self.searcher.kind())
22 .field("start_kind", &self.searcher.start_kind())
23 .field("match_kind", &self.searcher.match_kind())
24 .finish()
25 }
26}
27impl<'a> SubstringSetMatcher<'a> {
28 pub fn new(patterns: Vec<Span<Cow<'a, str>>>, config: &Config) -> DiagResult<Self> {
32 let patterns = patterns
33 .into_iter()
34 .map(|p| {
35 p.map(|p| {
36 text::canonicalize_horizontal_whitespace(p, config.options.strict_whitespace)
37 })
38 })
39 .collect();
40
41 let mut builder = SubstringSetBuilder::new_with_patterns(patterns);
42 builder.case_insensitive(config.options.ignore_case);
43 builder.build()
44 }
45
46 pub fn search<'input, 'patterns>(
47 &'patterns self,
48 input: Input<'input>,
49 ) -> DiagResult<SubstringSetSearcher<'a, 'patterns, 'input>> {
50 SubstringSetSearcher::new(input, Cow::Borrowed(&self.patterns))
51 }
52
53 pub fn pattern_len(&self) -> usize {
54 self.patterns.len()
55 }
56
57 pub fn first_pattern(&self) -> Span<usize> {
58 self.patterns
59 .iter()
60 .enumerate()
61 .map(|(i, p)| Span::new(p.span(), i))
62 .min_by_key(|span| span.span().start())
63 .unwrap()
64 }
65
66 pub fn first_pattern_span(&self) -> SourceSpan {
67 self.first_pattern().span()
68 }
69
70 pub fn build() -> SubstringSetBuilder<'a> {
72 SubstringSetBuilder::default()
73 }
74
75 pub fn try_match_all<'input>(&self, input: Input<'input>) -> Vec<MatchInfo<'input>> {
77 let mut matches = vec![];
78 for matched in self.searcher.find_iter(input) {
79 let pattern_id = matched.pattern().as_usize();
80 let pattern_span = self.patterns[pattern_id].span();
81 let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
82 matches.push(MatchInfo::new_with_pattern(span, pattern_span, pattern_id))
83 }
84 matches
85 }
86
87 pub fn try_match_overlapping<'input>(&self, input: Input<'input>) -> Vec<MatchInfo<'input>> {
89 let mut matches = vec![];
90 for matched in self.searcher.find_overlapping_iter(input) {
91 let pattern_id = matched.pattern().as_usize();
92 let pattern_span = self.patterns[pattern_id].span();
93 let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
94 matches.push(MatchInfo::new_with_pattern(span, pattern_span, pattern_id))
95 }
96 matches
97 }
98}
99impl<'a> Spanned for SubstringSetMatcher<'a> {
100 fn span(&self) -> SourceSpan {
101 let start = self
102 .patterns
103 .iter()
104 .map(|p| p.span())
105 .min_by_key(|span| span.start())
106 .unwrap();
107 let end = self
108 .patterns
109 .iter()
110 .map(|p| p.span())
111 .max_by_key(|span| span.end())
112 .unwrap();
113 SourceSpan::from_range_unchecked(
114 start.source_id(),
115 start.start().to_usize()..end.end().to_usize(),
116 )
117 }
118}
119impl<'a> MatcherMut for SubstringSetMatcher<'a> {
120 fn try_match_mut<'input, 'context, C>(
121 &self,
122 input: Input<'input>,
123 context: &mut C,
124 ) -> DiagResult<MatchResult<'input>>
125 where
126 C: Context<'input, 'context> + ?Sized,
127 {
128 self.try_match(input, context)
129 }
130}
131impl<'a> Matcher for SubstringSetMatcher<'a> {
132 fn try_match<'input, 'context, C>(
133 &self,
134 input: Input<'input>,
135 context: &C,
136 ) -> DiagResult<MatchResult<'input>>
137 where
138 C: Context<'input, 'context> + ?Sized,
139 {
140 if let Some(matched) = self.searcher.find(input) {
141 let pattern_id = matched.pattern().as_usize();
142 let pattern_span = self.patterns[pattern_id].span();
143 let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
144 Ok(MatchResult::ok(MatchInfo::new_with_pattern(
145 span,
146 pattern_span,
147 pattern_id,
148 )))
149 } else {
150 Ok(MatchResult::failed(
151 CheckFailedError::MatchNoneButExpected {
152 span: self.span(),
153 match_file: context.source_file(self.span().source_id()).unwrap(),
154 note: None,
155 },
156 ))
157 }
158 }
159}
160
161pub struct SubstringSetBuilder<'a> {
162 patterns: Vec<Span<Cow<'a, str>>>,
163 start_kind: Option<StartKind>,
164 match_kind: Option<MatchKind>,
165 case_insensitive: bool,
166 support_overlapping_matches: bool,
167}
168impl<'a> Default for SubstringSetBuilder<'a> {
169 #[inline]
170 fn default() -> Self {
171 Self::new_with_patterns(vec![])
172 }
173}
174impl<'a> SubstringSetBuilder<'a> {
175 #[inline]
176 pub fn new() -> Self {
177 Self {
178 patterns: vec![],
179 start_kind: None,
180 match_kind: None,
181 case_insensitive: false,
182 support_overlapping_matches: false,
183 }
184 }
185
186 #[inline]
187 pub fn new_with_patterns(patterns: Vec<Span<Cow<'a, str>>>) -> Self {
188 Self {
189 patterns,
190 start_kind: None,
191 match_kind: None,
192 case_insensitive: false,
193 support_overlapping_matches: false,
194 }
195 }
196
197 pub fn with_pattern(&mut self, pattern: Span<Cow<'a, str>>) -> &mut Self {
199 self.patterns.push(pattern);
200 self
201 }
202
203 pub fn with_patterns<I>(&mut self, patterns: I) -> &mut Self
205 where
206 I: IntoIterator<Item = Span<Cow<'a, str>>>,
207 {
208 self.patterns.extend(patterns);
209 self
210 }
211
212 pub fn support_anchored_search(&mut self, yes: bool) -> &mut Self {
217 self.start_kind = if yes {
218 Some(StartKind::Both)
219 } else {
220 Some(StartKind::Unanchored)
221 };
222 self
223 }
224
225 pub fn support_overlapping_matches(&mut self, yes: bool) -> &mut Self {
230 self.support_overlapping_matches = yes;
231 self.match_kind = Some(MatchKind::Standard);
232 self
233 }
234
235 pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
241 self.case_insensitive = yes;
242 self
243 }
244
245 pub fn match_kind(&mut self, kind: MatchKind) -> &mut Self {
251 assert!(
252 !self.support_overlapping_matches,
253 "cannot support {kind:?} when overlapping matches are enabled"
254 );
255 self.match_kind = Some(kind);
256 self
257 }
258
259 pub fn build(self) -> DiagResult<SubstringSetMatcher<'a>> {
264 assert!(
265 !self.patterns.is_empty(),
266 "there must be at least one pattern in the set"
267 );
268
269 let kind = if self.support_overlapping_matches {
270 Some(AhoCorasickKind::DFA)
271 } else {
272 None
273 };
274 let match_kind = self.match_kind.unwrap_or(MatchKind::LeftmostLongest);
275 let start_kind = self.start_kind.unwrap_or(StartKind::Unanchored);
276 let mut builder = AhoCorasickBuilder::new();
277 builder
278 .ascii_case_insensitive(self.case_insensitive)
279 .match_kind(match_kind)
280 .start_kind(start_kind)
281 .kind(kind);
282 let searcher = builder
283 .build(self.patterns.iter().map(|p| p.as_bytes()))
284 .map_err(|err| {
285 let labels = self
286 .patterns
287 .iter()
288 .map(|s| Label::new(s.span(), err.to_string()).into());
289 let diag = Diag::new("failed to build multi-substring aho-corasick searcher")
290 .and_labels(labels)
291 .with_help(format!(
292 "search configuration: {kind:?}, {match_kind:?}, {start_kind:?}"
293 ));
294 Report::from(diag)
295 })?;
296 Ok(SubstringSetMatcher {
297 patterns: self.patterns,
298 searcher,
299 })
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use crate::{source_file, span};
306
307 use super::*;
308
309 #[test]
310 fn test_multi_substring_matcher_overlapping() -> DiagResult<()> {
311 const INPUT: &str = "
312define void @sub1(i32* %p, i32 %v) {
313entry:
314 %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
315 ret void
316}
317
318define void @inc4(i64* %p) {
319entry:
320 %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
321 ret void
322}
323";
324 let mut context = TestContext::new();
325 let match_file = source_file!(
326 context.config,
327 "
328CHECK-DAG: tail call i64
329CHECK-DAG: tail call i32
330"
331 );
332 let input_file = source_file!(context.config, INPUT);
333 context
334 .with_checks(match_file)
335 .with_input(input_file.clone());
336
337 let pattern1 = Span::new(
338 span!(input_file.id(), 12, 24),
339 Cow::Borrowed("tail call i64"),
340 );
341 let pattern2 = Span::new(
342 span!(input_file.id(), 25, 41),
343 Cow::Borrowed("tail call i32"),
344 );
345 let matcher = SubstringSetMatcher::new(vec![pattern1, pattern2], &context.config)
346 .expect("expected pattern to be valid");
347 let mctx = context.match_context();
348 let input = mctx.search();
349 let result = matcher.try_match(input, &mctx)?;
350 let info = result.info.expect("expected match");
351 assert_eq!(info.span.start().to_u32(), 58);
352 assert_eq!(info.span.len(), 13);
353 assert_eq!(input.as_str(info.matched_range()), "tail call i32");
354 Ok(())
355 }
356
357 #[test]
358 fn test_multi_substring_matcher_overlapped() -> DiagResult<()> {
359 const INPUT: &str = "
360define void @sub1(i32* %p, i32 %v) {
361entry:
362 %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
363 ret void
364}
365
366define void @inc4(i64* %p) {
367entry:
368 %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
369 ret void
370}
371";
372 let mut context = TestContext::new();
373 let match_file = source_file!(
374 context.config,
375 "
376CHECK-DAG: tail call i32
377CHECK-DAG: tail call
378"
379 );
380 let input_file = source_file!(context.config, INPUT);
381 context
382 .with_checks(match_file)
383 .with_input(input_file.clone());
384
385 let pattern1 = Span::new(
386 span!(input_file.id(), 12, 24),
387 Cow::Borrowed("tail call i32"),
388 );
389 let pattern2 = Span::new(span!(input_file.id(), 25, 37), Cow::Borrowed("tail call"));
390 let matcher = SubstringSetMatcher::new(vec![pattern1, pattern2], &context.config)
391 .expect("expected pattern to be valid");
392 let mctx = context.match_context();
393 let input = mctx.search();
394 let result = matcher.try_match(input, &mctx)?;
395 let info = result.info.expect("expected match");
396 assert_eq!(info.span.start().to_u32(), 58);
397 assert_eq!(info.span.len(), 13);
398 assert_eq!(input.as_str(info.matched_range()), "tail call i32");
399 Ok(())
400 }
401
402 #[test]
403 fn test_multi_substring_matcher_disjoint() -> DiagResult<()> {
404 const INPUT: &str = "
405define void @sub1(i32* %p, i32 %v) {
406entry:
407 %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
408 ret void
409}
410
411define void @inc4(i64* %p) {
412entry:
413 %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
414 ret void
415}
416";
417 let mut context = TestContext::new();
418 let match_file = source_file!(
419 context.config,
420 "
421CHECK-DAG: inc4
422CHECK-DAG: sub1
423"
424 );
425 let input_file = source_file!(context.config, INPUT);
426 context
427 .with_checks(match_file)
428 .with_input(input_file.clone());
429
430 let pattern1 = Span::new(span!(input_file.id(), 12, 17), Cow::Borrowed("inc4"));
431 let pattern2 = Span::new(span!(input_file.id(), 19, 35), Cow::Borrowed("sub1"));
432 let matcher = SubstringSetMatcher::new(vec![pattern1, pattern2], &context.config)
433 .expect("expected pattern to be valid");
434 let mctx = context.match_context();
435 let input = mctx.search();
436 let result = matcher.try_match(input, &mctx)?;
437 let info = result.info.expect("expected match");
438 assert_eq!(info.span.start().to_u32(), 14);
439 assert_eq!(info.span.len(), 4);
440 assert_eq!(input.as_str(info.matched_range()), "sub1");
441 Ok(())
442 }
443
444 #[test]
445 fn test_multi_substring_matcher_anchored() -> DiagResult<()> {
446 const INPUT: &str = "
447define void @sub1(i32* %p, i32 %v) {
448entry:
449 %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
450 ret void
451}
452
453define void @inc4(i64* %p) {
454entry:
455 %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
456 ret void
457}
458";
459 let mut context = TestContext::new();
460 let match_file = source_file!(
461 context.config,
462 "
463CHECK-DAG: @inc4
464CHECK-DAG: @sub1
465"
466 );
467 let input_file = source_file!(context.config, INPUT);
468 context
469 .with_checks(match_file.clone())
470 .with_input(input_file);
471
472 let pattern1_span = SourceSpan::from_range_unchecked(match_file.id(), 13..18);
473 let pattern1 = Span::new(pattern1_span, Cow::Borrowed("@inc4"));
474 let pattern2_span = SourceSpan::from_range_unchecked(match_file.id(), 30..35);
475 let pattern2 = Span::new(pattern2_span, Cow::Borrowed("@sub1"));
476 let mut builder = SubstringSetMatcher::build();
477 builder
478 .with_patterns([pattern1, pattern2])
479 .support_anchored_search(true);
480 let matcher = builder.build().expect("expected pattern to be valid");
481 let mctx = context.match_context();
482 let input = mctx.search_range(13..).anchored(true);
483 let result = matcher.try_match(input, &mctx)?;
484 let info = result.info.expect("expected match");
485 assert_eq!(info.span.start().to_u32(), 13);
486 assert_eq!(info.span.len(), 5);
487 assert_eq!(input.as_str(info.matched_range()), "@sub1");
488
489 let input = mctx.search().anchored(true);
490 let result = matcher.try_match(input, &mctx)?;
491 assert_eq!(result.info, None);
492 Ok(())
493 }
494}