Skip to main content

litcheck_filecheck/pattern/search/
substring_set.rs

1use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind, StartKind};
2
3use crate::common::*;
4
5use super::AhoCorasickSearcher;
6
7pub struct SubstringSetSearcher<'a, 'patterns, 'input> {
8    source_id: SourceId,
9    buffer: &'input [u8],
10    crlf: bool,
11    /// The set of raw input patterns from which
12    /// this matcher was constructed
13    patterns: Cow<'patterns, [Span<Cow<'a, str>>]>,
14    /// The compiled regex which will be used to search the input buffer
15    pattern: AhoCorasick,
16    /// The searcher used to maintain the search state in the buffer
17    searcher: AhoCorasickSearcher<'input>,
18}
19impl<'a, 'patterns, 'input> SubstringSetSearcher<'a, 'patterns, 'input> {
20    pub fn new(
21        input: Input<'input>,
22        patterns: Cow<'patterns, [Span<Cow<'a, str>>]>,
23    ) -> DiagResult<Self> {
24        let source_id = input.source_id();
25        let buffer = input.buffer();
26        let crlf = input.is_crlf();
27        let mut builder = AhoCorasickBuilder::new();
28        builder
29            .ascii_case_insensitive(false)
30            .match_kind(MatchKind::LeftmostLongest)
31            .start_kind(StartKind::Unanchored);
32        let pattern = builder
33            .build(patterns.iter().map(|p| p.as_bytes()))
34            .map_err(|err| {
35                let labels = patterns
36                    .iter()
37                    .map(|s| Label::new(s.span(), err.to_string()).into());
38                let diag = Diag::new("failed to build substring set searcher")
39                    .and_labels(labels)
40                    .with_help(format!(
41                        "search configuration: {:?}, {:?}",
42                        MatchKind::LeftmostLongest,
43                        StartKind::Unanchored
44                    ));
45                Report::from(diag)
46            })?;
47
48        let searcher = AhoCorasickSearcher::new(input.into());
49
50        Ok(Self {
51            source_id,
52            buffer,
53            crlf,
54            patterns,
55            pattern,
56            searcher,
57        })
58    }
59
60    pub fn input(&self) -> Input<'input> {
61        let input = self.searcher.input();
62        Input::new(self.source_id, self.buffer, self.crlf)
63            .bounded(input.get_range())
64            .anchored(input.get_anchored().is_anchored())
65    }
66
67    pub fn num_patterns(&self) -> usize {
68        self.patterns.len()
69    }
70
71    pub fn pattern_span(&self, index: usize) -> SourceSpan {
72        self.patterns[index].span()
73    }
74}
75impl<'a, 'patterns, 'input> fmt::Debug for SubstringSetSearcher<'a, 'patterns, 'input> {
76    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
77        f.debug_struct("SubstringSetSearcher")
78            .field("patterns", &self.patterns)
79            .field("kind", &self.pattern.kind())
80            .field("start_kind", &self.pattern.start_kind())
81            .field("match_kind", &self.pattern.match_kind())
82            .finish()
83    }
84}
85
86impl<'a, 'patterns, 'input> Spanned for SubstringSetSearcher<'a, 'patterns, 'input> {
87    fn span(&self) -> SourceSpan {
88        let start = self
89            .patterns
90            .iter()
91            .map(|p| p.span())
92            .min_by(|a, b| a.start().cmp(&b.start()))
93            .unwrap();
94        let end = self
95            .patterns
96            .iter()
97            .map(|p| p.span())
98            .max_by(|a, b| a.end().cmp(&b.end()))
99            .unwrap();
100        SourceSpan::new(start.source_id(), Range::new(start.start(), end.end()))
101    }
102}
103impl<'a, 'patterns, 'input> PatternSearcher<'input>
104    for SubstringSetSearcher<'a, 'patterns, 'input>
105{
106    type Input = aho_corasick::Input<'input>;
107    type PatternID = aho_corasick::PatternID;
108
109    fn input(&self) -> &Self::Input {
110        self.searcher.input()
111    }
112    fn last_match_end(&self) -> Option<usize> {
113        self.searcher.last_match_end()
114    }
115    fn set_last_match_end(&mut self, end: usize) {
116        self.searcher.set_last_match_end(end);
117    }
118    fn patterns_len(&self) -> usize {
119        self.num_patterns()
120    }
121    fn pattern_span(&self, id: Self::PatternID) -> SourceSpan {
122        SubstringSetSearcher::pattern_span(self, id.as_usize())
123    }
124    fn try_match_next<'context, C>(&mut self, context: &mut C) -> DiagResult<MatchResult<'input>>
125    where
126        C: Context<'input, 'context> + ?Sized,
127    {
128        use super::Input as SearchInput;
129        let result = self
130            .searcher
131            .advance(|input| self.pattern.try_find(input.as_input()));
132        if let Some(matched) = result {
133            let pattern_id = matched.pattern().as_usize();
134            let pattern_span = self.patterns[pattern_id].span();
135            Ok(MatchResult::ok(MatchInfo::new_with_pattern(
136                SourceSpan::try_from_range(self.input().source_id(), matched.range()).unwrap(),
137                pattern_span,
138                pattern_id,
139            )))
140        } else {
141            Ok(MatchResult::failed(
142                CheckFailedError::MatchNoneButExpected {
143                    span: self.span(),
144                    match_file: context.source_file(self.span().source_id()).unwrap(),
145                    note: None,
146                },
147            ))
148        }
149    }
150}