litcheck_filecheck/pattern/matcher/matchers/
regex.rs

1use regex_automata::{
2    util::{captures::Captures, syntax},
3    PatternID,
4};
5
6use crate::{
7    ast::{Capture, RegexPattern},
8    common::*,
9    expr::ValueType,
10};
11
12/// This matcher is used to match a single regular expression
13///
14/// This essentially corresponds to [SubstringMatcher], but
15/// with regular expressions instead of literal strings.
16pub struct RegexMatcher<'a> {
17    /// The source pattern from which the regex was compiled
18    pattern: Span<Cow<'a, str>>,
19    /// The compiled form of the input regex
20    regex: Regex,
21    /// Metadata about captures in the pattern
22    captures: Vec<Capture>,
23}
24impl<'a> fmt::Debug for RegexMatcher<'a> {
25    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26        f.debug_struct("RegexMatcher")
27            .field("pattern", &self.pattern)
28            .field("captures", &self.captures)
29            .finish()
30    }
31}
32impl<'a> RegexMatcher<'a> {
33    pub fn new(
34        pattern: RegexPattern<'a>,
35        config: &Config,
36        interner: &StringInterner,
37    ) -> DiagResult<Self> {
38        let span = pattern.span();
39        let regex = Regex::builder()
40            .syntax(
41                syntax::Config::new()
42                    .multi_line(true)
43                    .case_insensitive(config.ignore_case),
44            )
45            .build(pattern.as_ref())
46            .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
47
48        // Compute capture group information
49        let groups = regex.group_info();
50        let num_captures = groups.group_len(PatternID::ZERO);
51        let mut captures = vec![Capture::Ignore(span); num_captures];
52        for capture in pattern.captures.into_iter() {
53            if let Capture::Ignore(_) = capture {
54                continue;
55            }
56            if let Some(name) = capture.group_name() {
57                let group_name = interner.resolve(name);
58                let group_id = groups
59                    .to_index(PatternID::ZERO, group_name)
60                    .unwrap_or_else(|| panic!("expected group for capture of '{group_name}'"));
61                captures[group_id] = capture;
62            } else {
63                assert_eq!(
64                    &captures[0],
65                    &Capture::Ignore(span),
66                    "{capture:?} would overwrite a previous implicit capture group"
67                );
68                captures[0] = capture;
69            }
70        }
71
72        Ok(Self {
73            pattern: pattern.pattern,
74            regex,
75            captures,
76        })
77    }
78
79    pub fn new_nocapture(pattern: Span<Cow<'a, str>>, config: &Config) -> DiagResult<Self> {
80        let span = pattern.span();
81        let regex = Regex::builder()
82            .syntax(
83                syntax::Config::new()
84                    .multi_line(true)
85                    .case_insensitive(config.ignore_case),
86            )
87            .build(pattern.as_ref())
88            .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
89
90        // Compute capture group information
91        let groups = regex.group_info();
92        let num_captures = groups.group_len(PatternID::ZERO);
93        let captures = vec![Capture::Ignore(span); num_captures];
94
95        Ok(Self {
96            pattern,
97            regex,
98            captures,
99        })
100    }
101}
102impl<'a> MatcherMut for RegexMatcher<'a> {
103    fn try_match_mut<'input, 'context, C>(
104        &self,
105        input: Input<'input>,
106        context: &mut C,
107    ) -> DiagResult<MatchResult<'input>>
108    where
109        C: Context<'input, 'context> + ?Sized,
110    {
111        let matched = self.try_match(input, context)?;
112        matched.bind_captures_in(context);
113        Ok(matched)
114    }
115}
116impl<'a> Matcher for RegexMatcher<'a> {
117    fn try_match<'input, 'context, C>(
118        &self,
119        input: Input<'input>,
120        context: &C,
121    ) -> DiagResult<MatchResult<'input>>
122    where
123        C: Context<'input, 'context> + ?Sized,
124    {
125        let regex_input = input.into();
126        let mut captures = self.regex.create_captures();
127        self.regex.search_captures(&regex_input, &mut captures);
128        if let Some(matched) = captures.get_match() {
129            let span = SourceSpan::from(matched.range());
130            let mut capture_infos = Vec::with_capacity(captures.group_len());
131            for (index, (maybe_capture_span, capture)) in captures
132                .iter()
133                .zip(self.captures.iter().copied())
134                .enumerate()
135            {
136                if let Some(capture_span) = maybe_capture_span {
137                    let captured = input.as_str(capture_span.range());
138                    let capture_span = SourceSpan::from(capture_span.range());
139                    let result = try_convert_capture_to_type(
140                        matched.pattern(),
141                        index,
142                        self.pattern.span(),
143                        span,
144                        Span::new(capture_span, captured),
145                        capture,
146                        &captures,
147                        context,
148                    );
149                    match result {
150                        Ok(capture_info) => {
151                            capture_infos.push(capture_info);
152                        }
153                        Err(error) => return Ok(MatchResult::failed(error)),
154                    }
155                }
156            }
157            Ok(MatchResult::ok(MatchInfo {
158                span,
159                pattern_span: self.pattern.span(),
160                pattern_id: 0,
161                captures: capture_infos,
162            }))
163        } else {
164            Ok(MatchResult::failed(
165                CheckFailedError::MatchNoneButExpected {
166                    span: self.pattern.span(),
167                    match_file: context.match_file(),
168                    note: None,
169                },
170            ))
171        }
172    }
173}
174impl<'a> Spanned for RegexMatcher<'a> {
175    fn span(&self) -> SourceSpan {
176        self.pattern.span()
177    }
178}
179
180#[allow(clippy::too_many_arguments)]
181pub fn try_convert_capture_to_type<'input, 'context, C>(
182    pattern_id: PatternID,
183    group_id: usize,
184    pattern_span: SourceSpan,
185    overall_span: SourceSpan,
186    captured: Span<&'input str>,
187    capture: Capture,
188    captures: &Captures,
189    context: &C,
190) -> Result<CaptureInfo<'input>, CheckFailedError>
191where
192    C: Context<'input, 'context> + ?Sized,
193{
194    let (capture_span, captured) = captured.into_parts();
195    let name = capture.name();
196    let value = match capture.value_type() {
197        ValueType::String => Value::Str(Cow::Borrowed(captured)),
198        ValueType::Number(format) => {
199            match Number::parse_with_format(Span::new(capture_span, captured), format) {
200                Ok(n) => Value::Num(Expr::Num(n)),
201                Err(error) => {
202                    return Err(CheckFailedError::MatchFoundConstraintFailed {
203                        span: overall_span,
204                        input_file: context.input_file(),
205                        pattern: Some(RelatedCheckError {
206                            span: pattern_span,
207                            match_file: context.match_file(),
208                        }),
209                        error: Some(RelatedError::new(Report::new(error))),
210                        help: Some(if let Some(name) = name {
211                            let name = context.resolve(name);
212                            format!("expected {}; the constraint was required when parsing the capture group for '{name}'", format.describe())
213                        } else if let Some(group_name) =
214                            captures.group_info().to_name(pattern_id, group_id)
215                        {
216                            format!("expected {}; the constraint was required when parsing the capture group named '{group_name}'", format.describe())
217                        } else {
218                            format!("expected {}; the constraint was required when parsing capture group {group_id}", format.describe())
219                        }),
220                    });
221                }
222            }
223        }
224    };
225
226    Ok(CaptureInfo {
227        span: capture_span,
228        pattern_span,
229        index: group_id,
230        value,
231        capture,
232    })
233}
234
235pub(crate) trait RegexBuildError: std::error::Error + std::fmt::Display {
236    #[inline(always)]
237    fn pattern(&self) -> Option<PatternID> {
238        None
239    }
240
241    #[inline(always)]
242    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
243        None
244    }
245}
246impl RegexBuildError for regex_automata::meta::BuildError {
247    #[inline(always)]
248    fn pattern(&self) -> Option<PatternID> {
249        regex_automata::meta::BuildError::pattern(self)
250    }
251
252    #[inline(always)]
253    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
254        regex_automata::meta::BuildError::syntax_error(self)
255    }
256}
257impl RegexBuildError for regex_automata::dfa::dense::BuildError {
258    #[inline(always)]
259    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
260        <Self as std::error::Error>::source(self)
261            .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
262            .and_then(|e| e.syntax_error())
263    }
264}
265impl RegexBuildError for regex_automata::dfa::onepass::BuildError {
266    #[inline(always)]
267    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
268        <Self as std::error::Error>::source(self)
269            .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
270            .and_then(|e| e.syntax_error())
271    }
272}
273impl RegexBuildError for regex_automata::nfa::thompson::BuildError {
274    #[inline(always)]
275    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
276        <Self as std::error::Error>::source(self).and_then(|e| e.downcast_ref())
277    }
278}
279
280pub(crate) fn build_error_to_diagnostic<E, F>(
281    error: E,
282    num_patterns: usize,
283    get_pattern_span: F,
284) -> Report
285where
286    E: RegexBuildError,
287    F: Fn(usize) -> SourceSpan,
288{
289    let diagnostic = if let Some(pattern_id) = error.pattern() {
290        let span = get_pattern_span(pattern_id.as_usize());
291        if let Some(syntax_err) = error.syntax_error() {
292            Diag::new(format!("invalid regex pattern: {error}"))
293                .with_help("a syntax error prevented us from compiling this pattern")
294                .with_url("https://docs.rs/regex/latest/regex/index.html#syntax")
295                .and_label(Label::new(span, syntax_err.to_string()))
296        } else {
297            Diag::new("unable to compile regex pattern set")
298                .with_help("the pattern shown exceeded preconfigured limits during construction")
299                .and_label(Label::at(span))
300        }
301    } else if num_patterns > 1 {
302        Diag::new(format!("unable to compile regex pattern set: {error}"))
303            .with_help("construction of a multi-pattern regex from this set failed to due to preconfigured limits")
304            .with_labels((0..num_patterns).map(|pid| Label::at(get_pattern_span(pid)).into()))
305    } else {
306        Diag::new(format!("unable to compile regex pattern: {error}"))
307            .with_help("construction of this regex failed to due to preconfigured limits")
308            .with_label(Label::at(get_pattern_span(0)))
309    };
310    Report::from(diagnostic)
311}
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316
317    #[test]
318    fn test_regex_matcher() -> DiagResult<()> {
319        let mut context = TestContext::new();
320        context
321            .with_checks(r"CHECK: {{Name: b[[:alpha:]]*}}")
322            .with_input(
323                "
324Name: foo
325Field: 1
326
327Name: bar
328Field: 2
329",
330            );
331
332        let pattern = RegexPattern::new(Span::new(
333            SourceSpan::from(0..0),
334            Cow::Borrowed("Name: b[[:alpha:]]*"),
335        ));
336        let matcher = RegexMatcher::new(pattern, &context.config, &context.interner)
337            .expect("expected pattern to be valid");
338        let mctx = context.match_context();
339        let input = mctx.search();
340        let result = matcher.try_match(input, &mctx)?;
341        let info = result.info.expect("expected match");
342        assert_eq!(info.span.offset(), 21);
343        assert_eq!(info.span.len(), 9);
344        assert_eq!(
345            input.as_str(info.span.offset()..(info.span.offset() + info.span.len())),
346            "Name: bar"
347        );
348
349        Ok(())
350    }
351}