litcheck_filecheck/pattern/matcher/matchers/
regex.rs

1use regex_automata::{
2    util::{captures::Captures, syntax},
3    PatternID,
4};
5
6use crate::{
7    ast::{Capture, RegexPattern},
8    common::*,
9    expr::ValueType,
10};
11
12/// This matcher is used to match a single regular expression
13///
14/// This essentially corresponds to [SubstringMatcher], but
15/// with regular expressions instead of literal strings.
16pub struct RegexMatcher<'a> {
17    /// The source pattern from which the regex was compiled
18    pattern: Span<Cow<'a, str>>,
19    /// The compiled form of the input regex
20    regex: Regex,
21    /// Metadata about captures in the pattern
22    captures: Vec<Capture>,
23}
24impl<'a> fmt::Debug for RegexMatcher<'a> {
25    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26        f.debug_struct("RegexMatcher")
27            .field("pattern", &self.pattern)
28            .field("captures", &self.captures)
29            .finish()
30    }
31}
32impl<'a> RegexMatcher<'a> {
33    pub fn new(
34        pattern: RegexPattern<'a>,
35        config: &Config,
36        interner: &StringInterner,
37    ) -> DiagResult<Self> {
38        let span = pattern.span();
39        let regex = Regex::builder()
40            .syntax(
41                syntax::Config::new()
42                    .multi_line(true)
43                    .case_insensitive(config.ignore_case),
44            )
45            .build(pattern.as_ref())
46            .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
47
48        // Compute capture group information
49        let groups = regex.group_info();
50        let num_captures = groups.group_len(PatternID::ZERO);
51        let mut captures = vec![Capture::Ignore(span); num_captures];
52        for capture in pattern.captures.into_iter() {
53            if let Capture::Ignore(_) = capture {
54                continue;
55            }
56            if let Some(name) = capture.group_name() {
57                let group_name = interner.resolve(name);
58                let group_id = groups
59                    .to_index(PatternID::ZERO, group_name)
60                    .unwrap_or_else(|| panic!("expected group for capture of '{group_name}'"));
61                captures[group_id] = capture;
62            } else {
63                assert_eq!(
64                    &captures[0],
65                    &Capture::Ignore(span),
66                    "{capture:?} would overwrite a previous implicit capture group"
67                );
68                captures[0] = capture;
69            }
70        }
71
72        Ok(Self {
73            pattern: pattern.pattern,
74            regex,
75            captures,
76        })
77    }
78
79    pub fn new_nocapture(pattern: Span<Cow<'a, str>>, config: &Config) -> DiagResult<Self> {
80        let span = pattern.span();
81        let regex = Regex::builder()
82            .syntax(
83                syntax::Config::new()
84                    .multi_line(true)
85                    .case_insensitive(config.ignore_case),
86            )
87            .build(pattern.as_ref())
88            .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
89
90        // Compute capture group information
91        let groups = regex.group_info();
92        let num_captures = groups.group_len(PatternID::ZERO);
93        let captures = vec![Capture::Ignore(span); num_captures];
94
95        Ok(Self {
96            pattern,
97            regex,
98            captures,
99        })
100    }
101}
102impl<'a> MatcherMut for RegexMatcher<'a> {
103    fn try_match_mut<'input, 'context, C>(
104        &self,
105        input: Input<'input>,
106        context: &mut C,
107    ) -> DiagResult<MatchResult<'input>>
108    where
109        C: Context<'input, 'context> + ?Sized,
110    {
111        self.try_match(input, context)
112    }
113}
114impl<'a> Matcher for RegexMatcher<'a> {
115    fn try_match<'input, 'context, C>(
116        &self,
117        input: Input<'input>,
118        context: &C,
119    ) -> DiagResult<MatchResult<'input>>
120    where
121        C: Context<'input, 'context> + ?Sized,
122    {
123        let regex_input = input.into();
124        let mut captures = self.regex.create_captures();
125        self.regex.search_captures(&regex_input, &mut captures);
126        if let Some(matched) = captures.get_match() {
127            let span = SourceSpan::from(matched.range());
128            let mut capture_infos = Vec::with_capacity(captures.group_len());
129            for (index, (maybe_capture_span, capture)) in captures
130                .iter()
131                .zip(self.captures.iter().copied())
132                .enumerate()
133            {
134                if let Some(capture_span) = maybe_capture_span {
135                    let captured = input.as_str(capture_span.range());
136                    let capture_span = SourceSpan::from(capture_span.range());
137                    let result = try_convert_capture_to_type(
138                        matched.pattern(),
139                        index,
140                        self.pattern.span(),
141                        span,
142                        Span::new(capture_span, captured),
143                        capture,
144                        &captures,
145                        context,
146                    );
147                    match result {
148                        Ok(capture_info) => {
149                            capture_infos.push(capture_info);
150                        }
151                        Err(error) => return Ok(MatchResult::failed(error)),
152                    }
153                }
154            }
155            Ok(MatchResult::ok(MatchInfo {
156                span,
157                pattern_span: self.pattern.span(),
158                pattern_id: 0,
159                captures: capture_infos,
160            }))
161        } else {
162            Ok(MatchResult::failed(
163                CheckFailedError::MatchNoneButExpected {
164                    span: self.pattern.span(),
165                    match_file: context.match_file(),
166                    note: None,
167                },
168            ))
169        }
170    }
171}
172impl<'a> Spanned for RegexMatcher<'a> {
173    fn span(&self) -> SourceSpan {
174        self.pattern.span()
175    }
176}
177
178#[allow(clippy::too_many_arguments)]
179pub fn try_convert_capture_to_type<'input, 'context, C>(
180    pattern_id: PatternID,
181    group_id: usize,
182    pattern_span: SourceSpan,
183    overall_span: SourceSpan,
184    captured: Span<&'input str>,
185    capture: Capture,
186    captures: &Captures,
187    context: &C,
188) -> Result<CaptureInfo<'input>, CheckFailedError>
189where
190    C: Context<'input, 'context> + ?Sized,
191{
192    let (capture_span, captured) = captured.into_parts();
193    let name = capture.name();
194    let value = match capture.value_type() {
195        ValueType::String => Value::Str(Cow::Borrowed(captured)),
196        ValueType::Number(format) => {
197            match Number::parse_with_format(Span::new(capture_span, captured), format) {
198                Ok(n) => Value::Num(Expr::Num(n)),
199                Err(error) => {
200                    return Err(CheckFailedError::MatchFoundConstraintFailed {
201                        span: overall_span,
202                        input_file: context.input_file(),
203                        pattern: Some(RelatedCheckError {
204                            span: pattern_span,
205                            match_file: context.match_file(),
206                        }),
207                        error: Some(RelatedError::new(Report::new(error))),
208                        help: Some(if let Some(name) = name {
209                            let name = context.resolve(name);
210                            format!("expected {}; the constraint was required when parsing the capture group for '{name}'", format.describe())
211                        } else if let Some(group_name) =
212                            captures.group_info().to_name(pattern_id, group_id)
213                        {
214                            format!("expected {}; the constraint was required when parsing the capture group named '{group_name}'", format.describe())
215                        } else {
216                            format!("expected {}; the constraint was required when parsing capture group {group_id}", format.describe())
217                        }),
218                    });
219                }
220            }
221        }
222    };
223
224    Ok(CaptureInfo {
225        span: capture_span,
226        pattern_span,
227        index: group_id,
228        value,
229        capture,
230    })
231}
232
233pub(crate) trait RegexBuildError: std::error::Error + std::fmt::Display {
234    #[inline(always)]
235    fn pattern(&self) -> Option<PatternID> {
236        None
237    }
238
239    #[inline(always)]
240    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
241        None
242    }
243}
244impl RegexBuildError for regex_automata::meta::BuildError {
245    #[inline(always)]
246    fn pattern(&self) -> Option<PatternID> {
247        regex_automata::meta::BuildError::pattern(self)
248    }
249
250    #[inline(always)]
251    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
252        regex_automata::meta::BuildError::syntax_error(self)
253    }
254}
255impl RegexBuildError for regex_automata::dfa::dense::BuildError {
256    #[inline(always)]
257    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
258        <Self as std::error::Error>::source(self)
259            .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
260            .and_then(|e| e.syntax_error())
261    }
262}
263impl RegexBuildError for regex_automata::dfa::onepass::BuildError {
264    #[inline(always)]
265    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
266        <Self as std::error::Error>::source(self)
267            .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
268            .and_then(|e| e.syntax_error())
269    }
270}
271impl RegexBuildError for regex_automata::nfa::thompson::BuildError {
272    #[inline(always)]
273    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
274        <Self as std::error::Error>::source(self).and_then(|e| e.downcast_ref())
275    }
276}
277
278pub(crate) fn build_error_to_diagnostic<E, F>(
279    error: E,
280    num_patterns: usize,
281    get_pattern_span: F,
282) -> Report
283where
284    E: RegexBuildError,
285    F: Fn(usize) -> SourceSpan,
286{
287    let diagnostic = if let Some(pattern_id) = error.pattern() {
288        let span = get_pattern_span(pattern_id.as_usize());
289        if let Some(syntax_err) = error.syntax_error() {
290            Diag::new(format!("invalid regex pattern: {error}"))
291                .with_help("a syntax error prevented us from compiling this pattern")
292                .with_url("https://docs.rs/regex/latest/regex/index.html#syntax")
293                .and_label(Label::new(span, syntax_err.to_string()))
294        } else {
295            Diag::new("unable to compile regex pattern set")
296                .with_help("the pattern shown exceeded preconfigured limits during construction")
297                .and_label(Label::at(span))
298        }
299    } else if num_patterns > 1 {
300        Diag::new(format!("unable to compile regex pattern set: {error}"))
301            .with_help("construction of a multi-pattern regex from this set failed to due to preconfigured limits")
302            .with_labels((0..num_patterns).map(|pid| Label::at(get_pattern_span(pid)).into()))
303    } else {
304        Diag::new(format!("unable to compile regex pattern: {error}"))
305            .with_help("construction of this regex failed to due to preconfigured limits")
306            .with_label(Label::at(get_pattern_span(0)))
307    };
308    Report::from(diagnostic)
309}
310
311#[cfg(test)]
312mod tests {
313    use super::*;
314
315    #[test]
316    fn test_regex_matcher() -> DiagResult<()> {
317        let mut context = TestContext::new();
318        context
319            .with_checks(r"CHECK: {{Name: b[[:alpha:]]*}}")
320            .with_input(
321                "
322Name: foo
323Field: 1
324
325Name: bar
326Field: 2
327",
328            );
329
330        let pattern = RegexPattern::new(Span::new(
331            SourceSpan::from(0..0),
332            Cow::Borrowed("Name: b[[:alpha:]]*"),
333        ));
334        let matcher = RegexMatcher::new(pattern, &context.config, &context.interner)
335            .expect("expected pattern to be valid");
336        let mctx = context.match_context();
337        let input = mctx.search();
338        let result = matcher.try_match(input, &mctx)?;
339        let info = result.info.expect("expected match");
340        assert_eq!(info.span.offset(), 21);
341        assert_eq!(info.span.len(), 9);
342        assert_eq!(
343            input.as_str(info.span.offset()..(info.span.offset() + info.span.len())),
344            "Name: bar"
345        );
346
347        Ok(())
348    }
349}