Skip to main content

litcheck_filecheck/pattern/matcher/matchers/
regex.rs

1use regex_automata::{
2    PatternID,
3    util::{captures::Captures, syntax},
4};
5
6use crate::{
7    ast::{Capture, RegexPattern},
8    common::*,
9    expr::ValueType,
10};
11
12/// This matcher is used to match a single regular expression
13///
14/// This essentially corresponds to [SubstringMatcher], but
15/// with regular expressions instead of literal strings.
16pub struct RegexMatcher<'a> {
17    /// The source pattern from which the regex was compiled
18    pattern: Span<Cow<'a, str>>,
19    /// The compiled form of the input regex
20    regex: Regex,
21    /// Metadata about captures in the pattern
22    captures: Vec<Capture>,
23}
24impl<'a> fmt::Debug for RegexMatcher<'a> {
25    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26        f.debug_struct("RegexMatcher")
27            .field("pattern", &self.pattern)
28            .field("captures", &self.captures)
29            .finish()
30    }
31}
32impl<'a> RegexMatcher<'a> {
33    pub fn new(pattern: RegexPattern<'a>, config: &Config) -> DiagResult<Self> {
34        let span = pattern.span();
35        let regex = Regex::builder()
36            .syntax(
37                syntax::Config::new()
38                    .multi_line(true)
39                    .case_insensitive(config.options.ignore_case),
40            )
41            .build(pattern.as_ref())
42            .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
43
44        // Compute capture group information
45        let groups = regex.group_info();
46        let num_captures = groups.group_len(PatternID::ZERO);
47        let mut captures = vec![Capture::Ignore(span); num_captures];
48        for capture in pattern.captures.into_iter() {
49            if let Capture::Ignore(_) = capture {
50                continue;
51            }
52            if let Some(group_name) = capture.group_name() {
53                let group_id = groups
54                    .to_index(PatternID::ZERO, group_name.as_str())
55                    .unwrap_or_else(|| panic!("expected group for capture of '{group_name}'"));
56                captures[group_id] = capture;
57            } else {
58                assert_eq!(
59                    &captures[0],
60                    &Capture::Ignore(span),
61                    "{capture:?} would overwrite a previous implicit capture group"
62                );
63                captures[0] = capture;
64            }
65        }
66
67        Ok(Self {
68            pattern: pattern.pattern,
69            regex,
70            captures,
71        })
72    }
73
74    pub fn new_nocapture(pattern: Span<Cow<'a, str>>, config: &Config) -> DiagResult<Self> {
75        let span = pattern.span();
76        let regex = Regex::builder()
77            .syntax(
78                syntax::Config::new()
79                    .multi_line(true)
80                    .case_insensitive(config.options.ignore_case),
81            )
82            .build(pattern.inner().as_ref())
83            .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
84
85        // Compute capture group information
86        let groups = regex.group_info();
87        let num_captures = groups.group_len(PatternID::ZERO);
88        let captures = vec![Capture::Ignore(span); num_captures];
89
90        Ok(Self {
91            pattern,
92            regex,
93            captures,
94        })
95    }
96}
97impl<'a> MatcherMut for RegexMatcher<'a> {
98    fn try_match_mut<'input, 'context, C>(
99        &self,
100        input: Input<'input>,
101        context: &mut C,
102    ) -> DiagResult<MatchResult<'input>>
103    where
104        C: Context<'input, 'context> + ?Sized,
105    {
106        let matched = self.try_match(input, context)?;
107        matched.bind_captures_in(context);
108        Ok(matched)
109    }
110}
111impl<'a> Matcher for RegexMatcher<'a> {
112    fn try_match<'input, 'context, C>(
113        &self,
114        input: Input<'input>,
115        context: &C,
116    ) -> DiagResult<MatchResult<'input>>
117    where
118        C: Context<'input, 'context> + ?Sized,
119    {
120        let regex_input = input.into();
121        let mut captures = self.regex.create_captures();
122        self.regex.search_captures(&regex_input, &mut captures);
123        if let Some(matched) = captures.get_match() {
124            let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
125            let mut capture_infos = Vec::with_capacity(captures.group_len());
126            for (index, (maybe_capture_span, capture)) in captures
127                .iter()
128                .zip(self.captures.iter().copied())
129                .enumerate()
130            {
131                if let Some(capture_span) = maybe_capture_span {
132                    let captured = input.as_str(capture_span.range());
133                    let capture_span =
134                        SourceSpan::from_range_unchecked(input.source_id(), capture_span.range());
135                    let result = try_convert_capture_to_type(
136                        matched.pattern(),
137                        index,
138                        self.pattern.span(),
139                        span,
140                        Span::new(capture_span, captured),
141                        capture,
142                        &captures,
143                        context,
144                    );
145                    match result {
146                        Ok(capture_info) => {
147                            capture_infos.push(capture_info);
148                        }
149                        Err(error) => return Ok(MatchResult::failed(error)),
150                    }
151                }
152            }
153            Ok(MatchResult::ok(MatchInfo {
154                span,
155                pattern_span: self.pattern.span(),
156                pattern_id: 0,
157                captures: capture_infos,
158            }))
159        } else {
160            Ok(MatchResult::failed(
161                CheckFailedError::MatchNoneButExpected {
162                    span: self.pattern.span(),
163                    match_file: context
164                        .source_file(self.pattern.span().source_id())
165                        .unwrap(),
166                    note: None,
167                },
168            ))
169        }
170    }
171}
172impl<'a> Spanned for RegexMatcher<'a> {
173    fn span(&self) -> SourceSpan {
174        self.pattern.span()
175    }
176}
177
178#[allow(clippy::too_many_arguments)]
179pub fn try_convert_capture_to_type<'input, 'context, C>(
180    pattern_id: PatternID,
181    group_id: usize,
182    pattern_span: SourceSpan,
183    overall_span: SourceSpan,
184    captured: Span<&'input str>,
185    capture: Capture,
186    captures: &Captures,
187    context: &C,
188) -> Result<CaptureInfo<'input>, CheckFailedError>
189where
190    C: Context<'input, 'context> + ?Sized,
191{
192    let (capture_span, captured) = captured.into_parts();
193    let name = capture.name();
194    let value = match capture.value_type() {
195        ValueType::String => Value::Str(Cow::Borrowed(captured)),
196        ValueType::Number(format) => {
197            let format = format.unwrap_or_default();
198            match Number::parse_with_format(Span::new(capture_span, captured), format) {
199                Ok(n) => Value::Num(Expr::Num(n)),
200                Err(error) => {
201                    return Err(CheckFailedError::MatchFoundConstraintFailed {
202                        span: overall_span,
203                        input_file: context.input_file(),
204                        pattern: Some(RelatedCheckError {
205                            span: pattern_span,
206                            match_file: context.source_file(pattern_span.source_id()).unwrap(),
207                        }),
208                        error: Some(RelatedError::new(Report::new(error))),
209                        help: Some(if let Some(name) = name {
210                            format!(
211                                "expected {}; the constraint was required when parsing the capture group for '{name}'",
212                                format.describe()
213                            )
214                        } else if let Some(group_name) =
215                            captures.group_info().to_name(pattern_id, group_id)
216                        {
217                            format!(
218                                "expected {}; the constraint was required when parsing the capture group named '{group_name}'",
219                                format.describe()
220                            )
221                        } else {
222                            format!(
223                                "expected {}; the constraint was required when parsing capture group {group_id}",
224                                format.describe()
225                            )
226                        }),
227                    });
228                }
229            }
230        }
231    };
232
233    Ok(CaptureInfo {
234        span: capture_span,
235        pattern_span,
236        index: group_id,
237        value,
238        capture,
239    })
240}
241
242pub(crate) trait RegexBuildError: std::error::Error + std::fmt::Display {
243    #[inline(always)]
244    fn pattern(&self) -> Option<PatternID> {
245        None
246    }
247
248    #[inline(always)]
249    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
250        None
251    }
252}
253impl RegexBuildError for regex_automata::meta::BuildError {
254    #[inline(always)]
255    fn pattern(&self) -> Option<PatternID> {
256        regex_automata::meta::BuildError::pattern(self)
257    }
258
259    #[inline(always)]
260    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
261        regex_automata::meta::BuildError::syntax_error(self)
262    }
263}
264impl RegexBuildError for regex_automata::dfa::dense::BuildError {
265    #[inline(always)]
266    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
267        <Self as std::error::Error>::source(self)
268            .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
269            .and_then(|e| e.syntax_error())
270    }
271}
272impl RegexBuildError for regex_automata::dfa::onepass::BuildError {
273    #[inline(always)]
274    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
275        <Self as std::error::Error>::source(self)
276            .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
277            .and_then(|e| e.syntax_error())
278    }
279}
280impl RegexBuildError for regex_automata::nfa::thompson::BuildError {
281    #[inline(always)]
282    fn syntax_error(&self) -> Option<&regex_syntax::Error> {
283        <Self as std::error::Error>::source(self).and_then(|e| e.downcast_ref())
284    }
285}
286
287pub(crate) fn build_error_to_diagnostic<E, F>(
288    error: E,
289    num_patterns: usize,
290    get_pattern_span: F,
291) -> Report
292where
293    E: RegexBuildError,
294    F: Fn(usize) -> SourceSpan,
295{
296    let diagnostic = if let Some(pattern_id) = error.pattern() {
297        let span = get_pattern_span(pattern_id.as_usize());
298        if let Some(syntax_err) = error.syntax_error() {
299            Diag::new(syntax_err.to_string())
300                .with_help("a syntax error prevented us from compiling this pattern")
301                .with_url("https://docs.rs/regex/latest/regex/index.html#syntax")
302                .and_label(Label::new(span, syntax_err.to_string()))
303        } else {
304            Diag::new("unable to compile regex pattern set")
305                .with_help("the pattern shown exceeded preconfigured limits during construction")
306                .and_label(Label::at(span))
307        }
308    } else if num_patterns > 1 {
309        Diag::new(format!("unable to compile regex pattern set: {error}"))
310            .with_help("construction of a multi-pattern regex from this set failed to due to preconfigured limits")
311            .with_labels((0..num_patterns).map(|pid| Label::at(get_pattern_span(pid)).into()))
312    } else {
313        Diag::new(format!("unable to compile regex pattern: {error}"))
314            .with_help("construction of this regex failed to due to preconfigured limits")
315            .with_label(Label::at(get_pattern_span(0)))
316    };
317    Report::from(diagnostic)
318}
319
320#[cfg(test)]
321mod tests {
322    use crate::source_file;
323
324    use super::*;
325
326    #[test]
327    fn test_regex_matcher() -> DiagResult<()> {
328        let mut context = TestContext::new();
329        let match_file = source_file!(context.config, r"CHECK: {{Name: b[[:alpha:]]*}}");
330        let input_file = source_file!(
331            context.config,
332            "
333Name: foo
334Field: 1
335
336Name: bar
337Field: 2
338"
339        );
340        context.with_checks(match_file).with_input(input_file);
341
342        let pattern = RegexPattern::new(Span::new(
343            SourceSpan::UNKNOWN,
344            Cow::Borrowed("Name: b[[:alpha:]]*"),
345        ));
346        let matcher =
347            RegexMatcher::new(pattern, &context.config).expect("expected pattern to be valid");
348        let mctx = context.match_context();
349        let input = mctx.search();
350        let result = matcher.try_match(input, &mctx)?;
351        let info = result.info.expect("expected match");
352        assert_eq!(info.span.start().to_u32(), 21);
353        assert_eq!(info.span.len(), 9);
354        assert_eq!(
355            input.as_str(info.span.start().to_usize()..info.span.end().to_usize()),
356            "Name: bar"
357        );
358
359        Ok(())
360    }
361}