litcheck_filecheck/pattern/matcher/matchers/
regex.rs1use regex_automata::{
2 util::{captures::Captures, syntax},
3 PatternID,
4};
5
6use crate::{
7 ast::{Capture, RegexPattern},
8 common::*,
9 expr::ValueType,
10};
11
12pub struct RegexMatcher<'a> {
17 pattern: Span<Cow<'a, str>>,
19 regex: Regex,
21 captures: Vec<Capture>,
23}
24impl<'a> fmt::Debug for RegexMatcher<'a> {
25 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26 f.debug_struct("RegexMatcher")
27 .field("pattern", &self.pattern)
28 .field("captures", &self.captures)
29 .finish()
30 }
31}
32impl<'a> RegexMatcher<'a> {
33 pub fn new(
34 pattern: RegexPattern<'a>,
35 config: &Config,
36 interner: &StringInterner,
37 ) -> DiagResult<Self> {
38 let span = pattern.span();
39 let regex = Regex::builder()
40 .syntax(
41 syntax::Config::new()
42 .multi_line(true)
43 .case_insensitive(config.ignore_case),
44 )
45 .build(pattern.as_ref())
46 .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
47
48 let groups = regex.group_info();
50 let num_captures = groups.group_len(PatternID::ZERO);
51 let mut captures = vec![Capture::Ignore(span); num_captures];
52 for capture in pattern.captures.into_iter() {
53 if let Capture::Ignore(_) = capture {
54 continue;
55 }
56 if let Some(name) = capture.group_name() {
57 let group_name = interner.resolve(name);
58 let group_id = groups
59 .to_index(PatternID::ZERO, group_name)
60 .unwrap_or_else(|| panic!("expected group for capture of '{group_name}'"));
61 captures[group_id] = capture;
62 } else {
63 assert_eq!(
64 &captures[0],
65 &Capture::Ignore(span),
66 "{capture:?} would overwrite a previous implicit capture group"
67 );
68 captures[0] = capture;
69 }
70 }
71
72 Ok(Self {
73 pattern: pattern.pattern,
74 regex,
75 captures,
76 })
77 }
78
79 pub fn new_nocapture(pattern: Span<Cow<'a, str>>, config: &Config) -> DiagResult<Self> {
80 let span = pattern.span();
81 let regex = Regex::builder()
82 .syntax(
83 syntax::Config::new()
84 .multi_line(true)
85 .case_insensitive(config.ignore_case),
86 )
87 .build(pattern.as_ref())
88 .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
89
90 let groups = regex.group_info();
92 let num_captures = groups.group_len(PatternID::ZERO);
93 let captures = vec![Capture::Ignore(span); num_captures];
94
95 Ok(Self {
96 pattern,
97 regex,
98 captures,
99 })
100 }
101}
102impl<'a> MatcherMut for RegexMatcher<'a> {
103 fn try_match_mut<'input, 'context, C>(
104 &self,
105 input: Input<'input>,
106 context: &mut C,
107 ) -> DiagResult<MatchResult<'input>>
108 where
109 C: Context<'input, 'context> + ?Sized,
110 {
111 let matched = self.try_match(input, context)?;
112 matched.bind_captures_in(context);
113 Ok(matched)
114 }
115}
116impl<'a> Matcher for RegexMatcher<'a> {
117 fn try_match<'input, 'context, C>(
118 &self,
119 input: Input<'input>,
120 context: &C,
121 ) -> DiagResult<MatchResult<'input>>
122 where
123 C: Context<'input, 'context> + ?Sized,
124 {
125 let regex_input = input.into();
126 let mut captures = self.regex.create_captures();
127 self.regex.search_captures(®ex_input, &mut captures);
128 if let Some(matched) = captures.get_match() {
129 let span = SourceSpan::from(matched.range());
130 let mut capture_infos = Vec::with_capacity(captures.group_len());
131 for (index, (maybe_capture_span, capture)) in captures
132 .iter()
133 .zip(self.captures.iter().copied())
134 .enumerate()
135 {
136 if let Some(capture_span) = maybe_capture_span {
137 let captured = input.as_str(capture_span.range());
138 let capture_span = SourceSpan::from(capture_span.range());
139 let result = try_convert_capture_to_type(
140 matched.pattern(),
141 index,
142 self.pattern.span(),
143 span,
144 Span::new(capture_span, captured),
145 capture,
146 &captures,
147 context,
148 );
149 match result {
150 Ok(capture_info) => {
151 capture_infos.push(capture_info);
152 }
153 Err(error) => return Ok(MatchResult::failed(error)),
154 }
155 }
156 }
157 Ok(MatchResult::ok(MatchInfo {
158 span,
159 pattern_span: self.pattern.span(),
160 pattern_id: 0,
161 captures: capture_infos,
162 }))
163 } else {
164 Ok(MatchResult::failed(
165 CheckFailedError::MatchNoneButExpected {
166 span: self.pattern.span(),
167 match_file: context.match_file(),
168 note: None,
169 },
170 ))
171 }
172 }
173}
174impl<'a> Spanned for RegexMatcher<'a> {
175 fn span(&self) -> SourceSpan {
176 self.pattern.span()
177 }
178}
179
180#[allow(clippy::too_many_arguments)]
181pub fn try_convert_capture_to_type<'input, 'context, C>(
182 pattern_id: PatternID,
183 group_id: usize,
184 pattern_span: SourceSpan,
185 overall_span: SourceSpan,
186 captured: Span<&'input str>,
187 capture: Capture,
188 captures: &Captures,
189 context: &C,
190) -> Result<CaptureInfo<'input>, CheckFailedError>
191where
192 C: Context<'input, 'context> + ?Sized,
193{
194 let (capture_span, captured) = captured.into_parts();
195 let name = capture.name();
196 let value = match capture.value_type() {
197 ValueType::String => Value::Str(Cow::Borrowed(captured)),
198 ValueType::Number(format) => {
199 match Number::parse_with_format(Span::new(capture_span, captured), format) {
200 Ok(n) => Value::Num(Expr::Num(n)),
201 Err(error) => {
202 return Err(CheckFailedError::MatchFoundConstraintFailed {
203 span: overall_span,
204 input_file: context.input_file(),
205 pattern: Some(RelatedCheckError {
206 span: pattern_span,
207 match_file: context.match_file(),
208 }),
209 error: Some(RelatedError::new(Report::new(error))),
210 help: Some(if let Some(name) = name {
211 let name = context.resolve(name);
212 format!("expected {}; the constraint was required when parsing the capture group for '{name}'", format.describe())
213 } else if let Some(group_name) =
214 captures.group_info().to_name(pattern_id, group_id)
215 {
216 format!("expected {}; the constraint was required when parsing the capture group named '{group_name}'", format.describe())
217 } else {
218 format!("expected {}; the constraint was required when parsing capture group {group_id}", format.describe())
219 }),
220 });
221 }
222 }
223 }
224 };
225
226 Ok(CaptureInfo {
227 span: capture_span,
228 pattern_span,
229 index: group_id,
230 value,
231 capture,
232 })
233}
234
235pub(crate) trait RegexBuildError: std::error::Error + std::fmt::Display {
236 #[inline(always)]
237 fn pattern(&self) -> Option<PatternID> {
238 None
239 }
240
241 #[inline(always)]
242 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
243 None
244 }
245}
246impl RegexBuildError for regex_automata::meta::BuildError {
247 #[inline(always)]
248 fn pattern(&self) -> Option<PatternID> {
249 regex_automata::meta::BuildError::pattern(self)
250 }
251
252 #[inline(always)]
253 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
254 regex_automata::meta::BuildError::syntax_error(self)
255 }
256}
257impl RegexBuildError for regex_automata::dfa::dense::BuildError {
258 #[inline(always)]
259 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
260 <Self as std::error::Error>::source(self)
261 .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
262 .and_then(|e| e.syntax_error())
263 }
264}
265impl RegexBuildError for regex_automata::dfa::onepass::BuildError {
266 #[inline(always)]
267 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
268 <Self as std::error::Error>::source(self)
269 .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
270 .and_then(|e| e.syntax_error())
271 }
272}
273impl RegexBuildError for regex_automata::nfa::thompson::BuildError {
274 #[inline(always)]
275 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
276 <Self as std::error::Error>::source(self).and_then(|e| e.downcast_ref())
277 }
278}
279
280pub(crate) fn build_error_to_diagnostic<E, F>(
281 error: E,
282 num_patterns: usize,
283 get_pattern_span: F,
284) -> Report
285where
286 E: RegexBuildError,
287 F: Fn(usize) -> SourceSpan,
288{
289 let diagnostic = if let Some(pattern_id) = error.pattern() {
290 let span = get_pattern_span(pattern_id.as_usize());
291 if let Some(syntax_err) = error.syntax_error() {
292 Diag::new(format!("invalid regex pattern: {error}"))
293 .with_help("a syntax error prevented us from compiling this pattern")
294 .with_url("https://docs.rs/regex/latest/regex/index.html#syntax")
295 .and_label(Label::new(span, syntax_err.to_string()))
296 } else {
297 Diag::new("unable to compile regex pattern set")
298 .with_help("the pattern shown exceeded preconfigured limits during construction")
299 .and_label(Label::at(span))
300 }
301 } else if num_patterns > 1 {
302 Diag::new(format!("unable to compile regex pattern set: {error}"))
303 .with_help("construction of a multi-pattern regex from this set failed to due to preconfigured limits")
304 .with_labels((0..num_patterns).map(|pid| Label::at(get_pattern_span(pid)).into()))
305 } else {
306 Diag::new(format!("unable to compile regex pattern: {error}"))
307 .with_help("construction of this regex failed to due to preconfigured limits")
308 .with_label(Label::at(get_pattern_span(0)))
309 };
310 Report::from(diagnostic)
311}
312
313#[cfg(test)]
314mod tests {
315 use super::*;
316
317 #[test]
318 fn test_regex_matcher() -> DiagResult<()> {
319 let mut context = TestContext::new();
320 context
321 .with_checks(r"CHECK: {{Name: b[[:alpha:]]*}}")
322 .with_input(
323 "
324Name: foo
325Field: 1
326
327Name: bar
328Field: 2
329",
330 );
331
332 let pattern = RegexPattern::new(Span::new(
333 SourceSpan::from(0..0),
334 Cow::Borrowed("Name: b[[:alpha:]]*"),
335 ));
336 let matcher = RegexMatcher::new(pattern, &context.config, &context.interner)
337 .expect("expected pattern to be valid");
338 let mctx = context.match_context();
339 let input = mctx.search();
340 let result = matcher.try_match(input, &mctx)?;
341 let info = result.info.expect("expected match");
342 assert_eq!(info.span.offset(), 21);
343 assert_eq!(info.span.len(), 9);
344 assert_eq!(
345 input.as_str(info.span.offset()..(info.span.offset() + info.span.len())),
346 "Name: bar"
347 );
348
349 Ok(())
350 }
351}