litcheck_filecheck/pattern/matcher/matchers/
regex.rs1use regex_automata::{
2 PatternID,
3 util::{captures::Captures, syntax},
4};
5
6use crate::{
7 ast::{Capture, RegexPattern},
8 common::*,
9 expr::ValueType,
10};
11
12pub struct RegexMatcher<'a> {
17 pattern: Span<Cow<'a, str>>,
19 regex: Regex,
21 captures: Vec<Capture>,
23}
24impl<'a> fmt::Debug for RegexMatcher<'a> {
25 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26 f.debug_struct("RegexMatcher")
27 .field("pattern", &self.pattern)
28 .field("captures", &self.captures)
29 .finish()
30 }
31}
32impl<'a> RegexMatcher<'a> {
33 pub fn new(pattern: RegexPattern<'a>, config: &Config) -> DiagResult<Self> {
34 let span = pattern.span();
35 let regex = Regex::builder()
36 .syntax(
37 syntax::Config::new()
38 .multi_line(true)
39 .case_insensitive(config.options.ignore_case),
40 )
41 .build(pattern.as_ref())
42 .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
43
44 let groups = regex.group_info();
46 let num_captures = groups.group_len(PatternID::ZERO);
47 let mut captures = vec![Capture::Ignore(span); num_captures];
48 for capture in pattern.captures.into_iter() {
49 if let Capture::Ignore(_) = capture {
50 continue;
51 }
52 if let Some(group_name) = capture.group_name() {
53 let group_id = groups
54 .to_index(PatternID::ZERO, group_name.as_str())
55 .unwrap_or_else(|| panic!("expected group for capture of '{group_name}'"));
56 captures[group_id] = capture;
57 } else {
58 assert_eq!(
59 &captures[0],
60 &Capture::Ignore(span),
61 "{capture:?} would overwrite a previous implicit capture group"
62 );
63 captures[0] = capture;
64 }
65 }
66
67 Ok(Self {
68 pattern: pattern.pattern,
69 regex,
70 captures,
71 })
72 }
73
74 pub fn new_nocapture(pattern: Span<Cow<'a, str>>, config: &Config) -> DiagResult<Self> {
75 let span = pattern.span();
76 let regex = Regex::builder()
77 .syntax(
78 syntax::Config::new()
79 .multi_line(true)
80 .case_insensitive(config.options.ignore_case),
81 )
82 .build(pattern.inner().as_ref())
83 .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
84
85 let groups = regex.group_info();
87 let num_captures = groups.group_len(PatternID::ZERO);
88 let captures = vec![Capture::Ignore(span); num_captures];
89
90 Ok(Self {
91 pattern,
92 regex,
93 captures,
94 })
95 }
96}
97impl<'a> MatcherMut for RegexMatcher<'a> {
98 fn try_match_mut<'input, 'context, C>(
99 &self,
100 input: Input<'input>,
101 context: &mut C,
102 ) -> DiagResult<MatchResult<'input>>
103 where
104 C: Context<'input, 'context> + ?Sized,
105 {
106 let matched = self.try_match(input, context)?;
107 matched.bind_captures_in(context);
108 Ok(matched)
109 }
110}
111impl<'a> Matcher for RegexMatcher<'a> {
112 fn try_match<'input, 'context, C>(
113 &self,
114 input: Input<'input>,
115 context: &C,
116 ) -> DiagResult<MatchResult<'input>>
117 where
118 C: Context<'input, 'context> + ?Sized,
119 {
120 let regex_input = input.into();
121 let mut captures = self.regex.create_captures();
122 self.regex.search_captures(®ex_input, &mut captures);
123 if let Some(matched) = captures.get_match() {
124 let span = SourceSpan::from_range_unchecked(input.source_id(), matched.range());
125 let mut capture_infos = Vec::with_capacity(captures.group_len());
126 for (index, (maybe_capture_span, capture)) in captures
127 .iter()
128 .zip(self.captures.iter().copied())
129 .enumerate()
130 {
131 if let Some(capture_span) = maybe_capture_span {
132 let captured = input.as_str(capture_span.range());
133 let capture_span =
134 SourceSpan::from_range_unchecked(input.source_id(), capture_span.range());
135 let result = try_convert_capture_to_type(
136 matched.pattern(),
137 index,
138 self.pattern.span(),
139 span,
140 Span::new(capture_span, captured),
141 capture,
142 &captures,
143 context,
144 );
145 match result {
146 Ok(capture_info) => {
147 capture_infos.push(capture_info);
148 }
149 Err(error) => return Ok(MatchResult::failed(error)),
150 }
151 }
152 }
153 Ok(MatchResult::ok(MatchInfo {
154 span,
155 pattern_span: self.pattern.span(),
156 pattern_id: 0,
157 captures: capture_infos,
158 }))
159 } else {
160 Ok(MatchResult::failed(
161 CheckFailedError::MatchNoneButExpected {
162 span: self.pattern.span(),
163 match_file: context
164 .source_file(self.pattern.span().source_id())
165 .unwrap(),
166 note: None,
167 },
168 ))
169 }
170 }
171}
172impl<'a> Spanned for RegexMatcher<'a> {
173 fn span(&self) -> SourceSpan {
174 self.pattern.span()
175 }
176}
177
178#[allow(clippy::too_many_arguments)]
179pub fn try_convert_capture_to_type<'input, 'context, C>(
180 pattern_id: PatternID,
181 group_id: usize,
182 pattern_span: SourceSpan,
183 overall_span: SourceSpan,
184 captured: Span<&'input str>,
185 capture: Capture,
186 captures: &Captures,
187 context: &C,
188) -> Result<CaptureInfo<'input>, CheckFailedError>
189where
190 C: Context<'input, 'context> + ?Sized,
191{
192 let (capture_span, captured) = captured.into_parts();
193 let name = capture.name();
194 let value = match capture.value_type() {
195 ValueType::String => Value::Str(Cow::Borrowed(captured)),
196 ValueType::Number(format) => {
197 let format = format.unwrap_or_default();
198 match Number::parse_with_format(Span::new(capture_span, captured), format) {
199 Ok(n) => Value::Num(Expr::Num(n)),
200 Err(error) => {
201 return Err(CheckFailedError::MatchFoundConstraintFailed {
202 span: overall_span,
203 input_file: context.input_file(),
204 pattern: Some(RelatedCheckError {
205 span: pattern_span,
206 match_file: context.source_file(pattern_span.source_id()).unwrap(),
207 }),
208 error: Some(RelatedError::new(Report::new(error))),
209 help: Some(if let Some(name) = name {
210 format!(
211 "expected {}; the constraint was required when parsing the capture group for '{name}'",
212 format.describe()
213 )
214 } else if let Some(group_name) =
215 captures.group_info().to_name(pattern_id, group_id)
216 {
217 format!(
218 "expected {}; the constraint was required when parsing the capture group named '{group_name}'",
219 format.describe()
220 )
221 } else {
222 format!(
223 "expected {}; the constraint was required when parsing capture group {group_id}",
224 format.describe()
225 )
226 }),
227 });
228 }
229 }
230 }
231 };
232
233 Ok(CaptureInfo {
234 span: capture_span,
235 pattern_span,
236 index: group_id,
237 value,
238 capture,
239 })
240}
241
242pub(crate) trait RegexBuildError: std::error::Error + std::fmt::Display {
243 #[inline(always)]
244 fn pattern(&self) -> Option<PatternID> {
245 None
246 }
247
248 #[inline(always)]
249 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
250 None
251 }
252}
253impl RegexBuildError for regex_automata::meta::BuildError {
254 #[inline(always)]
255 fn pattern(&self) -> Option<PatternID> {
256 regex_automata::meta::BuildError::pattern(self)
257 }
258
259 #[inline(always)]
260 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
261 regex_automata::meta::BuildError::syntax_error(self)
262 }
263}
264impl RegexBuildError for regex_automata::dfa::dense::BuildError {
265 #[inline(always)]
266 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
267 <Self as std::error::Error>::source(self)
268 .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
269 .and_then(|e| e.syntax_error())
270 }
271}
272impl RegexBuildError for regex_automata::dfa::onepass::BuildError {
273 #[inline(always)]
274 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
275 <Self as std::error::Error>::source(self)
276 .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
277 .and_then(|e| e.syntax_error())
278 }
279}
280impl RegexBuildError for regex_automata::nfa::thompson::BuildError {
281 #[inline(always)]
282 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
283 <Self as std::error::Error>::source(self).and_then(|e| e.downcast_ref())
284 }
285}
286
287pub(crate) fn build_error_to_diagnostic<E, F>(
288 error: E,
289 num_patterns: usize,
290 get_pattern_span: F,
291) -> Report
292where
293 E: RegexBuildError,
294 F: Fn(usize) -> SourceSpan,
295{
296 let diagnostic = if let Some(pattern_id) = error.pattern() {
297 let span = get_pattern_span(pattern_id.as_usize());
298 if let Some(syntax_err) = error.syntax_error() {
299 Diag::new(syntax_err.to_string())
300 .with_help("a syntax error prevented us from compiling this pattern")
301 .with_url("https://docs.rs/regex/latest/regex/index.html#syntax")
302 .and_label(Label::new(span, syntax_err.to_string()))
303 } else {
304 Diag::new("unable to compile regex pattern set")
305 .with_help("the pattern shown exceeded preconfigured limits during construction")
306 .and_label(Label::at(span))
307 }
308 } else if num_patterns > 1 {
309 Diag::new(format!("unable to compile regex pattern set: {error}"))
310 .with_help("construction of a multi-pattern regex from this set failed to due to preconfigured limits")
311 .with_labels((0..num_patterns).map(|pid| Label::at(get_pattern_span(pid)).into()))
312 } else {
313 Diag::new(format!("unable to compile regex pattern: {error}"))
314 .with_help("construction of this regex failed to due to preconfigured limits")
315 .with_label(Label::at(get_pattern_span(0)))
316 };
317 Report::from(diagnostic)
318}
319
320#[cfg(test)]
321mod tests {
322 use crate::source_file;
323
324 use super::*;
325
326 #[test]
327 fn test_regex_matcher() -> DiagResult<()> {
328 let mut context = TestContext::new();
329 let match_file = source_file!(context.config, r"CHECK: {{Name: b[[:alpha:]]*}}");
330 let input_file = source_file!(
331 context.config,
332 "
333Name: foo
334Field: 1
335
336Name: bar
337Field: 2
338"
339 );
340 context.with_checks(match_file).with_input(input_file);
341
342 let pattern = RegexPattern::new(Span::new(
343 SourceSpan::UNKNOWN,
344 Cow::Borrowed("Name: b[[:alpha:]]*"),
345 ));
346 let matcher =
347 RegexMatcher::new(pattern, &context.config).expect("expected pattern to be valid");
348 let mctx = context.match_context();
349 let input = mctx.search();
350 let result = matcher.try_match(input, &mctx)?;
351 let info = result.info.expect("expected match");
352 assert_eq!(info.span.start().to_u32(), 21);
353 assert_eq!(info.span.len(), 9);
354 assert_eq!(
355 input.as_str(info.span.start().to_usize()..info.span.end().to_usize()),
356 "Name: bar"
357 );
358
359 Ok(())
360 }
361}