litcheck_filecheck/pattern/matcher/matchers/
regex.rs1use regex_automata::{
2 util::{captures::Captures, syntax},
3 PatternID,
4};
5
6use crate::{
7 ast::{Capture, RegexPattern},
8 common::*,
9 expr::ValueType,
10};
11
12pub struct RegexMatcher<'a> {
17 pattern: Span<Cow<'a, str>>,
19 regex: Regex,
21 captures: Vec<Capture>,
23}
24impl<'a> fmt::Debug for RegexMatcher<'a> {
25 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
26 f.debug_struct("RegexMatcher")
27 .field("pattern", &self.pattern)
28 .field("captures", &self.captures)
29 .finish()
30 }
31}
32impl<'a> RegexMatcher<'a> {
33 pub fn new(
34 pattern: RegexPattern<'a>,
35 config: &Config,
36 interner: &StringInterner,
37 ) -> DiagResult<Self> {
38 let span = pattern.span();
39 let regex = Regex::builder()
40 .syntax(
41 syntax::Config::new()
42 .multi_line(true)
43 .case_insensitive(config.ignore_case),
44 )
45 .build(pattern.as_ref())
46 .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
47
48 let groups = regex.group_info();
50 let num_captures = groups.group_len(PatternID::ZERO);
51 let mut captures = vec![Capture::Ignore(span); num_captures];
52 for capture in pattern.captures.into_iter() {
53 if let Capture::Ignore(_) = capture {
54 continue;
55 }
56 if let Some(name) = capture.group_name() {
57 let group_name = interner.resolve(name);
58 let group_id = groups
59 .to_index(PatternID::ZERO, group_name)
60 .unwrap_or_else(|| panic!("expected group for capture of '{group_name}'"));
61 captures[group_id] = capture;
62 } else {
63 assert_eq!(
64 &captures[0],
65 &Capture::Ignore(span),
66 "{capture:?} would overwrite a previous implicit capture group"
67 );
68 captures[0] = capture;
69 }
70 }
71
72 Ok(Self {
73 pattern: pattern.pattern,
74 regex,
75 captures,
76 })
77 }
78
79 pub fn new_nocapture(pattern: Span<Cow<'a, str>>, config: &Config) -> DiagResult<Self> {
80 let span = pattern.span();
81 let regex = Regex::builder()
82 .syntax(
83 syntax::Config::new()
84 .multi_line(true)
85 .case_insensitive(config.ignore_case),
86 )
87 .build(pattern.as_ref())
88 .map_err(|error| build_error_to_diagnostic(error, 1, |_| span))?;
89
90 let groups = regex.group_info();
92 let num_captures = groups.group_len(PatternID::ZERO);
93 let captures = vec![Capture::Ignore(span); num_captures];
94
95 Ok(Self {
96 pattern,
97 regex,
98 captures,
99 })
100 }
101}
102impl<'a> MatcherMut for RegexMatcher<'a> {
103 fn try_match_mut<'input, 'context, C>(
104 &self,
105 input: Input<'input>,
106 context: &mut C,
107 ) -> DiagResult<MatchResult<'input>>
108 where
109 C: Context<'input, 'context> + ?Sized,
110 {
111 self.try_match(input, context)
112 }
113}
114impl<'a> Matcher for RegexMatcher<'a> {
115 fn try_match<'input, 'context, C>(
116 &self,
117 input: Input<'input>,
118 context: &C,
119 ) -> DiagResult<MatchResult<'input>>
120 where
121 C: Context<'input, 'context> + ?Sized,
122 {
123 let regex_input = input.into();
124 let mut captures = self.regex.create_captures();
125 self.regex.search_captures(®ex_input, &mut captures);
126 if let Some(matched) = captures.get_match() {
127 let span = SourceSpan::from(matched.range());
128 let mut capture_infos = Vec::with_capacity(captures.group_len());
129 for (index, (maybe_capture_span, capture)) in captures
130 .iter()
131 .zip(self.captures.iter().copied())
132 .enumerate()
133 {
134 if let Some(capture_span) = maybe_capture_span {
135 let captured = input.as_str(capture_span.range());
136 let capture_span = SourceSpan::from(capture_span.range());
137 let result = try_convert_capture_to_type(
138 matched.pattern(),
139 index,
140 self.pattern.span(),
141 span,
142 Span::new(capture_span, captured),
143 capture,
144 &captures,
145 context,
146 );
147 match result {
148 Ok(capture_info) => {
149 capture_infos.push(capture_info);
150 }
151 Err(error) => return Ok(MatchResult::failed(error)),
152 }
153 }
154 }
155 Ok(MatchResult::ok(MatchInfo {
156 span,
157 pattern_span: self.pattern.span(),
158 pattern_id: 0,
159 captures: capture_infos,
160 }))
161 } else {
162 Ok(MatchResult::failed(
163 CheckFailedError::MatchNoneButExpected {
164 span: self.pattern.span(),
165 match_file: context.match_file(),
166 note: None,
167 },
168 ))
169 }
170 }
171}
172impl<'a> Spanned for RegexMatcher<'a> {
173 fn span(&self) -> SourceSpan {
174 self.pattern.span()
175 }
176}
177
178#[allow(clippy::too_many_arguments)]
179pub fn try_convert_capture_to_type<'input, 'context, C>(
180 pattern_id: PatternID,
181 group_id: usize,
182 pattern_span: SourceSpan,
183 overall_span: SourceSpan,
184 captured: Span<&'input str>,
185 capture: Capture,
186 captures: &Captures,
187 context: &C,
188) -> Result<CaptureInfo<'input>, CheckFailedError>
189where
190 C: Context<'input, 'context> + ?Sized,
191{
192 let (capture_span, captured) = captured.into_parts();
193 let name = capture.name();
194 let value = match capture.value_type() {
195 ValueType::String => Value::Str(Cow::Borrowed(captured)),
196 ValueType::Number(format) => {
197 match Number::parse_with_format(Span::new(capture_span, captured), format) {
198 Ok(n) => Value::Num(Expr::Num(n)),
199 Err(error) => {
200 return Err(CheckFailedError::MatchFoundConstraintFailed {
201 span: overall_span,
202 input_file: context.input_file(),
203 pattern: Some(RelatedCheckError {
204 span: pattern_span,
205 match_file: context.match_file(),
206 }),
207 error: Some(RelatedError::new(Report::new(error))),
208 help: Some(if let Some(name) = name {
209 let name = context.resolve(name);
210 format!("expected {}; the constraint was required when parsing the capture group for '{name}'", format.describe())
211 } else if let Some(group_name) =
212 captures.group_info().to_name(pattern_id, group_id)
213 {
214 format!("expected {}; the constraint was required when parsing the capture group named '{group_name}'", format.describe())
215 } else {
216 format!("expected {}; the constraint was required when parsing capture group {group_id}", format.describe())
217 }),
218 });
219 }
220 }
221 }
222 };
223
224 Ok(CaptureInfo {
225 span: capture_span,
226 pattern_span,
227 index: group_id,
228 value,
229 capture,
230 })
231}
232
233pub(crate) trait RegexBuildError: std::error::Error + std::fmt::Display {
234 #[inline(always)]
235 fn pattern(&self) -> Option<PatternID> {
236 None
237 }
238
239 #[inline(always)]
240 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
241 None
242 }
243}
244impl RegexBuildError for regex_automata::meta::BuildError {
245 #[inline(always)]
246 fn pattern(&self) -> Option<PatternID> {
247 regex_automata::meta::BuildError::pattern(self)
248 }
249
250 #[inline(always)]
251 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
252 regex_automata::meta::BuildError::syntax_error(self)
253 }
254}
255impl RegexBuildError for regex_automata::dfa::dense::BuildError {
256 #[inline(always)]
257 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
258 <Self as std::error::Error>::source(self)
259 .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
260 .and_then(|e| e.syntax_error())
261 }
262}
263impl RegexBuildError for regex_automata::dfa::onepass::BuildError {
264 #[inline(always)]
265 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
266 <Self as std::error::Error>::source(self)
267 .and_then(|e| e.downcast_ref::<regex_automata::nfa::thompson::BuildError>())
268 .and_then(|e| e.syntax_error())
269 }
270}
271impl RegexBuildError for regex_automata::nfa::thompson::BuildError {
272 #[inline(always)]
273 fn syntax_error(&self) -> Option<®ex_syntax::Error> {
274 <Self as std::error::Error>::source(self).and_then(|e| e.downcast_ref())
275 }
276}
277
278pub(crate) fn build_error_to_diagnostic<E, F>(
279 error: E,
280 num_patterns: usize,
281 get_pattern_span: F,
282) -> Report
283where
284 E: RegexBuildError,
285 F: Fn(usize) -> SourceSpan,
286{
287 let diagnostic = if let Some(pattern_id) = error.pattern() {
288 let span = get_pattern_span(pattern_id.as_usize());
289 if let Some(syntax_err) = error.syntax_error() {
290 Diag::new(format!("invalid regex pattern: {error}"))
291 .with_help("a syntax error prevented us from compiling this pattern")
292 .with_url("https://docs.rs/regex/latest/regex/index.html#syntax")
293 .and_label(Label::new(span, syntax_err.to_string()))
294 } else {
295 Diag::new("unable to compile regex pattern set")
296 .with_help("the pattern shown exceeded preconfigured limits during construction")
297 .and_label(Label::at(span))
298 }
299 } else if num_patterns > 1 {
300 Diag::new(format!("unable to compile regex pattern set: {error}"))
301 .with_help("construction of a multi-pattern regex from this set failed to due to preconfigured limits")
302 .with_labels((0..num_patterns).map(|pid| Label::at(get_pattern_span(pid)).into()))
303 } else {
304 Diag::new(format!("unable to compile regex pattern: {error}"))
305 .with_help("construction of this regex failed to due to preconfigured limits")
306 .with_label(Label::at(get_pattern_span(0)))
307 };
308 Report::from(diagnostic)
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314
315 #[test]
316 fn test_regex_matcher() -> DiagResult<()> {
317 let mut context = TestContext::new();
318 context
319 .with_checks(r"CHECK: {{Name: b[[:alpha:]]*}}")
320 .with_input(
321 "
322Name: foo
323Field: 1
324
325Name: bar
326Field: 2
327",
328 );
329
330 let pattern = RegexPattern::new(Span::new(
331 SourceSpan::from(0..0),
332 Cow::Borrowed("Name: b[[:alpha:]]*"),
333 ));
334 let matcher = RegexMatcher::new(pattern, &context.config, &context.interner)
335 .expect("expected pattern to be valid");
336 let mctx = context.match_context();
337 let input = mctx.search();
338 let result = matcher.try_match(input, &mctx)?;
339 let info = result.info.expect("expected match");
340 assert_eq!(info.span.offset(), 21);
341 assert_eq!(info.span.len(), 9);
342 assert_eq!(
343 input.as_str(info.span.offset()..(info.span.offset() + info.span.len())),
344 "Name: bar"
345 );
346
347 Ok(())
348 }
349}