recast_core/pattern.rs
1//! Regex compilation and the convergence (idempotency) check.
2//!
3//! [`CompiledPattern`] wraps a compiled `regex::Regex` plus its
4//! replacement template. [`CompiledPattern::is_convergent`] strips
5//! capture-group placeholders from the replacement and tests whether
6//! the resulting probe string would re-match the pattern; if so, the
7//! rewrite is non-convergent and the planner will reject it.
8
9use regex::{Regex, RegexBuilder};
10
11use crate::error::Result;
12
13/// Knobs controlling how a pattern string is compiled into a regex.
14///
15/// - `literal` — treat the pattern (and replacement) as plain text;
16/// metacharacters are escaped.
17/// - `ignore_case` — case-insensitive matching.
18/// - `single_line` — disable the implicit `(?s)`. With it off (the
19/// default), `.` matches `\n`, which is what most LLM-driven rewrites
20/// expect.
21#[derive(Debug, Clone, Default)]
22pub struct PatternOptions {
23 pub literal: bool,
24 pub ignore_case: bool,
25 pub single_line: bool,
26}
27
28/// A compiled regex paired with its replacement template. Construct with
29/// [`CompiledPattern::compile`]; use [`CompiledPattern::is_convergent`]
30/// to check the idempotency invariant before scanning.
31#[derive(Debug, Clone)]
32pub struct CompiledPattern {
33 regex: Regex,
34 replacement: String,
35 literal: bool,
36}
37
38impl CompiledPattern {
39 /// Compile `pattern` into a regex and store `replacement` for later
40 /// substitution. Returns [`crate::Error::InvalidRegex`] on syntax errors.
41 pub fn compile(pattern: &str, replacement: &str, opts: &PatternOptions) -> Result<Self> {
42 let source = if opts.literal { regex::escape(pattern) } else { pattern.to_owned() };
43 let regex = RegexBuilder::new(&source)
44 .case_insensitive(opts.ignore_case)
45 .dot_matches_new_line(!opts.single_line)
46 .multi_line(true)
47 .build()?;
48 Ok(Self { regex, replacement: replacement.to_owned(), literal: opts.literal })
49 }
50
51 pub fn regex(&self) -> &Regex {
52 &self.regex
53 }
54
55 pub fn replacement(&self) -> &str {
56 &self.replacement
57 }
58
59 /// True when the pattern is convergent given its replacement: re-applying
60 /// the rewrite to its own output produces no further match. Catches
61 /// non-idempotent rewrites such as `a` → `aa`.
62 pub fn is_convergent(&self) -> bool {
63 let replacement_probe = self.replacement_probe();
64 !self.regex.is_match(&replacement_probe)
65 }
66
67 fn replacement_probe(&self) -> String {
68 if self.literal {
69 return self.replacement.clone();
70 }
71 let mut out = String::with_capacity(self.replacement.len());
72 let bytes = self.replacement.as_bytes();
73 let mut i = 0;
74 while i < bytes.len() {
75 let b = bytes[i];
76 if b == b'$' && i + 1 < bytes.len() {
77 let next = bytes[i + 1];
78 if next == b'$' {
79 out.push('$');
80 i += 2;
81 continue;
82 }
83 if next.is_ascii_digit() {
84 i += 2;
85 while i < bytes.len() && bytes[i].is_ascii_digit() {
86 i += 1;
87 }
88 continue;
89 }
90 if next == b'{'
91 && let Some((_, _, after)) =
92 crate::template_scan::scan_braced_name(&self.replacement, i)
93 {
94 i = after;
95 continue;
96 }
97 }
98 let ch_len = crate::template_scan::utf8_char_len(b);
99 out.push_str(&self.replacement[i..i + ch_len]);
100 i += ch_len;
101 }
102 out
103 }
104}
105
106#[cfg(test)]
107#[path = "pattern_tests.rs"]
108mod tests;