Skip to main content

recast_core/
pattern.rs

1//! Regex compilation and the convergence (idempotency) check.
2//!
3//! [`CompiledPattern`] wraps a compiled `regex::Regex` plus its
4//! replacement template. [`CompiledPattern::is_convergent`] strips
5//! capture-group placeholders from the replacement and tests whether
6//! the resulting probe string would re-match the pattern; if so, the
7//! rewrite is non-convergent and the planner will reject it.
8
9use regex::{Regex, RegexBuilder};
10
11use crate::error::Result;
12
13/// Knobs controlling how a pattern string is compiled into a regex.
14///
15/// - `literal` — treat the pattern (and replacement) as plain text;
16///   metacharacters are escaped.
17/// - `ignore_case` — case-insensitive matching.
18/// - `single_line` — disable the implicit `(?s)`. With it off (the
19///   default), `.` matches `\n`, which is what most LLM-driven rewrites
20///   expect.
21#[derive(Debug, Clone, Default)]
22pub struct PatternOptions {
23    pub literal: bool,
24    pub ignore_case: bool,
25    pub single_line: bool,
26}
27
28/// A compiled regex paired with its replacement template. Construct with
29/// [`CompiledPattern::compile`]; use [`CompiledPattern::is_convergent`]
30/// to check the idempotency invariant before scanning.
31#[derive(Debug, Clone)]
32pub struct CompiledPattern {
33    regex: Regex,
34    replacement: String,
35    literal: bool,
36}
37
38impl CompiledPattern {
39    /// Compile `pattern` into a regex and store `replacement` for later
40    /// substitution. Returns [`crate::Error::InvalidRegex`] on syntax errors.
41    pub fn compile(pattern: &str, replacement: &str, opts: &PatternOptions) -> Result<Self> {
42        let source = if opts.literal { regex::escape(pattern) } else { pattern.to_owned() };
43        let regex = RegexBuilder::new(&source)
44            .case_insensitive(opts.ignore_case)
45            .dot_matches_new_line(!opts.single_line)
46            .multi_line(true)
47            .build()?;
48        Ok(Self { regex, replacement: replacement.to_owned(), literal: opts.literal })
49    }
50
51    pub fn regex(&self) -> &Regex {
52        &self.regex
53    }
54
55    pub fn replacement(&self) -> &str {
56        &self.replacement
57    }
58
59    /// True when the pattern is convergent given its replacement: re-applying
60    /// the rewrite to its own output produces no further match. Catches
61    /// non-idempotent rewrites such as `a` → `aa`.
62    pub fn is_convergent(&self) -> bool {
63        let replacement_probe = self.replacement_probe();
64        !self.regex.is_match(&replacement_probe)
65    }
66
67    fn replacement_probe(&self) -> String {
68        if self.literal {
69            return self.replacement.clone();
70        }
71        let mut out = String::with_capacity(self.replacement.len());
72        let bytes = self.replacement.as_bytes();
73        let mut i = 0;
74        while i < bytes.len() {
75            let b = bytes[i];
76            if b == b'$' && i + 1 < bytes.len() {
77                let next = bytes[i + 1];
78                if next == b'$' {
79                    out.push('$');
80                    i += 2;
81                    continue;
82                }
83                if next.is_ascii_digit() {
84                    i += 2;
85                    while i < bytes.len() && bytes[i].is_ascii_digit() {
86                        i += 1;
87                    }
88                    continue;
89                }
90                if next == b'{'
91                    && let Some((_, _, after)) =
92                        crate::template_scan::scan_braced_name(&self.replacement, i)
93                {
94                    i = after;
95                    continue;
96                }
97            }
98            let ch_len = crate::template_scan::utf8_char_len(b);
99            out.push_str(&self.replacement[i..i + ch_len]);
100            i += ch_len;
101        }
102        out
103    }
104}
105
106#[cfg(test)]
107#[path = "pattern_tests.rs"]
108mod tests;