Skip to main content

rgx/engine/
mod.rs

1pub mod fancy;
2#[cfg(feature = "pcre2-engine")]
3pub mod pcre2;
4pub mod rust_regex;
5
6use std::fmt;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum EngineKind {
10    RustRegex,
11    FancyRegex,
12    #[cfg(feature = "pcre2-engine")]
13    Pcre2,
14}
15
16impl EngineKind {
17    pub fn all() -> Vec<EngineKind> {
18        vec![
19            EngineKind::RustRegex,
20            EngineKind::FancyRegex,
21            #[cfg(feature = "pcre2-engine")]
22            EngineKind::Pcre2,
23        ]
24    }
25
26    pub fn next(self) -> EngineKind {
27        match self {
28            EngineKind::RustRegex => EngineKind::FancyRegex,
29            #[cfg(feature = "pcre2-engine")]
30            EngineKind::FancyRegex => EngineKind::Pcre2,
31            #[cfg(not(feature = "pcre2-engine"))]
32            EngineKind::FancyRegex => EngineKind::RustRegex,
33            #[cfg(feature = "pcre2-engine")]
34            EngineKind::Pcre2 => EngineKind::RustRegex,
35        }
36    }
37}
38
39impl fmt::Display for EngineKind {
40    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41        match self {
42            EngineKind::RustRegex => write!(f, "Rust regex"),
43            EngineKind::FancyRegex => write!(f, "fancy-regex"),
44            #[cfg(feature = "pcre2-engine")]
45            EngineKind::Pcre2 => write!(f, "PCRE2"),
46        }
47    }
48}
49
50#[derive(Debug, Clone, Copy, Default)]
51pub struct EngineFlags {
52    pub case_insensitive: bool,
53    pub multi_line: bool,
54    pub dot_matches_newline: bool,
55    pub unicode: bool,
56    pub extended: bool,
57}
58
59impl EngineFlags {
60    pub fn to_inline_prefix(&self) -> String {
61        let mut s = String::new();
62        if self.case_insensitive {
63            s.push('i');
64        }
65        if self.multi_line {
66            s.push('m');
67        }
68        if self.dot_matches_newline {
69            s.push('s');
70        }
71        if self.unicode {
72            s.push('u');
73        }
74        if self.extended {
75            s.push('x');
76        }
77        s
78    }
79
80    pub fn wrap_pattern(&self, pattern: &str) -> String {
81        let prefix = self.to_inline_prefix();
82        if prefix.is_empty() {
83            pattern.to_string()
84        } else {
85            format!("(?{prefix}){pattern}")
86        }
87    }
88
89    pub fn toggle_case_insensitive(&mut self) {
90        self.case_insensitive = !self.case_insensitive;
91    }
92    pub fn toggle_multi_line(&mut self) {
93        self.multi_line = !self.multi_line;
94    }
95    pub fn toggle_dot_matches_newline(&mut self) {
96        self.dot_matches_newline = !self.dot_matches_newline;
97    }
98    pub fn toggle_unicode(&mut self) {
99        self.unicode = !self.unicode;
100    }
101    pub fn toggle_extended(&mut self) {
102        self.extended = !self.extended;
103    }
104}
105
106#[derive(Debug, Clone)]
107pub struct Match {
108    pub start: usize,
109    pub end: usize,
110    pub text: String,
111    pub captures: Vec<CaptureGroup>,
112}
113
114#[derive(Debug, Clone)]
115pub struct CaptureGroup {
116    pub index: usize,
117    pub name: Option<String>,
118    pub start: usize,
119    pub end: usize,
120    pub text: String,
121}
122
123#[derive(Debug)]
124pub enum EngineError {
125    CompileError(String),
126    MatchError(String),
127}
128
129impl fmt::Display for EngineError {
130    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
131        match self {
132            EngineError::CompileError(msg) => write!(f, "Compile error: {msg}"),
133            EngineError::MatchError(msg) => write!(f, "Match error: {msg}"),
134        }
135    }
136}
137
138impl std::error::Error for EngineError {}
139
140pub type EngineResult<T> = Result<T, EngineError>;
141
142pub trait RegexEngine: Send + Sync {
143    fn kind(&self) -> EngineKind;
144    fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>>;
145}
146
147pub trait CompiledRegex: Send + Sync {
148    fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>>;
149}
150
151pub fn create_engine(kind: EngineKind) -> Box<dyn RegexEngine> {
152    match kind {
153        EngineKind::RustRegex => Box::new(rust_regex::RustRegexEngine),
154        EngineKind::FancyRegex => Box::new(fancy::FancyRegexEngine),
155        #[cfg(feature = "pcre2-engine")]
156        EngineKind::Pcre2 => Box::new(pcre2::Pcre2Engine),
157    }
158}
159
160// --- Replace/Substitution support ---
161
162#[derive(Debug, Clone)]
163pub struct ReplaceSegment {
164    pub start: usize,
165    pub end: usize,
166    pub is_replacement: bool,
167}
168
169#[derive(Debug, Clone)]
170pub struct ReplaceResult {
171    pub output: String,
172    pub segments: Vec<ReplaceSegment>,
173}
174
175/// Expand a replacement template against a single match.
176///
177/// Supports: `$0` / `$&` (whole match), `$1`..`$99` (numbered groups),
178/// `${name}` (named groups), `$$` (literal `$`).
179fn expand_replacement(template: &str, m: &Match) -> String {
180    let mut result = String::new();
181    let mut chars = template.char_indices().peekable();
182
183    while let Some((_i, c)) = chars.next() {
184        if c == '$' {
185            match chars.peek() {
186                None => {
187                    result.push('$');
188                }
189                Some(&(_, '$')) => {
190                    chars.next();
191                    result.push('$');
192                }
193                Some(&(_, '&')) => {
194                    chars.next();
195                    result.push_str(&m.text);
196                }
197                Some(&(_, '{')) => {
198                    chars.next(); // consume '{'
199                    let brace_start = chars.peek().map(|&(idx, _)| idx).unwrap_or(template.len());
200                    if let Some(close) = template[brace_start..].find('}') {
201                        let ref_name = &template[brace_start..brace_start + close];
202                        if let Some(text) = lookup_capture(m, ref_name) {
203                            result.push_str(text);
204                        }
205                        // Advance past the content and closing brace
206                        let end_byte = brace_start + close + 1;
207                        while chars.peek().is_some_and(|&(idx, _)| idx < end_byte) {
208                            chars.next();
209                        }
210                    } else {
211                        result.push('$');
212                        result.push('{');
213                    }
214                }
215                Some(&(_, next_c)) if next_c.is_ascii_digit() => {
216                    let (_, d1) = chars.next().unwrap();
217                    let mut num_str = String::from(d1);
218                    // Grab a second digit if present
219                    if let Some(&(_, d2)) = chars.peek() {
220                        if d2.is_ascii_digit() {
221                            chars.next();
222                            num_str.push(d2);
223                        }
224                    }
225                    let idx: usize = num_str.parse().unwrap_or(0);
226                    if idx == 0 {
227                        result.push_str(&m.text);
228                    } else if let Some(cap) = m.captures.iter().find(|c| c.index == idx) {
229                        result.push_str(&cap.text);
230                    }
231                }
232                Some(_) => {
233                    result.push('$');
234                }
235            }
236        } else {
237            result.push(c);
238        }
239    }
240
241    result
242}
243
244/// Look up a capture by name or numeric string.
245fn lookup_capture<'a>(m: &'a Match, key: &str) -> Option<&'a str> {
246    // Try as number first
247    if let Ok(idx) = key.parse::<usize>() {
248        if idx == 0 {
249            return Some(&m.text);
250        }
251        return m
252            .captures
253            .iter()
254            .find(|c| c.index == idx)
255            .map(|c| c.text.as_str());
256    }
257    // Try as named capture
258    m.captures
259        .iter()
260        .find(|c| c.name.as_deref() == Some(key))
261        .map(|c| c.text.as_str())
262}
263
264/// Perform replacement across all matches, returning the output string and segment metadata.
265pub fn replace_all(text: &str, matches: &[Match], template: &str) -> ReplaceResult {
266    let mut output = String::new();
267    let mut segments = Vec::new();
268    let mut pos = 0;
269
270    for m in matches {
271        // Original text before this match
272        if m.start > pos {
273            let seg_start = output.len();
274            output.push_str(&text[pos..m.start]);
275            segments.push(ReplaceSegment {
276                start: seg_start,
277                end: output.len(),
278                is_replacement: false,
279            });
280        }
281        // Expanded replacement
282        let expanded = expand_replacement(template, m);
283        if !expanded.is_empty() {
284            let seg_start = output.len();
285            output.push_str(&expanded);
286            segments.push(ReplaceSegment {
287                start: seg_start,
288                end: output.len(),
289                is_replacement: true,
290            });
291        }
292        pos = m.end;
293    }
294
295    // Trailing original text
296    if pos < text.len() {
297        let seg_start = output.len();
298        output.push_str(&text[pos..]);
299        segments.push(ReplaceSegment {
300            start: seg_start,
301            end: output.len(),
302            is_replacement: false,
303        });
304    }
305
306    ReplaceResult { output, segments }
307}
308
309#[cfg(test)]
310mod tests {
311    use super::*;
312
313    fn make_match(start: usize, end: usize, text: &str, captures: Vec<CaptureGroup>) -> Match {
314        Match {
315            start,
316            end,
317            text: text.to_string(),
318            captures,
319        }
320    }
321
322    fn make_cap(
323        index: usize,
324        name: Option<&str>,
325        start: usize,
326        end: usize,
327        text: &str,
328    ) -> CaptureGroup {
329        CaptureGroup {
330            index,
331            name: name.map(|s| s.to_string()),
332            start,
333            end,
334            text: text.to_string(),
335        }
336    }
337
338    #[test]
339    fn test_replace_all_basic() {
340        let matches = vec![make_match(
341            0,
342            12,
343            "user@example",
344            vec![
345                make_cap(1, None, 0, 4, "user"),
346                make_cap(2, None, 5, 12, "example"),
347            ],
348        )];
349        let result = replace_all("user@example", &matches, "$2=$1");
350        assert_eq!(result.output, "example=user");
351    }
352
353    #[test]
354    fn test_replace_all_no_matches() {
355        let result = replace_all("hello world", &[], "replacement");
356        assert_eq!(result.output, "hello world");
357        assert_eq!(result.segments.len(), 1);
358        assert!(!result.segments[0].is_replacement);
359    }
360
361    #[test]
362    fn test_replace_all_empty_template() {
363        let matches = vec![
364            make_match(4, 7, "123", vec![]),
365            make_match(12, 15, "456", vec![]),
366        ];
367        let result = replace_all("abc 123 def 456 ghi", &matches, "");
368        assert_eq!(result.output, "abc  def  ghi");
369    }
370
371    #[test]
372    fn test_replace_all_literal_dollar() {
373        let matches = vec![make_match(0, 3, "foo", vec![])];
374        let result = replace_all("foo", &matches, "$$bar");
375        assert_eq!(result.output, "$bar");
376    }
377
378    #[test]
379    fn test_replace_all_named_groups() {
380        let matches = vec![make_match(
381            0,
382            7,
383            "2024-01",
384            vec![
385                make_cap(1, Some("y"), 0, 4, "2024"),
386                make_cap(2, Some("m"), 5, 7, "01"),
387            ],
388        )];
389        let result = replace_all("2024-01", &matches, "${m}/${y}");
390        assert_eq!(result.output, "01/2024");
391    }
392
393    #[test]
394    fn test_expand_replacement_whole_match() {
395        let m = make_match(0, 5, "hello", vec![]);
396        assert_eq!(expand_replacement("$0", &m), "hello");
397        assert_eq!(expand_replacement("$&", &m), "hello");
398        assert_eq!(expand_replacement("[$0]", &m), "[hello]");
399    }
400
401    #[test]
402    fn test_expand_replacement_non_ascii() {
403        let m = make_match(0, 5, "hello", vec![]);
404        // Non-ASCII characters in replacement template should work correctly
405        assert_eq!(expand_replacement("café $0", &m), "café hello");
406        assert_eq!(expand_replacement("→$0←", &m), "→hello←");
407        assert_eq!(expand_replacement("日本語", &m), "日本語");
408        assert_eq!(expand_replacement("über $& cool", &m), "über hello cool");
409    }
410
411    #[test]
412    fn test_replace_segments_tracking() {
413        let matches = vec![make_match(6, 9, "123", vec![])];
414        let result = replace_all("hello 123 world", &matches, "NUM");
415        assert_eq!(result.output, "hello NUM world");
416        assert_eq!(result.segments.len(), 3);
417        // "hello " - original
418        assert!(!result.segments[0].is_replacement);
419        assert_eq!(
420            &result.output[result.segments[0].start..result.segments[0].end],
421            "hello "
422        );
423        // "NUM" - replacement
424        assert!(result.segments[1].is_replacement);
425        assert_eq!(
426            &result.output[result.segments[1].start..result.segments[1].end],
427            "NUM"
428        );
429        // " world" - original
430        assert!(!result.segments[2].is_replacement);
431        assert_eq!(
432            &result.output[result.segments[2].start..result.segments[2].end],
433            " world"
434        );
435    }
436}