Skip to main content

rgx/engine/
mod.rs

1pub mod fancy;
2#[cfg(feature = "pcre2-engine")]
3pub mod pcre2;
4pub mod rust_regex;
5
6use std::fmt;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum EngineKind {
10    RustRegex,
11    FancyRegex,
12    #[cfg(feature = "pcre2-engine")]
13    Pcre2,
14}
15
16impl EngineKind {
17    pub fn all() -> Vec<EngineKind> {
18        vec![
19            EngineKind::RustRegex,
20            EngineKind::FancyRegex,
21            #[cfg(feature = "pcre2-engine")]
22            EngineKind::Pcre2,
23        ]
24    }
25
26    pub fn next(self) -> EngineKind {
27        match self {
28            EngineKind::RustRegex => EngineKind::FancyRegex,
29            #[cfg(feature = "pcre2-engine")]
30            EngineKind::FancyRegex => EngineKind::Pcre2,
31            #[cfg(not(feature = "pcre2-engine"))]
32            EngineKind::FancyRegex => EngineKind::RustRegex,
33            #[cfg(feature = "pcre2-engine")]
34            EngineKind::Pcre2 => EngineKind::RustRegex,
35        }
36    }
37}
38
39impl fmt::Display for EngineKind {
40    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
41        match self {
42            EngineKind::RustRegex => write!(f, "Rust regex"),
43            EngineKind::FancyRegex => write!(f, "fancy-regex"),
44            #[cfg(feature = "pcre2-engine")]
45            EngineKind::Pcre2 => write!(f, "PCRE2"),
46        }
47    }
48}
49
50#[derive(Debug, Clone, Default)]
51pub struct EngineFlags {
52    pub case_insensitive: bool,
53    pub multi_line: bool,
54    pub dot_matches_newline: bool,
55    pub unicode: bool,
56    pub extended: bool,
57}
58
59impl EngineFlags {
60    pub fn toggle_case_insensitive(&mut self) {
61        self.case_insensitive = !self.case_insensitive;
62    }
63    pub fn toggle_multi_line(&mut self) {
64        self.multi_line = !self.multi_line;
65    }
66    pub fn toggle_dot_matches_newline(&mut self) {
67        self.dot_matches_newline = !self.dot_matches_newline;
68    }
69    pub fn toggle_unicode(&mut self) {
70        self.unicode = !self.unicode;
71    }
72    pub fn toggle_extended(&mut self) {
73        self.extended = !self.extended;
74    }
75}
76
77#[derive(Debug, Clone)]
78pub struct Match {
79    pub start: usize,
80    pub end: usize,
81    pub text: String,
82    pub captures: Vec<CaptureGroup>,
83}
84
85#[derive(Debug, Clone)]
86pub struct CaptureGroup {
87    pub index: usize,
88    pub name: Option<String>,
89    pub start: usize,
90    pub end: usize,
91    pub text: String,
92}
93
94#[derive(Debug)]
95pub enum EngineError {
96    CompileError(String),
97    MatchError(String),
98}
99
100impl fmt::Display for EngineError {
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        match self {
103            EngineError::CompileError(msg) => write!(f, "Compile error: {msg}"),
104            EngineError::MatchError(msg) => write!(f, "Match error: {msg}"),
105        }
106    }
107}
108
109impl std::error::Error for EngineError {}
110
111pub type EngineResult<T> = Result<T, EngineError>;
112
113pub trait RegexEngine: Send + Sync {
114    fn kind(&self) -> EngineKind;
115    fn compile(&self, pattern: &str, flags: &EngineFlags) -> EngineResult<Box<dyn CompiledRegex>>;
116}
117
118pub trait CompiledRegex: Send + Sync {
119    fn find_matches(&self, text: &str) -> EngineResult<Vec<Match>>;
120}
121
122pub fn create_engine(kind: EngineKind) -> Box<dyn RegexEngine> {
123    match kind {
124        EngineKind::RustRegex => Box::new(rust_regex::RustRegexEngine),
125        EngineKind::FancyRegex => Box::new(fancy::FancyRegexEngine),
126        #[cfg(feature = "pcre2-engine")]
127        EngineKind::Pcre2 => Box::new(pcre2::Pcre2Engine),
128    }
129}
130
131// --- Replace/Substitution support ---
132
133#[derive(Debug, Clone)]
134pub struct ReplaceSegment {
135    pub start: usize,
136    pub end: usize,
137    pub is_replacement: bool,
138}
139
140#[derive(Debug, Clone)]
141pub struct ReplaceResult {
142    pub output: String,
143    pub segments: Vec<ReplaceSegment>,
144}
145
146/// Expand a replacement template against a single match.
147///
148/// Supports: `$0` / `$&` (whole match), `$1`..`$99` (numbered groups),
149/// `${name}` (named groups), `$$` (literal `$`).
150fn expand_replacement(template: &str, m: &Match) -> String {
151    let mut result = String::new();
152    let mut chars = template.char_indices().peekable();
153
154    while let Some((_i, c)) = chars.next() {
155        if c == '$' {
156            match chars.peek() {
157                None => {
158                    result.push('$');
159                }
160                Some(&(_, '$')) => {
161                    chars.next();
162                    result.push('$');
163                }
164                Some(&(_, '&')) => {
165                    chars.next();
166                    result.push_str(&m.text);
167                }
168                Some(&(_, '{')) => {
169                    chars.next(); // consume '{'
170                    let brace_start = chars.peek().map(|&(idx, _)| idx).unwrap_or(template.len());
171                    if let Some(close) = template[brace_start..].find('}') {
172                        let ref_name = &template[brace_start..brace_start + close];
173                        if let Some(text) = lookup_capture(m, ref_name) {
174                            result.push_str(text);
175                        }
176                        // Advance past the content and closing brace
177                        let end_byte = brace_start + close + 1;
178                        while chars.peek().is_some_and(|&(idx, _)| idx < end_byte) {
179                            chars.next();
180                        }
181                    } else {
182                        result.push('$');
183                        result.push('{');
184                    }
185                }
186                Some(&(_, next_c)) if next_c.is_ascii_digit() => {
187                    let (_, d1) = chars.next().unwrap();
188                    let mut num_str = String::from(d1);
189                    // Grab a second digit if present
190                    if let Some(&(_, d2)) = chars.peek() {
191                        if d2.is_ascii_digit() {
192                            chars.next();
193                            num_str.push(d2);
194                        }
195                    }
196                    let idx: usize = num_str.parse().unwrap_or(0);
197                    if idx == 0 {
198                        result.push_str(&m.text);
199                    } else if let Some(cap) = m.captures.iter().find(|c| c.index == idx) {
200                        result.push_str(&cap.text);
201                    }
202                }
203                Some(_) => {
204                    result.push('$');
205                }
206            }
207        } else {
208            result.push(c);
209        }
210    }
211
212    result
213}
214
215/// Look up a capture by name or numeric string.
216fn lookup_capture<'a>(m: &'a Match, key: &str) -> Option<&'a str> {
217    // Try as number first
218    if let Ok(idx) = key.parse::<usize>() {
219        if idx == 0 {
220            return Some(&m.text);
221        }
222        return m
223            .captures
224            .iter()
225            .find(|c| c.index == idx)
226            .map(|c| c.text.as_str());
227    }
228    // Try as named capture
229    m.captures
230        .iter()
231        .find(|c| c.name.as_deref() == Some(key))
232        .map(|c| c.text.as_str())
233}
234
235/// Perform replacement across all matches, returning the output string and segment metadata.
236pub fn replace_all(text: &str, matches: &[Match], template: &str) -> ReplaceResult {
237    let mut output = String::new();
238    let mut segments = Vec::new();
239    let mut pos = 0;
240
241    for m in matches {
242        // Original text before this match
243        if m.start > pos {
244            let seg_start = output.len();
245            output.push_str(&text[pos..m.start]);
246            segments.push(ReplaceSegment {
247                start: seg_start,
248                end: output.len(),
249                is_replacement: false,
250            });
251        }
252        // Expanded replacement
253        let expanded = expand_replacement(template, m);
254        if !expanded.is_empty() {
255            let seg_start = output.len();
256            output.push_str(&expanded);
257            segments.push(ReplaceSegment {
258                start: seg_start,
259                end: output.len(),
260                is_replacement: true,
261            });
262        }
263        pos = m.end;
264    }
265
266    // Trailing original text
267    if pos < text.len() {
268        let seg_start = output.len();
269        output.push_str(&text[pos..]);
270        segments.push(ReplaceSegment {
271            start: seg_start,
272            end: output.len(),
273            is_replacement: false,
274        });
275    }
276
277    ReplaceResult { output, segments }
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283
284    fn make_match(start: usize, end: usize, text: &str, captures: Vec<CaptureGroup>) -> Match {
285        Match {
286            start,
287            end,
288            text: text.to_string(),
289            captures,
290        }
291    }
292
293    fn make_cap(
294        index: usize,
295        name: Option<&str>,
296        start: usize,
297        end: usize,
298        text: &str,
299    ) -> CaptureGroup {
300        CaptureGroup {
301            index,
302            name: name.map(|s| s.to_string()),
303            start,
304            end,
305            text: text.to_string(),
306        }
307    }
308
309    #[test]
310    fn test_replace_all_basic() {
311        let matches = vec![make_match(
312            0,
313            12,
314            "user@example",
315            vec![
316                make_cap(1, None, 0, 4, "user"),
317                make_cap(2, None, 5, 12, "example"),
318            ],
319        )];
320        let result = replace_all("user@example", &matches, "$2=$1");
321        assert_eq!(result.output, "example=user");
322    }
323
324    #[test]
325    fn test_replace_all_no_matches() {
326        let result = replace_all("hello world", &[], "replacement");
327        assert_eq!(result.output, "hello world");
328        assert_eq!(result.segments.len(), 1);
329        assert!(!result.segments[0].is_replacement);
330    }
331
332    #[test]
333    fn test_replace_all_empty_template() {
334        let matches = vec![
335            make_match(4, 7, "123", vec![]),
336            make_match(12, 15, "456", vec![]),
337        ];
338        let result = replace_all("abc 123 def 456 ghi", &matches, "");
339        assert_eq!(result.output, "abc  def  ghi");
340    }
341
342    #[test]
343    fn test_replace_all_literal_dollar() {
344        let matches = vec![make_match(0, 3, "foo", vec![])];
345        let result = replace_all("foo", &matches, "$$bar");
346        assert_eq!(result.output, "$bar");
347    }
348
349    #[test]
350    fn test_replace_all_named_groups() {
351        let matches = vec![make_match(
352            0,
353            7,
354            "2024-01",
355            vec![
356                make_cap(1, Some("y"), 0, 4, "2024"),
357                make_cap(2, Some("m"), 5, 7, "01"),
358            ],
359        )];
360        let result = replace_all("2024-01", &matches, "${m}/${y}");
361        assert_eq!(result.output, "01/2024");
362    }
363
364    #[test]
365    fn test_expand_replacement_whole_match() {
366        let m = make_match(0, 5, "hello", vec![]);
367        assert_eq!(expand_replacement("$0", &m), "hello");
368        assert_eq!(expand_replacement("$&", &m), "hello");
369        assert_eq!(expand_replacement("[$0]", &m), "[hello]");
370    }
371
372    #[test]
373    fn test_expand_replacement_non_ascii() {
374        let m = make_match(0, 5, "hello", vec![]);
375        // Non-ASCII characters in replacement template should work correctly
376        assert_eq!(expand_replacement("café $0", &m), "café hello");
377        assert_eq!(expand_replacement("→$0←", &m), "→hello←");
378        assert_eq!(expand_replacement("日本語", &m), "日本語");
379        assert_eq!(expand_replacement("über $& cool", &m), "über hello cool");
380    }
381
382    #[test]
383    fn test_replace_segments_tracking() {
384        let matches = vec![make_match(6, 9, "123", vec![])];
385        let result = replace_all("hello 123 world", &matches, "NUM");
386        assert_eq!(result.output, "hello NUM world");
387        assert_eq!(result.segments.len(), 3);
388        // "hello " - original
389        assert!(!result.segments[0].is_replacement);
390        assert_eq!(
391            &result.output[result.segments[0].start..result.segments[0].end],
392            "hello "
393        );
394        // "NUM" - replacement
395        assert!(result.segments[1].is_replacement);
396        assert_eq!(
397            &result.output[result.segments[1].start..result.segments[1].end],
398            "NUM"
399        );
400        // " world" - original
401        assert!(!result.segments[2].is_replacement);
402        assert_eq!(
403            &result.output[result.segments[2].start..result.segments[2].end],
404            " world"
405        );
406    }
407}