Skip to main content

ripsed_core/
matcher.rs

1use crate::error::RipsedError;
2use crate::operation::Op;
3use regex::Regex;
4
5/// Abstraction over literal and regex matching.
6#[derive(Debug)]
7pub enum Matcher {
8    Literal {
9        pattern: String,
10        case_insensitive: bool,
11    },
12    Regex(Regex),
13}
14
15impl Matcher {
16    /// Create a new matcher from an operation.
17    pub fn new(op: &Op) -> Result<Self, RipsedError> {
18        let pattern = op.find_pattern();
19        let is_regex = op.is_regex();
20        let case_insensitive = op.is_case_insensitive();
21
22        if is_regex {
23            let re_pattern = if case_insensitive {
24                format!("(?i){pattern}")
25            } else {
26                pattern.to_string()
27            };
28            Regex::new(&re_pattern)
29                .map(Matcher::Regex)
30                .map_err(|e| RipsedError::invalid_regex(0, pattern, &e.to_string()))
31        } else {
32            Ok(Matcher::Literal {
33                pattern: pattern.to_string(),
34                case_insensitive,
35            })
36        }
37    }
38
39    /// Check if the given text matches.
40    pub fn is_match(&self, text: &str) -> bool {
41        match self {
42            Matcher::Literal {
43                pattern,
44                case_insensitive,
45            } => {
46                if *case_insensitive {
47                    text.to_lowercase().contains(&pattern.to_lowercase())
48                } else {
49                    text.contains(pattern.as_str())
50                }
51            }
52            Matcher::Regex(re) => re.is_match(text),
53        }
54    }
55
56    /// Replace all matches in the given text. Returns None if no matches.
57    pub fn replace(&self, text: &str, replacement: &str) -> Option<String> {
58        match self {
59            Matcher::Literal {
60                pattern,
61                case_insensitive,
62            } => {
63                if *case_insensitive {
64                    // Case-insensitive literal replace
65                    let lower_text = text.to_lowercase();
66                    let lower_pattern = pattern.to_lowercase();
67                    if !lower_text.contains(&lower_pattern) {
68                        return None;
69                    }
70                    let mut result = String::with_capacity(text.len());
71                    let mut search_start = 0;
72                    while let Some(pos) = lower_text[search_start..].find(&lower_pattern) {
73                        let abs_pos = search_start + pos;
74                        result.push_str(&text[search_start..abs_pos]);
75                        result.push_str(replacement);
76                        search_start = abs_pos + pattern.len();
77                    }
78                    result.push_str(&text[search_start..]);
79                    Some(result)
80                } else if text.contains(pattern.as_str()) {
81                    Some(text.replace(pattern.as_str(), replacement))
82                } else {
83                    None
84                }
85            }
86            Matcher::Regex(re) => {
87                if re.is_match(text) {
88                    Some(re.replace_all(text, replacement).into_owned())
89                } else {
90                    None
91                }
92            }
93        }
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    #[test]
102    fn test_literal_match() {
103        let op = Op::Replace {
104            find: "hello".to_string(),
105            replace: "hi".to_string(),
106            regex: false,
107            case_insensitive: false,
108        };
109        let m = Matcher::new(&op).unwrap();
110        assert!(m.is_match("say hello world"));
111        assert!(!m.is_match("say Hi world"));
112    }
113
114    #[test]
115    fn test_literal_case_insensitive() {
116        let op = Op::Replace {
117            find: "hello".to_string(),
118            replace: "hi".to_string(),
119            regex: false,
120            case_insensitive: true,
121        };
122        let m = Matcher::new(&op).unwrap();
123        assert!(m.is_match("say HELLO world"));
124        assert!(m.is_match("say Hello world"));
125    }
126
127    #[test]
128    fn test_regex_match() {
129        let op = Op::Replace {
130            find: r"fn\s+(\w+)".to_string(),
131            replace: "fn new_$1".to_string(),
132            regex: true,
133            case_insensitive: false,
134        };
135        let m = Matcher::new(&op).unwrap();
136        assert!(m.is_match("fn old_func() {"));
137        assert!(!m.is_match("let x = 5;"));
138    }
139
140    #[test]
141    fn test_regex_replace_with_captures() {
142        let op = Op::Replace {
143            find: r"fn\s+old_(\w+)".to_string(),
144            replace: "fn new_$1".to_string(),
145            regex: true,
146            case_insensitive: false,
147        };
148        let m = Matcher::new(&op).unwrap();
149        let result = m.replace("fn old_function() {", "fn new_$1");
150        assert_eq!(result, Some("fn new_function() {".to_string()));
151    }
152
153    #[test]
154    fn test_invalid_regex() {
155        let op = Op::Replace {
156            find: "fn (foo".to_string(),
157            replace: "bar".to_string(),
158            regex: true,
159            case_insensitive: false,
160        };
161        let err = Matcher::new(&op).unwrap_err();
162        assert_eq!(err.code, crate::error::ErrorCode::InvalidRegex);
163    }
164
165    // ---------------------------------------------------------------
166    // Empty pattern behavior
167    // ---------------------------------------------------------------
168
169    #[test]
170    fn test_empty_pattern_literal_matches_everything() {
171        let op = Op::Replace {
172            find: "".to_string(),
173            replace: "x".to_string(),
174            regex: false,
175            case_insensitive: false,
176        };
177        let m = Matcher::new(&op).unwrap();
178        // An empty string is contained in every string
179        assert!(m.is_match("anything"));
180        assert!(m.is_match(""));
181    }
182
183    #[test]
184    fn test_empty_pattern_literal_replace() {
185        let op = Op::Replace {
186            find: "".to_string(),
187            replace: "x".to_string(),
188            regex: false,
189            case_insensitive: false,
190        };
191        let m = Matcher::new(&op).unwrap();
192        // Rust's str::replace("", "x") inserts "x" between every char and at start/end
193        let result = m.replace("ab", "x");
194        assert_eq!(result, Some("xaxbx".to_string()));
195    }
196
197    #[test]
198    fn test_empty_pattern_regex_matches_everything() {
199        let op = Op::Replace {
200            find: "".to_string(),
201            replace: "x".to_string(),
202            regex: true,
203            case_insensitive: false,
204        };
205        let m = Matcher::new(&op).unwrap();
206        assert!(m.is_match("anything"));
207        assert!(m.is_match(""));
208    }
209
210    // ---------------------------------------------------------------
211    // Pattern that matches entire line
212    // ---------------------------------------------------------------
213
214    #[test]
215    fn test_pattern_matches_entire_line_literal() {
216        let op = Op::Replace {
217            find: "hello world".to_string(),
218            replace: "goodbye".to_string(),
219            regex: false,
220            case_insensitive: false,
221        };
222        let m = Matcher::new(&op).unwrap();
223        let result = m.replace("hello world", "goodbye");
224        assert_eq!(result, Some("goodbye".to_string()));
225    }
226
227    #[test]
228    fn test_pattern_matches_entire_line_regex() {
229        let op = Op::Replace {
230            find: r"^.*$".to_string(),
231            replace: "replaced".to_string(),
232            regex: true,
233            case_insensitive: false,
234        };
235        let m = Matcher::new(&op).unwrap();
236        let result = m.replace("anything here", "replaced");
237        assert_eq!(result, Some("replaced".to_string()));
238    }
239
240    #[test]
241    fn test_regex_anchored_full_line() {
242        let op = Op::Replace {
243            find: r"^fn main\(\)$".to_string(),
244            replace: "fn start()".to_string(),
245            regex: true,
246            case_insensitive: false,
247        };
248        let m = Matcher::new(&op).unwrap();
249        assert!(m.is_match("fn main()"));
250        assert!(!m.is_match("  fn main()")); // leading whitespace
251        assert!(!m.is_match("fn main() {")); // trailing content
252    }
253
254    // ---------------------------------------------------------------
255    // Case-insensitive with unicode (Turkish I problem, etc.)
256    // ---------------------------------------------------------------
257
258    #[test]
259    fn test_case_insensitive_ascii() {
260        let op = Op::Replace {
261            find: "Hello".to_string(),
262            replace: "hi".to_string(),
263            regex: false,
264            case_insensitive: true,
265        };
266        let m = Matcher::new(&op).unwrap();
267        assert!(m.is_match("HELLO"));
268        assert!(m.is_match("hello"));
269        assert!(m.is_match("HeLLo"));
270        let result = m.replace("say HELLO there", "hi");
271        assert_eq!(result, Some("say hi there".to_string()));
272    }
273
274    #[test]
275    fn test_case_insensitive_german_eszett() {
276        // German sharp-s: lowercase to_lowercase() of "SS" is "ss",
277        // and to_lowercase() of "\u{00DF}" (sharp-s) is "\u{00DF}"
278        // This tests that the engine handles non-trivial unicode casing
279        let op = Op::Replace {
280            find: "stra\u{00DF}e".to_string(), // "strasse" with sharp-s
281            replace: "street".to_string(),
282            regex: false,
283            case_insensitive: true,
284        };
285        let m = Matcher::new(&op).unwrap();
286        assert!(m.is_match("STRA\u{00DF}E"));
287    }
288
289    #[test]
290    fn test_case_insensitive_turkish_i_lowercase() {
291        // Turkish dotted I: \u{0130} (capital I with dot above)
292        // to_lowercase() of \u{0130} is "i\u{0307}" in most locales
293        // This is a known edge case. We test that the matcher doesn't panic.
294        let op = Op::Replace {
295            find: "i".to_string(),
296            replace: "x".to_string(),
297            regex: false,
298            case_insensitive: true,
299        };
300        let m = Matcher::new(&op).unwrap();
301        // Standard Rust to_lowercase: "I" -> "i", so this should match
302        assert!(m.is_match("I"));
303        // \u{0130} (capital I with dot above) lowercases to "i\u{0307}" which
304        // does contain "i", so this should also match with to_lowercase()
305        assert!(m.is_match("\u{0130}"));
306    }
307
308    // ---------------------------------------------------------------
309    // Regex special characters in literal mode
310    // ---------------------------------------------------------------
311
312    #[test]
313    fn test_literal_mode_regex_metacharacters() {
314        // All these are regex metacharacters but should be treated literally
315        let patterns = vec![
316            (".", "dot"),
317            ("*", "star"),
318            ("+", "plus"),
319            ("?", "question"),
320            ("(", "paren"),
321            ("[", "bracket"),
322            ("{", "brace"),
323            ("^", "caret"),
324            ("$", "dollar"),
325            ("|", "pipe"),
326            ("\\", "backslash"),
327        ];
328        for (pat, name) in patterns {
329            let op = Op::Replace {
330                find: pat.to_string(),
331                replace: "X".to_string(),
332                regex: false,
333                case_insensitive: false,
334            };
335            let m = Matcher::new(&op).unwrap();
336            let text = format!("before {pat} after");
337            assert!(
338                m.is_match(&text),
339                "Literal mode should match '{name}' ({pat}) as a literal character"
340            );
341            let result = m.replace(&text, "X");
342            assert_eq!(
343                result,
344                Some("before X after".to_string()),
345                "Literal mode should replace '{name}' ({pat}) as a literal"
346            );
347        }
348    }
349
350    // ---------------------------------------------------------------
351    // Multiple matches on same line
352    // ---------------------------------------------------------------
353
354    #[test]
355    fn test_multiple_matches_same_line() {
356        let op = Op::Replace {
357            find: "ab".to_string(),
358            replace: "X".to_string(),
359            regex: false,
360            case_insensitive: false,
361        };
362        let m = Matcher::new(&op).unwrap();
363        let result = m.replace("ab cd ab ef ab", "X");
364        assert_eq!(result, Some("X cd X ef X".to_string()));
365    }
366
367    #[test]
368    fn test_replace_with_empty_string() {
369        let op = Op::Replace {
370            find: "remove".to_string(),
371            replace: "".to_string(),
372            regex: false,
373            case_insensitive: false,
374        };
375        let m = Matcher::new(&op).unwrap();
376        let result = m.replace("please remove this", "");
377        assert_eq!(result, Some("please  this".to_string()));
378    }
379
380    #[test]
381    fn test_no_match_returns_none() {
382        let op = Op::Replace {
383            find: "xyz".to_string(),
384            replace: "abc".to_string(),
385            regex: false,
386            case_insensitive: false,
387        };
388        let m = Matcher::new(&op).unwrap();
389        assert!(m.replace("nothing here", "abc").is_none());
390    }
391}