Skip to main content

ripsed_core/
matcher.rs

1use crate::error::RipsedError;
2use crate::operation::Op;
3use regex::Regex;
4
5/// Abstraction over literal and regex matching.
6#[derive(Debug)]
7pub enum Matcher {
8    Literal {
9        pattern: String,
10        case_insensitive: bool,
11    },
12    /// A regex matcher — used for both explicit `--regex` patterns and as the
13    /// implementation backing case-insensitive literal matching (via
14    /// `regex::escape` + `(?i)`), which avoids byte-offset mismatches from
15    /// `str::to_lowercase()` on multi-byte Unicode characters.
16    Regex(Regex),
17}
18
19impl Matcher {
20    /// Create a new matcher from an operation.
21    pub fn new(op: &Op) -> Result<Self, RipsedError> {
22        let pattern = op.find_pattern();
23        let is_regex = op.is_regex();
24        let case_insensitive = op.is_case_insensitive();
25
26        if is_regex || case_insensitive {
27            // For case-insensitive literals, escape the pattern and delegate to
28            // the regex engine which handles Unicode casing correctly.
29            let re_src = if is_regex {
30                pattern.to_string()
31            } else {
32                regex::escape(pattern)
33            };
34            let re_pattern = if case_insensitive {
35                format!("(?i){re_src}")
36            } else {
37                re_src
38            };
39            Regex::new(&re_pattern)
40                .map(Matcher::Regex)
41                .map_err(|e| RipsedError::invalid_regex(0, pattern, &e.to_string()))
42        } else {
43            Ok(Matcher::Literal {
44                pattern: pattern.to_string(),
45                case_insensitive: false,
46            })
47        }
48    }
49
50    /// Check if the given text matches.
51    pub fn is_match(&self, text: &str) -> bool {
52        match self {
53            Matcher::Literal { pattern, .. } => text.contains(pattern.as_str()),
54            Matcher::Regex(re) => re.is_match(text),
55        }
56    }
57
58    /// Replace all matches in the given text. Returns None if no matches.
59    pub fn replace(&self, text: &str, replacement: &str) -> Option<String> {
60        match self {
61            Matcher::Literal { pattern, .. } => {
62                if text.contains(pattern.as_str()) {
63                    Some(text.replace(pattern.as_str(), replacement))
64                } else {
65                    None
66                }
67            }
68            Matcher::Regex(re) => {
69                if re.is_match(text) {
70                    Some(re.replace_all(text, replacement).into_owned())
71                } else {
72                    None
73                }
74            }
75        }
76    }
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82
83    #[test]
84    fn test_literal_match() {
85        let op = Op::Replace {
86            find: "hello".to_string(),
87            replace: "hi".to_string(),
88            regex: false,
89            case_insensitive: false,
90        };
91        let m = Matcher::new(&op).unwrap();
92        assert!(m.is_match("say hello world"));
93        assert!(!m.is_match("say Hi world"));
94    }
95
96    #[test]
97    fn test_literal_case_insensitive() {
98        let op = Op::Replace {
99            find: "hello".to_string(),
100            replace: "hi".to_string(),
101            regex: false,
102            case_insensitive: true,
103        };
104        let m = Matcher::new(&op).unwrap();
105        assert!(m.is_match("say HELLO world"));
106        assert!(m.is_match("say Hello world"));
107    }
108
109    #[test]
110    fn test_regex_match() {
111        let op = Op::Replace {
112            find: r"fn\s+(\w+)".to_string(),
113            replace: "fn new_$1".to_string(),
114            regex: true,
115            case_insensitive: false,
116        };
117        let m = Matcher::new(&op).unwrap();
118        assert!(m.is_match("fn old_func() {"));
119        assert!(!m.is_match("let x = 5;"));
120    }
121
122    #[test]
123    fn test_regex_replace_with_captures() {
124        let op = Op::Replace {
125            find: r"fn\s+old_(\w+)".to_string(),
126            replace: "fn new_$1".to_string(),
127            regex: true,
128            case_insensitive: false,
129        };
130        let m = Matcher::new(&op).unwrap();
131        let result = m.replace("fn old_function() {", "fn new_$1");
132        assert_eq!(result, Some("fn new_function() {".to_string()));
133    }
134
135    #[test]
136    fn test_invalid_regex() {
137        let op = Op::Replace {
138            find: "fn (foo".to_string(),
139            replace: "bar".to_string(),
140            regex: true,
141            case_insensitive: false,
142        };
143        let err = Matcher::new(&op).unwrap_err();
144        assert_eq!(err.code, crate::error::ErrorCode::InvalidRegex);
145    }
146
147    // ---------------------------------------------------------------
148    // Empty pattern behavior
149    // ---------------------------------------------------------------
150
151    #[test]
152    fn test_empty_pattern_literal_matches_everything() {
153        let op = Op::Replace {
154            find: "".to_string(),
155            replace: "x".to_string(),
156            regex: false,
157            case_insensitive: false,
158        };
159        let m = Matcher::new(&op).unwrap();
160        // An empty string is contained in every string
161        assert!(m.is_match("anything"));
162        assert!(m.is_match(""));
163    }
164
165    #[test]
166    fn test_empty_pattern_literal_replace() {
167        let op = Op::Replace {
168            find: "".to_string(),
169            replace: "x".to_string(),
170            regex: false,
171            case_insensitive: false,
172        };
173        let m = Matcher::new(&op).unwrap();
174        // Rust's str::replace("", "x") inserts "x" between every char and at start/end
175        let result = m.replace("ab", "x");
176        assert_eq!(result, Some("xaxbx".to_string()));
177    }
178
179    #[test]
180    fn test_empty_pattern_regex_matches_everything() {
181        let op = Op::Replace {
182            find: "".to_string(),
183            replace: "x".to_string(),
184            regex: true,
185            case_insensitive: false,
186        };
187        let m = Matcher::new(&op).unwrap();
188        assert!(m.is_match("anything"));
189        assert!(m.is_match(""));
190    }
191
192    // ---------------------------------------------------------------
193    // Pattern that matches entire line
194    // ---------------------------------------------------------------
195
196    #[test]
197    fn test_pattern_matches_entire_line_literal() {
198        let op = Op::Replace {
199            find: "hello world".to_string(),
200            replace: "goodbye".to_string(),
201            regex: false,
202            case_insensitive: false,
203        };
204        let m = Matcher::new(&op).unwrap();
205        let result = m.replace("hello world", "goodbye");
206        assert_eq!(result, Some("goodbye".to_string()));
207    }
208
209    #[test]
210    fn test_pattern_matches_entire_line_regex() {
211        let op = Op::Replace {
212            find: r"^.*$".to_string(),
213            replace: "replaced".to_string(),
214            regex: true,
215            case_insensitive: false,
216        };
217        let m = Matcher::new(&op).unwrap();
218        let result = m.replace("anything here", "replaced");
219        assert_eq!(result, Some("replaced".to_string()));
220    }
221
222    #[test]
223    fn test_regex_anchored_full_line() {
224        let op = Op::Replace {
225            find: r"^fn main\(\)$".to_string(),
226            replace: "fn start()".to_string(),
227            regex: true,
228            case_insensitive: false,
229        };
230        let m = Matcher::new(&op).unwrap();
231        assert!(m.is_match("fn main()"));
232        assert!(!m.is_match("  fn main()")); // leading whitespace
233        assert!(!m.is_match("fn main() {")); // trailing content
234    }
235
236    // ---------------------------------------------------------------
237    // Case-insensitive with unicode (Turkish I problem, etc.)
238    // ---------------------------------------------------------------
239
240    #[test]
241    fn test_case_insensitive_ascii() {
242        let op = Op::Replace {
243            find: "Hello".to_string(),
244            replace: "hi".to_string(),
245            regex: false,
246            case_insensitive: true,
247        };
248        let m = Matcher::new(&op).unwrap();
249        assert!(m.is_match("HELLO"));
250        assert!(m.is_match("hello"));
251        assert!(m.is_match("HeLLo"));
252        let result = m.replace("say HELLO there", "hi");
253        assert_eq!(result, Some("say hi there".to_string()));
254    }
255
256    #[test]
257    fn test_case_insensitive_german_eszett() {
258        // German sharp-s: lowercase to_lowercase() of "SS" is "ss",
259        // and to_lowercase() of "\u{00DF}" (sharp-s) is "\u{00DF}"
260        // This tests that the engine handles non-trivial unicode casing
261        let op = Op::Replace {
262            find: "stra\u{00DF}e".to_string(), // "strasse" with sharp-s
263            replace: "street".to_string(),
264            regex: false,
265            case_insensitive: true,
266        };
267        let m = Matcher::new(&op).unwrap();
268        assert!(m.is_match("STRA\u{00DF}E"));
269    }
270
271    #[test]
272    fn test_case_insensitive_turkish_i_lowercase() {
273        // Turkish dotted I: \u{0130} (capital I with dot above)
274        // This is a known edge case. We test that the matcher doesn't panic
275        // and behaves consistently with Unicode simple case folding.
276        let op = Op::Replace {
277            find: "i".to_string(),
278            replace: "x".to_string(),
279            regex: false,
280            case_insensitive: true,
281        };
282        let m = Matcher::new(&op).unwrap();
283        // Standard ASCII: "I" simple-folds to "i", so this matches
284        assert!(m.is_match("I"));
285        // \u{0130} (İ) has no simple case fold to "i" in Unicode — the full
286        // fold is "i\u{0307}" but the regex engine only uses simple folds.
287        // This correctly does NOT match, avoiding false positives from the
288        // old to_lowercase()-based byte-offset approach.
289        assert!(!m.is_match("\u{0130}"));
290    }
291
292    // ---------------------------------------------------------------
293    // Regex special characters in literal mode
294    // ---------------------------------------------------------------
295
296    #[test]
297    fn test_literal_mode_regex_metacharacters() {
298        // All these are regex metacharacters but should be treated literally
299        let patterns = vec![
300            (".", "dot"),
301            ("*", "star"),
302            ("+", "plus"),
303            ("?", "question"),
304            ("(", "paren"),
305            ("[", "bracket"),
306            ("{", "brace"),
307            ("^", "caret"),
308            ("$", "dollar"),
309            ("|", "pipe"),
310            ("\\", "backslash"),
311        ];
312        for (pat, name) in patterns {
313            let op = Op::Replace {
314                find: pat.to_string(),
315                replace: "X".to_string(),
316                regex: false,
317                case_insensitive: false,
318            };
319            let m = Matcher::new(&op).unwrap();
320            let text = format!("before {pat} after");
321            assert!(
322                m.is_match(&text),
323                "Literal mode should match '{name}' ({pat}) as a literal character"
324            );
325            let result = m.replace(&text, "X");
326            assert_eq!(
327                result,
328                Some("before X after".to_string()),
329                "Literal mode should replace '{name}' ({pat}) as a literal"
330            );
331        }
332    }
333
334    // ---------------------------------------------------------------
335    // Multiple matches on same line
336    // ---------------------------------------------------------------
337
338    #[test]
339    fn test_multiple_matches_same_line() {
340        let op = Op::Replace {
341            find: "ab".to_string(),
342            replace: "X".to_string(),
343            regex: false,
344            case_insensitive: false,
345        };
346        let m = Matcher::new(&op).unwrap();
347        let result = m.replace("ab cd ab ef ab", "X");
348        assert_eq!(result, Some("X cd X ef X".to_string()));
349    }
350
351    #[test]
352    fn test_replace_with_empty_string() {
353        let op = Op::Replace {
354            find: "remove".to_string(),
355            replace: "".to_string(),
356            regex: false,
357            case_insensitive: false,
358        };
359        let m = Matcher::new(&op).unwrap();
360        let result = m.replace("please remove this", "");
361        assert_eq!(result, Some("please  this".to_string()));
362    }
363
364    #[test]
365    fn test_no_match_returns_none() {
366        let op = Op::Replace {
367            find: "xyz".to_string(),
368            replace: "abc".to_string(),
369            regex: false,
370            case_insensitive: false,
371        };
372        let m = Matcher::new(&op).unwrap();
373        assert!(m.replace("nothing here", "abc").is_none());
374    }
375}