Skip to main content

ripsed_core/
matcher.rs

1use crate::error::RipsedError;
2use crate::operation::Op;
3use regex::Regex;
4
5/// One match found by [`Matcher::find_replacements`]: the byte span of the
6/// match in the original text and the fully-expanded replacement for it.
7#[derive(Debug, Clone, PartialEq, Eq)]
8pub struct MatchSpan {
9    /// Byte offset of the match start in the original text.
10    pub start: usize,
11    /// Byte offset one past the match end in the original text.
12    pub end: usize,
13    /// The replacement text with any capture references (`$1`) expanded.
14    pub replacement: String,
15}
16
17/// Abstraction over literal and regex matching.
18#[derive(Debug)]
19pub enum Matcher {
20    Literal {
21        pattern: String,
22    },
23    /// A regex matcher — used for both explicit `--regex` patterns and as the
24    /// implementation backing case-insensitive literal matching (via
25    /// `regex::escape` + `(?i)`), which avoids byte-offset mismatches from
26    /// `str::to_lowercase()` on multi-byte Unicode characters.
27    Regex {
28        re: Regex,
29        /// Whole-buffer fast-reject shadow: the same pattern compiled with
30        /// `(?m)` so `^`/`$` keep their per-line meaning against a full
31        /// buffer. `None` when no sound shadow exists (see
32        /// [`prescreen_shadow`]) — then prescreening always says "maybe".
33        prescreen: Option<Regex>,
34    },
35}
36
37/// Build the whole-buffer prescreen shadow for a regex pattern, or `None`
38/// when a sound one can't be constructed.
39///
40/// Prepending `(?m)` gives `^`/`$` the same line-boundary semantics on a
41/// whole buffer that they have when matching line by line. That is NOT
42/// sound for patterns using `\A`/`\z`/`\Z` (which anchor to the haystack —
43/// each *line* in per-line matching, the whole buffer in the shadow) or
44/// containing a flag-negating group like `(?-m)` that could switch the
45/// multiline flag back off. Those patterns simply don't get a prescreen.
46fn prescreen_shadow(re_pattern: &str) -> Option<Regex> {
47    // (`\Z` needs no check: the regex crate rejects it at compile time,
48    // so such a pattern never reaches prescreening.)
49    if re_pattern.contains(r"\A") || re_pattern.contains(r"\z") || re_pattern.contains("(?-") {
50        return None;
51    }
52    Regex::new(&format!("(?m){re_pattern}")).ok()
53}
54
55impl Matcher {
56    /// Create a new matcher from an operation.
57    pub fn new(op: &Op) -> Result<Self, RipsedError> {
58        let pattern = op.find_pattern();
59        let is_regex = op.is_regex();
60        let case_insensitive = op.is_case_insensitive();
61
62        if is_regex || case_insensitive {
63            // For case-insensitive literals, escape the pattern and delegate to
64            // the regex engine which handles Unicode casing correctly.
65            let re_src = if is_regex {
66                pattern.to_string()
67            } else {
68                regex::escape(pattern)
69            };
70            let re_pattern = if case_insensitive {
71                format!("(?i){re_src}")
72            } else {
73                re_src
74            };
75            Regex::new(&re_pattern)
76                .map(|re| Matcher::Regex {
77                    prescreen: prescreen_shadow(&re_pattern),
78                    re,
79                })
80                .map_err(|e| {
81                    let mut err = RipsedError::invalid_regex(0, pattern, &e.to_string());
82                    err.operation_index = None;
83                    err
84                })
85        } else {
86            Ok(Matcher::Literal {
87                pattern: pattern.to_string(),
88            })
89        }
90    }
91
92    /// Cheap whole-buffer check: `false` means no line of `text` can match
93    /// this pattern, so per-line processing can be skipped entirely.
94    /// `true` means "maybe" — false positives are fine, false negatives
95    /// are a correctness bug (locked by a proptest).
96    pub fn prescreen(&self, text: &str) -> bool {
97        match self {
98            Matcher::Literal { pattern } => text.contains(pattern.as_str()),
99            Matcher::Regex {
100                prescreen: Some(shadow),
101                ..
102            } => shadow.is_match(text),
103            // No sound shadow — always maybe.
104            Matcher::Regex {
105                prescreen: None, ..
106            } => true,
107        }
108    }
109
110    /// Check if the given text matches.
111    pub fn is_match(&self, text: &str) -> bool {
112        match self {
113            Matcher::Literal { pattern, .. } => text.contains(pattern.as_str()),
114            Matcher::Regex { re, .. } => re.is_match(text),
115        }
116    }
117
118    /// Replace all matches in the given text. Returns None if no matches.
119    pub fn replace(&self, text: &str, replacement: &str) -> Option<String> {
120        match self {
121            Matcher::Literal { pattern, .. } => {
122                if text.contains(pattern.as_str()) {
123                    Some(text.replace(pattern.as_str(), replacement))
124                } else {
125                    None
126                }
127            }
128            Matcher::Regex { re, .. } => {
129                if re.is_match(text) {
130                    Some(re.replace_all(text, replacement).into_owned())
131                } else {
132                    None
133                }
134            }
135        }
136    }
137
138    /// Replace up to `limit` matches (0 = unlimited), left to right.
139    ///
140    /// Returns the new text and how many occurrences were replaced, or
141    /// `None` if nothing matched. With `limit == 0` this is exactly
142    /// [`Matcher::replace`] plus the occurrence count.
143    pub fn replace_n(
144        &self,
145        text: &str,
146        replacement: &str,
147        limit: usize,
148    ) -> Option<(String, usize)> {
149        match self {
150            Matcher::Literal { pattern } => {
151                let occurrences = text.match_indices(pattern.as_str()).count();
152                if occurrences == 0 {
153                    return None;
154                }
155                let n = if limit == 0 {
156                    occurrences
157                } else {
158                    occurrences.min(limit)
159                };
160                Some((text.replacen(pattern.as_str(), replacement, n), n))
161            }
162            Matcher::Regex { re, .. } => {
163                let occurrences = re.find_iter(text).count();
164                if occurrences == 0 {
165                    return None;
166                }
167                let n = if limit == 0 {
168                    occurrences
169                } else {
170                    occurrences.min(limit)
171                };
172                Some((re.replacen(text, n, replacement).into_owned(), n))
173            }
174        }
175    }
176
177    /// Find every non-overlapping match in `text` and compute its expanded
178    /// replacement, left to right.
179    ///
180    /// Spans are returned in ascending order and never overlap, with the
181    /// same semantics as [`Matcher::replace`] (`str::replace` for literals,
182    /// `Regex::replace_all` for regexes) — splicing each span's replacement
183    /// into the original text reproduces `replace`'s output exactly.
184    pub fn find_replacements(&self, text: &str, replacement: &str) -> Vec<MatchSpan> {
185        match self {
186            Matcher::Literal { pattern } => text
187                .match_indices(pattern.as_str())
188                .map(|(start, matched)| MatchSpan {
189                    start,
190                    end: start + matched.len(),
191                    replacement: replacement.to_string(),
192                })
193                .collect(),
194            Matcher::Regex { re, .. } => re
195                .captures_iter(text)
196                .map(|caps| {
197                    let m = caps.get(0).expect("capture group 0 always exists");
198                    let mut expanded = String::new();
199                    caps.expand(replacement, &mut expanded);
200                    MatchSpan {
201                        start: m.start(),
202                        end: m.end(),
203                        replacement: expanded,
204                    }
205                })
206                .collect(),
207        }
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214
215    #[test]
216    fn test_literal_match() {
217        let op = Op::Replace {
218            count: Default::default(),
219            multiline: false,
220            find: "hello".to_string(),
221            replace: "hi".to_string(),
222            regex: false,
223            case_insensitive: false,
224        };
225        let m = Matcher::new(&op).unwrap();
226        assert!(m.is_match("say hello world"));
227        assert!(!m.is_match("say Hi world"));
228    }
229
230    #[test]
231    fn test_literal_case_insensitive() {
232        let op = Op::Replace {
233            count: Default::default(),
234            multiline: false,
235            find: "hello".to_string(),
236            replace: "hi".to_string(),
237            regex: false,
238            case_insensitive: true,
239        };
240        let m = Matcher::new(&op).unwrap();
241        assert!(m.is_match("say HELLO world"));
242        assert!(m.is_match("say Hello world"));
243    }
244
245    #[test]
246    fn test_regex_match() {
247        let op = Op::Replace {
248            count: Default::default(),
249            multiline: false,
250            find: r"fn\s+(\w+)".to_string(),
251            replace: "fn new_$1".to_string(),
252            regex: true,
253            case_insensitive: false,
254        };
255        let m = Matcher::new(&op).unwrap();
256        assert!(m.is_match("fn old_func() {"));
257        assert!(!m.is_match("let x = 5;"));
258    }
259
260    #[test]
261    fn test_regex_replace_with_captures() {
262        let op = Op::Replace {
263            count: Default::default(),
264            multiline: false,
265            find: r"fn\s+old_(\w+)".to_string(),
266            replace: "fn new_$1".to_string(),
267            regex: true,
268            case_insensitive: false,
269        };
270        let m = Matcher::new(&op).unwrap();
271        let result = m.replace("fn old_function() {", "fn new_$1");
272        assert_eq!(result, Some("fn new_function() {".to_string()));
273    }
274
275    #[test]
276    fn test_invalid_regex() {
277        let op = Op::Replace {
278            count: Default::default(),
279            multiline: false,
280            find: "fn (foo".to_string(),
281            replace: "bar".to_string(),
282            regex: true,
283            case_insensitive: false,
284        };
285        let err = Matcher::new(&op).unwrap_err();
286        assert_eq!(err.code, crate::error::ErrorCode::InvalidRegex);
287    }
288
289    // ---------------------------------------------------------------
290    // Empty pattern behavior
291    // ---------------------------------------------------------------
292
293    #[test]
294    fn test_empty_pattern_literal_matches_everything() {
295        let op = Op::Replace {
296            count: Default::default(),
297            multiline: false,
298            find: "".to_string(),
299            replace: "x".to_string(),
300            regex: false,
301            case_insensitive: false,
302        };
303        let m = Matcher::new(&op).unwrap();
304        // An empty string is contained in every string
305        assert!(m.is_match("anything"));
306        assert!(m.is_match(""));
307    }
308
309    #[test]
310    fn test_empty_pattern_literal_replace() {
311        let op = Op::Replace {
312            count: Default::default(),
313            multiline: false,
314            find: "".to_string(),
315            replace: "x".to_string(),
316            regex: false,
317            case_insensitive: false,
318        };
319        let m = Matcher::new(&op).unwrap();
320        // Rust's str::replace("", "x") inserts "x" between every char and at start/end
321        let result = m.replace("ab", "x");
322        assert_eq!(result, Some("xaxbx".to_string()));
323    }
324
325    #[test]
326    fn test_empty_pattern_regex_matches_everything() {
327        let op = Op::Replace {
328            count: Default::default(),
329            multiline: false,
330            find: "".to_string(),
331            replace: "x".to_string(),
332            regex: true,
333            case_insensitive: false,
334        };
335        let m = Matcher::new(&op).unwrap();
336        assert!(m.is_match("anything"));
337        assert!(m.is_match(""));
338    }
339
340    // ---------------------------------------------------------------
341    // Pattern that matches entire line
342    // ---------------------------------------------------------------
343
344    #[test]
345    fn test_pattern_matches_entire_line_literal() {
346        let op = Op::Replace {
347            count: Default::default(),
348            multiline: false,
349            find: "hello world".to_string(),
350            replace: "goodbye".to_string(),
351            regex: false,
352            case_insensitive: false,
353        };
354        let m = Matcher::new(&op).unwrap();
355        let result = m.replace("hello world", "goodbye");
356        assert_eq!(result, Some("goodbye".to_string()));
357    }
358
359    #[test]
360    fn test_pattern_matches_entire_line_regex() {
361        let op = Op::Replace {
362            count: Default::default(),
363            multiline: false,
364            find: r"^.*$".to_string(),
365            replace: "replaced".to_string(),
366            regex: true,
367            case_insensitive: false,
368        };
369        let m = Matcher::new(&op).unwrap();
370        let result = m.replace("anything here", "replaced");
371        assert_eq!(result, Some("replaced".to_string()));
372    }
373
374    #[test]
375    fn test_regex_anchored_full_line() {
376        let op = Op::Replace {
377            count: Default::default(),
378            multiline: false,
379            find: r"^fn main\(\)$".to_string(),
380            replace: "fn start()".to_string(),
381            regex: true,
382            case_insensitive: false,
383        };
384        let m = Matcher::new(&op).unwrap();
385        assert!(m.is_match("fn main()"));
386        assert!(!m.is_match("  fn main()")); // leading whitespace
387        assert!(!m.is_match("fn main() {")); // trailing content
388    }
389
390    // ---------------------------------------------------------------
391    // Case-insensitive with unicode (Turkish I problem, etc.)
392    // ---------------------------------------------------------------
393
394    #[test]
395    fn test_case_insensitive_ascii() {
396        let op = Op::Replace {
397            count: Default::default(),
398            multiline: false,
399            find: "Hello".to_string(),
400            replace: "hi".to_string(),
401            regex: false,
402            case_insensitive: true,
403        };
404        let m = Matcher::new(&op).unwrap();
405        assert!(m.is_match("HELLO"));
406        assert!(m.is_match("hello"));
407        assert!(m.is_match("HeLLo"));
408        let result = m.replace("say HELLO there", "hi");
409        assert_eq!(result, Some("say hi there".to_string()));
410    }
411
412    #[test]
413    fn test_case_insensitive_german_eszett() {
414        // German sharp-s: lowercase to_lowercase() of "SS" is "ss",
415        // and to_lowercase() of "\u{00DF}" (sharp-s) is "\u{00DF}"
416        // This tests that the engine handles non-trivial unicode casing
417        let op = Op::Replace {
418            count: Default::default(),
419            multiline: false,
420            find: "stra\u{00DF}e".to_string(), // "strasse" with sharp-s
421            replace: "street".to_string(),
422            regex: false,
423            case_insensitive: true,
424        };
425        let m = Matcher::new(&op).unwrap();
426        assert!(m.is_match("STRA\u{00DF}E"));
427    }
428
429    #[test]
430    fn test_case_insensitive_turkish_i_lowercase() {
431        // Turkish dotted I: \u{0130} (capital I with dot above)
432        // This is a known edge case. We test that the matcher doesn't panic
433        // and behaves consistently with Unicode simple case folding.
434        let op = Op::Replace {
435            count: Default::default(),
436            multiline: false,
437            find: "i".to_string(),
438            replace: "x".to_string(),
439            regex: false,
440            case_insensitive: true,
441        };
442        let m = Matcher::new(&op).unwrap();
443        // Standard ASCII: "I" simple-folds to "i", so this matches
444        assert!(m.is_match("I"));
445        // \u{0130} (İ) has no simple case fold to "i" in Unicode — the full
446        // fold is "i\u{0307}" but the regex engine only uses simple folds.
447        // This correctly does NOT match, avoiding false positives from the
448        // old to_lowercase()-based byte-offset approach.
449        assert!(!m.is_match("\u{0130}"));
450    }
451
452    // ---------------------------------------------------------------
453    // Regex special characters in literal mode
454    // ---------------------------------------------------------------
455
456    #[test]
457    fn test_literal_mode_regex_metacharacters() {
458        // All these are regex metacharacters but should be treated literally
459        let patterns = vec![
460            (".", "dot"),
461            ("*", "star"),
462            ("+", "plus"),
463            ("?", "question"),
464            ("(", "paren"),
465            ("[", "bracket"),
466            ("{", "brace"),
467            ("^", "caret"),
468            ("$", "dollar"),
469            ("|", "pipe"),
470            ("\\", "backslash"),
471        ];
472        for (pat, name) in patterns {
473            let op = Op::Replace {
474                count: Default::default(),
475                multiline: false,
476                find: pat.to_string(),
477                replace: "X".to_string(),
478                regex: false,
479                case_insensitive: false,
480            };
481            let m = Matcher::new(&op).unwrap();
482            let text = format!("before {pat} after");
483            assert!(
484                m.is_match(&text),
485                "Literal mode should match '{name}' ({pat}) as a literal character"
486            );
487            let result = m.replace(&text, "X");
488            assert_eq!(
489                result,
490                Some("before X after".to_string()),
491                "Literal mode should replace '{name}' ({pat}) as a literal"
492            );
493        }
494    }
495
496    // ---------------------------------------------------------------
497    // Multiple matches on same line
498    // ---------------------------------------------------------------
499
500    #[test]
501    fn test_multiple_matches_same_line() {
502        let op = Op::Replace {
503            count: Default::default(),
504            multiline: false,
505            find: "ab".to_string(),
506            replace: "X".to_string(),
507            regex: false,
508            case_insensitive: false,
509        };
510        let m = Matcher::new(&op).unwrap();
511        let result = m.replace("ab cd ab ef ab", "X");
512        assert_eq!(result, Some("X cd X ef X".to_string()));
513    }
514
515    #[test]
516    fn test_replace_with_empty_string() {
517        let op = Op::Replace {
518            count: Default::default(),
519            multiline: false,
520            find: "remove".to_string(),
521            replace: "".to_string(),
522            regex: false,
523            case_insensitive: false,
524        };
525        let m = Matcher::new(&op).unwrap();
526        let result = m.replace("please remove this", "");
527        assert_eq!(result, Some("please  this".to_string()));
528    }
529
530    #[test]
531    fn test_no_match_returns_none() {
532        let op = Op::Replace {
533            count: Default::default(),
534            multiline: false,
535            find: "xyz".to_string(),
536            replace: "abc".to_string(),
537            regex: false,
538            case_insensitive: false,
539        };
540        let m = Matcher::new(&op).unwrap();
541        assert!(m.replace("nothing here", "abc").is_none());
542    }
543
544    // ---------------------------------------------------------------
545    // Pathological / adversarial pattern tests
546    //
547    // These lock in behavior for patterns that look like they ought to
548    // break something: regex metacharacters misused in literal mode,
549    // empty inputs, patterns with backreference-like replacement strings,
550    // and regex that would blow up a backtracking engine.
551    // ---------------------------------------------------------------
552
553    /// A literal pattern of `$1` (which would be a capture backreference in
554    /// a regex replacement context) must match the literal two-character
555    /// sequence in text and be replaceable without invoking capture-group
556    /// semantics. Regression guard against anyone accidentally swapping
557    /// `str::replace` for `Regex::replace_all` in the literal path.
558    #[test]
559    fn test_literal_dollar_one_pattern() {
560        let op = Op::Replace {
561            count: Default::default(),
562            multiline: false,
563            find: "$1".to_string(),
564            replace: "REPLACED".to_string(),
565            regex: false,
566            case_insensitive: false,
567        };
568        let m = Matcher::new(&op).unwrap();
569        assert!(m.is_match("value is $1 here"));
570        let result = m.replace("value is $1 here", "REPLACED");
571        assert_eq!(result, Some("value is REPLACED here".to_string()));
572    }
573
574    /// A regex pattern whose replacement string contains `$0`, `$1`, etc.
575    /// should be interpreted as a capture-backreference in regex mode.
576    /// This is intended behavior; locking it in so nobody accidentally
577    /// escapes it.
578    #[test]
579    fn test_regex_backreferences_work_in_replace() {
580        let op = Op::Replace {
581            count: Default::default(),
582            multiline: false,
583            find: r"hello (\w+)".to_string(),
584            replace: "greetings, $1!".to_string(),
585            regex: true,
586            case_insensitive: false,
587        };
588        let m = Matcher::new(&op).unwrap();
589        let result = m.replace("hello world", "greetings, $1!");
590        assert_eq!(result, Some("greetings, world!".to_string()));
591    }
592
593    /// **Adversarial**: the classic "catastrophic backtracking" pattern
594    /// `(a+)+$` on a long non-matching input is O(2^n) in a naive NFA.
595    /// The `regex` crate uses a DFA/bounded-time engine so this should
596    /// complete effectively instantly. Lock in that we've picked a safe
597    /// engine — switching to a backtracking regex crate would hang here.
598    #[test]
599    fn test_regex_no_catastrophic_backtracking() {
600        let op = Op::Replace {
601            count: Default::default(),
602            multiline: false,
603            find: r"(a+)+$".to_string(),
604            replace: "X".to_string(),
605            regex: true,
606            case_insensitive: false,
607        };
608        let m = Matcher::new(&op).unwrap();
609        // 30 'a's followed by 'b' — classic ReDoS trigger for backtracking engines.
610        let mut input = "a".repeat(30);
611        input.push('b');
612        let start = std::time::Instant::now();
613        let result = m.is_match(&input);
614        let elapsed = start.elapsed();
615        assert!(!result, "pattern should not match 'aaaa...b'");
616        // Generous bound — should actually complete in microseconds.
617        assert!(
618            elapsed < std::time::Duration::from_millis(500),
619            "regex took too long ({elapsed:?}) — possible ReDoS"
620        );
621    }
622
623    /// **Adversarial**: the replacement string is NUL-separated or contains
624    /// control characters. Must pass through unchanged (no shell-like
625    /// interpretation).
626    #[test]
627    fn test_replacement_with_control_chars() {
628        let op = Op::Replace {
629            count: Default::default(),
630            multiline: false,
631            find: "placeholder".to_string(),
632            replace: "\x07bell\x1bescape\x00nul".to_string(),
633            regex: false,
634            case_insensitive: false,
635        };
636        let m = Matcher::new(&op).unwrap();
637        let result = m.replace("use placeholder here", "\x07bell\x1bescape\x00nul");
638        assert_eq!(
639            result,
640            Some("use \x07bell\x1bescape\x00nul here".to_string())
641        );
642    }
643
644    /// **Adversarial**: a regex that is a valid-but-empty-matching pattern
645    /// (like `(?:)`) produces an empty match at every position. This is a
646    /// weird edge case that can blow up naive replace loops. Lock in that
647    /// we produce *some* deterministic output without panicking.
648    #[test]
649    fn test_empty_regex_match_does_not_panic() {
650        let op = Op::Replace {
651            count: Default::default(),
652            multiline: false,
653            find: r"(?:)".to_string(),
654            replace: "X".to_string(),
655            regex: true,
656            case_insensitive: false,
657        };
658        let m = Matcher::new(&op).unwrap();
659        // Must not panic — actual content of the result is implementation-defined.
660        let _ = m.replace("abc", "X");
661    }
662}
663
664// ---------------------------------------------------------------
665// Property-based tests (proptest)
666// ---------------------------------------------------------------
667#[cfg(test)]
668mod proptests {
669    use super::*;
670    use proptest::prelude::*;
671
672    proptest! {
673        /// Invariant: in literal mode, `Matcher::is_match(text)` ⟺
674        /// `text.contains(pattern)`. This guards against a future optimization
675        /// accidentally changing the semantics of literal matching.
676        #[test]
677        fn prop_literal_matches_iff_contains(
678            pattern in "[a-zA-Z0-9 ]{1,10}",
679            text in "[a-zA-Z0-9 ]{0,60}",
680        ) {
681            let op = Op::Replace {
682                count: Default::default(),
683                multiline: false,
684                find: pattern.clone(),
685                replace: "".into(),
686                regex: false,
687                case_insensitive: false,
688            };
689            let m = Matcher::new(&op).unwrap();
690            prop_assert_eq!(m.is_match(&text), text.contains(&pattern));
691        }
692
693        /// Invariant: `replace(text, pat)` returns `None` iff `is_match(text)`
694        /// is `false`. A mismatch here means we'd record a spurious "change"
695        /// with no actual edit.
696        #[test]
697        fn prop_replace_none_iff_not_match(
698            pattern in "[a-zA-Z0-9]{1,6}",
699            text in "[a-zA-Z0-9]{0,40}",
700            replacement in "[a-zA-Z0-9]{0,6}",
701        ) {
702            let op = Op::Replace {
703                count: Default::default(),
704                multiline: false,
705                find: pattern.clone(),
706                replace: replacement.clone(),
707                regex: false,
708                case_insensitive: false,
709            };
710            let m = Matcher::new(&op).unwrap();
711            let is_match = m.is_match(&text);
712            let replaced = m.replace(&text, &replacement);
713            prop_assert_eq!(replaced.is_some(), is_match);
714        }
715
716        /// Invariant: replacing pattern with itself is a no-op on content
717        /// (the returned String equals the input). This is a fixed-point
718        /// test that catches mis-implementations of the literal replace path.
719        #[test]
720        fn prop_replace_with_self_is_identity(
721            pattern in "[a-zA-Z0-9]{1,6}",
722            text in "[a-zA-Z0-9 ]{0,50}",
723        ) {
724            let op = Op::Replace {
725                count: Default::default(),
726                multiline: false,
727                find: pattern.clone(),
728                replace: pattern.clone(),
729                regex: false,
730                case_insensitive: false,
731            };
732            let m = Matcher::new(&op).unwrap();
733            if let Some(replaced) = m.replace(&text, &pattern) {
734                prop_assert_eq!(replaced, text);
735            }
736        }
737
738        /// Invariant: case-insensitive literal matching is symmetric —
739        /// `Matcher(p, ci=true).is_match(t)` equals
740        /// `Matcher(t.to_lowercase(), ci=false).is_match(p.to_lowercase())`
741        /// for ASCII patterns. (Restricts to ASCII because Unicode case folding
742        /// is famously asymmetric; our ASCII invariant is what callers rely on.)
743        #[test]
744        fn prop_case_insensitive_ascii_symmetric(
745            pattern in "[a-zA-Z]{1,6}",
746            text in "[a-zA-Z]{0,30}",
747        ) {
748            let op = Op::Replace {
749                count: Default::default(),
750                multiline: false,
751                find: pattern.clone(),
752                replace: String::new(),
753                regex: false,
754                case_insensitive: true,
755            };
756            let m = Matcher::new(&op).unwrap();
757            let matches = m.is_match(&text);
758            prop_assert_eq!(
759                matches,
760                text.to_ascii_lowercase().contains(&pattern.to_ascii_lowercase())
761            );
762        }
763
764        /// Invariant: splicing `find_replacements` spans into the original
765        /// text reproduces `replace`'s output exactly — the two APIs must
766        /// never drift apart.
767        #[test]
768        fn prop_find_replacements_splice_equals_replace(
769            text in ".{0,60}",
770            pattern in ".{1,5}",
771            replacement in ".{0,8}",
772        ) {
773            let op = Op::Replace {
774                count: Default::default(),
775                multiline: false,
776                find: pattern.clone(),
777                replace: replacement.clone(),
778                regex: false,
779                case_insensitive: false,
780            };
781            let m = Matcher::new(&op).unwrap();
782            let spans = m.find_replacements(&text, &replacement);
783            let mut spliced = String::new();
784            let mut last = 0;
785            for s in &spans {
786                spliced.push_str(&text[last..s.start]);
787                spliced.push_str(&s.replacement);
788                last = s.end;
789            }
790            spliced.push_str(&text[last..]);
791            let expected = m.replace(&text, &replacement).unwrap_or_else(|| text.clone());
792            prop_assert_eq!(spliced, expected);
793        }
794
795        /// SOUNDNESS: prescreen(text) == false must imply that no line of
796        /// the text matches — a false skip would silently drop edits.
797        /// Exercises literals and regexes including line anchors.
798        #[test]
799        fn prop_prescreen_never_false_skips(
800            text in "(?:[abc^$\\n]{0,8}\\n?){0,6}",
801            pattern in "(?:\\^?[abc]{1,3}\\$?)|(?:[abc]{1,4})",
802            is_regex in proptest::bool::ANY,
803        ) {
804            let op = Op::Replace {
805                count: Default::default(),
806                multiline: false,
807                find: pattern.clone(),
808                replace: String::new(),
809                regex: is_regex,
810                case_insensitive: false,
811            };
812            // Skip combos that don't compile as regex.
813            let Ok(m) = Matcher::new(&op) else { return Ok(()) };
814            if !m.prescreen(&text) {
815                for line in text.lines() {
816                    prop_assert!(
817                        !m.is_match(line),
818                        "prescreen said no, but line {:?} matches {:?}",
819                        line,
820                        pattern
821                    );
822                }
823            }
824        }
825    }
826
827    #[test]
828    fn test_find_replacements_literal_spans() {
829        let op = Op::Replace {
830            count: Default::default(),
831            multiline: false,
832            find: "ab".to_string(),
833            replace: "X".to_string(),
834            regex: false,
835            case_insensitive: false,
836        };
837        let m = Matcher::new(&op).unwrap();
838        let spans = m.find_replacements("ab--ab", "X");
839        assert_eq!(spans.len(), 2);
840        assert_eq!((spans[0].start, spans[0].end), (0, 2));
841        assert_eq!((spans[1].start, spans[1].end), (4, 6));
842        assert_eq!(spans[0].replacement, "X");
843    }
844
845    #[test]
846    fn test_find_replacements_regex_capture_expansion() {
847        let op = Op::Replace {
848            count: Default::default(),
849            multiline: false,
850            find: r"(\d+)-(\d+)".to_string(),
851            replace: "$2-$1".to_string(),
852            regex: true,
853            case_insensitive: false,
854        };
855        let m = Matcher::new(&op).unwrap();
856        let spans = m.find_replacements("1-2 and 3-4", "$2-$1");
857        assert_eq!(spans.len(), 2);
858        assert_eq!(spans[0].replacement, "2-1");
859        assert_eq!(spans[1].replacement, "4-3");
860    }
861
862    #[test]
863    fn test_find_replacements_across_newlines() {
864        let op = Op::Replace {
865            count: Default::default(),
866            multiline: true,
867            find: "a\nb".to_string(),
868            replace: "ab".to_string(),
869            regex: false,
870            case_insensitive: false,
871        };
872        let m = Matcher::new(&op).unwrap();
873        let spans = m.find_replacements("x\na\nb\ny", "ab");
874        assert_eq!(spans.len(), 1);
875        assert_eq!((spans[0].start, spans[0].end), (2, 5));
876    }
877
878    #[test]
879    fn test_replace_n_literal_limits_and_counts() {
880        let op = Op::Replace {
881            count: Default::default(),
882            multiline: false,
883            find: "a".to_string(),
884            replace: "B".to_string(),
885            regex: false,
886            case_insensitive: false,
887        };
888        let m = Matcher::new(&op).unwrap();
889        assert_eq!(m.replace_n("a a a", "B", 2), Some(("B B a".to_string(), 2)));
890        assert_eq!(m.replace_n("a a a", "B", 0), Some(("B B B".to_string(), 3)));
891        // Limit above occurrence count replaces them all and reports the truth.
892        assert_eq!(m.replace_n("a a", "B", 9), Some(("B B".to_string(), 2)));
893        assert_eq!(m.replace_n("zzz", "B", 1), None);
894    }
895
896    #[test]
897    fn test_replace_n_regex_limits_and_expansion() {
898        let op = Op::Replace {
899            count: Default::default(),
900            multiline: false,
901            find: r"(\d)".to_string(),
902            replace: "[$1]".to_string(),
903            regex: true,
904            case_insensitive: false,
905        };
906        let m = Matcher::new(&op).unwrap();
907        assert_eq!(
908            m.replace_n("1 2 3", "[$1]", 2),
909            Some(("[1] [2] 3".to_string(), 2))
910        );
911    }
912
913    #[test]
914    fn test_replace_n_unlimited_matches_replace() {
915        let op = Op::Replace {
916            count: Default::default(),
917            multiline: false,
918            find: "ab".to_string(),
919            replace: "X".to_string(),
920            regex: false,
921            case_insensitive: false,
922        };
923        let m = Matcher::new(&op).unwrap();
924        let (text, _) = m.replace_n("ab ab ab", "X", 0).unwrap();
925        assert_eq!(text, m.replace("ab ab ab", "X").unwrap());
926    }
927
928    // ── Prescreen ──
929
930    #[test]
931    fn test_prescreen_literal() {
932        let op = Op::Replace {
933            count: Default::default(),
934            multiline: false,
935            find: "needle".to_string(),
936            replace: "x".to_string(),
937            regex: false,
938            case_insensitive: false,
939        };
940        let m = Matcher::new(&op).unwrap();
941        assert!(m.prescreen("hay needle hay"));
942        assert!(!m.prescreen("just hay"));
943    }
944
945    #[test]
946    fn test_prescreen_anchored_regex_is_sound() {
947        // The critical case: ^foo matches line 2 in per-line processing but
948        // NOT against the whole buffer without (?m). The shadow must say
949        // "maybe" here, never "no".
950        let op = Op::Replace {
951            count: Default::default(),
952            multiline: false,
953            find: "^foo".to_string(),
954            replace: "x".to_string(),
955            regex: true,
956            case_insensitive: false,
957        };
958        let m = Matcher::new(&op).unwrap();
959        assert!(m.prescreen("bar\nfoo\n"), "(?m) shadow must see line 2");
960        assert!(!m.prescreen("bar\nbaz\n"));
961
962        let op = Op::Replace {
963            count: Default::default(),
964            multiline: false,
965            find: "foo$".to_string(),
966            replace: "x".to_string(),
967            regex: true,
968            case_insensitive: false,
969        };
970        let m = Matcher::new(&op).unwrap();
971        assert!(m.prescreen("foo\nbar\n"));
972    }
973
974    #[test]
975    fn test_prescreen_haystack_anchors_disable_shadow() {
976        // \A anchors to the haystack: each LINE in per-line matching, the
977        // whole buffer in a shadow — no sound shadow exists, so prescreen
978        // must always say "maybe".
979        for pattern in [r"\Afoo", r"foo\z", r"(?-m)^foo"] {
980            let op = Op::Replace {
981                count: Default::default(),
982                multiline: false,
983                find: pattern.to_string(),
984                replace: "x".to_string(),
985                regex: true,
986                case_insensitive: false,
987            };
988            let m = Matcher::new(&op).unwrap();
989            assert!(
990                m.prescreen("anything at all"),
991                "{pattern} must never prescreen-reject"
992            );
993        }
994    }
995
996    #[test]
997    fn test_prescreen_case_insensitive_literal() {
998        let op = Op::Replace {
999            count: Default::default(),
1000            multiline: false,
1001            find: "Needle".to_string(),
1002            replace: "x".to_string(),
1003            regex: false,
1004            case_insensitive: true,
1005        };
1006        let m = Matcher::new(&op).unwrap();
1007        assert!(m.prescreen("hay NEEDLE hay"));
1008        assert!(!m.prescreen("just hay"));
1009    }
1010
1011    #[test]
1012    fn test_find_replacements_no_match_is_empty() {
1013        let op = Op::Replace {
1014            count: Default::default(),
1015            multiline: false,
1016            find: "zzz".to_string(),
1017            replace: "x".to_string(),
1018            regex: false,
1019            case_insensitive: false,
1020        };
1021        let m = Matcher::new(&op).unwrap();
1022        assert!(m.find_replacements("abc", "x").is_empty());
1023    }
1024}