Skip to main content

hjkl_engine/
substitute.rs

1//! Public substitute command parser and applicator.
2//!
3//! Exposes [`parse_substitute`] and [`apply_substitute`] for the
4//! `:[range]s/pattern/replacement/[flags]` ex command.
5//!
6//! ## Vim compatibility notes (v1 limitations)
7//!
8//! - Delimiter is **always `/`**. Alternate delimiters (`s|x|y|`,
9//!   `s#x#y#`) are not supported. The parser returns an error when the
10//!   first character after the keyword is not `/`.
11//! - The `c` (confirm) flag is **parsed but silently ignored**. No
12//!   interactive replacement. See vim's `:help :s_c` for what a full
13//!   implementation looks like.
14//! - The `\v` very-magic mode is not supported. The regex crate uses
15//!   ERE syntax by default. Most ERE patterns work, but vim-specific
16//!   extensions (`\<`, `\>`, `\s`, `\+`) may not. Use POSIX ERE
17//!   equivalents or the `regex` crate's syntax.
18//! - Capture-group references use vim notation (`\1`…`\9`, `&`); the
19//!   parser translates them to `$1`…`$9`, `$0` for the `regex` crate.
20//!
21//! See vim's `:help :substitute` for the full spec.
22
23use regex::Regex;
24
25use crate::Editor;
26
27/// Error type returned by [`parse_substitute`] and [`apply_substitute`].
28pub type SubstError = String;
29
30/// Parsed `:s/pattern/replacement/flags` command.
31///
32/// Produced by [`parse_substitute`]. Pass to [`apply_substitute`].
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct SubstituteCmd {
35    /// The literal pattern string. `None` means "reuse `last_search`
36    /// from the editor" (the user typed `:s//replacement/`).
37    pub pattern: Option<String>,
38    /// The replacement string in vim notation (`&`, `\1`…`\9`).
39    /// Empty string deletes the match.
40    pub replacement: String,
41    /// Parsed flags.
42    pub flags: SubstFlags,
43}
44
45/// Flags for the substitute command.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
47pub struct SubstFlags {
48    /// `g` — replace all occurrences on each line (default: first only).
49    pub all: bool,
50    /// `i` — case-insensitive (overrides editor `ignorecase`).
51    pub ignore_case: bool,
52    /// `I` — case-sensitive (overrides editor `ignorecase`).
53    pub case_sensitive: bool,
54    /// `c` — confirm mode. **Parsed but ignored in v1.** Behaves as if
55    /// not set; all matches are replaced without prompting. This is a
56    /// known divergence from vim. See vim's `:help :s_c`.
57    pub confirm: bool,
58}
59
60/// Result of [`apply_substitute`].
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
62pub struct SubstituteOutcome {
63    /// Total number of individual replacements made across all lines.
64    pub replacements: usize,
65    /// Number of lines that had at least one replacement.
66    pub lines_changed: usize,
67}
68
69/// Parse the tail of a substitute command (everything after the leading
70/// `s` / `substitute` keyword).
71///
72/// # Examples
73///
74/// ```
75/// use hjkl_engine::substitute::parse_substitute;
76///
77/// let cmd = parse_substitute("/foo/bar/gi").unwrap();
78/// assert_eq!(cmd.pattern.as_deref(), Some("foo"));
79/// assert_eq!(cmd.replacement, "bar");
80/// assert!(cmd.flags.all);
81/// assert!(cmd.flags.ignore_case);
82///
83/// // Empty pattern — reuse last_search.
84/// let cmd = parse_substitute("//bar/").unwrap();
85/// assert!(cmd.pattern.is_none());
86/// assert_eq!(cmd.replacement, "bar");
87/// ```
88///
89/// # Errors
90///
91/// Returns an error when:
92/// - `s` is not followed by `/` (no delimiter or alternate delimiter).
93/// - The flag string contains an unknown character.
94/// - The separator `/` is absent (less than two fields).
95pub fn parse_substitute(s: &str) -> Result<SubstituteCmd, SubstError> {
96    // Require leading `/`. Alternate delimiters are out of scope for v1.
97    let rest = s
98        .strip_prefix('/')
99        .ok_or_else(|| format!("substitute: expected '/' delimiter, got {s:?}"))?;
100
101    // Split on unescaped `/`, collecting at most 3 segments:
102    // [pattern, replacement, flags?]
103    let parts = split_on_slash(rest);
104
105    if parts.len() < 2 {
106        return Err("substitute needs /pattern/replacement/".into());
107    }
108
109    let raw_pattern = &parts[0];
110    let raw_replacement = &parts[1];
111    let raw_flags = parts.get(2).map(String::as_str).unwrap_or("");
112
113    // Empty pattern → reuse last_search.
114    let pattern = if raw_pattern.is_empty() {
115        None
116    } else {
117        Some(raw_pattern.clone())
118    };
119
120    // Translate vim replacement notation to regex crate notation.
121    let replacement = translate_replacement(raw_replacement);
122
123    let mut flags = SubstFlags::default();
124    for ch in raw_flags.chars() {
125        match ch {
126            'g' => flags.all = true,
127            'i' => flags.ignore_case = true,
128            'I' => flags.case_sensitive = true,
129            'c' => flags.confirm = true, // parsed, silently ignored
130            other => return Err(format!("unknown flag '{other}' in substitute")),
131        }
132    }
133
134    Ok(SubstituteCmd {
135        pattern,
136        replacement,
137        flags,
138    })
139}
140
141/// Apply a parsed substitute command to `line_range` (0-based inclusive)
142/// in the editor's buffer.
143///
144/// # Pattern resolution
145///
146/// If `cmd.pattern` is `None` (user typed `:s//rep/`), the editor's
147/// `last_search()` is used. Returns an error with `"no previous regular
148/// expression"` when both are empty.
149///
150/// # Case-sensitivity precedence
151///
152/// `flags.case_sensitive` wins over `flags.ignore_case`, which wins over
153/// the editor's `settings().ignore_case`.
154///
155/// # Cursor
156///
157/// After a successful substitution the cursor is placed at column 0 of the
158/// **last line that changed**, matching vim semantics. When no replacements
159/// are made the cursor is left unchanged.
160///
161/// # Undo
162///
163/// One undo snapshot is pushed before the first edit. If no replacements
164/// occur the snapshot is popped so the undo stack stays clean.
165///
166/// # Errors
167///
168/// Returns an error when pattern resolution fails or the regex is invalid.
169pub fn apply_substitute<H: crate::types::Host>(
170    ed: &mut Editor<hjkl_buffer::Buffer, H>,
171    cmd: &SubstituteCmd,
172    line_range: std::ops::RangeInclusive<u32>,
173) -> Result<SubstituteOutcome, SubstError> {
174    // Resolve pattern.
175    let pattern_str: String = match &cmd.pattern {
176        Some(p) => p.clone(),
177        None => ed
178            .last_search()
179            .map(str::to_owned)
180            .ok_or_else(|| "no previous regular expression".to_string())?,
181    };
182
183    // Case-sensitivity.
184    let case_insensitive = if cmd.flags.case_sensitive {
185        false
186    } else if cmd.flags.ignore_case {
187        true
188    } else {
189        ed.settings().ignore_case
190    };
191
192    let effective_pattern = if case_insensitive {
193        format!("(?i){pattern_str}")
194    } else {
195        pattern_str.clone()
196    };
197
198    let regex = Regex::new(&effective_pattern).map_err(|e| format!("bad pattern: {e}"))?;
199
200    ed.push_undo();
201
202    let start = *line_range.start() as usize;
203    let end = *line_range.end() as usize;
204    let total = ed.buffer().lines().len();
205
206    let clamp_end = end.min(total.saturating_sub(1));
207    let mut new_lines: Vec<String> = ed.buffer().lines().to_vec();
208    let mut replacements = 0usize;
209    let mut lines_changed = 0usize;
210    let mut last_changed_row = 0usize;
211
212    if start <= clamp_end {
213        for (row, line) in new_lines[start..=clamp_end].iter_mut().enumerate() {
214            let (replaced, n) = do_replace(&regex, line, &cmd.replacement, cmd.flags.all);
215            if n > 0 {
216                *line = replaced;
217                replacements += n;
218                lines_changed += 1;
219                last_changed_row = start + row;
220            }
221        }
222    }
223
224    if replacements == 0 {
225        ed.pop_last_undo();
226        return Ok(SubstituteOutcome {
227            replacements: 0,
228            lines_changed: 0,
229        });
230    }
231
232    // Apply the new content in one shot.
233    ed.buffer_mut().replace_all(&new_lines.join("\n"));
234
235    // Cursor lands on the start of the last changed line.
236    ed.buffer_mut()
237        .set_cursor(hjkl_buffer::Position::new(last_changed_row, 0));
238
239    ed.mark_content_dirty();
240
241    // Update last_search so n/N can repeat the same pattern.
242    ed.set_last_search(Some(pattern_str), true);
243
244    Ok(SubstituteOutcome {
245        replacements,
246        lines_changed,
247    })
248}
249
250/// Split `s` on unescaped `/`. Each `\/` in `s` becomes a literal `/`
251/// in the output segment. Other `\x` sequences pass through unchanged
252/// (so regex escape syntax survives).
253///
254/// Returns at most 3 segments: `[pattern, replacement, flags]`. Anything
255/// after the third `/` is absorbed into the flags segment.
256fn split_on_slash(s: &str) -> Vec<String> {
257    let mut out: Vec<String> = Vec::new();
258    let mut cur = String::new();
259    let mut chars = s.chars().peekable();
260    while let Some(c) = chars.next() {
261        if c == '\\' {
262            match chars.peek() {
263                Some(&'/') => {
264                    // Escaped delimiter → literal slash in this segment.
265                    cur.push('/');
266                    chars.next();
267                }
268                Some(_) => {
269                    // Any other escape: preserve both chars so regex
270                    // syntax (\d, \s, \1, \n …) survives.
271                    let next = chars.next().unwrap();
272                    cur.push('\\');
273                    cur.push(next);
274                }
275                None => cur.push('\\'),
276            }
277        } else if c == '/' {
278            if out.len() < 2 {
279                out.push(std::mem::take(&mut cur));
280            } else {
281                // Third delimiter found: treat rest as flags.
282                // Everything up to this point was the replacement;
283                // collect the flags into `cur` and break.
284                cur.push(c);
285                // Keep going to collect remaining chars as flags.
286                // (Actually we already consumed the `/`, so just let
287                // the outer loop continue accumulating into cur.)
288            }
289        } else {
290            cur.push(c);
291        }
292    }
293    out.push(cur);
294    out
295}
296
297/// Translate vim-style replacement tokens to regex-crate syntax.
298///
299/// - `&` → `$0` (whole match)
300/// - `\&` → literal `&`
301/// - `\1`…`\9` → `$1`…`$9` (capture groups)
302/// - `\\` → `\` (literal backslash)
303/// - Any other `\x` → `x` (drop the backslash)
304fn translate_replacement(s: &str) -> String {
305    let mut out = String::with_capacity(s.len() + 4);
306    let mut chars = s.chars().peekable();
307    while let Some(c) = chars.next() {
308        if c == '&' {
309            out.push_str("$0");
310        } else if c == '\\' {
311            match chars.next() {
312                Some('&') => out.push('&'),   // \& → literal &
313                Some('\\') => out.push('\\'), // \\ → literal \
314                Some(d @ '1'..='9') => {
315                    out.push('$');
316                    out.push(d);
317                }
318                Some(other) => out.push(other), // drop backslash
319                None => {}                      // trailing \ ignored
320            }
321        } else {
322            out.push(c);
323        }
324    }
325    out
326}
327
328/// Replace first or all occurrences of `regex` in `text` using the
329/// already-translated `replacement` string. Returns `(new_text, count)`.
330fn do_replace(regex: &Regex, text: &str, replacement: &str, all: bool) -> (String, usize) {
331    let matches = regex.find_iter(text).count();
332    if matches == 0 {
333        return (text.to_string(), 0);
334    }
335    let replaced = if all {
336        regex.replace_all(text, replacement).into_owned()
337    } else {
338        regex.replace(text, replacement).into_owned()
339    };
340    let count = if all { matches } else { 1 };
341    (replaced, count)
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347    use crate::types::{DefaultHost, Options};
348    use hjkl_buffer::Buffer;
349
350    fn editor_with(content: &str) -> Editor<Buffer, DefaultHost> {
351        let mut e = Editor::new(Buffer::new(), DefaultHost::new(), Options::default());
352        e.set_content(content);
353        e
354    }
355
356    // ── Parser tests ─────────────────────────────────────────────────
357
358    #[test]
359    fn parse_basic() {
360        let cmd = parse_substitute("/foo/bar/").unwrap();
361        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
362        assert_eq!(cmd.replacement, "bar");
363        assert!(!cmd.flags.all);
364    }
365
366    #[test]
367    fn parse_trailing_slash_optional() {
368        let cmd = parse_substitute("/foo/bar").unwrap();
369        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
370        assert_eq!(cmd.replacement, "bar");
371    }
372
373    #[test]
374    fn parse_global_flag() {
375        let cmd = parse_substitute("/x/y/g").unwrap();
376        assert!(cmd.flags.all);
377    }
378
379    #[test]
380    fn parse_ignore_case_flag() {
381        let cmd = parse_substitute("/x/y/i").unwrap();
382        assert!(cmd.flags.ignore_case);
383    }
384
385    #[test]
386    fn parse_case_sensitive_flag() {
387        let cmd = parse_substitute("/x/y/I").unwrap();
388        assert!(cmd.flags.case_sensitive);
389    }
390
391    #[test]
392    fn parse_confirm_flag_accepted() {
393        let cmd = parse_substitute("/x/y/c").unwrap();
394        assert!(cmd.flags.confirm);
395    }
396
397    #[test]
398    fn parse_multi_flags() {
399        let cmd = parse_substitute("/x/y/gi").unwrap();
400        assert!(cmd.flags.all);
401        assert!(cmd.flags.ignore_case);
402    }
403
404    #[test]
405    fn parse_unknown_flag_errors() {
406        let err = parse_substitute("/x/y/z").unwrap_err();
407        assert!(err.to_string().contains("unknown flag 'z'"), "{err}");
408    }
409
410    #[test]
411    fn parse_empty_pattern_is_none() {
412        let cmd = parse_substitute("//bar/").unwrap();
413        assert!(cmd.pattern.is_none());
414        assert_eq!(cmd.replacement, "bar");
415    }
416
417    #[test]
418    fn parse_empty_replacement_ok() {
419        let cmd = parse_substitute("/foo//").unwrap();
420        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
421        assert_eq!(cmd.replacement, "");
422    }
423
424    #[test]
425    fn parse_escaped_slash_in_pattern() {
426        let cmd = parse_substitute("/a\\/b/c/").unwrap();
427        assert_eq!(cmd.pattern.as_deref(), Some("a/b"));
428    }
429
430    #[test]
431    fn parse_escaped_slash_in_replacement() {
432        let cmd = parse_substitute("/a/b\\/c/").unwrap();
433        // Replacement is already translated; literal / survives.
434        assert_eq!(cmd.replacement, "b/c");
435    }
436
437    #[test]
438    fn parse_ampersand_becomes_dollar_zero() {
439        let cmd = parse_substitute("/foo/[&]/").unwrap();
440        assert_eq!(cmd.replacement, "[$0]");
441    }
442
443    #[test]
444    fn parse_escaped_ampersand_is_literal() {
445        let cmd = parse_substitute("/foo/\\&/").unwrap();
446        assert_eq!(cmd.replacement, "&");
447    }
448
449    #[test]
450    fn parse_group_ref_translates() {
451        let cmd = parse_substitute("/(foo)/\\1/").unwrap();
452        assert_eq!(cmd.replacement, "$1");
453    }
454
455    #[test]
456    fn parse_group_ref_nine() {
457        let cmd = parse_substitute("/(x)/\\9/").unwrap();
458        assert_eq!(cmd.replacement, "$9");
459    }
460
461    #[test]
462    fn parse_wrong_delimiter_errors() {
463        let err = parse_substitute("|foo|bar|").unwrap_err();
464        assert!(err.to_string().contains("'/'"), "{err}");
465    }
466
467    #[test]
468    fn parse_too_few_fields_errors() {
469        let err = parse_substitute("/foo").unwrap_err();
470        assert!(
471            err.to_string().contains("needs /pattern/replacement"),
472            "{err}"
473        );
474    }
475
476    // ── Apply tests ──────────────────────────────────────────────────
477
478    #[test]
479    fn apply_single_line_first_only() {
480        let mut e = editor_with("foo foo");
481        let cmd = parse_substitute("/foo/bar/").unwrap();
482        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
483        assert_eq!(out.replacements, 1);
484        assert_eq!(out.lines_changed, 1);
485        assert_eq!(e.buffer().lines()[0], "bar foo");
486    }
487
488    #[test]
489    fn apply_single_line_global() {
490        let mut e = editor_with("foo foo foo");
491        let cmd = parse_substitute("/foo/bar/g").unwrap();
492        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
493        assert_eq!(out.replacements, 3);
494        assert_eq!(out.lines_changed, 1);
495        assert_eq!(e.buffer().lines()[0], "bar bar bar");
496    }
497
498    #[test]
499    fn apply_multi_line_range() {
500        let mut e = editor_with("foo\nfoo foo\nbar");
501        let cmd = parse_substitute("/foo/xyz/g").unwrap();
502        let out = apply_substitute(&mut e, &cmd, 0..=2).unwrap();
503        assert_eq!(out.replacements, 3);
504        assert_eq!(out.lines_changed, 2);
505        assert_eq!(e.buffer().lines()[0], "xyz");
506        assert_eq!(e.buffer().lines()[1], "xyz xyz");
507        assert_eq!(e.buffer().lines()[2], "bar");
508    }
509
510    #[test]
511    fn apply_no_match_returns_zero() {
512        let mut e = editor_with("hello");
513        let original = e.buffer().lines()[0].to_string();
514        let cmd = parse_substitute("/xyz/abc/").unwrap();
515        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
516        assert_eq!(out.replacements, 0);
517        assert_eq!(out.lines_changed, 0);
518        assert_eq!(e.buffer().lines()[0], original);
519    }
520
521    #[test]
522    fn apply_case_insensitive_flag() {
523        let mut e = editor_with("Foo FOO foo");
524        let cmd = parse_substitute("/foo/bar/gi").unwrap();
525        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
526        assert_eq!(out.replacements, 3);
527        assert_eq!(e.buffer().lines()[0], "bar bar bar");
528    }
529
530    #[test]
531    fn apply_case_sensitive_flag_overrides_editor_setting() {
532        let mut e = editor_with("Foo foo");
533        // Enable ignorecase on the editor.
534        e.settings_mut().ignore_case = true;
535        // `I` (capital) forces case-sensitive.
536        let cmd = parse_substitute("/foo/bar/I").unwrap();
537        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
538        // Only the lowercase "foo" matches.
539        assert_eq!(out.replacements, 1);
540        assert_eq!(e.buffer().lines()[0], "Foo bar");
541    }
542
543    #[test]
544    fn apply_empty_pattern_reuses_last_search() {
545        let mut e = editor_with("hello world");
546        e.set_last_search(Some("world".to_string()), true);
547        let cmd = parse_substitute("//planet/").unwrap();
548        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
549        assert_eq!(out.replacements, 1);
550        assert_eq!(e.buffer().lines()[0], "hello planet");
551    }
552
553    #[test]
554    fn apply_empty_pattern_no_last_search_errors() {
555        let mut e = editor_with("hello");
556        let cmd = parse_substitute("//bar/").unwrap();
557        let err = apply_substitute(&mut e, &cmd, 0..=0).unwrap_err();
558        assert!(
559            err.to_string().contains("no previous regular expression"),
560            "{err}"
561        );
562    }
563
564    #[test]
565    fn apply_updates_last_search() {
566        let mut e = editor_with("foo");
567        let cmd = parse_substitute("/foo/bar/").unwrap();
568        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
569        assert_eq!(e.last_search(), Some("foo"));
570    }
571
572    #[test]
573    fn apply_empty_replacement_deletes_match() {
574        let mut e = editor_with("hello world");
575        let cmd = parse_substitute("/world//").unwrap();
576        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
577        assert_eq!(out.replacements, 1);
578        assert_eq!(e.buffer().lines()[0], "hello ");
579    }
580
581    #[test]
582    fn apply_undo_reverts_in_one_step() {
583        let mut e = editor_with("foo");
584        let cmd = parse_substitute("/foo/bar/").unwrap();
585        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
586        assert_eq!(e.buffer().lines()[0], "bar");
587        e.undo();
588        assert_eq!(e.buffer().lines()[0], "foo");
589    }
590
591    #[test]
592    fn apply_ampersand_in_replacement() {
593        let mut e = editor_with("foo");
594        let cmd = parse_substitute("/foo/[&]/").unwrap();
595        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
596        assert_eq!(e.buffer().lines()[0], "[foo]");
597    }
598
599    #[test]
600    fn apply_capture_group_reference() {
601        let mut e = editor_with("hello world");
602        let cmd = parse_substitute("/(\\w+)/<<\\1>>/g").unwrap();
603        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
604        assert_eq!(e.buffer().lines()[0], "<<hello>> <<world>>");
605    }
606}