Skip to main content

hjkl_engine/
substitute.rs

1//! Public substitute command parser and applicator.
2//!
3//! Exposes [`parse_substitute`] and [`apply_substitute`] for the
4//! `:[range]s/pattern/replacement/[flags]` ex command.
5//!
6//! ## Vim compatibility notes (v1 limitations)
7//!
8//! - Delimiter is **always `/`**. Alternate delimiters (`s|x|y|`,
9//!   `s#x#y#`) are not supported. The parser returns an error when the
10//!   first character after the keyword is not `/`.
11//! - The `c` (confirm) flag is **parsed but silently ignored**. No
12//!   interactive replacement. See vim's `:help :s_c` for what a full
13//!   implementation looks like.
14//! - The `\v` very-magic mode is not supported. The regex crate uses
15//!   ERE syntax by default. Most ERE patterns work, but vim-specific
16//!   extensions (`\<`, `\>`, `\s`, `\+`) may not. Use POSIX ERE
17//!   equivalents or the `regex` crate's syntax.
18//! - Capture-group references use vim notation (`\1`…`\9`, `&`); the
19//!   parser translates them to `$1`…`$9`, `$0` for the `regex` crate.
20//!
21//! See vim's `:help :substitute` for the full spec.
22
23use regex::Regex;
24
25use crate::Editor;
26
27/// Error type returned by [`parse_substitute`] and [`apply_substitute`].
28pub type SubstError = String;
29
30/// Parsed `:s/pattern/replacement/flags` command.
31///
32/// Produced by [`parse_substitute`]. Pass to [`apply_substitute`].
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct SubstituteCmd {
35    /// The literal pattern string. `None` means "reuse `last_search`
36    /// from the editor" (the user typed `:s//replacement/`).
37    pub pattern: Option<String>,
38    /// The replacement string in vim notation (`&`, `\1`…`\9`).
39    /// Empty string deletes the match.
40    pub replacement: String,
41    /// Parsed flags.
42    pub flags: SubstFlags,
43}
44
45/// Flags for the substitute command.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
47pub struct SubstFlags {
48    /// `g` — replace all occurrences on each line (default: first only).
49    pub all: bool,
50    /// `i` — case-insensitive (overrides editor `ignorecase`).
51    pub ignore_case: bool,
52    /// `I` — case-sensitive (overrides editor `ignorecase`).
53    pub case_sensitive: bool,
54    /// `c` — confirm mode. **Parsed but ignored in v1.** Behaves as if
55    /// not set; all matches are replaced without prompting. This is a
56    /// known divergence from vim. See vim's `:help :s_c`.
57    pub confirm: bool,
58}
59
60/// Result of [`apply_substitute`].
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
62pub struct SubstituteOutcome {
63    /// Total number of individual replacements made across all lines.
64    pub replacements: usize,
65    /// Number of lines that had at least one replacement.
66    pub lines_changed: usize,
67}
68
69/// Parse the tail of a substitute command (everything after the leading
70/// `s` / `substitute` keyword).
71///
72/// # Examples
73///
74/// ```
75/// use hjkl_engine::substitute::parse_substitute;
76///
77/// let cmd = parse_substitute("/foo/bar/gi").unwrap();
78/// assert_eq!(cmd.pattern.as_deref(), Some("foo"));
79/// assert_eq!(cmd.replacement, "bar");
80/// assert!(cmd.flags.all);
81/// assert!(cmd.flags.ignore_case);
82///
83/// // Empty pattern — reuse last_search.
84/// let cmd = parse_substitute("//bar/").unwrap();
85/// assert!(cmd.pattern.is_none());
86/// assert_eq!(cmd.replacement, "bar");
87/// ```
88///
89/// # Errors
90///
91/// Returns an error when:
92/// - `s` is not followed by `/` (no delimiter or alternate delimiter).
93/// - The flag string contains an unknown character.
94/// - The separator `/` is absent (less than two fields).
95pub fn parse_substitute(s: &str) -> Result<SubstituteCmd, SubstError> {
96    // Require leading `/`. Alternate delimiters are out of scope for v1.
97    let rest = s
98        .strip_prefix('/')
99        .ok_or_else(|| format!("substitute: expected '/' delimiter, got {s:?}"))?;
100
101    // Split on unescaped `/`, collecting at most 3 segments:
102    // [pattern, replacement, flags?]
103    let parts = split_on_slash(rest);
104
105    if parts.len() < 2 {
106        return Err("substitute needs /pattern/replacement/".into());
107    }
108
109    let raw_pattern = &parts[0];
110    let raw_replacement = &parts[1];
111    let raw_flags = parts.get(2).map(String::as_str).unwrap_or("");
112
113    // Empty pattern → reuse last_search.
114    let pattern = if raw_pattern.is_empty() {
115        None
116    } else {
117        Some(raw_pattern.clone())
118    };
119
120    // Translate vim replacement notation to regex crate notation.
121    let replacement = translate_replacement(raw_replacement);
122
123    let mut flags = SubstFlags::default();
124    for ch in raw_flags.chars() {
125        match ch {
126            'g' => flags.all = true,
127            'i' => flags.ignore_case = true,
128            'I' => flags.case_sensitive = true,
129            'c' => flags.confirm = true, // parsed, silently ignored
130            other => return Err(format!("unknown flag '{other}' in substitute")),
131        }
132    }
133
134    Ok(SubstituteCmd {
135        pattern,
136        replacement,
137        flags,
138    })
139}
140
141/// Apply a parsed substitute command to `line_range` (0-based inclusive)
142/// in the editor's buffer.
143///
144/// # Pattern resolution
145///
146/// If `cmd.pattern` is `None` (user typed `:s//rep/`), the editor's
147/// `last_search()` is used. Returns an error with `"no previous regular
148/// expression"` when both are empty.
149///
150/// # Case-sensitivity precedence
151///
152/// `flags.case_sensitive` wins over `flags.ignore_case`, which wins over
153/// the editor's `settings().ignore_case`.
154///
155/// # Cursor
156///
157/// After a successful substitution the cursor is placed at column 0 of the
158/// **last line that changed**, matching vim semantics. When no replacements
159/// are made the cursor is left unchanged.
160///
161/// # Undo
162///
163/// One undo snapshot is pushed before the first edit. If no replacements
164/// occur the snapshot is popped so the undo stack stays clean.
165///
166/// # Errors
167///
168/// Returns an error when pattern resolution fails or the regex is invalid.
169pub fn apply_substitute<H: crate::types::Host>(
170    ed: &mut Editor<hjkl_buffer::Buffer, H>,
171    cmd: &SubstituteCmd,
172    line_range: std::ops::RangeInclusive<u32>,
173) -> Result<SubstituteOutcome, SubstError> {
174    // Resolve pattern.
175    let pattern_str: String = match &cmd.pattern {
176        Some(p) => p.clone(),
177        None => ed
178            .last_search()
179            .map(str::to_owned)
180            .ok_or_else(|| "no previous regular expression".to_string())?,
181    };
182
183    // Case-sensitivity.
184    let case_insensitive = if cmd.flags.case_sensitive {
185        false
186    } else if cmd.flags.ignore_case {
187        true
188    } else {
189        ed.settings().ignore_case
190    };
191
192    let translated = crate::search::vim_to_rust_regex(&pattern_str);
193    let effective_pattern = if case_insensitive {
194        format!("(?i){translated}")
195    } else {
196        translated
197    };
198
199    let regex = Regex::new(&effective_pattern).map_err(|e| format!("bad pattern: {e}"))?;
200
201    ed.push_undo();
202
203    let start = *line_range.start() as usize;
204    let end = *line_range.end() as usize;
205    let total = ed.buffer().lines().len();
206
207    let clamp_end = end.min(total.saturating_sub(1));
208    let mut new_lines: Vec<String> = ed.buffer().lines().to_vec();
209    let mut replacements = 0usize;
210    let mut lines_changed = 0usize;
211    let mut last_changed_row = 0usize;
212
213    if start <= clamp_end {
214        for (row, line) in new_lines[start..=clamp_end].iter_mut().enumerate() {
215            let (replaced, n) = do_replace(&regex, line, &cmd.replacement, cmd.flags.all);
216            if n > 0 {
217                *line = replaced;
218                replacements += n;
219                lines_changed += 1;
220                last_changed_row = start + row;
221            }
222        }
223    }
224
225    if replacements == 0 {
226        ed.pop_last_undo();
227        return Ok(SubstituteOutcome {
228            replacements: 0,
229            lines_changed: 0,
230        });
231    }
232
233    // Apply the new content in one shot.
234    ed.buffer_mut().replace_all(&new_lines.join("\n"));
235
236    // Cursor lands on the start of the last changed line.
237    ed.buffer_mut()
238        .set_cursor(hjkl_buffer::Position::new(last_changed_row, 0));
239
240    ed.mark_content_dirty();
241
242    // Update last_search so n/N can repeat the same pattern.
243    ed.set_last_search(Some(pattern_str), true);
244
245    Ok(SubstituteOutcome {
246        replacements,
247        lines_changed,
248    })
249}
250
251/// Split `s` on unescaped `/`. Each `\/` in `s` becomes a literal `/`
252/// in the output segment. Other `\x` sequences pass through unchanged
253/// (so regex escape syntax survives).
254///
255/// Returns at most 3 segments: `[pattern, replacement, flags]`. Anything
256/// after the third `/` is absorbed into the flags segment.
257fn split_on_slash(s: &str) -> Vec<String> {
258    let mut out: Vec<String> = Vec::new();
259    let mut cur = String::new();
260    let mut chars = s.chars().peekable();
261    while let Some(c) = chars.next() {
262        if c == '\\' {
263            match chars.peek() {
264                Some(&'/') => {
265                    // Escaped delimiter → literal slash in this segment.
266                    cur.push('/');
267                    chars.next();
268                }
269                Some(_) => {
270                    // Any other escape: preserve both chars so regex
271                    // syntax (\d, \s, \1, \n …) survives.
272                    let next = chars.next().unwrap();
273                    cur.push('\\');
274                    cur.push(next);
275                }
276                None => cur.push('\\'),
277            }
278        } else if c == '/' {
279            if out.len() < 2 {
280                out.push(std::mem::take(&mut cur));
281            } else {
282                // Third delimiter found: treat rest as flags.
283                // Everything up to this point was the replacement;
284                // collect the flags into `cur` and break.
285                cur.push(c);
286                // Keep going to collect remaining chars as flags.
287                // (Actually we already consumed the `/`, so just let
288                // the outer loop continue accumulating into cur.)
289            }
290        } else {
291            cur.push(c);
292        }
293    }
294    out.push(cur);
295    out
296}
297
298/// Translate vim-style replacement tokens to regex-crate syntax.
299///
300/// - `&` → `$0` (whole match)
301/// - `\&` → literal `&`
302/// - `\1`…`\9` → `$1`…`$9` (capture groups)
303/// - `\\` → `\` (literal backslash)
304/// - Any other `\x` → `x` (drop the backslash)
305fn translate_replacement(s: &str) -> String {
306    let mut out = String::with_capacity(s.len() + 4);
307    let mut chars = s.chars().peekable();
308    while let Some(c) = chars.next() {
309        if c == '&' {
310            out.push_str("$0");
311        } else if c == '\\' {
312            match chars.next() {
313                Some('&') => out.push('&'),   // \& → literal &
314                Some('\\') => out.push('\\'), // \\ → literal \
315                Some(d @ '1'..='9') => {
316                    out.push('$');
317                    out.push(d);
318                }
319                Some(other) => out.push(other), // drop backslash
320                None => {}                      // trailing \ ignored
321            }
322        } else {
323            out.push(c);
324        }
325    }
326    out
327}
328
329/// Replace first or all occurrences of `regex` in `text` using the
330/// already-translated `replacement` string. Returns `(new_text, count)`.
331fn do_replace(regex: &Regex, text: &str, replacement: &str, all: bool) -> (String, usize) {
332    let matches = regex.find_iter(text).count();
333    if matches == 0 {
334        return (text.to_string(), 0);
335    }
336    let replaced = if all {
337        regex.replace_all(text, replacement).into_owned()
338    } else {
339        regex.replace(text, replacement).into_owned()
340    };
341    let count = if all { matches } else { 1 };
342    (replaced, count)
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348    use crate::types::{DefaultHost, Options};
349    use hjkl_buffer::Buffer;
350
351    fn editor_with(content: &str) -> Editor<Buffer, DefaultHost> {
352        let mut e = Editor::new(Buffer::new(), DefaultHost::new(), Options::default());
353        e.set_content(content);
354        e
355    }
356
357    // ── Parser tests ─────────────────────────────────────────────────
358
359    #[test]
360    fn parse_basic() {
361        let cmd = parse_substitute("/foo/bar/").unwrap();
362        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
363        assert_eq!(cmd.replacement, "bar");
364        assert!(!cmd.flags.all);
365    }
366
367    #[test]
368    fn parse_trailing_slash_optional() {
369        let cmd = parse_substitute("/foo/bar").unwrap();
370        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
371        assert_eq!(cmd.replacement, "bar");
372    }
373
374    #[test]
375    fn parse_global_flag() {
376        let cmd = parse_substitute("/x/y/g").unwrap();
377        assert!(cmd.flags.all);
378    }
379
380    #[test]
381    fn parse_ignore_case_flag() {
382        let cmd = parse_substitute("/x/y/i").unwrap();
383        assert!(cmd.flags.ignore_case);
384    }
385
386    #[test]
387    fn parse_case_sensitive_flag() {
388        let cmd = parse_substitute("/x/y/I").unwrap();
389        assert!(cmd.flags.case_sensitive);
390    }
391
392    #[test]
393    fn parse_confirm_flag_accepted() {
394        let cmd = parse_substitute("/x/y/c").unwrap();
395        assert!(cmd.flags.confirm);
396    }
397
398    #[test]
399    fn parse_multi_flags() {
400        let cmd = parse_substitute("/x/y/gi").unwrap();
401        assert!(cmd.flags.all);
402        assert!(cmd.flags.ignore_case);
403    }
404
405    #[test]
406    fn parse_unknown_flag_errors() {
407        let err = parse_substitute("/x/y/z").unwrap_err();
408        assert!(err.to_string().contains("unknown flag 'z'"), "{err}");
409    }
410
411    #[test]
412    fn parse_empty_pattern_is_none() {
413        let cmd = parse_substitute("//bar/").unwrap();
414        assert!(cmd.pattern.is_none());
415        assert_eq!(cmd.replacement, "bar");
416    }
417
418    #[test]
419    fn parse_empty_replacement_ok() {
420        let cmd = parse_substitute("/foo//").unwrap();
421        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
422        assert_eq!(cmd.replacement, "");
423    }
424
425    #[test]
426    fn parse_escaped_slash_in_pattern() {
427        let cmd = parse_substitute("/a\\/b/c/").unwrap();
428        assert_eq!(cmd.pattern.as_deref(), Some("a/b"));
429    }
430
431    #[test]
432    fn parse_escaped_slash_in_replacement() {
433        let cmd = parse_substitute("/a/b\\/c/").unwrap();
434        // Replacement is already translated; literal / survives.
435        assert_eq!(cmd.replacement, "b/c");
436    }
437
438    #[test]
439    fn parse_ampersand_becomes_dollar_zero() {
440        let cmd = parse_substitute("/foo/[&]/").unwrap();
441        assert_eq!(cmd.replacement, "[$0]");
442    }
443
444    #[test]
445    fn parse_escaped_ampersand_is_literal() {
446        let cmd = parse_substitute("/foo/\\&/").unwrap();
447        assert_eq!(cmd.replacement, "&");
448    }
449
450    #[test]
451    fn parse_group_ref_translates() {
452        let cmd = parse_substitute("/(foo)/\\1/").unwrap();
453        assert_eq!(cmd.replacement, "$1");
454    }
455
456    #[test]
457    fn parse_group_ref_nine() {
458        let cmd = parse_substitute("/(x)/\\9/").unwrap();
459        assert_eq!(cmd.replacement, "$9");
460    }
461
462    #[test]
463    fn parse_wrong_delimiter_errors() {
464        let err = parse_substitute("|foo|bar|").unwrap_err();
465        assert!(err.to_string().contains("'/'"), "{err}");
466    }
467
468    #[test]
469    fn parse_too_few_fields_errors() {
470        let err = parse_substitute("/foo").unwrap_err();
471        assert!(
472            err.to_string().contains("needs /pattern/replacement"),
473            "{err}"
474        );
475    }
476
477    // ── Apply tests ──────────────────────────────────────────────────
478
479    #[test]
480    fn apply_single_line_first_only() {
481        let mut e = editor_with("foo foo");
482        let cmd = parse_substitute("/foo/bar/").unwrap();
483        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
484        assert_eq!(out.replacements, 1);
485        assert_eq!(out.lines_changed, 1);
486        assert_eq!(e.buffer().lines()[0], "bar foo");
487    }
488
489    #[test]
490    fn apply_single_line_global() {
491        let mut e = editor_with("foo foo foo");
492        let cmd = parse_substitute("/foo/bar/g").unwrap();
493        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
494        assert_eq!(out.replacements, 3);
495        assert_eq!(out.lines_changed, 1);
496        assert_eq!(e.buffer().lines()[0], "bar bar bar");
497    }
498
499    #[test]
500    fn apply_multi_line_range() {
501        let mut e = editor_with("foo\nfoo foo\nbar");
502        let cmd = parse_substitute("/foo/xyz/g").unwrap();
503        let out = apply_substitute(&mut e, &cmd, 0..=2).unwrap();
504        assert_eq!(out.replacements, 3);
505        assert_eq!(out.lines_changed, 2);
506        assert_eq!(e.buffer().lines()[0], "xyz");
507        assert_eq!(e.buffer().lines()[1], "xyz xyz");
508        assert_eq!(e.buffer().lines()[2], "bar");
509    }
510
511    #[test]
512    fn apply_no_match_returns_zero() {
513        let mut e = editor_with("hello");
514        let original = e.buffer().lines()[0].to_string();
515        let cmd = parse_substitute("/xyz/abc/").unwrap();
516        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
517        assert_eq!(out.replacements, 0);
518        assert_eq!(out.lines_changed, 0);
519        assert_eq!(e.buffer().lines()[0], original);
520    }
521
522    #[test]
523    fn apply_case_insensitive_flag() {
524        let mut e = editor_with("Foo FOO foo");
525        let cmd = parse_substitute("/foo/bar/gi").unwrap();
526        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
527        assert_eq!(out.replacements, 3);
528        assert_eq!(e.buffer().lines()[0], "bar bar bar");
529    }
530
531    #[test]
532    fn apply_case_sensitive_flag_overrides_editor_setting() {
533        let mut e = editor_with("Foo foo");
534        // Enable ignorecase on the editor.
535        e.settings_mut().ignore_case = true;
536        // `I` (capital) forces case-sensitive.
537        let cmd = parse_substitute("/foo/bar/I").unwrap();
538        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
539        // Only the lowercase "foo" matches.
540        assert_eq!(out.replacements, 1);
541        assert_eq!(e.buffer().lines()[0], "Foo bar");
542    }
543
544    #[test]
545    fn apply_empty_pattern_reuses_last_search() {
546        let mut e = editor_with("hello world");
547        e.set_last_search(Some("world".to_string()), true);
548        let cmd = parse_substitute("//planet/").unwrap();
549        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
550        assert_eq!(out.replacements, 1);
551        assert_eq!(e.buffer().lines()[0], "hello planet");
552    }
553
554    #[test]
555    fn apply_empty_pattern_no_last_search_errors() {
556        let mut e = editor_with("hello");
557        let cmd = parse_substitute("//bar/").unwrap();
558        let err = apply_substitute(&mut e, &cmd, 0..=0).unwrap_err();
559        assert!(
560            err.to_string().contains("no previous regular expression"),
561            "{err}"
562        );
563    }
564
565    #[test]
566    fn apply_updates_last_search() {
567        let mut e = editor_with("foo");
568        let cmd = parse_substitute("/foo/bar/").unwrap();
569        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
570        assert_eq!(e.last_search(), Some("foo"));
571    }
572
573    #[test]
574    fn apply_empty_replacement_deletes_match() {
575        let mut e = editor_with("hello world");
576        let cmd = parse_substitute("/world//").unwrap();
577        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
578        assert_eq!(out.replacements, 1);
579        assert_eq!(e.buffer().lines()[0], "hello ");
580    }
581
582    #[test]
583    fn apply_undo_reverts_in_one_step() {
584        let mut e = editor_with("foo");
585        let cmd = parse_substitute("/foo/bar/").unwrap();
586        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
587        assert_eq!(e.buffer().lines()[0], "bar");
588        e.undo();
589        assert_eq!(e.buffer().lines()[0], "foo");
590    }
591
592    #[test]
593    fn apply_ampersand_in_replacement() {
594        let mut e = editor_with("foo");
595        let cmd = parse_substitute("/foo/[&]/").unwrap();
596        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
597        assert_eq!(e.buffer().lines()[0], "[foo]");
598    }
599
600    #[test]
601    fn apply_capture_group_reference() {
602        let mut e = editor_with("hello world");
603        let cmd = parse_substitute("/(\\w+)/<<\\1>>/g").unwrap();
604        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
605        assert_eq!(e.buffer().lines()[0], "<<hello>> <<world>>");
606    }
607}