Skip to main content

hjkl_engine/
substitute.rs

1//! Public substitute command parser and applicator.
2//!
3//! Exposes [`parse_substitute`] and [`apply_substitute`] for the
4//! `:[range]s/pattern/replacement/[flags]` ex command.
5//!
6//! ## Vim compatibility notes (v1 limitations)
7//!
8//! - Delimiter is **always `/`**. Alternate delimiters (`s|x|y|`,
9//!   `s#x#y#`) are not supported. The parser returns an error when the
10//!   first character after the keyword is not `/`.
11//! - The `c` (confirm) flag is **parsed but silently ignored**. No
12//!   interactive replacement. See vim's `:help :s_c` for what a full
13//!   implementation looks like.
14//! - The `\v` very-magic mode is not supported. The regex crate uses
15//!   ERE syntax by default. Most ERE patterns work, but vim-specific
16//!   extensions (`\<`, `\>`, `\s`, `\+`) may not. Use POSIX ERE
17//!   equivalents or the `regex` crate's syntax.
18//! - Capture-group references use vim notation (`\1`…`\9`, `&`); the
19//!   parser translates them to `$1`…`$9`, `$0` for the `regex` crate.
20//!
21//! See vim's `:help :substitute` for the full spec.
22
23use regex::Regex;
24
25use crate::Editor;
26
27/// Error type returned by [`parse_substitute`] and [`apply_substitute`].
28pub type SubstError = String;
29
30/// Parsed `:s/pattern/replacement/flags` command.
31///
32/// Produced by [`parse_substitute`]. Pass to [`apply_substitute`].
33#[derive(Debug, Clone, PartialEq, Eq)]
34pub struct SubstituteCmd {
35    /// The literal pattern string. `None` means "reuse `last_search`
36    /// from the editor" (the user typed `:s//replacement/`).
37    pub pattern: Option<String>,
38    /// The replacement string in vim notation (`&`, `\1`…`\9`).
39    /// Empty string deletes the match.
40    pub replacement: String,
41    /// Parsed flags.
42    pub flags: SubstFlags,
43}
44
45/// Flags for the substitute command.
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
47pub struct SubstFlags {
48    /// `g` — replace all occurrences on each line (default: first only).
49    pub all: bool,
50    /// `i` — case-insensitive (overrides editor `ignorecase`).
51    pub ignore_case: bool,
52    /// `I` — case-sensitive (overrides editor `ignorecase`).
53    pub case_sensitive: bool,
54    /// `c` — confirm mode. **Parsed but ignored in v1.** Behaves as if
55    /// not set; all matches are replaced without prompting. This is a
56    /// known divergence from vim. See vim's `:help :s_c`.
57    pub confirm: bool,
58}
59
60/// Result of [`apply_substitute`].
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
62pub struct SubstituteOutcome {
63    /// Total number of individual replacements made across all lines.
64    pub replacements: usize,
65    /// Number of lines that had at least one replacement.
66    pub lines_changed: usize,
67}
68
69/// Parse the tail of a substitute command (everything after the leading
70/// `s` / `substitute` keyword).
71///
72/// # Examples
73///
74/// ```
75/// use hjkl_engine::substitute::parse_substitute;
76///
77/// let cmd = parse_substitute("/foo/bar/gi").unwrap();
78/// assert_eq!(cmd.pattern.as_deref(), Some("foo"));
79/// assert_eq!(cmd.replacement, "bar");
80/// assert!(cmd.flags.all);
81/// assert!(cmd.flags.ignore_case);
82///
83/// // Empty pattern — reuse last_search.
84/// let cmd = parse_substitute("//bar/").unwrap();
85/// assert!(cmd.pattern.is_none());
86/// assert_eq!(cmd.replacement, "bar");
87/// ```
88///
89/// # Errors
90///
91/// Returns an error when:
92/// - `s` is not followed by `/` (no delimiter or alternate delimiter).
93/// - The flag string contains an unknown character.
94/// - The separator `/` is absent (less than two fields).
95pub fn parse_substitute(s: &str) -> Result<SubstituteCmd, SubstError> {
96    // Require leading `/`. Alternate delimiters are out of scope for v1.
97    let rest = s
98        .strip_prefix('/')
99        .ok_or_else(|| format!("substitute: expected '/' delimiter, got {s:?}"))?;
100
101    // Split on unescaped `/`, collecting at most 3 segments:
102    // [pattern, replacement, flags?]
103    let parts = split_on_slash(rest);
104
105    if parts.len() < 2 {
106        return Err("substitute needs /pattern/replacement/".into());
107    }
108
109    let raw_pattern = &parts[0];
110    let raw_replacement = &parts[1];
111    let raw_flags = parts.get(2).map(String::as_str).unwrap_or("");
112
113    // Empty pattern → reuse last_search.
114    let pattern = if raw_pattern.is_empty() {
115        None
116    } else {
117        Some(raw_pattern.clone())
118    };
119
120    // Translate vim replacement notation to regex crate notation.
121    let replacement = translate_replacement(raw_replacement);
122
123    let mut flags = SubstFlags::default();
124    for ch in raw_flags.chars() {
125        match ch {
126            'g' => flags.all = true,
127            'i' => flags.ignore_case = true,
128            'I' => flags.case_sensitive = true,
129            'c' => flags.confirm = true, // parsed, silently ignored
130            other => return Err(format!("unknown flag '{other}' in substitute")),
131        }
132    }
133
134    Ok(SubstituteCmd {
135        pattern,
136        replacement,
137        flags,
138    })
139}
140
141/// Apply a parsed substitute command to `line_range` (0-based inclusive)
142/// in the editor's buffer.
143///
144/// # Pattern resolution
145///
146/// If `cmd.pattern` is `None` (user typed `:s//rep/`), the editor's
147/// `last_search()` is used. Returns an error with `"no previous regular
148/// expression"` when both are empty.
149///
150/// # Case-sensitivity precedence
151///
152/// `flags.case_sensitive` wins over `flags.ignore_case`, which wins over
153/// the editor's `settings().ignore_case`.
154///
155/// # Cursor
156///
157/// After a successful substitution the cursor is placed at column 0 of the
158/// **last line that changed**, matching vim semantics. When no replacements
159/// are made the cursor is left unchanged.
160///
161/// # Undo
162///
163/// One undo snapshot is pushed before the first edit. If no replacements
164/// occur the snapshot is popped so the undo stack stays clean.
165///
166/// # Errors
167///
168/// Returns an error when pattern resolution fails or the regex is invalid.
169pub fn apply_substitute<H: crate::types::Host>(
170    ed: &mut Editor<hjkl_buffer::Buffer, H>,
171    cmd: &SubstituteCmd,
172    line_range: std::ops::RangeInclusive<u32>,
173) -> Result<SubstituteOutcome, SubstError> {
174    // Resolve pattern.
175    let pattern_str: String = match &cmd.pattern {
176        Some(p) => p.clone(),
177        None => ed
178            .last_search()
179            .map(str::to_owned)
180            .ok_or_else(|| "no previous regular expression".to_string())?,
181    };
182
183    // Case-sensitivity.
184    let case_insensitive = if cmd.flags.case_sensitive {
185        false
186    } else if cmd.flags.ignore_case {
187        true
188    } else {
189        ed.settings().ignore_case
190    };
191
192    let translated = crate::search::vim_to_rust_regex(&pattern_str);
193    let effective_pattern = if case_insensitive {
194        format!("(?i){translated}")
195    } else {
196        translated
197    };
198
199    let regex = Regex::new(&effective_pattern).map_err(|e| format!("bad pattern: {e}"))?;
200
201    ed.push_undo();
202
203    let start = *line_range.start() as usize;
204    let end = *line_range.end() as usize;
205    let rope = crate::types::Query::rope(ed.buffer());
206    let total = rope.len_lines();
207
208    let clamp_end = end.min(total.saturating_sub(1));
209    let mut new_lines: Vec<String> = crate::vim::rope_to_lines_vec(&rope);
210    let mut replacements = 0usize;
211    let mut lines_changed = 0usize;
212    let mut last_changed_row = 0usize;
213
214    if start <= clamp_end {
215        for (row, line) in new_lines[start..=clamp_end].iter_mut().enumerate() {
216            let (replaced, n) = do_replace(&regex, line, &cmd.replacement, cmd.flags.all);
217            if n > 0 {
218                *line = replaced;
219                replacements += n;
220                lines_changed += 1;
221                last_changed_row = start + row;
222            }
223        }
224    }
225
226    if replacements == 0 {
227        ed.pop_last_undo();
228        return Ok(SubstituteOutcome {
229            replacements: 0,
230            lines_changed: 0,
231        });
232    }
233
234    // Apply the new content in one shot.
235    ed.buffer_mut().replace_all(&new_lines.join("\n"));
236
237    // Cursor lands on the start of the last changed line.
238    ed.buffer_mut()
239        .set_cursor(hjkl_buffer::Position::new(last_changed_row, 0));
240
241    ed.mark_content_dirty();
242
243    // Update last_search so n/N can repeat the same pattern.
244    ed.set_last_search(Some(pattern_str), true);
245
246    Ok(SubstituteOutcome {
247        replacements,
248        lines_changed,
249    })
250}
251
252/// Split `s` on unescaped `/`. Each `\/` in `s` becomes a literal `/`
253/// in the output segment. Other `\x` sequences pass through unchanged
254/// (so regex escape syntax survives).
255///
256/// Returns at most 3 segments: `[pattern, replacement, flags]`. Anything
257/// after the third `/` is absorbed into the flags segment.
258fn split_on_slash(s: &str) -> Vec<String> {
259    let mut out: Vec<String> = Vec::new();
260    let mut cur = String::new();
261    let mut chars = s.chars().peekable();
262    while let Some(c) = chars.next() {
263        if c == '\\' {
264            match chars.peek() {
265                Some(&'/') => {
266                    // Escaped delimiter → literal slash in this segment.
267                    cur.push('/');
268                    chars.next();
269                }
270                Some(_) => {
271                    // Any other escape: preserve both chars so regex
272                    // syntax (\d, \s, \1, \n …) survives.
273                    let next = chars.next().unwrap();
274                    cur.push('\\');
275                    cur.push(next);
276                }
277                None => cur.push('\\'),
278            }
279        } else if c == '/' {
280            if out.len() < 2 {
281                out.push(std::mem::take(&mut cur));
282            } else {
283                // Third delimiter found: treat rest as flags.
284                // Everything up to this point was the replacement;
285                // collect the flags into `cur` and break.
286                cur.push(c);
287                // Keep going to collect remaining chars as flags.
288                // (Actually we already consumed the `/`, so just let
289                // the outer loop continue accumulating into cur.)
290            }
291        } else {
292            cur.push(c);
293        }
294    }
295    out.push(cur);
296    out
297}
298
299/// Translate vim-style replacement tokens to regex-crate syntax.
300///
301/// - `&` → `$0` (whole match)
302/// - `\&` → literal `&`
303/// - `\1`…`\9` → `$1`…`$9` (capture groups)
304/// - `\\` → `\` (literal backslash)
305/// - Any other `\x` → `x` (drop the backslash)
306fn translate_replacement(s: &str) -> String {
307    let mut out = String::with_capacity(s.len() + 4);
308    let mut chars = s.chars().peekable();
309    while let Some(c) = chars.next() {
310        if c == '&' {
311            out.push_str("$0");
312        } else if c == '\\' {
313            match chars.next() {
314                Some('&') => out.push('&'),   // \& → literal &
315                Some('\\') => out.push('\\'), // \\ → literal \
316                Some(d @ '1'..='9') => {
317                    out.push('$');
318                    out.push(d);
319                }
320                Some(other) => out.push(other), // drop backslash
321                None => {}                      // trailing \ ignored
322            }
323        } else {
324            out.push(c);
325        }
326    }
327    out
328}
329
330/// Replace first or all occurrences of `regex` in `text` using the
331/// already-translated `replacement` string. Returns `(new_text, count)`.
332fn do_replace(regex: &Regex, text: &str, replacement: &str, all: bool) -> (String, usize) {
333    let matches = regex.find_iter(text).count();
334    if matches == 0 {
335        return (text.to_string(), 0);
336    }
337    let replaced = if all {
338        regex.replace_all(text, replacement).into_owned()
339    } else {
340        regex.replace(text, replacement).into_owned()
341    };
342    let count = if all { matches } else { 1 };
343    (replaced, count)
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349    use crate::types::{DefaultHost, Options};
350    use hjkl_buffer::Buffer;
351
352    fn editor_with(content: &str) -> Editor<Buffer, DefaultHost> {
353        let mut e = Editor::new(Buffer::new(), DefaultHost::new(), Options::default());
354        e.set_content(content);
355        e
356    }
357
358    fn buf_line(e: &Editor<Buffer, DefaultHost>, row: usize) -> String {
359        hjkl_buffer::rope_line_str(&e.buffer().rope(), row)
360    }
361
362    // ── Parser tests ─────────────────────────────────────────────────
363
364    #[test]
365    fn parse_basic() {
366        let cmd = parse_substitute("/foo/bar/").unwrap();
367        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
368        assert_eq!(cmd.replacement, "bar");
369        assert!(!cmd.flags.all);
370    }
371
372    #[test]
373    fn parse_trailing_slash_optional() {
374        let cmd = parse_substitute("/foo/bar").unwrap();
375        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
376        assert_eq!(cmd.replacement, "bar");
377    }
378
379    #[test]
380    fn parse_global_flag() {
381        let cmd = parse_substitute("/x/y/g").unwrap();
382        assert!(cmd.flags.all);
383    }
384
385    #[test]
386    fn parse_ignore_case_flag() {
387        let cmd = parse_substitute("/x/y/i").unwrap();
388        assert!(cmd.flags.ignore_case);
389    }
390
391    #[test]
392    fn parse_case_sensitive_flag() {
393        let cmd = parse_substitute("/x/y/I").unwrap();
394        assert!(cmd.flags.case_sensitive);
395    }
396
397    #[test]
398    fn parse_confirm_flag_accepted() {
399        let cmd = parse_substitute("/x/y/c").unwrap();
400        assert!(cmd.flags.confirm);
401    }
402
403    #[test]
404    fn parse_multi_flags() {
405        let cmd = parse_substitute("/x/y/gi").unwrap();
406        assert!(cmd.flags.all);
407        assert!(cmd.flags.ignore_case);
408    }
409
410    #[test]
411    fn parse_unknown_flag_errors() {
412        let err = parse_substitute("/x/y/z").unwrap_err();
413        assert!(err.to_string().contains("unknown flag 'z'"), "{err}");
414    }
415
416    #[test]
417    fn parse_empty_pattern_is_none() {
418        let cmd = parse_substitute("//bar/").unwrap();
419        assert!(cmd.pattern.is_none());
420        assert_eq!(cmd.replacement, "bar");
421    }
422
423    #[test]
424    fn parse_empty_replacement_ok() {
425        let cmd = parse_substitute("/foo//").unwrap();
426        assert_eq!(cmd.pattern.as_deref(), Some("foo"));
427        assert_eq!(cmd.replacement, "");
428    }
429
430    #[test]
431    fn parse_escaped_slash_in_pattern() {
432        let cmd = parse_substitute("/a\\/b/c/").unwrap();
433        assert_eq!(cmd.pattern.as_deref(), Some("a/b"));
434    }
435
436    #[test]
437    fn parse_escaped_slash_in_replacement() {
438        let cmd = parse_substitute("/a/b\\/c/").unwrap();
439        // Replacement is already translated; literal / survives.
440        assert_eq!(cmd.replacement, "b/c");
441    }
442
443    #[test]
444    fn parse_ampersand_becomes_dollar_zero() {
445        let cmd = parse_substitute("/foo/[&]/").unwrap();
446        assert_eq!(cmd.replacement, "[$0]");
447    }
448
449    #[test]
450    fn parse_escaped_ampersand_is_literal() {
451        let cmd = parse_substitute("/foo/\\&/").unwrap();
452        assert_eq!(cmd.replacement, "&");
453    }
454
455    #[test]
456    fn parse_group_ref_translates() {
457        let cmd = parse_substitute("/(foo)/\\1/").unwrap();
458        assert_eq!(cmd.replacement, "$1");
459    }
460
461    #[test]
462    fn parse_group_ref_nine() {
463        let cmd = parse_substitute("/(x)/\\9/").unwrap();
464        assert_eq!(cmd.replacement, "$9");
465    }
466
467    #[test]
468    fn parse_wrong_delimiter_errors() {
469        let err = parse_substitute("|foo|bar|").unwrap_err();
470        assert!(err.to_string().contains("'/'"), "{err}");
471    }
472
473    #[test]
474    fn parse_too_few_fields_errors() {
475        let err = parse_substitute("/foo").unwrap_err();
476        assert!(
477            err.to_string().contains("needs /pattern/replacement"),
478            "{err}"
479        );
480    }
481
482    // ── Apply tests ──────────────────────────────────────────────────
483
484    #[test]
485    fn apply_single_line_first_only() {
486        let mut e = editor_with("foo foo");
487        let cmd = parse_substitute("/foo/bar/").unwrap();
488        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
489        assert_eq!(out.replacements, 1);
490        assert_eq!(out.lines_changed, 1);
491        assert_eq!(buf_line(&e, 0), "bar foo");
492    }
493
494    #[test]
495    fn apply_single_line_global() {
496        let mut e = editor_with("foo foo foo");
497        let cmd = parse_substitute("/foo/bar/g").unwrap();
498        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
499        assert_eq!(out.replacements, 3);
500        assert_eq!(out.lines_changed, 1);
501        assert_eq!(buf_line(&e, 0), "bar bar bar");
502    }
503
504    #[test]
505    fn apply_multi_line_range() {
506        let mut e = editor_with("foo\nfoo foo\nbar");
507        let cmd = parse_substitute("/foo/xyz/g").unwrap();
508        let out = apply_substitute(&mut e, &cmd, 0..=2).unwrap();
509        assert_eq!(out.replacements, 3);
510        assert_eq!(out.lines_changed, 2);
511        assert_eq!(buf_line(&e, 0), "xyz");
512        assert_eq!(buf_line(&e, 1), "xyz xyz");
513        assert_eq!(buf_line(&e, 2), "bar");
514    }
515
516    #[test]
517    fn apply_no_match_returns_zero() {
518        let mut e = editor_with("hello");
519        let original = buf_line(&e, 0);
520        let cmd = parse_substitute("/xyz/abc/").unwrap();
521        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
522        assert_eq!(out.replacements, 0);
523        assert_eq!(out.lines_changed, 0);
524        assert_eq!(buf_line(&e, 0), original);
525    }
526
527    #[test]
528    fn apply_case_insensitive_flag() {
529        let mut e = editor_with("Foo FOO foo");
530        let cmd = parse_substitute("/foo/bar/gi").unwrap();
531        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
532        assert_eq!(out.replacements, 3);
533        assert_eq!(buf_line(&e, 0), "bar bar bar");
534    }
535
536    #[test]
537    fn apply_case_sensitive_flag_overrides_editor_setting() {
538        let mut e = editor_with("Foo foo");
539        // Enable ignorecase on the editor.
540        e.settings_mut().ignore_case = true;
541        // `I` (capital) forces case-sensitive.
542        let cmd = parse_substitute("/foo/bar/I").unwrap();
543        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
544        // Only the lowercase "foo" matches.
545        assert_eq!(out.replacements, 1);
546        assert_eq!(buf_line(&e, 0), "Foo bar");
547    }
548
549    #[test]
550    fn apply_empty_pattern_reuses_last_search() {
551        let mut e = editor_with("hello world");
552        e.set_last_search(Some("world".to_string()), true);
553        let cmd = parse_substitute("//planet/").unwrap();
554        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
555        assert_eq!(out.replacements, 1);
556        assert_eq!(buf_line(&e, 0), "hello planet");
557    }
558
559    #[test]
560    fn apply_empty_pattern_no_last_search_errors() {
561        let mut e = editor_with("hello");
562        let cmd = parse_substitute("//bar/").unwrap();
563        let err = apply_substitute(&mut e, &cmd, 0..=0).unwrap_err();
564        assert!(
565            err.to_string().contains("no previous regular expression"),
566            "{err}"
567        );
568    }
569
570    #[test]
571    fn apply_updates_last_search() {
572        let mut e = editor_with("foo");
573        let cmd = parse_substitute("/foo/bar/").unwrap();
574        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
575        assert_eq!(e.last_search(), Some("foo"));
576    }
577
578    #[test]
579    fn apply_empty_replacement_deletes_match() {
580        let mut e = editor_with("hello world");
581        let cmd = parse_substitute("/world//").unwrap();
582        let out = apply_substitute(&mut e, &cmd, 0..=0).unwrap();
583        assert_eq!(out.replacements, 1);
584        assert_eq!(buf_line(&e, 0), "hello ");
585    }
586
587    #[test]
588    fn apply_undo_reverts_in_one_step() {
589        let mut e = editor_with("foo");
590        let cmd = parse_substitute("/foo/bar/").unwrap();
591        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
592        assert_eq!(buf_line(&e, 0), "bar");
593        e.undo();
594        assert_eq!(buf_line(&e, 0), "foo");
595    }
596
597    #[test]
598    fn apply_ampersand_in_replacement() {
599        let mut e = editor_with("foo");
600        let cmd = parse_substitute("/foo/[&]/").unwrap();
601        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
602        assert_eq!(buf_line(&e, 0), "[foo]");
603    }
604
605    #[test]
606    fn apply_capture_group_reference() {
607        let mut e = editor_with("hello world");
608        let cmd = parse_substitute("/(\\w+)/<<\\1>>/g").unwrap();
609        apply_substitute(&mut e, &cmd, 0..=0).unwrap();
610        assert_eq!(buf_line(&e, 0), "<<hello>> <<world>>");
611    }
612}