Skip to main content

hjkl_engine/
search.rs

1//! Engine-owned search state + execution helpers.
2//!
3//! Patch 0.0.35 step 1 of the 33-method classification rollout
4//! (see `DESIGN_33_METHOD_CLASSIFICATION.md`). The pattern, per-row
5//! match cache, and `wrapscan` flag previously lived on
6//! [`hjkl_buffer::Buffer`] (private `SearchState`). Moving the FSM
7//! state out of the buffer keeps multi-window hosts from sharing the
8//! "current search" across panes that happen to share content.
9//!
10//! The buffer keeps `Search::find_next` / `Search::find_prev` (the
11//! SPEC trait surface — pure observers, caller owns the regex). This
12//! module composes those primitives with the Editor-owned
13//! [`SearchState`] to drive `n` / `N` / `*` / `#` / `/` / `?`.
14//!
15//! 0.0.37: the buffer-inherent `search_forward` / `search_backward`
16//! / `search_matches` / `set_search_pattern` / `search_pattern` /
17//! `set_search_wrap` / `search_wraps` accessors are removed. Search
18//! state lives on `Editor::search_state`, the rendering path
19//! (`BufferView`) takes the active `&Regex` as a parameter, and the
20//! `Search` trait impl always wraps (engine controls non-wrap
21//! semantics).
22
23use regex::Regex;
24
25use crate::types::{Cursor, Query, Search};
26
27/// Case-sensitivity policy derived from `:set ignorecase` / `:set smartcase`.
28///
29/// Use [`CaseMode::from_options`] to build from two booleans, then pass to
30/// [`resolve_case_mode`] together with the raw pattern string.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CaseMode {
33    /// Always case-sensitive regardless of the pattern.
34    Sensitive,
35    /// Always case-insensitive regardless of the pattern.
36    Insensitive,
37    /// Case-insensitive unless the pattern contains an uppercase rune
38    /// (vim's `smartcase` behaviour).
39    Smart,
40}
41
42impl CaseMode {
43    /// Build a `CaseMode` from the two option booleans.
44    ///
45    /// | `ignorecase` | `smartcase` | Result        |
46    /// |---|---|---|
47    /// | `false` | `*`   | `Sensitive`   |
48    /// | `true`  | `false` | `Insensitive` |
49    /// | `true`  | `true`  | `Smart`       |
50    pub fn from_options(ignorecase: bool, smartcase: bool) -> Self {
51        if !ignorecase {
52            CaseMode::Sensitive
53        } else if smartcase {
54            CaseMode::Smart
55        } else {
56            CaseMode::Insensitive
57        }
58    }
59}
60
61/// Strip `\c` / `\C` overrides from `pat`, resolve the effective
62/// [`CaseMode`], and return the cleaned pattern together with the
63/// resolved mode.
64///
65/// ### Override rules (mirrors vim)
66///
67/// - `\c` anywhere in `pat` forces case-insensitive.
68/// - `\C` anywhere in `pat` forces case-sensitive.
69/// - When both appear the **last** one wins.
70/// - Both are stripped from the returned pattern.
71///
72/// ### Smart-case detection
73///
74/// When `base` is [`CaseMode::Smart`] and no `\c`/`\C` override was
75/// found, the pattern is scanned for uppercase Unicode letters. Any
76/// uppercase letter → `Sensitive`; otherwise → `Insensitive`.
77///
78/// ### Per-substitute flag interaction
79///
80/// The `:s/…/…/i` and `:s/…/…/I` flags are handled in
81/// `apply_substitute` **before** calling this function (they
82/// short-circuit entirely). This function is not involved.
83pub fn resolve_case_mode(pat: &str, base: CaseMode) -> (String, CaseMode) {
84    let mut out = String::with_capacity(pat.len());
85    let mut chars = pat.chars().peekable();
86    // None = no override seen yet; Some(true) = \c (insensitive); Some(false) = \C (sensitive).
87    let mut override_mode: Option<bool> = None;
88
89    while let Some(ch) = chars.next() {
90        if ch == '\\' {
91            match chars.peek() {
92                Some('c') => {
93                    chars.next();
94                    override_mode = Some(true); // \c → insensitive
95                }
96                Some('C') => {
97                    chars.next();
98                    override_mode = Some(false); // \C → sensitive
99                }
100                Some('<') => {
101                    chars.next();
102                    out.push_str(r"\b");
103                }
104                Some('>') => {
105                    chars.next();
106                    out.push_str(r"\b");
107                }
108                _ => {
109                    out.push('\\');
110                    if let Some(next) = chars.next() {
111                        out.push(next);
112                    }
113                }
114            }
115        } else {
116            out.push(ch);
117        }
118    }
119
120    let resolved = match override_mode {
121        Some(true) => CaseMode::Insensitive,
122        Some(false) => CaseMode::Sensitive,
123        None => match base {
124            CaseMode::Smart => {
125                // Any uppercase rune → sensitive.
126                if out.chars().any(|c| c.is_uppercase()) {
127                    CaseMode::Sensitive
128                } else {
129                    CaseMode::Insensitive
130                }
131            }
132            other => other,
133        },
134    };
135
136    (out, resolved)
137}
138
139/// Rewrite vim-style word-boundary escapes to Rust `regex`-compatible form
140/// **and** strip `\c`/`\C` case overrides.
141///
142/// The `regex` crate supports `\b` (symmetric word boundary) but not the
143/// vim/PCRE `\<` (word-boundary start) or `\>` (word-boundary end) variants.
144/// This function performs a single-pass rewrite:
145///
146/// - `\<` → `\b`
147/// - `\>` → `\b`
148/// - `\c` / `\C` stripped (case override — handled by [`resolve_case_mode`])
149/// - `\\<` / `\\>` (literal double-backslash followed by `<`/`>`) are left
150///   untouched — only the unescaped form transforms.
151/// - All other syntax (`\b`, `\B`, `\d`, anchors, …) passes through unchanged.
152///
153/// Call this on the raw user-typed pattern string **before** passing to
154/// `regex::Regex::new`. Keep the original string for display / history.
155///
156/// Prefer [`resolve_case_mode`] when you also need to apply case semantics;
157/// that function performs the same boundary rewrite internally.
158pub fn vim_to_rust_regex(pat: &str) -> String {
159    resolve_case_mode(pat, CaseMode::Sensitive).0
160}
161
162/// Per-row match cache keyed against the buffer's `dirty_gen`. Live
163/// alongside the active pattern so re-running `n` doesn't re-scan
164/// rows the buffer hasn't touched.
165#[derive(Debug, Clone, Default)]
166pub struct SearchState {
167    /// Active pattern, if any. `None` clears highlighting and makes
168    /// `n` / `N` no-op until the next `/` / `?` commit.
169    pub pattern: Option<Regex>,
170    /// `true` for `/`, `false` for `?` — drives `n` vs `N` direction.
171    /// Mirrors `vim.last_search_forward`; consolidated so future
172    /// patches can drop the duplicate.
173    pub forward: bool,
174    /// `matches[row]` is the `(byte_start, byte_end)` runs cached on
175    /// `row`, captured at `gen[row]`. Length grows lazily.
176    pub matches: Vec<Vec<(usize, usize)>>,
177    /// Per-row generation tag. When the buffer's `dirty_gen` for a
178    /// row diverges, the row gets re-scanned on next access.
179    pub generations: Vec<u64>,
180    /// Wrap past buffer ends. Mirrors `Settings::wrapscan`.
181    pub wrap_around: bool,
182}
183
184impl SearchState {
185    /// Empty state — no pattern, forward direction, wraps.
186    pub fn new() -> Self {
187        Self {
188            pattern: None,
189            forward: true,
190            matches: Vec::new(),
191            generations: Vec::new(),
192            wrap_around: true,
193        }
194    }
195
196    /// Replace the active pattern. Drops the cached match runs so
197    /// the next access re-scans against the new regex.
198    pub fn set_pattern(&mut self, re: Option<Regex>) {
199        self.pattern = re;
200        self.matches.clear();
201        self.generations.clear();
202    }
203
204    /// Refresh `matches[row]` if either the row's gen has rolled or
205    /// we never scanned it. Returns the cached slice.
206    pub fn matches_for(&mut self, row: usize, line: &str, dirty_gen: u64) -> &[(usize, usize)] {
207        let Some(ref re) = self.pattern else {
208            return &[];
209        };
210        if self.matches.len() <= row {
211            self.matches.resize_with(row + 1, Vec::new);
212            self.generations.resize(row + 1, u64::MAX);
213        }
214        if self.generations[row] != dirty_gen {
215            self.matches[row] = re.find_iter(line).map(|m| (m.start(), m.end())).collect();
216            self.generations[row] = dirty_gen;
217        }
218        &self.matches[row]
219    }
220}
221
222/// Move the cursor to the next match starting from (or just after,
223/// when `skip_current = true`) the cursor. Wraps end-of-buffer to
224/// row 0 when `state.wrap_around`. Returns `true` when a match was
225/// found.
226///
227/// Pure observe + cursor mutation — no auto-scroll. The Editor's
228/// post-step `ensure_cursor_in_scrolloff` reapplies viewport
229/// follow.
230pub fn search_forward<B: Cursor + Query + Search>(
231    buf: &mut B,
232    state: &mut SearchState,
233    skip_current: bool,
234) -> bool {
235    let Some(re) = state.pattern.clone() else {
236        return false;
237    };
238    let cursor = buf.cursor();
239    let total = buf.line_count();
240    if total == 0 {
241        return false;
242    }
243    // To "skip the current cell", advance `from` one byte past the
244    // cursor before asking `find_next` for the at-or-after match.
245    // `pos_at_byte` clamps overflow to end-of-buffer so this is
246    // safe even when the cursor sits at the trailing edge.
247    let from = if skip_current {
248        let from_byte = buf.byte_offset(cursor);
249        buf.pos_at_byte(from_byte.saturating_add(1))
250    } else {
251        cursor
252    };
253    if let Some(range) = buf.find_next(from, &re) {
254        // Honour engine wrap policy explicitly. The buffer impl uses
255        // its own (deprecated) wrap flag; for new search state the
256        // engine SearchState is the source of truth.
257        if !state.wrap_around && range.start.line < cursor.line {
258            return false;
259        }
260        Cursor::set_cursor(buf, range.start);
261        return true;
262    }
263    false
264}
265
266/// Symmetric counterpart of [`search_forward`].
267pub fn search_backward<B: Cursor + Query + Search>(
268    buf: &mut B,
269    state: &mut SearchState,
270    skip_current: bool,
271) -> bool {
272    let Some(re) = state.pattern.clone() else {
273        return false;
274    };
275    let cursor = buf.cursor();
276    let total = buf.line_count();
277    if total == 0 {
278        return false;
279    }
280    // Buffer's `Search::find_prev` returns the at-or-before match
281    // for the anchor `from`. For `skip_current`, we want the
282    // rightmost match whose start is *strictly before* the cursor.
283    // Strategy: query find_prev(cursor); if the returned match
284    // covers/starts-at the cursor, step the anchor back one byte
285    // past that match's start and re-query so the next find_prev
286    // skips it. Otherwise the at-or-before match is already strictly
287    // before the cursor and we accept it.
288    let initial = buf.find_prev(cursor, &re);
289    let range = if skip_current {
290        match initial {
291            Some(m) if m.start == cursor => {
292                // Cursor sits exactly on a match start (typical post-
293                // commit state). Step past and re-query.
294                let cb = buf.byte_offset(m.start);
295                if cb == 0 {
296                    // No earlier byte — fall through to wrap.
297                    None
298                } else {
299                    let anchor = buf.pos_at_byte(cb.saturating_sub(1));
300                    buf.find_prev(anchor, &re)
301                }
302            }
303            other => other,
304        }
305    } else {
306        initial
307    };
308    if let Some(range) = range {
309        if !state.wrap_around && range.start.line > cursor.line {
310            return false;
311        }
312        Cursor::set_cursor(buf, range.start);
313        return true;
314    }
315    false
316}
317
318/// Match positions on `row` as `(byte_start, byte_end)`. Used by
319/// the engine's highlight pipeline. Reads through the cache so a
320/// steady-state buffer doesn't re-scan every frame.
321pub fn search_matches<B: Query>(
322    buf: &B,
323    state: &mut SearchState,
324    dirty_gen: u64,
325    row: usize,
326) -> Vec<(usize, usize)> {
327    if state.pattern.is_none() {
328        return Vec::new();
329    }
330    let line_count = buf.line_count() as usize;
331    if row >= line_count {
332        return Vec::new();
333    }
334    let line = buf.line(row as u32);
335    state.matches_for(row, &line, dirty_gen).to_vec()
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use crate::types::Pos;
342    use hjkl_buffer::Buffer;
343
344    fn re(pat: &str) -> Regex {
345        Regex::new(pat).unwrap()
346    }
347
348    fn vim_re(pat: &str) -> Regex {
349        Regex::new(&vim_to_rust_regex(pat)).unwrap()
350    }
351
352    // ── vim_to_rust_regex unit tests ─────────────────────────────────────────
353
354    /// `\<` and `\>` both rewrite to `\b`.
355    #[test]
356    fn vim_boundary_rewrites_to_b() {
357        assert_eq!(vim_to_rust_regex(r"\<foo\>"), r"\bfoo\b");
358        assert_eq!(vim_to_rust_regex(r"\<"), r"\b");
359        assert_eq!(vim_to_rust_regex(r"\>"), r"\b");
360    }
361
362    /// A literal double-backslash before `<`/`>` must not be consumed.
363    /// `\\<` in the source string is two chars: `\` `\`; the rewriter sees
364    /// the first `\` followed by `\`, emits `\\`, then `<` is plain text.
365    #[test]
366    fn escaped_backslash_left_alone() {
367        // Input: \\< (three chars in source: '\', '\', '<')
368        // Expected output: \\< (the first \ escapes the second, < is literal)
369        let input = r"\\<";
370        let output = vim_to_rust_regex(input);
371        assert_eq!(output, r"\\<");
372    }
373
374    /// Other escape sequences (`\b`, `\B`, `\d`, `\w`, anchors) pass through.
375    #[test]
376    fn other_escapes_unchanged() {
377        assert_eq!(vim_to_rust_regex(r"\b"), r"\b");
378        assert_eq!(vim_to_rust_regex(r"\B"), r"\B");
379        assert_eq!(vim_to_rust_regex(r"\d+"), r"\d+");
380        assert_eq!(vim_to_rust_regex(r"^\w+$"), r"^\w+$");
381    }
382
383    /// Mixed: `\<\w+\>` rewrites to `\b\w+\b` — matches whole words.
384    #[test]
385    fn mixed_boundary_and_word_class() {
386        assert_eq!(vim_to_rust_regex(r"\<\w+\>"), r"\b\w+\b");
387    }
388
389    // ── Integration: compiled vim patterns match correctly ───────────────────
390
391    /// `/foo\<bar\>` — `bar` as a standalone word is matched, `foobar` is not.
392    #[test]
393    fn vim_boundary_matches_standalone_word_not_suffix() {
394        let re = vim_re(r"foo\<bar\>");
395        // "foobar" — `bar` follows directly after `foo` with no word boundary:
396        // the `\b` between `foo` and `bar` fails here.
397        assert!(!re.is_match("foobar"));
398        // "foo bar" — word boundary between `foo ` and `bar`:
399        // pattern `foo\bbar\b` does not match because `foo` is not adjacent.
400        // Use a pattern that directly tests the intent: `bar` as a whole word.
401        let re2 = vim_re(r"\<bar\>");
402        assert!(re2.is_match("foo bar baz"));
403        assert!(!re2.is_match("foobar"));
404    }
405
406    /// `\<word` matches `word` at start-of-word but not mid-word.
407    #[test]
408    fn vim_boundary_start_only() {
409        let re = vim_re(r"\<word");
410        assert!(re.is_match("word here"));
411        assert!(re.is_match("some word here"));
412        assert!(!re.is_match("sword"));
413        assert!(!re.is_match("aword"));
414    }
415
416    /// `word\>` matches `word` at end-of-word but not when followed by more.
417    #[test]
418    fn vim_boundary_end_only() {
419        let re = vim_re(r"word\>");
420        assert!(re.is_match("some word"));
421        assert!(re.is_match("word"));
422        assert!(!re.is_match("words"));
423        assert!(!re.is_match("wordsmith"));
424    }
425
426    /// Existing `\b` continues to work (sanity check — no double-transform).
427    #[test]
428    fn existing_b_boundary_unchanged() {
429        let re = vim_re(r"\bfoo\b");
430        assert!(re.is_match("foo"));
431        assert!(re.is_match("a foo b"));
432        assert!(!re.is_match("foobar"));
433        assert!(!re.is_match("afoo"));
434    }
435
436    /// Mixed: `\<\w+\>` matches whole words only.
437    #[test]
438    fn vim_whole_word_pattern() {
439        let re = vim_re(r"\<\w+\>");
440        let matches: Vec<_> = re.find_iter("foo bar baz").map(|m| m.as_str()).collect();
441        assert_eq!(matches, vec!["foo", "bar", "baz"]);
442    }
443
444    #[test]
445    fn empty_state_no_match() {
446        let mut b = Buffer::from_str("anything");
447        let mut s = SearchState::new();
448        assert!(!search_forward(&mut b, &mut s, false));
449        assert!(!search_backward(&mut b, &mut s, false));
450    }
451
452    #[test]
453    fn forward_finds_first_match() {
454        let mut b = Buffer::from_str("foo bar foo baz");
455        let mut s = SearchState::new();
456        s.set_pattern(Some(re("foo")));
457        assert!(search_forward(&mut b, &mut s, false));
458        assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
459    }
460
461    #[test]
462    fn forward_skip_current_walks_past() {
463        let mut b = Buffer::from_str("foo bar foo baz");
464        let mut s = SearchState::new();
465        s.set_pattern(Some(re("foo")));
466        search_forward(&mut b, &mut s, false);
467        search_forward(&mut b, &mut s, true);
468        assert_eq!(Cursor::cursor(&b), Pos::new(0, 8));
469    }
470
471    #[test]
472    fn forward_wraps_to_top() {
473        let mut b = Buffer::from_str("zzz\nfoo");
474        // 0.0.37: wrap policy lives entirely on `SearchState::wrap_around`;
475        // the buffer-side `set_search_wrap` accessor is gone. Trait
476        // `find_next` always wraps; the engine search free function
477        // honours `s.wrap_around` directly.
478        Cursor::set_cursor(&mut b, Pos::new(1, 2));
479        let mut s = SearchState::new();
480        s.set_pattern(Some(re("zzz")));
481        s.wrap_around = true;
482        assert!(search_forward(&mut b, &mut s, true));
483        assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
484    }
485
486    #[test]
487    fn search_matches_caches_against_dirty_gen() {
488        let b = Buffer::from_str("foo bar");
489        let mut s = SearchState::new();
490        s.set_pattern(Some(re("bar")));
491        let dgen = b.dirty_gen();
492        let initial = search_matches(&b, &mut s, dgen, 0);
493        assert_eq!(initial, vec![(4, 7)]);
494    }
495
496    // ── CaseMode::from_options matrix ────────────────────────────────────────
497
498    #[test]
499    fn case_mode_from_options_matrix() {
500        // ic=false, smart=* → Sensitive
501        assert_eq!(CaseMode::from_options(false, false), CaseMode::Sensitive);
502        assert_eq!(CaseMode::from_options(false, true), CaseMode::Sensitive);
503        // ic=true, smart=false → Insensitive
504        assert_eq!(CaseMode::from_options(true, false), CaseMode::Insensitive);
505        // ic=true, smart=true → Smart
506        assert_eq!(CaseMode::from_options(true, true), CaseMode::Smart);
507    }
508
509    // ── resolve_case_mode unit tests ─────────────────────────────────────────
510
511    #[test]
512    fn resolve_case_mode_no_override_smart_lowercase() {
513        let (stripped, mode) = resolve_case_mode("foo", CaseMode::Smart);
514        assert_eq!(stripped, "foo");
515        assert_eq!(mode, CaseMode::Insensitive);
516    }
517
518    #[test]
519    fn resolve_case_mode_no_override_smart_uppercase() {
520        let (stripped, mode) = resolve_case_mode("Foo", CaseMode::Smart);
521        assert_eq!(stripped, "Foo");
522        assert_eq!(mode, CaseMode::Sensitive);
523    }
524
525    #[test]
526    fn resolve_case_mode_lower_c_override() {
527        // \c overrides Sensitive → Insensitive; stripped pattern is "Foo"
528        let (stripped, mode) = resolve_case_mode(r"\cFoo", CaseMode::Sensitive);
529        assert_eq!(stripped, "Foo");
530        assert_eq!(mode, CaseMode::Insensitive);
531    }
532
533    #[test]
534    fn resolve_case_mode_upper_c_override() {
535        // \C overrides Smart → Sensitive; stripped pattern is "foo"
536        let (stripped, mode) = resolve_case_mode(r"foo\C", CaseMode::Smart);
537        assert_eq!(stripped, "foo");
538        assert_eq!(mode, CaseMode::Sensitive);
539    }
540
541    #[test]
542    fn resolve_case_mode_last_wins() {
543        // \c then \C → last-wins → Sensitive; stripped "foo"
544        let (stripped, mode) = resolve_case_mode(r"\cfoo\C", CaseMode::Smart);
545        assert_eq!(stripped, "foo");
546        assert_eq!(mode, CaseMode::Sensitive);
547    }
548
549    // ── Integration: search with smartcase / \c / \C ─────────────────────────
550
551    fn build_regex_from(pat: &str, ic: bool, smart: bool) -> Regex {
552        let base = CaseMode::from_options(ic, smart);
553        let (stripped, mode) = resolve_case_mode(pat, base);
554        let src = if mode == CaseMode::Insensitive {
555            format!("(?i){stripped}")
556        } else {
557            stripped
558        };
559        Regex::new(&src).unwrap()
560    }
561
562    #[test]
563    fn search_finds_capital_with_smartcase_lowercase_pattern() {
564        // ic=true, smart=true, pattern "foo" → Insensitive → matches "FOO"
565        let re = build_regex_from("foo", true, true);
566        assert!(re.is_match("FOO"), "expected match on 'FOO'");
567        assert!(re.is_match("foo"), "expected match on 'foo'");
568    }
569
570    #[test]
571    fn search_skips_capital_with_smartcase_mixed_pattern() {
572        // ic=true, smart=true, pattern "Foo" → Sensitive → does NOT match "FOO"
573        let re = build_regex_from("Foo", true, true);
574        assert!(!re.is_match("FOO"), "must not match 'FOO' (case-sensitive)");
575        assert!(re.is_match("Foo"), "must match exact 'Foo'");
576    }
577
578    #[test]
579    fn search_lower_c_override_finds_capital() {
580        // \cFoo + Sensitive base → Insensitive override → matches "FOO"
581        let re = build_regex_from(r"\cFoo", false, false);
582        assert!(re.is_match("FOO"), "\\c override must match 'FOO'");
583        assert!(re.is_match("foo"), "\\c override must match 'foo'");
584    }
585
586    #[test]
587    fn vim_to_rust_regex_strips_case_overrides() {
588        // vim_to_rust_regex is now a thin wrapper; \c and \C are stripped
589        assert_eq!(vim_to_rust_regex(r"\cfoo"), "foo");
590        assert_eq!(vim_to_rust_regex(r"foo\C"), "foo");
591        assert_eq!(vim_to_rust_regex(r"\<bar\>"), r"\bbar\b");
592    }
593
594    /// `*` on word "foo" emits the pattern `\bfoo\b` (all lowercase). Under
595    /// smartcase that resolves to Insensitive → should match "FOO". This test
596    /// simulates the word_at_cursor_search pattern-build path.
597    #[test]
598    fn star_search_finds_lowercase_when_smartcase_lower_word() {
599        // word_at_cursor_search escapes the word then wraps \b..\b.
600        // "foo" is all-lowercase after word-extraction → Smart → Insensitive.
601        let pat = r"\bfoo\b";
602        let re = build_regex_from(pat, true, true);
603        // Case-insensitive → matches "FOO foo Foo".
604        let text = "FOO foo Foo";
605        let hits: Vec<_> = re.find_iter(text).map(|m| m.as_str()).collect();
606        assert!(
607            hits.contains(&"FOO"),
608            "smartcase lower-word * must match FOO: {hits:?}"
609        );
610        assert!(
611            hits.contains(&"foo"),
612            "smartcase lower-word * must match foo: {hits:?}"
613        );
614    }
615}