Skip to main content

hjkl_engine/
search.rs

1//! Engine-owned search state + execution helpers.
2//!
3//! Patch 0.0.35 step 1 of the 33-method classification rollout
4//! (see `DESIGN_33_METHOD_CLASSIFICATION.md`). The pattern, per-row
5//! match cache, and `wrapscan` flag previously lived on
6//! [`hjkl_buffer::Buffer`] (private `SearchState`). Moving the FSM
7//! state out of the buffer keeps multi-window hosts from sharing the
8//! "current search" across panes that happen to share content.
9//!
10//! The buffer keeps `Search::find_next` / `Search::find_prev` (the
11//! SPEC trait surface — pure observers, caller owns the regex). This
12//! module composes those primitives with the Editor-owned
13//! [`SearchState`] to drive `n` / `N` / `*` / `#` / `/` / `?`.
14//!
15//! 0.0.37: the buffer-inherent `search_forward` / `search_backward`
16//! / `search_matches` / `set_search_pattern` / `search_pattern` /
17//! `set_search_wrap` / `search_wraps` accessors are removed. Search
18//! state lives on `Editor::search_state`, the rendering path
19//! (`BufferView`) takes the active `&Regex` as a parameter, and the
20//! `Search` trait impl always wraps (engine controls non-wrap
21//! semantics).
22
23use regex::Regex;
24
25use crate::types::{Cursor, Query, Search};
26
27/// Rewrite vim-style word-boundary escapes to Rust `regex`-compatible form.
28///
29/// The `regex` crate supports `\b` (symmetric word boundary) but not the
30/// vim/PCRE `\<` (word-boundary start) or `\>` (word-boundary end) variants.
31/// This function performs a single-pass rewrite:
32///
33/// - `\<` → `\b`
34/// - `\>` → `\b`
35/// - `\\<` / `\\>` (literal double-backslash followed by `<`/`>`) are left
36///   untouched — only the unescaped form transforms.
37/// - All other syntax (`\b`, `\B`, `\d`, anchors, …) passes through unchanged.
38///
39/// Call this on the raw user-typed pattern string **before** passing to
40/// `regex::Regex::new`. Keep the original string for display / history.
41pub fn vim_to_rust_regex(pat: &str) -> String {
42    let mut out = String::with_capacity(pat.len());
43    let mut chars = pat.chars().peekable();
44    while let Some(ch) = chars.next() {
45        if ch == '\\' {
46            match chars.peek() {
47                Some('<') => {
48                    chars.next();
49                    out.push_str(r"\b");
50                }
51                Some('>') => {
52                    chars.next();
53                    out.push_str(r"\b");
54                }
55                _ => {
56                    out.push('\\');
57                    if let Some(next) = chars.next() {
58                        out.push(next);
59                    }
60                }
61            }
62        } else {
63            out.push(ch);
64        }
65    }
66    out
67}
68
69/// Per-row match cache keyed against the buffer's `dirty_gen`. Live
70/// alongside the active pattern so re-running `n` doesn't re-scan
71/// rows the buffer hasn't touched.
72#[derive(Debug, Clone, Default)]
73pub struct SearchState {
74    /// Active pattern, if any. `None` clears highlighting and makes
75    /// `n` / `N` no-op until the next `/` / `?` commit.
76    pub pattern: Option<Regex>,
77    /// `true` for `/`, `false` for `?` — drives `n` vs `N` direction.
78    /// Mirrors `vim.last_search_forward`; consolidated so future
79    /// patches can drop the duplicate.
80    pub forward: bool,
81    /// `matches[row]` is the `(byte_start, byte_end)` runs cached on
82    /// `row`, captured at `gen[row]`. Length grows lazily.
83    pub matches: Vec<Vec<(usize, usize)>>,
84    /// Per-row generation tag. When the buffer's `dirty_gen` for a
85    /// row diverges, the row gets re-scanned on next access.
86    pub generations: Vec<u64>,
87    /// Wrap past buffer ends. Mirrors `Settings::wrapscan`.
88    pub wrap_around: bool,
89}
90
91impl SearchState {
92    /// Empty state — no pattern, forward direction, wraps.
93    pub fn new() -> Self {
94        Self {
95            pattern: None,
96            forward: true,
97            matches: Vec::new(),
98            generations: Vec::new(),
99            wrap_around: true,
100        }
101    }
102
103    /// Replace the active pattern. Drops the cached match runs so
104    /// the next access re-scans against the new regex.
105    pub fn set_pattern(&mut self, re: Option<Regex>) {
106        self.pattern = re;
107        self.matches.clear();
108        self.generations.clear();
109    }
110
111    /// Refresh `matches[row]` if either the row's gen has rolled or
112    /// we never scanned it. Returns the cached slice.
113    pub fn matches_for(&mut self, row: usize, line: &str, dirty_gen: u64) -> &[(usize, usize)] {
114        let Some(ref re) = self.pattern else {
115            return &[];
116        };
117        if self.matches.len() <= row {
118            self.matches.resize_with(row + 1, Vec::new);
119            self.generations.resize(row + 1, u64::MAX);
120        }
121        if self.generations[row] != dirty_gen {
122            self.matches[row] = re.find_iter(line).map(|m| (m.start(), m.end())).collect();
123            self.generations[row] = dirty_gen;
124        }
125        &self.matches[row]
126    }
127}
128
129/// Move the cursor to the next match starting from (or just after,
130/// when `skip_current = true`) the cursor. Wraps end-of-buffer to
131/// row 0 when `state.wrap_around`. Returns `true` when a match was
132/// found.
133///
134/// Pure observe + cursor mutation — no auto-scroll. The Editor's
135/// post-step `ensure_cursor_in_scrolloff` reapplies viewport
136/// follow.
137pub fn search_forward<B: Cursor + Query + Search>(
138    buf: &mut B,
139    state: &mut SearchState,
140    skip_current: bool,
141) -> bool {
142    let Some(re) = state.pattern.clone() else {
143        return false;
144    };
145    let cursor = buf.cursor();
146    let total = buf.line_count();
147    if total == 0 {
148        return false;
149    }
150    // To "skip the current cell", advance `from` one byte past the
151    // cursor before asking `find_next` for the at-or-after match.
152    // `pos_at_byte` clamps overflow to end-of-buffer so this is
153    // safe even when the cursor sits at the trailing edge.
154    let from = if skip_current {
155        let from_byte = buf.byte_offset(cursor);
156        buf.pos_at_byte(from_byte.saturating_add(1))
157    } else {
158        cursor
159    };
160    if let Some(range) = buf.find_next(from, &re) {
161        // Honour engine wrap policy explicitly. The buffer impl uses
162        // its own (deprecated) wrap flag; for new search state the
163        // engine SearchState is the source of truth.
164        if !state.wrap_around && range.start.line < cursor.line {
165            return false;
166        }
167        Cursor::set_cursor(buf, range.start);
168        return true;
169    }
170    false
171}
172
173/// Symmetric counterpart of [`search_forward`].
174pub fn search_backward<B: Cursor + Query + Search>(
175    buf: &mut B,
176    state: &mut SearchState,
177    skip_current: bool,
178) -> bool {
179    let Some(re) = state.pattern.clone() else {
180        return false;
181    };
182    let cursor = buf.cursor();
183    let total = buf.line_count();
184    if total == 0 {
185        return false;
186    }
187    // Buffer's `Search::find_prev` returns the at-or-before match
188    // for the anchor `from`. For `skip_current`, we want the
189    // rightmost match whose start is *strictly before* the cursor.
190    // Strategy: query find_prev(cursor); if the returned match
191    // covers/starts-at the cursor, step the anchor back one byte
192    // past that match's start and re-query so the next find_prev
193    // skips it. Otherwise the at-or-before match is already strictly
194    // before the cursor and we accept it.
195    let initial = buf.find_prev(cursor, &re);
196    let range = if skip_current {
197        match initial {
198            Some(m) if m.start == cursor => {
199                // Cursor sits exactly on a match start (typical post-
200                // commit state). Step past and re-query.
201                let cb = buf.byte_offset(m.start);
202                if cb == 0 {
203                    // No earlier byte — fall through to wrap.
204                    None
205                } else {
206                    let anchor = buf.pos_at_byte(cb.saturating_sub(1));
207                    buf.find_prev(anchor, &re)
208                }
209            }
210            other => other,
211        }
212    } else {
213        initial
214    };
215    if let Some(range) = range {
216        if !state.wrap_around && range.start.line > cursor.line {
217            return false;
218        }
219        Cursor::set_cursor(buf, range.start);
220        return true;
221    }
222    false
223}
224
225/// Match positions on `row` as `(byte_start, byte_end)`. Used by
226/// the engine's highlight pipeline. Reads through the cache so a
227/// steady-state buffer doesn't re-scan every frame.
228pub fn search_matches<B: Query>(
229    buf: &B,
230    state: &mut SearchState,
231    dirty_gen: u64,
232    row: usize,
233) -> Vec<(usize, usize)> {
234    if state.pattern.is_none() {
235        return Vec::new();
236    }
237    let line_count = buf.line_count() as usize;
238    if row >= line_count {
239        return Vec::new();
240    }
241    let line = buf.line(row as u32);
242    state.matches_for(row, &line, dirty_gen).to_vec()
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use crate::types::Pos;
249    use hjkl_buffer::Buffer;
250
251    fn re(pat: &str) -> Regex {
252        Regex::new(pat).unwrap()
253    }
254
255    fn vim_re(pat: &str) -> Regex {
256        Regex::new(&vim_to_rust_regex(pat)).unwrap()
257    }
258
259    // ── vim_to_rust_regex unit tests ─────────────────────────────────────────
260
261    /// `\<` and `\>` both rewrite to `\b`.
262    #[test]
263    fn vim_boundary_rewrites_to_b() {
264        assert_eq!(vim_to_rust_regex(r"\<foo\>"), r"\bfoo\b");
265        assert_eq!(vim_to_rust_regex(r"\<"), r"\b");
266        assert_eq!(vim_to_rust_regex(r"\>"), r"\b");
267    }
268
269    /// A literal double-backslash before `<`/`>` must not be consumed.
270    /// `\\<` in the source string is two chars: `\` `\`; the rewriter sees
271    /// the first `\` followed by `\`, emits `\\`, then `<` is plain text.
272    #[test]
273    fn escaped_backslash_left_alone() {
274        // Input: \\< (three chars in source: '\', '\', '<')
275        // Expected output: \\< (the first \ escapes the second, < is literal)
276        let input = r"\\<";
277        let output = vim_to_rust_regex(input);
278        assert_eq!(output, r"\\<");
279    }
280
281    /// Other escape sequences (`\b`, `\B`, `\d`, `\w`, anchors) pass through.
282    #[test]
283    fn other_escapes_unchanged() {
284        assert_eq!(vim_to_rust_regex(r"\b"), r"\b");
285        assert_eq!(vim_to_rust_regex(r"\B"), r"\B");
286        assert_eq!(vim_to_rust_regex(r"\d+"), r"\d+");
287        assert_eq!(vim_to_rust_regex(r"^\w+$"), r"^\w+$");
288    }
289
290    /// Mixed: `\<\w+\>` rewrites to `\b\w+\b` — matches whole words.
291    #[test]
292    fn mixed_boundary_and_word_class() {
293        assert_eq!(vim_to_rust_regex(r"\<\w+\>"), r"\b\w+\b");
294    }
295
296    // ── Integration: compiled vim patterns match correctly ───────────────────
297
298    /// `/foo\<bar\>` — `bar` as a standalone word is matched, `foobar` is not.
299    #[test]
300    fn vim_boundary_matches_standalone_word_not_suffix() {
301        let re = vim_re(r"foo\<bar\>");
302        // "foobar" — `bar` follows directly after `foo` with no word boundary:
303        // the `\b` between `foo` and `bar` fails here.
304        assert!(!re.is_match("foobar"));
305        // "foo bar" — word boundary between `foo ` and `bar`:
306        // pattern `foo\bbar\b` does not match because `foo` is not adjacent.
307        // Use a pattern that directly tests the intent: `bar` as a whole word.
308        let re2 = vim_re(r"\<bar\>");
309        assert!(re2.is_match("foo bar baz"));
310        assert!(!re2.is_match("foobar"));
311    }
312
313    /// `\<word` matches `word` at start-of-word but not mid-word.
314    #[test]
315    fn vim_boundary_start_only() {
316        let re = vim_re(r"\<word");
317        assert!(re.is_match("word here"));
318        assert!(re.is_match("some word here"));
319        assert!(!re.is_match("sword"));
320        assert!(!re.is_match("aword"));
321    }
322
323    /// `word\>` matches `word` at end-of-word but not when followed by more.
324    #[test]
325    fn vim_boundary_end_only() {
326        let re = vim_re(r"word\>");
327        assert!(re.is_match("some word"));
328        assert!(re.is_match("word"));
329        assert!(!re.is_match("words"));
330        assert!(!re.is_match("wordsmith"));
331    }
332
333    /// Existing `\b` continues to work (sanity check — no double-transform).
334    #[test]
335    fn existing_b_boundary_unchanged() {
336        let re = vim_re(r"\bfoo\b");
337        assert!(re.is_match("foo"));
338        assert!(re.is_match("a foo b"));
339        assert!(!re.is_match("foobar"));
340        assert!(!re.is_match("afoo"));
341    }
342
343    /// Mixed: `\<\w+\>` matches whole words only.
344    #[test]
345    fn vim_whole_word_pattern() {
346        let re = vim_re(r"\<\w+\>");
347        let matches: Vec<_> = re.find_iter("foo bar baz").map(|m| m.as_str()).collect();
348        assert_eq!(matches, vec!["foo", "bar", "baz"]);
349    }
350
351    #[test]
352    fn empty_state_no_match() {
353        let mut b = Buffer::from_str("anything");
354        let mut s = SearchState::new();
355        assert!(!search_forward(&mut b, &mut s, false));
356        assert!(!search_backward(&mut b, &mut s, false));
357    }
358
359    #[test]
360    fn forward_finds_first_match() {
361        let mut b = Buffer::from_str("foo bar foo baz");
362        let mut s = SearchState::new();
363        s.set_pattern(Some(re("foo")));
364        assert!(search_forward(&mut b, &mut s, false));
365        assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
366    }
367
368    #[test]
369    fn forward_skip_current_walks_past() {
370        let mut b = Buffer::from_str("foo bar foo baz");
371        let mut s = SearchState::new();
372        s.set_pattern(Some(re("foo")));
373        search_forward(&mut b, &mut s, false);
374        search_forward(&mut b, &mut s, true);
375        assert_eq!(Cursor::cursor(&b), Pos::new(0, 8));
376    }
377
378    #[test]
379    fn forward_wraps_to_top() {
380        let mut b = Buffer::from_str("zzz\nfoo");
381        // 0.0.37: wrap policy lives entirely on `SearchState::wrap_around`;
382        // the buffer-side `set_search_wrap` accessor is gone. Trait
383        // `find_next` always wraps; the engine search free function
384        // honours `s.wrap_around` directly.
385        Cursor::set_cursor(&mut b, Pos::new(1, 2));
386        let mut s = SearchState::new();
387        s.set_pattern(Some(re("zzz")));
388        s.wrap_around = true;
389        assert!(search_forward(&mut b, &mut s, true));
390        assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
391    }
392
393    #[test]
394    fn search_matches_caches_against_dirty_gen() {
395        let b = Buffer::from_str("foo bar");
396        let mut s = SearchState::new();
397        s.set_pattern(Some(re("bar")));
398        let dgen = b.dirty_gen();
399        let initial = search_matches(&b, &mut s, dgen, 0);
400        assert_eq!(initial, vec![(4, 7)]);
401    }
402}