hjkl_engine/search.rs
1//! Engine-owned search state + execution helpers.
2//!
3//! Patch 0.0.35 step 1 of the 33-method classification rollout
4//! (see `DESIGN_33_METHOD_CLASSIFICATION.md`). The pattern, per-row
5//! match cache, and `wrapscan` flag previously lived on
6//! [`hjkl_buffer::Buffer`] (private `SearchState`). Moving the FSM
7//! state out of the buffer keeps multi-window hosts from sharing the
8//! "current search" across panes that happen to share content.
9//!
10//! The buffer keeps `Search::find_next` / `Search::find_prev` (the
11//! SPEC trait surface — pure observers, caller owns the regex). This
12//! module composes those primitives with the Editor-owned
13//! [`SearchState`] to drive `n` / `N` / `*` / `#` / `/` / `?`.
14//!
15//! 0.0.37: the buffer-inherent `search_forward` / `search_backward`
16//! / `search_matches` / `set_search_pattern` / `search_pattern` /
17//! `set_search_wrap` / `search_wraps` accessors are removed. Search
18//! state lives on `Editor::search_state`, the rendering path
19//! (`BufferView`) takes the active `&Regex` as a parameter, and the
20//! `Search` trait impl always wraps (engine controls non-wrap
21//! semantics).
22
23use regex::Regex;
24
25use crate::types::{Cursor, Query, Search};
26
27/// Rewrite vim-style word-boundary escapes to Rust `regex`-compatible form.
28///
29/// The `regex` crate supports `\b` (symmetric word boundary) but not the
30/// vim/PCRE `\<` (word-boundary start) or `\>` (word-boundary end) variants.
31/// This function performs a single-pass rewrite:
32///
33/// - `\<` → `\b`
34/// - `\>` → `\b`
35/// - `\\<` / `\\>` (literal double-backslash followed by `<`/`>`) are left
36/// untouched — only the unescaped form transforms.
37/// - All other syntax (`\b`, `\B`, `\d`, anchors, …) passes through unchanged.
38///
39/// Call this on the raw user-typed pattern string **before** passing to
40/// `regex::Regex::new`. Keep the original string for display / history.
41pub fn vim_to_rust_regex(pat: &str) -> String {
42 let mut out = String::with_capacity(pat.len());
43 let mut chars = pat.chars().peekable();
44 while let Some(ch) = chars.next() {
45 if ch == '\\' {
46 match chars.peek() {
47 Some('<') => {
48 chars.next();
49 out.push_str(r"\b");
50 }
51 Some('>') => {
52 chars.next();
53 out.push_str(r"\b");
54 }
55 _ => {
56 out.push('\\');
57 if let Some(next) = chars.next() {
58 out.push(next);
59 }
60 }
61 }
62 } else {
63 out.push(ch);
64 }
65 }
66 out
67}
68
69/// Per-row match cache keyed against the buffer's `dirty_gen`. Live
70/// alongside the active pattern so re-running `n` doesn't re-scan
71/// rows the buffer hasn't touched.
72#[derive(Debug, Clone, Default)]
73pub struct SearchState {
74 /// Active pattern, if any. `None` clears highlighting and makes
75 /// `n` / `N` no-op until the next `/` / `?` commit.
76 pub pattern: Option<Regex>,
77 /// `true` for `/`, `false` for `?` — drives `n` vs `N` direction.
78 /// Mirrors `vim.last_search_forward`; consolidated so future
79 /// patches can drop the duplicate.
80 pub forward: bool,
81 /// `matches[row]` is the `(byte_start, byte_end)` runs cached on
82 /// `row`, captured at `gen[row]`. Length grows lazily.
83 pub matches: Vec<Vec<(usize, usize)>>,
84 /// Per-row generation tag. When the buffer's `dirty_gen` for a
85 /// row diverges, the row gets re-scanned on next access.
86 pub generations: Vec<u64>,
87 /// Wrap past buffer ends. Mirrors `Settings::wrapscan`.
88 pub wrap_around: bool,
89}
90
91impl SearchState {
92 /// Empty state — no pattern, forward direction, wraps.
93 pub fn new() -> Self {
94 Self {
95 pattern: None,
96 forward: true,
97 matches: Vec::new(),
98 generations: Vec::new(),
99 wrap_around: true,
100 }
101 }
102
103 /// Replace the active pattern. Drops the cached match runs so
104 /// the next access re-scans against the new regex.
105 pub fn set_pattern(&mut self, re: Option<Regex>) {
106 self.pattern = re;
107 self.matches.clear();
108 self.generations.clear();
109 }
110
111 /// Refresh `matches[row]` if either the row's gen has rolled or
112 /// we never scanned it. Returns the cached slice.
113 pub fn matches_for(&mut self, row: usize, line: &str, dirty_gen: u64) -> &[(usize, usize)] {
114 let Some(ref re) = self.pattern else {
115 return &[];
116 };
117 if self.matches.len() <= row {
118 self.matches.resize_with(row + 1, Vec::new);
119 self.generations.resize(row + 1, u64::MAX);
120 }
121 if self.generations[row] != dirty_gen {
122 self.matches[row] = re.find_iter(line).map(|m| (m.start(), m.end())).collect();
123 self.generations[row] = dirty_gen;
124 }
125 &self.matches[row]
126 }
127}
128
129/// Move the cursor to the next match starting from (or just after,
130/// when `skip_current = true`) the cursor. Wraps end-of-buffer to
131/// row 0 when `state.wrap_around`. Returns `true` when a match was
132/// found.
133///
134/// Pure observe + cursor mutation — no auto-scroll. The Editor's
135/// post-step `ensure_cursor_in_scrolloff` reapplies viewport
136/// follow.
137pub fn search_forward<B: Cursor + Query + Search>(
138 buf: &mut B,
139 state: &mut SearchState,
140 skip_current: bool,
141) -> bool {
142 let Some(re) = state.pattern.clone() else {
143 return false;
144 };
145 let cursor = buf.cursor();
146 let total = buf.line_count();
147 if total == 0 {
148 return false;
149 }
150 // To "skip the current cell", advance `from` one byte past the
151 // cursor before asking `find_next` for the at-or-after match.
152 // `pos_at_byte` clamps overflow to end-of-buffer so this is
153 // safe even when the cursor sits at the trailing edge.
154 let from = if skip_current {
155 let from_byte = buf.byte_offset(cursor);
156 buf.pos_at_byte(from_byte.saturating_add(1))
157 } else {
158 cursor
159 };
160 if let Some(range) = buf.find_next(from, &re) {
161 // Honour engine wrap policy explicitly. The buffer impl uses
162 // its own (deprecated) wrap flag; for new search state the
163 // engine SearchState is the source of truth.
164 if !state.wrap_around && range.start.line < cursor.line {
165 return false;
166 }
167 Cursor::set_cursor(buf, range.start);
168 return true;
169 }
170 false
171}
172
173/// Symmetric counterpart of [`search_forward`].
174pub fn search_backward<B: Cursor + Query + Search>(
175 buf: &mut B,
176 state: &mut SearchState,
177 skip_current: bool,
178) -> bool {
179 let Some(re) = state.pattern.clone() else {
180 return false;
181 };
182 let cursor = buf.cursor();
183 let total = buf.line_count();
184 if total == 0 {
185 return false;
186 }
187 // Buffer's `Search::find_prev` returns the at-or-before match
188 // for the anchor `from`. For `skip_current`, we want the
189 // rightmost match whose start is *strictly before* the cursor.
190 // Strategy: query find_prev(cursor); if the returned match
191 // covers/starts-at the cursor, step the anchor back one byte
192 // past that match's start and re-query so the next find_prev
193 // skips it. Otherwise the at-or-before match is already strictly
194 // before the cursor and we accept it.
195 let initial = buf.find_prev(cursor, &re);
196 let range = if skip_current {
197 match initial {
198 Some(m) if m.start == cursor => {
199 // Cursor sits exactly on a match start (typical post-
200 // commit state). Step past and re-query.
201 let cb = buf.byte_offset(m.start);
202 if cb == 0 {
203 // No earlier byte — fall through to wrap.
204 None
205 } else {
206 let anchor = buf.pos_at_byte(cb.saturating_sub(1));
207 buf.find_prev(anchor, &re)
208 }
209 }
210 other => other,
211 }
212 } else {
213 initial
214 };
215 if let Some(range) = range {
216 if !state.wrap_around && range.start.line > cursor.line {
217 return false;
218 }
219 Cursor::set_cursor(buf, range.start);
220 return true;
221 }
222 false
223}
224
225/// Match positions on `row` as `(byte_start, byte_end)`. Used by
226/// the engine's highlight pipeline. Reads through the cache so a
227/// steady-state buffer doesn't re-scan every frame.
228pub fn search_matches<B: Query>(
229 buf: &B,
230 state: &mut SearchState,
231 dirty_gen: u64,
232 row: usize,
233) -> Vec<(usize, usize)> {
234 if state.pattern.is_none() {
235 return Vec::new();
236 }
237 let line_count = buf.line_count() as usize;
238 if row >= line_count {
239 return Vec::new();
240 }
241 let line = buf.line(row as u32);
242 state.matches_for(row, &line, dirty_gen).to_vec()
243}
244
245#[cfg(test)]
246mod tests {
247 use super::*;
248 use crate::types::Pos;
249 use hjkl_buffer::Buffer;
250
251 fn re(pat: &str) -> Regex {
252 Regex::new(pat).unwrap()
253 }
254
255 fn vim_re(pat: &str) -> Regex {
256 Regex::new(&vim_to_rust_regex(pat)).unwrap()
257 }
258
259 // ── vim_to_rust_regex unit tests ─────────────────────────────────────────
260
261 /// `\<` and `\>` both rewrite to `\b`.
262 #[test]
263 fn vim_boundary_rewrites_to_b() {
264 assert_eq!(vim_to_rust_regex(r"\<foo\>"), r"\bfoo\b");
265 assert_eq!(vim_to_rust_regex(r"\<"), r"\b");
266 assert_eq!(vim_to_rust_regex(r"\>"), r"\b");
267 }
268
269 /// A literal double-backslash before `<`/`>` must not be consumed.
270 /// `\\<` in the source string is two chars: `\` `\`; the rewriter sees
271 /// the first `\` followed by `\`, emits `\\`, then `<` is plain text.
272 #[test]
273 fn escaped_backslash_left_alone() {
274 // Input: \\< (three chars in source: '\', '\', '<')
275 // Expected output: \\< (the first \ escapes the second, < is literal)
276 let input = r"\\<";
277 let output = vim_to_rust_regex(input);
278 assert_eq!(output, r"\\<");
279 }
280
281 /// Other escape sequences (`\b`, `\B`, `\d`, `\w`, anchors) pass through.
282 #[test]
283 fn other_escapes_unchanged() {
284 assert_eq!(vim_to_rust_regex(r"\b"), r"\b");
285 assert_eq!(vim_to_rust_regex(r"\B"), r"\B");
286 assert_eq!(vim_to_rust_regex(r"\d+"), r"\d+");
287 assert_eq!(vim_to_rust_regex(r"^\w+$"), r"^\w+$");
288 }
289
290 /// Mixed: `\<\w+\>` rewrites to `\b\w+\b` — matches whole words.
291 #[test]
292 fn mixed_boundary_and_word_class() {
293 assert_eq!(vim_to_rust_regex(r"\<\w+\>"), r"\b\w+\b");
294 }
295
296 // ── Integration: compiled vim patterns match correctly ───────────────────
297
298 /// `/foo\<bar\>` — `bar` as a standalone word is matched, `foobar` is not.
299 #[test]
300 fn vim_boundary_matches_standalone_word_not_suffix() {
301 let re = vim_re(r"foo\<bar\>");
302 // "foobar" — `bar` follows directly after `foo` with no word boundary:
303 // the `\b` between `foo` and `bar` fails here.
304 assert!(!re.is_match("foobar"));
305 // "foo bar" — word boundary between `foo ` and `bar`:
306 // pattern `foo\bbar\b` does not match because `foo` is not adjacent.
307 // Use a pattern that directly tests the intent: `bar` as a whole word.
308 let re2 = vim_re(r"\<bar\>");
309 assert!(re2.is_match("foo bar baz"));
310 assert!(!re2.is_match("foobar"));
311 }
312
313 /// `\<word` matches `word` at start-of-word but not mid-word.
314 #[test]
315 fn vim_boundary_start_only() {
316 let re = vim_re(r"\<word");
317 assert!(re.is_match("word here"));
318 assert!(re.is_match("some word here"));
319 assert!(!re.is_match("sword"));
320 assert!(!re.is_match("aword"));
321 }
322
323 /// `word\>` matches `word` at end-of-word but not when followed by more.
324 #[test]
325 fn vim_boundary_end_only() {
326 let re = vim_re(r"word\>");
327 assert!(re.is_match("some word"));
328 assert!(re.is_match("word"));
329 assert!(!re.is_match("words"));
330 assert!(!re.is_match("wordsmith"));
331 }
332
333 /// Existing `\b` continues to work (sanity check — no double-transform).
334 #[test]
335 fn existing_b_boundary_unchanged() {
336 let re = vim_re(r"\bfoo\b");
337 assert!(re.is_match("foo"));
338 assert!(re.is_match("a foo b"));
339 assert!(!re.is_match("foobar"));
340 assert!(!re.is_match("afoo"));
341 }
342
343 /// Mixed: `\<\w+\>` matches whole words only.
344 #[test]
345 fn vim_whole_word_pattern() {
346 let re = vim_re(r"\<\w+\>");
347 let matches: Vec<_> = re.find_iter("foo bar baz").map(|m| m.as_str()).collect();
348 assert_eq!(matches, vec!["foo", "bar", "baz"]);
349 }
350
351 #[test]
352 fn empty_state_no_match() {
353 let mut b = Buffer::from_str("anything");
354 let mut s = SearchState::new();
355 assert!(!search_forward(&mut b, &mut s, false));
356 assert!(!search_backward(&mut b, &mut s, false));
357 }
358
359 #[test]
360 fn forward_finds_first_match() {
361 let mut b = Buffer::from_str("foo bar foo baz");
362 let mut s = SearchState::new();
363 s.set_pattern(Some(re("foo")));
364 assert!(search_forward(&mut b, &mut s, false));
365 assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
366 }
367
368 #[test]
369 fn forward_skip_current_walks_past() {
370 let mut b = Buffer::from_str("foo bar foo baz");
371 let mut s = SearchState::new();
372 s.set_pattern(Some(re("foo")));
373 search_forward(&mut b, &mut s, false);
374 search_forward(&mut b, &mut s, true);
375 assert_eq!(Cursor::cursor(&b), Pos::new(0, 8));
376 }
377
378 #[test]
379 fn forward_wraps_to_top() {
380 let mut b = Buffer::from_str("zzz\nfoo");
381 // 0.0.37: wrap policy lives entirely on `SearchState::wrap_around`;
382 // the buffer-side `set_search_wrap` accessor is gone. Trait
383 // `find_next` always wraps; the engine search free function
384 // honours `s.wrap_around` directly.
385 Cursor::set_cursor(&mut b, Pos::new(1, 2));
386 let mut s = SearchState::new();
387 s.set_pattern(Some(re("zzz")));
388 s.wrap_around = true;
389 assert!(search_forward(&mut b, &mut s, true));
390 assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
391 }
392
393 #[test]
394 fn search_matches_caches_against_dirty_gen() {
395 let b = Buffer::from_str("foo bar");
396 let mut s = SearchState::new();
397 s.set_pattern(Some(re("bar")));
398 let dgen = b.dirty_gen();
399 let initial = search_matches(&b, &mut s, dgen, 0);
400 assert_eq!(initial, vec![(4, 7)]);
401 }
402}