hjkl_engine/search.rs
1//! Engine-owned search state + execution helpers.
2//!
3//! Patch 0.0.35 step 1 of the 33-method classification rollout
4//! (see `DESIGN_33_METHOD_CLASSIFICATION.md`). The pattern, per-row
5//! match cache, and `wrapscan` flag previously lived on
6//! [`hjkl_buffer::Buffer`] (private `SearchState`). Moving the FSM
7//! state out of the buffer keeps multi-window hosts from sharing the
8//! "current search" across panes that happen to share content.
9//!
10//! The buffer keeps `Search::find_next` / `Search::find_prev` (the
11//! SPEC trait surface — pure observers, caller owns the regex). This
12//! module composes those primitives with the Editor-owned
13//! [`SearchState`] to drive `n` / `N` / `*` / `#` / `/` / `?`.
14//!
15//! 0.0.37: the buffer-inherent `search_forward` / `search_backward`
16//! / `search_matches` / `set_search_pattern` / `search_pattern` /
17//! `set_search_wrap` / `search_wraps` accessors are removed. Search
18//! state lives on `Editor::search_state`, the rendering path
19//! (`BufferView`) takes the active `&Regex` as a parameter, and the
20//! `Search` trait impl always wraps (engine controls non-wrap
21//! semantics).
22
23use regex::Regex;
24
25use crate::types::{Cursor, Query, Search};
26
27/// Case-sensitivity policy derived from `:set ignorecase` / `:set smartcase`.
28///
29/// Use [`CaseMode::from_options`] to build from two booleans, then pass to
30/// [`resolve_case_mode`] together with the raw pattern string.
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CaseMode {
33 /// Always case-sensitive regardless of the pattern.
34 Sensitive,
35 /// Always case-insensitive regardless of the pattern.
36 Insensitive,
37 /// Case-insensitive unless the pattern contains an uppercase rune
38 /// (vim's `smartcase` behaviour).
39 Smart,
40}
41
42impl CaseMode {
43 /// Build a `CaseMode` from the two option booleans.
44 ///
45 /// | `ignorecase` | `smartcase` | Result |
46 /// |---|---|---|
47 /// | `false` | `*` | `Sensitive` |
48 /// | `true` | `false` | `Insensitive` |
49 /// | `true` | `true` | `Smart` |
50 pub fn from_options(ignorecase: bool, smartcase: bool) -> Self {
51 if !ignorecase {
52 CaseMode::Sensitive
53 } else if smartcase {
54 CaseMode::Smart
55 } else {
56 CaseMode::Insensitive
57 }
58 }
59}
60
61/// Strip `\c` / `\C` overrides from `pat`, resolve the effective
62/// [`CaseMode`], and return the cleaned pattern together with the
63/// resolved mode.
64///
65/// ### Override rules (mirrors vim)
66///
67/// - `\c` anywhere in `pat` forces case-insensitive.
68/// - `\C` anywhere in `pat` forces case-sensitive.
69/// - When both appear the **last** one wins.
70/// - Both are stripped from the returned pattern.
71///
72/// ### Smart-case detection
73///
74/// When `base` is [`CaseMode::Smart`] and no `\c`/`\C` override was
75/// found, the pattern is scanned for uppercase Unicode letters. Any
76/// uppercase letter → `Sensitive`; otherwise → `Insensitive`.
77///
78/// ### Per-substitute flag interaction
79///
80/// The `:s/…/…/i` and `:s/…/…/I` flags are handled in
81/// `apply_substitute` **before** calling this function (they
82/// short-circuit entirely). This function is not involved.
83pub fn resolve_case_mode(pat: &str, base: CaseMode) -> (String, CaseMode) {
84 let mut out = String::with_capacity(pat.len());
85 let mut chars = pat.chars().peekable();
86 // None = no override seen yet; Some(true) = \c (insensitive); Some(false) = \C (sensitive).
87 let mut override_mode: Option<bool> = None;
88
89 while let Some(ch) = chars.next() {
90 if ch == '\\' {
91 match chars.peek() {
92 Some('c') => {
93 chars.next();
94 override_mode = Some(true); // \c → insensitive
95 }
96 Some('C') => {
97 chars.next();
98 override_mode = Some(false); // \C → sensitive
99 }
100 Some('<') => {
101 chars.next();
102 out.push_str(r"\b");
103 }
104 Some('>') => {
105 chars.next();
106 out.push_str(r"\b");
107 }
108 _ => {
109 out.push('\\');
110 if let Some(next) = chars.next() {
111 out.push(next);
112 }
113 }
114 }
115 } else {
116 out.push(ch);
117 }
118 }
119
120 let resolved = match override_mode {
121 Some(true) => CaseMode::Insensitive,
122 Some(false) => CaseMode::Sensitive,
123 None => match base {
124 CaseMode::Smart => {
125 // Any uppercase rune → sensitive.
126 if out.chars().any(|c| c.is_uppercase()) {
127 CaseMode::Sensitive
128 } else {
129 CaseMode::Insensitive
130 }
131 }
132 other => other,
133 },
134 };
135
136 (out, resolved)
137}
138
139/// Rewrite vim-style word-boundary escapes to Rust `regex`-compatible form
140/// **and** strip `\c`/`\C` case overrides.
141///
142/// The `regex` crate supports `\b` (symmetric word boundary) but not the
143/// vim/PCRE `\<` (word-boundary start) or `\>` (word-boundary end) variants.
144/// This function performs a single-pass rewrite:
145///
146/// - `\<` → `\b`
147/// - `\>` → `\b`
148/// - `\c` / `\C` stripped (case override — handled by [`resolve_case_mode`])
149/// - `\\<` / `\\>` (literal double-backslash followed by `<`/`>`) are left
150/// untouched — only the unescaped form transforms.
151/// - All other syntax (`\b`, `\B`, `\d`, anchors, …) passes through unchanged.
152///
153/// Call this on the raw user-typed pattern string **before** passing to
154/// `regex::Regex::new`. Keep the original string for display / history.
155///
156/// Prefer [`resolve_case_mode`] when you also need to apply case semantics;
157/// that function performs the same boundary rewrite internally.
158pub fn vim_to_rust_regex(pat: &str) -> String {
159 resolve_case_mode(pat, CaseMode::Sensitive).0
160}
161
162/// Per-row match cache keyed against the buffer's `dirty_gen`. Live
163/// alongside the active pattern so re-running `n` doesn't re-scan
164/// rows the buffer hasn't touched.
165#[derive(Debug, Clone, Default)]
166pub struct SearchState {
167 /// Active pattern, if any. `None` clears highlighting and makes
168 /// `n` / `N` no-op until the next `/` / `?` commit.
169 pub pattern: Option<Regex>,
170 /// `true` for `/`, `false` for `?` — drives `n` vs `N` direction.
171 /// Mirrors `vim.last_search_forward`; consolidated so future
172 /// patches can drop the duplicate.
173 pub forward: bool,
174 /// `matches[row]` is the `(byte_start, byte_end)` runs cached on
175 /// `row`, captured at `gen[row]`. Length grows lazily.
176 pub matches: Vec<Vec<(usize, usize)>>,
177 /// Per-row generation tag. When the buffer's `dirty_gen` for a
178 /// row diverges, the row gets re-scanned on next access.
179 pub generations: Vec<u64>,
180 /// Wrap past buffer ends. Mirrors `Settings::wrapscan`.
181 pub wrap_around: bool,
182}
183
184impl SearchState {
185 /// Empty state — no pattern, forward direction, wraps.
186 pub fn new() -> Self {
187 Self {
188 pattern: None,
189 forward: true,
190 matches: Vec::new(),
191 generations: Vec::new(),
192 wrap_around: true,
193 }
194 }
195
196 /// Replace the active pattern. Drops the cached match runs so
197 /// the next access re-scans against the new regex.
198 pub fn set_pattern(&mut self, re: Option<Regex>) {
199 self.pattern = re;
200 self.matches.clear();
201 self.generations.clear();
202 }
203
204 /// Refresh `matches[row]` if either the row's gen has rolled or
205 /// we never scanned it. Returns the cached slice.
206 pub fn matches_for(&mut self, row: usize, line: &str, dirty_gen: u64) -> &[(usize, usize)] {
207 let Some(ref re) = self.pattern else {
208 return &[];
209 };
210 if self.matches.len() <= row {
211 self.matches.resize_with(row + 1, Vec::new);
212 self.generations.resize(row + 1, u64::MAX);
213 }
214 if self.generations[row] != dirty_gen {
215 self.matches[row] = re.find_iter(line).map(|m| (m.start(), m.end())).collect();
216 self.generations[row] = dirty_gen;
217 }
218 &self.matches[row]
219 }
220}
221
222/// Move the cursor to the next match starting from (or just after,
223/// when `skip_current = true`) the cursor. Wraps end-of-buffer to
224/// row 0 when `state.wrap_around`. Returns `true` when a match was
225/// found.
226///
227/// Pure observe + cursor mutation — no auto-scroll. The Editor's
228/// post-step `ensure_cursor_in_scrolloff` reapplies viewport
229/// follow.
230pub fn search_forward<B: Cursor + Query + Search>(
231 buf: &mut B,
232 state: &mut SearchState,
233 skip_current: bool,
234) -> bool {
235 let Some(re) = state.pattern.clone() else {
236 return false;
237 };
238 let cursor = buf.cursor();
239 let total = buf.line_count();
240 if total == 0 {
241 return false;
242 }
243 // To "skip the current cell", advance `from` one byte past the
244 // cursor before asking `find_next` for the at-or-after match.
245 // `pos_at_byte` clamps overflow to end-of-buffer so this is
246 // safe even when the cursor sits at the trailing edge.
247 let from = if skip_current {
248 let from_byte = buf.byte_offset(cursor);
249 buf.pos_at_byte(from_byte.saturating_add(1))
250 } else {
251 cursor
252 };
253 if let Some(range) = buf.find_next(from, &re) {
254 // Honour engine wrap policy explicitly. The buffer impl uses
255 // its own (deprecated) wrap flag; for new search state the
256 // engine SearchState is the source of truth.
257 if !state.wrap_around && range.start.line < cursor.line {
258 return false;
259 }
260 Cursor::set_cursor(buf, range.start);
261 return true;
262 }
263 false
264}
265
266/// Symmetric counterpart of [`search_forward`].
267pub fn search_backward<B: Cursor + Query + Search>(
268 buf: &mut B,
269 state: &mut SearchState,
270 skip_current: bool,
271) -> bool {
272 let Some(re) = state.pattern.clone() else {
273 return false;
274 };
275 let cursor = buf.cursor();
276 let total = buf.line_count();
277 if total == 0 {
278 return false;
279 }
280 // Buffer's `Search::find_prev` returns the at-or-before match
281 // for the anchor `from`. For `skip_current`, we want the
282 // rightmost match whose start is *strictly before* the cursor.
283 // Strategy: query find_prev(cursor); if the returned match
284 // covers/starts-at the cursor, step the anchor back one byte
285 // past that match's start and re-query so the next find_prev
286 // skips it. Otherwise the at-or-before match is already strictly
287 // before the cursor and we accept it.
288 let initial = buf.find_prev(cursor, &re);
289 let range = if skip_current {
290 match initial {
291 Some(m) if m.start == cursor => {
292 // Cursor sits exactly on a match start (typical post-
293 // commit state). Step past and re-query.
294 let cb = buf.byte_offset(m.start);
295 if cb == 0 {
296 // No earlier byte — fall through to wrap.
297 None
298 } else {
299 let anchor = buf.pos_at_byte(cb.saturating_sub(1));
300 buf.find_prev(anchor, &re)
301 }
302 }
303 other => other,
304 }
305 } else {
306 initial
307 };
308 if let Some(range) = range {
309 if !state.wrap_around && range.start.line > cursor.line {
310 return false;
311 }
312 Cursor::set_cursor(buf, range.start);
313 return true;
314 }
315 false
316}
317
318/// Match positions on `row` as `(byte_start, byte_end)`. Used by
319/// the engine's highlight pipeline. Reads through the cache so a
320/// steady-state buffer doesn't re-scan every frame.
321pub fn search_matches<B: Query>(
322 buf: &B,
323 state: &mut SearchState,
324 dirty_gen: u64,
325 row: usize,
326) -> Vec<(usize, usize)> {
327 if state.pattern.is_none() {
328 return Vec::new();
329 }
330 let line_count = buf.line_count() as usize;
331 if row >= line_count {
332 return Vec::new();
333 }
334 let line = buf.line(row as u32);
335 state.matches_for(row, &line, dirty_gen).to_vec()
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341 use crate::types::Pos;
342 use hjkl_buffer::Buffer;
343
344 fn re(pat: &str) -> Regex {
345 Regex::new(pat).unwrap()
346 }
347
348 fn vim_re(pat: &str) -> Regex {
349 Regex::new(&vim_to_rust_regex(pat)).unwrap()
350 }
351
352 // ── vim_to_rust_regex unit tests ─────────────────────────────────────────
353
354 /// `\<` and `\>` both rewrite to `\b`.
355 #[test]
356 fn vim_boundary_rewrites_to_b() {
357 assert_eq!(vim_to_rust_regex(r"\<foo\>"), r"\bfoo\b");
358 assert_eq!(vim_to_rust_regex(r"\<"), r"\b");
359 assert_eq!(vim_to_rust_regex(r"\>"), r"\b");
360 }
361
362 /// A literal double-backslash before `<`/`>` must not be consumed.
363 /// `\\<` in the source string is two chars: `\` `\`; the rewriter sees
364 /// the first `\` followed by `\`, emits `\\`, then `<` is plain text.
365 #[test]
366 fn escaped_backslash_left_alone() {
367 // Input: \\< (three chars in source: '\', '\', '<')
368 // Expected output: \\< (the first \ escapes the second, < is literal)
369 let input = r"\\<";
370 let output = vim_to_rust_regex(input);
371 assert_eq!(output, r"\\<");
372 }
373
374 /// Other escape sequences (`\b`, `\B`, `\d`, `\w`, anchors) pass through.
375 #[test]
376 fn other_escapes_unchanged() {
377 assert_eq!(vim_to_rust_regex(r"\b"), r"\b");
378 assert_eq!(vim_to_rust_regex(r"\B"), r"\B");
379 assert_eq!(vim_to_rust_regex(r"\d+"), r"\d+");
380 assert_eq!(vim_to_rust_regex(r"^\w+$"), r"^\w+$");
381 }
382
383 /// Mixed: `\<\w+\>` rewrites to `\b\w+\b` — matches whole words.
384 #[test]
385 fn mixed_boundary_and_word_class() {
386 assert_eq!(vim_to_rust_regex(r"\<\w+\>"), r"\b\w+\b");
387 }
388
389 // ── Integration: compiled vim patterns match correctly ───────────────────
390
391 /// `/foo\<bar\>` — `bar` as a standalone word is matched, `foobar` is not.
392 #[test]
393 fn vim_boundary_matches_standalone_word_not_suffix() {
394 let re = vim_re(r"foo\<bar\>");
395 // "foobar" — `bar` follows directly after `foo` with no word boundary:
396 // the `\b` between `foo` and `bar` fails here.
397 assert!(!re.is_match("foobar"));
398 // "foo bar" — word boundary between `foo ` and `bar`:
399 // pattern `foo\bbar\b` does not match because `foo` is not adjacent.
400 // Use a pattern that directly tests the intent: `bar` as a whole word.
401 let re2 = vim_re(r"\<bar\>");
402 assert!(re2.is_match("foo bar baz"));
403 assert!(!re2.is_match("foobar"));
404 }
405
406 /// `\<word` matches `word` at start-of-word but not mid-word.
407 #[test]
408 fn vim_boundary_start_only() {
409 let re = vim_re(r"\<word");
410 assert!(re.is_match("word here"));
411 assert!(re.is_match("some word here"));
412 assert!(!re.is_match("sword"));
413 assert!(!re.is_match("aword"));
414 }
415
416 /// `word\>` matches `word` at end-of-word but not when followed by more.
417 #[test]
418 fn vim_boundary_end_only() {
419 let re = vim_re(r"word\>");
420 assert!(re.is_match("some word"));
421 assert!(re.is_match("word"));
422 assert!(!re.is_match("words"));
423 assert!(!re.is_match("wordsmith"));
424 }
425
426 /// Existing `\b` continues to work (sanity check — no double-transform).
427 #[test]
428 fn existing_b_boundary_unchanged() {
429 let re = vim_re(r"\bfoo\b");
430 assert!(re.is_match("foo"));
431 assert!(re.is_match("a foo b"));
432 assert!(!re.is_match("foobar"));
433 assert!(!re.is_match("afoo"));
434 }
435
436 /// Mixed: `\<\w+\>` matches whole words only.
437 #[test]
438 fn vim_whole_word_pattern() {
439 let re = vim_re(r"\<\w+\>");
440 let matches: Vec<_> = re.find_iter("foo bar baz").map(|m| m.as_str()).collect();
441 assert_eq!(matches, vec!["foo", "bar", "baz"]);
442 }
443
444 #[test]
445 fn empty_state_no_match() {
446 let mut b = Buffer::from_str("anything");
447 let mut s = SearchState::new();
448 assert!(!search_forward(&mut b, &mut s, false));
449 assert!(!search_backward(&mut b, &mut s, false));
450 }
451
452 #[test]
453 fn forward_finds_first_match() {
454 let mut b = Buffer::from_str("foo bar foo baz");
455 let mut s = SearchState::new();
456 s.set_pattern(Some(re("foo")));
457 assert!(search_forward(&mut b, &mut s, false));
458 assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
459 }
460
461 #[test]
462 fn forward_skip_current_walks_past() {
463 let mut b = Buffer::from_str("foo bar foo baz");
464 let mut s = SearchState::new();
465 s.set_pattern(Some(re("foo")));
466 search_forward(&mut b, &mut s, false);
467 search_forward(&mut b, &mut s, true);
468 assert_eq!(Cursor::cursor(&b), Pos::new(0, 8));
469 }
470
471 #[test]
472 fn forward_wraps_to_top() {
473 let mut b = Buffer::from_str("zzz\nfoo");
474 // 0.0.37: wrap policy lives entirely on `SearchState::wrap_around`;
475 // the buffer-side `set_search_wrap` accessor is gone. Trait
476 // `find_next` always wraps; the engine search free function
477 // honours `s.wrap_around` directly.
478 Cursor::set_cursor(&mut b, Pos::new(1, 2));
479 let mut s = SearchState::new();
480 s.set_pattern(Some(re("zzz")));
481 s.wrap_around = true;
482 assert!(search_forward(&mut b, &mut s, true));
483 assert_eq!(Cursor::cursor(&b), Pos::new(0, 0));
484 }
485
486 #[test]
487 fn search_matches_caches_against_dirty_gen() {
488 let b = Buffer::from_str("foo bar");
489 let mut s = SearchState::new();
490 s.set_pattern(Some(re("bar")));
491 let dgen = b.dirty_gen();
492 let initial = search_matches(&b, &mut s, dgen, 0);
493 assert_eq!(initial, vec![(4, 7)]);
494 }
495
496 // ── CaseMode::from_options matrix ────────────────────────────────────────
497
498 #[test]
499 fn case_mode_from_options_matrix() {
500 // ic=false, smart=* → Sensitive
501 assert_eq!(CaseMode::from_options(false, false), CaseMode::Sensitive);
502 assert_eq!(CaseMode::from_options(false, true), CaseMode::Sensitive);
503 // ic=true, smart=false → Insensitive
504 assert_eq!(CaseMode::from_options(true, false), CaseMode::Insensitive);
505 // ic=true, smart=true → Smart
506 assert_eq!(CaseMode::from_options(true, true), CaseMode::Smart);
507 }
508
509 // ── resolve_case_mode unit tests ─────────────────────────────────────────
510
511 #[test]
512 fn resolve_case_mode_no_override_smart_lowercase() {
513 let (stripped, mode) = resolve_case_mode("foo", CaseMode::Smart);
514 assert_eq!(stripped, "foo");
515 assert_eq!(mode, CaseMode::Insensitive);
516 }
517
518 #[test]
519 fn resolve_case_mode_no_override_smart_uppercase() {
520 let (stripped, mode) = resolve_case_mode("Foo", CaseMode::Smart);
521 assert_eq!(stripped, "Foo");
522 assert_eq!(mode, CaseMode::Sensitive);
523 }
524
525 #[test]
526 fn resolve_case_mode_lower_c_override() {
527 // \c overrides Sensitive → Insensitive; stripped pattern is "Foo"
528 let (stripped, mode) = resolve_case_mode(r"\cFoo", CaseMode::Sensitive);
529 assert_eq!(stripped, "Foo");
530 assert_eq!(mode, CaseMode::Insensitive);
531 }
532
533 #[test]
534 fn resolve_case_mode_upper_c_override() {
535 // \C overrides Smart → Sensitive; stripped pattern is "foo"
536 let (stripped, mode) = resolve_case_mode(r"foo\C", CaseMode::Smart);
537 assert_eq!(stripped, "foo");
538 assert_eq!(mode, CaseMode::Sensitive);
539 }
540
541 #[test]
542 fn resolve_case_mode_last_wins() {
543 // \c then \C → last-wins → Sensitive; stripped "foo"
544 let (stripped, mode) = resolve_case_mode(r"\cfoo\C", CaseMode::Smart);
545 assert_eq!(stripped, "foo");
546 assert_eq!(mode, CaseMode::Sensitive);
547 }
548
549 // ── Integration: search with smartcase / \c / \C ─────────────────────────
550
551 fn build_regex_from(pat: &str, ic: bool, smart: bool) -> Regex {
552 let base = CaseMode::from_options(ic, smart);
553 let (stripped, mode) = resolve_case_mode(pat, base);
554 let src = if mode == CaseMode::Insensitive {
555 format!("(?i){stripped}")
556 } else {
557 stripped
558 };
559 Regex::new(&src).unwrap()
560 }
561
562 #[test]
563 fn search_finds_capital_with_smartcase_lowercase_pattern() {
564 // ic=true, smart=true, pattern "foo" → Insensitive → matches "FOO"
565 let re = build_regex_from("foo", true, true);
566 assert!(re.is_match("FOO"), "expected match on 'FOO'");
567 assert!(re.is_match("foo"), "expected match on 'foo'");
568 }
569
570 #[test]
571 fn search_skips_capital_with_smartcase_mixed_pattern() {
572 // ic=true, smart=true, pattern "Foo" → Sensitive → does NOT match "FOO"
573 let re = build_regex_from("Foo", true, true);
574 assert!(!re.is_match("FOO"), "must not match 'FOO' (case-sensitive)");
575 assert!(re.is_match("Foo"), "must match exact 'Foo'");
576 }
577
578 #[test]
579 fn search_lower_c_override_finds_capital() {
580 // \cFoo + Sensitive base → Insensitive override → matches "FOO"
581 let re = build_regex_from(r"\cFoo", false, false);
582 assert!(re.is_match("FOO"), "\\c override must match 'FOO'");
583 assert!(re.is_match("foo"), "\\c override must match 'foo'");
584 }
585
586 #[test]
587 fn vim_to_rust_regex_strips_case_overrides() {
588 // vim_to_rust_regex is now a thin wrapper; \c and \C are stripped
589 assert_eq!(vim_to_rust_regex(r"\cfoo"), "foo");
590 assert_eq!(vim_to_rust_regex(r"foo\C"), "foo");
591 assert_eq!(vim_to_rust_regex(r"\<bar\>"), r"\bbar\b");
592 }
593
594 /// `*` on word "foo" emits the pattern `\bfoo\b` (all lowercase). Under
595 /// smartcase that resolves to Insensitive → should match "FOO". This test
596 /// simulates the word_at_cursor_search pattern-build path.
597 #[test]
598 fn star_search_finds_lowercase_when_smartcase_lower_word() {
599 // word_at_cursor_search escapes the word then wraps \b..\b.
600 // "foo" is all-lowercase after word-extraction → Smart → Insensitive.
601 let pat = r"\bfoo\b";
602 let re = build_regex_from(pat, true, true);
603 // Case-insensitive → matches "FOO foo Foo".
604 let text = "FOO foo Foo";
605 let hits: Vec<_> = re.find_iter(text).map(|m| m.as_str()).collect();
606 assert!(
607 hits.contains(&"FOO"),
608 "smartcase lower-word * must match FOO: {hits:?}"
609 );
610 assert!(
611 hits.contains(&"foo"),
612 "smartcase lower-word * must match foo: {hits:?}"
613 );
614 }
615}