fresh/model/buffer/search.rs
1/// Standalone search types and helpers extracted from mod.rs.
2///
3/// - `ChunkedSearchState` — mutable state for incremental chunked search
4/// - `HybridSearchPlan` + `SearchRegion` — plan for hybrid (unloaded + loaded) search
5/// - `search_boundary_overlap` — overlap-zone regex matching across region boundaries
6use crate::model::filesystem::{FileSearchCursor, FileSearchOptions, FileSystem, SearchMatch};
7use regex::bytes::Regex;
8use std::io;
9use std::path::PathBuf;
10
11use super::LineScanChunk;
12
13// ---------------------------------------------------------------------------
14// ChunkedSearchState
15// ---------------------------------------------------------------------------
16
17/// Mutable state for an incremental chunked search over a TextBuffer's
18/// piece tree. This is the in-editor search path — it reads chunks via
19/// `get_text_range_mut` which loads lazily from disk and works with the
20/// piece tree's edit history.
21///
22/// For searching files on disk (project-wide grep), see
23/// `FileSystem::search_file` which uses `read_range` and doesn't need
24/// a TextBuffer at all.
25///
26/// Created by `TextBuffer::search_scan_init`, advanced by
27/// `TextBuffer::search_scan_next_chunk`. The same struct is used by
28/// both the Editor's incremental (non-blocking) search and the project-
29/// wide search running inside `spawn_blocking`.
30#[derive(Debug)]
31pub struct ChunkedSearchState {
32 /// One work item per piece-tree leaf (after `prepare_line_scan` splits).
33 pub chunks: Vec<LineScanChunk>,
34 /// Index of the next chunk to process.
35 pub next_chunk: usize,
36 /// Running document byte offset for the next chunk.
37 pub next_doc_offset: usize,
38 /// Total bytes in the buffer.
39 pub total_bytes: usize,
40 /// Bytes scanned so far (for progress reporting).
41 pub scanned_bytes: usize,
42 /// Compiled regex for searching.
43 pub regex: regex::bytes::Regex,
44 /// Accumulated match results with line/column/context.
45 pub matches: Vec<SearchMatch>,
46 /// Tail bytes from the previous chunk for cross-boundary matching.
47 pub overlap_tail: Vec<u8>,
48 /// Byte offset of the overlap_tail's first byte in the document.
49 pub overlap_doc_offset: usize,
50 /// Maximum number of matches before capping.
51 pub max_matches: usize,
52 /// Whether the match count was capped.
53 pub capped: bool,
54 /// Length of the original query string (for overlap sizing).
55 pub query_len: usize,
56 /// 1-based line number at the start of the next non-overlap data.
57 /// Advanced incrementally as chunks are processed.
58 pub(crate) running_line: usize,
59}
60
61impl ChunkedSearchState {
62 /// Returns true if the scan is complete (all chunks processed or capped).
63 pub fn is_done(&self) -> bool {
64 self.next_chunk >= self.chunks.len() || self.capped
65 }
66
67 /// Progress as a percentage (0–100).
68 pub fn progress_percent(&self) -> usize {
69 if self.total_bytes > 0 {
70 (self.scanned_bytes * 100) / self.total_bytes
71 } else {
72 100
73 }
74 }
75}
76
77// ---------------------------------------------------------------------------
78// SearchRegion / HybridSearchPlan
79// ---------------------------------------------------------------------------
80
81/// A region in a hybrid search plan — either an unloaded file range or
82/// in-memory data from the piece tree.
83#[derive(Debug)]
84pub(crate) enum SearchRegion {
85 /// Contiguous range on the original file that hasn't been loaded.
86 Unloaded {
87 file_offset: usize,
88 bytes: usize,
89 doc_offset: usize,
90 },
91 /// In-memory data (loaded original content or user edits).
92 Loaded { data: Vec<u8>, doc_offset: usize },
93}
94
95/// A plan for hybrid search — extracted from a `TextBuffer`'s piece tree
96/// on the main thread, executable on any thread.
97///
98/// For a large remote file with a small edit, the plan captures the few
99/// loaded regions (small) and unloaded file ranges (coordinates only).
100/// `execute()` then searches unloaded regions via `fs.search_file` (no data
101/// transfer) and loaded regions with in-memory regex.
102#[derive(Debug)]
103pub struct HybridSearchPlan {
104 pub(crate) file_path: PathBuf,
105 pub(crate) regions: Vec<SearchRegion>,
106}
107
108impl HybridSearchPlan {
109 /// Execute the search plan. Can run on any thread — only needs a
110 /// `FileSystem` reference for unloaded region searches.
111 pub fn execute(
112 &self,
113 fs: &dyn FileSystem,
114 pattern: &str,
115 opts: &FileSearchOptions,
116 regex: &Regex,
117 max_matches: usize,
118 query_len: usize,
119 ) -> io::Result<Vec<SearchMatch>> {
120 if self.regions.is_empty() {
121 return Ok(vec![]);
122 }
123
124 // Fast path: single unloaded region → search whole file
125 if self.regions.len() == 1 {
126 if let SearchRegion::Unloaded { .. } = &self.regions[0] {
127 let mut cursor = FileSearchCursor::new();
128 let mut all_matches = Vec::new();
129 while !cursor.done && all_matches.len() < max_matches {
130 let batch = fs.search_file(&self.file_path, pattern, opts, &mut cursor)?;
131 all_matches.extend(batch);
132 }
133 all_matches.truncate(max_matches);
134 return Ok(all_matches);
135 }
136 }
137
138 let overlap_size = query_len.max(256);
139 let mut all_matches: Vec<SearchMatch> = Vec::new();
140 let mut running_line: usize = 1;
141 let mut prev_tail: Vec<u8> = Vec::new();
142
143 for region in &self.regions {
144 if all_matches.len() >= max_matches {
145 break;
146 }
147 let remaining = max_matches - all_matches.len();
148
149 match region {
150 SearchRegion::Unloaded {
151 file_offset,
152 bytes,
153 doc_offset: region_doc_offset,
154 } => {
155 // Boundary overlap: prev_tail + start of unloaded region
156 if !prev_tail.is_empty() {
157 let overlap_read = (*bytes).min(overlap_size);
158 if let Ok(head) =
159 fs.read_range(&self.file_path, *file_offset as u64, overlap_read)
160 {
161 let boundary = search_boundary_overlap(
162 &prev_tail,
163 &head,
164 *region_doc_offset - prev_tail.len(),
165 running_line,
166 regex,
167 remaining,
168 );
169 all_matches.extend(boundary);
170 }
171 }
172
173 // Search unloaded range via fs.search_file
174 let mut opts_bounded = opts.clone();
175 opts_bounded.max_matches = remaining.saturating_sub(all_matches.len());
176 let mut cursor = FileSearchCursor::for_range(
177 *file_offset,
178 *file_offset + *bytes,
179 running_line,
180 );
181 while !cursor.done && all_matches.len() < max_matches {
182 let mut batch =
183 fs.search_file(&self.file_path, pattern, &opts_bounded, &mut cursor)?;
184 // Remap byte_offset from file-relative to doc-relative
185 for m in &mut batch {
186 m.byte_offset = *region_doc_offset + (m.byte_offset - *file_offset);
187 }
188 all_matches.extend(batch);
189 }
190 running_line = cursor.running_line;
191
192 // Save tail for next boundary
193 if *bytes >= overlap_size {
194 let tail_off = *file_offset + *bytes - overlap_size;
195 prev_tail = fs
196 .read_range(&self.file_path, tail_off as u64, overlap_size)
197 .unwrap_or_default();
198 } else {
199 prev_tail = fs
200 .read_range(&self.file_path, *file_offset as u64, *bytes)
201 .unwrap_or_default();
202 }
203 }
204 SearchRegion::Loaded {
205 data,
206 doc_offset: region_doc_offset,
207 } => {
208 // Build search buffer: overlap tail + loaded data
209 let mut search_buf = Vec::with_capacity(prev_tail.len() + data.len());
210 search_buf.extend_from_slice(&prev_tail);
211 search_buf.extend_from_slice(data);
212
213 let overlap_len = prev_tail.len();
214 let buf_doc_offset = if overlap_len > 0 {
215 *region_doc_offset - overlap_len
216 } else {
217 *region_doc_offset
218 };
219
220 let newlines_in_overlap = search_buf[..overlap_len]
221 .iter()
222 .filter(|&&b| b == b'\n')
223 .count();
224 let mut line_at = running_line.saturating_sub(newlines_in_overlap);
225 let mut counted_to = 0usize;
226
227 for m in regex.find_iter(&search_buf) {
228 if overlap_len > 0 && m.end() <= overlap_len {
229 continue;
230 }
231 if all_matches.len() >= max_matches {
232 break;
233 }
234
235 line_at += search_buf[counted_to..m.start()]
236 .iter()
237 .filter(|&&b| b == b'\n')
238 .count();
239 counted_to = m.start();
240
241 let line_start = search_buf[..m.start()]
242 .iter()
243 .rposition(|&b| b == b'\n')
244 .map(|p| p + 1)
245 .unwrap_or(0);
246 let line_end = search_buf[m.start()..]
247 .iter()
248 .position(|&b| b == b'\n')
249 .map(|p| m.start() + p)
250 .unwrap_or(search_buf.len());
251
252 let match_doc_offset = buf_doc_offset + m.start();
253 let column = m.start() - line_start + 1;
254 let context =
255 String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
256
257 all_matches.push(SearchMatch {
258 byte_offset: match_doc_offset,
259 length: m.end() - m.start(),
260 line: line_at,
261 column,
262 context,
263 });
264 }
265
266 running_line += data.iter().filter(|&&b| b == b'\n').count();
267
268 let tail_start = data.len().saturating_sub(overlap_size);
269 prev_tail = data[tail_start..].to_vec();
270 }
271 }
272 }
273
274 all_matches.truncate(max_matches);
275 Ok(all_matches)
276 }
277}
278
279// ---------------------------------------------------------------------------
280// search_boundary_overlap
281// ---------------------------------------------------------------------------
282
283/// Search the overlap zone between two regions for matches that span the
284/// boundary. `prev_tail` is the tail of the previous region, `next_head`
285/// is the head of the next region. `doc_offset` is the document byte
286/// offset of `prev_tail[0]`. Only matches that cross the boundary (start
287/// in tail, end in head) are returned — pure-tail matches were already found.
288pub(crate) fn search_boundary_overlap(
289 prev_tail: &[u8],
290 next_head: &[u8],
291 doc_offset: usize,
292 running_line: usize,
293 regex: &Regex,
294 max_matches: usize,
295) -> Vec<SearchMatch> {
296 let mut buf = Vec::with_capacity(prev_tail.len() + next_head.len());
297 buf.extend_from_slice(prev_tail);
298 buf.extend_from_slice(next_head);
299
300 let overlap_len = prev_tail.len();
301 let newlines_before = prev_tail.iter().filter(|&&b| b == b'\n').count();
302 let mut line_at = running_line.saturating_sub(newlines_before);
303 let mut counted_to = 0usize;
304 let mut matches = Vec::new();
305
306 for m in regex.find_iter(&buf) {
307 // Only keep matches that cross the boundary
308 if m.start() < overlap_len && m.end() > overlap_len {
309 if matches.len() >= max_matches {
310 break;
311 }
312
313 line_at += buf[counted_to..m.start()]
314 .iter()
315 .filter(|&&b| b == b'\n')
316 .count();
317 counted_to = m.start();
318
319 let line_start = buf[..m.start()]
320 .iter()
321 .rposition(|&b| b == b'\n')
322 .map(|p| p + 1)
323 .unwrap_or(0);
324 let line_end = buf[m.start()..]
325 .iter()
326 .position(|&b| b == b'\n')
327 .map(|p| m.start() + p)
328 .unwrap_or(buf.len());
329
330 let column = m.start() - line_start + 1;
331 let context = String::from_utf8_lossy(&buf[line_start..line_end]).into_owned();
332
333 matches.push(SearchMatch {
334 byte_offset: doc_offset + m.start(),
335 length: m.end() - m.start(),
336 line: line_at,
337 column,
338 context,
339 });
340 }
341 }
342 matches
343}