Skip to main content

fresh/model/buffer/
search.rs

1/// Standalone search types and helpers extracted from mod.rs.
2///
3/// - `ChunkedSearchState` — mutable state for incremental chunked search
4/// - `HybridSearchPlan` + `SearchRegion` — plan for hybrid (unloaded + loaded) search
5/// - `search_boundary_overlap` — overlap-zone regex matching across region boundaries
6use crate::model::filesystem::{FileSearchCursor, FileSearchOptions, FileSystem, SearchMatch};
7use regex::bytes::Regex;
8use std::io;
9use std::path::PathBuf;
10
11use super::LineScanChunk;
12
13// ---------------------------------------------------------------------------
14// ChunkedSearchState
15// ---------------------------------------------------------------------------
16
17/// Mutable state for an incremental chunked search over a TextBuffer's
18/// piece tree.  This is the in-editor search path — it reads chunks via
19/// `get_text_range_mut` which loads lazily from disk and works with the
20/// piece tree's edit history.
21///
22/// For searching files on disk (project-wide grep), see
23/// `FileSystem::search_file` which uses `read_range` and doesn't need
24/// a TextBuffer at all.
25///
26/// Created by `TextBuffer::search_scan_init`, advanced by
27/// `TextBuffer::search_scan_next_chunk`.  The same struct is used by
28/// both the Editor's incremental (non-blocking) search and the project-
29/// wide search running inside `spawn_blocking`.
30#[derive(Debug)]
31pub struct ChunkedSearchState {
32    /// One work item per piece-tree leaf (after `prepare_line_scan` splits).
33    pub chunks: Vec<LineScanChunk>,
34    /// Index of the next chunk to process.
35    pub next_chunk: usize,
36    /// Running document byte offset for the next chunk.
37    pub next_doc_offset: usize,
38    /// Total bytes in the buffer.
39    pub total_bytes: usize,
40    /// Bytes scanned so far (for progress reporting).
41    pub scanned_bytes: usize,
42    /// Compiled regex for searching.
43    pub regex: regex::bytes::Regex,
44    /// Accumulated match results with line/column/context.
45    pub matches: Vec<SearchMatch>,
46    /// Tail bytes from the previous chunk for cross-boundary matching.
47    pub overlap_tail: Vec<u8>,
48    /// Byte offset of the overlap_tail's first byte in the document.
49    pub overlap_doc_offset: usize,
50    /// Maximum number of matches before capping.
51    pub max_matches: usize,
52    /// Whether the match count was capped.
53    pub capped: bool,
54    /// Length of the original query string (for overlap sizing).
55    pub query_len: usize,
56    /// 1-based line number at the start of the next non-overlap data.
57    /// Advanced incrementally as chunks are processed.
58    pub(crate) running_line: usize,
59}
60
61impl ChunkedSearchState {
62    /// Returns true if the scan is complete (all chunks processed or capped).
63    pub fn is_done(&self) -> bool {
64        self.next_chunk >= self.chunks.len() || self.capped
65    }
66
67    /// Progress as a percentage (0–100).
68    pub fn progress_percent(&self) -> usize {
69        if self.total_bytes > 0 {
70            (self.scanned_bytes * 100) / self.total_bytes
71        } else {
72            100
73        }
74    }
75}
76
77// ---------------------------------------------------------------------------
78// SearchRegion / HybridSearchPlan
79// ---------------------------------------------------------------------------
80
81/// A region in a hybrid search plan — either an unloaded file range or
82/// in-memory data from the piece tree.
83#[derive(Debug)]
84pub(crate) enum SearchRegion {
85    /// Contiguous range on the original file that hasn't been loaded.
86    Unloaded {
87        file_offset: usize,
88        bytes: usize,
89        doc_offset: usize,
90    },
91    /// In-memory data (loaded original content or user edits).
92    Loaded { data: Vec<u8>, doc_offset: usize },
93}
94
95/// A plan for hybrid search — extracted from a `TextBuffer`'s piece tree
96/// on the main thread, executable on any thread.
97///
98/// For a large remote file with a small edit, the plan captures the few
99/// loaded regions (small) and unloaded file ranges (coordinates only).
100/// `execute()` then searches unloaded regions via `fs.search_file` (no data
101/// transfer) and loaded regions with in-memory regex.
102#[derive(Debug)]
103pub struct HybridSearchPlan {
104    pub(crate) file_path: PathBuf,
105    pub(crate) regions: Vec<SearchRegion>,
106}
107
108impl HybridSearchPlan {
109    /// Execute the search plan.  Can run on any thread — only needs a
110    /// `FileSystem` reference for unloaded region searches.
111    pub fn execute(
112        &self,
113        fs: &dyn FileSystem,
114        pattern: &str,
115        opts: &FileSearchOptions,
116        regex: &Regex,
117        max_matches: usize,
118        query_len: usize,
119    ) -> io::Result<Vec<SearchMatch>> {
120        if self.regions.is_empty() {
121            return Ok(vec![]);
122        }
123
124        // Fast path: single unloaded region → search whole file
125        if self.regions.len() == 1 {
126            if let SearchRegion::Unloaded { .. } = &self.regions[0] {
127                let mut cursor = FileSearchCursor::new();
128                let mut all_matches = Vec::new();
129                while !cursor.done && all_matches.len() < max_matches {
130                    let batch = fs.search_file(&self.file_path, pattern, opts, &mut cursor)?;
131                    all_matches.extend(batch);
132                }
133                all_matches.truncate(max_matches);
134                return Ok(all_matches);
135            }
136        }
137
138        let overlap_size = query_len.max(256);
139        let mut all_matches: Vec<SearchMatch> = Vec::new();
140        let mut running_line: usize = 1;
141        let mut prev_tail: Vec<u8> = Vec::new();
142
143        for region in &self.regions {
144            if all_matches.len() >= max_matches {
145                break;
146            }
147            let remaining = max_matches - all_matches.len();
148
149            match region {
150                SearchRegion::Unloaded {
151                    file_offset,
152                    bytes,
153                    doc_offset: region_doc_offset,
154                } => {
155                    // Boundary overlap: prev_tail + start of unloaded region
156                    if !prev_tail.is_empty() {
157                        let overlap_read = (*bytes).min(overlap_size);
158                        if let Ok(head) =
159                            fs.read_range(&self.file_path, *file_offset as u64, overlap_read)
160                        {
161                            let boundary = search_boundary_overlap(
162                                &prev_tail,
163                                &head,
164                                *region_doc_offset - prev_tail.len(),
165                                running_line,
166                                regex,
167                                remaining,
168                            );
169                            all_matches.extend(boundary);
170                        }
171                    }
172
173                    // Search unloaded range via fs.search_file
174                    let mut opts_bounded = opts.clone();
175                    opts_bounded.max_matches = remaining.saturating_sub(all_matches.len());
176                    let mut cursor = FileSearchCursor::for_range(
177                        *file_offset,
178                        *file_offset + *bytes,
179                        running_line,
180                    );
181                    while !cursor.done && all_matches.len() < max_matches {
182                        let mut batch =
183                            fs.search_file(&self.file_path, pattern, &opts_bounded, &mut cursor)?;
184                        // Remap byte_offset from file-relative to doc-relative
185                        for m in &mut batch {
186                            m.byte_offset = *region_doc_offset + (m.byte_offset - *file_offset);
187                        }
188                        all_matches.extend(batch);
189                    }
190                    running_line = cursor.running_line;
191
192                    // Save tail for next boundary
193                    if *bytes >= overlap_size {
194                        let tail_off = *file_offset + *bytes - overlap_size;
195                        prev_tail = fs
196                            .read_range(&self.file_path, tail_off as u64, overlap_size)
197                            .unwrap_or_default();
198                    } else {
199                        prev_tail = fs
200                            .read_range(&self.file_path, *file_offset as u64, *bytes)
201                            .unwrap_or_default();
202                    }
203                }
204                SearchRegion::Loaded {
205                    data,
206                    doc_offset: region_doc_offset,
207                } => {
208                    // Build search buffer: overlap tail + loaded data
209                    let mut search_buf = Vec::with_capacity(prev_tail.len() + data.len());
210                    search_buf.extend_from_slice(&prev_tail);
211                    search_buf.extend_from_slice(data);
212
213                    let overlap_len = prev_tail.len();
214                    let buf_doc_offset = if overlap_len > 0 {
215                        *region_doc_offset - overlap_len
216                    } else {
217                        *region_doc_offset
218                    };
219
220                    let newlines_in_overlap = search_buf[..overlap_len]
221                        .iter()
222                        .filter(|&&b| b == b'\n')
223                        .count();
224                    let mut line_at = running_line.saturating_sub(newlines_in_overlap);
225                    let mut counted_to = 0usize;
226
227                    for m in regex.find_iter(&search_buf) {
228                        if overlap_len > 0 && m.end() <= overlap_len {
229                            continue;
230                        }
231                        if all_matches.len() >= max_matches {
232                            break;
233                        }
234
235                        line_at += search_buf[counted_to..m.start()]
236                            .iter()
237                            .filter(|&&b| b == b'\n')
238                            .count();
239                        counted_to = m.start();
240
241                        let line_start = search_buf[..m.start()]
242                            .iter()
243                            .rposition(|&b| b == b'\n')
244                            .map(|p| p + 1)
245                            .unwrap_or(0);
246                        let line_end = search_buf[m.start()..]
247                            .iter()
248                            .position(|&b| b == b'\n')
249                            .map(|p| m.start() + p)
250                            .unwrap_or(search_buf.len());
251
252                        let match_doc_offset = buf_doc_offset + m.start();
253                        let column = m.start() - line_start + 1;
254                        let context =
255                            String::from_utf8_lossy(&search_buf[line_start..line_end]).into_owned();
256
257                        all_matches.push(SearchMatch {
258                            byte_offset: match_doc_offset,
259                            length: m.end() - m.start(),
260                            line: line_at,
261                            column,
262                            context,
263                        });
264                    }
265
266                    running_line += data.iter().filter(|&&b| b == b'\n').count();
267
268                    let tail_start = data.len().saturating_sub(overlap_size);
269                    prev_tail = data[tail_start..].to_vec();
270                }
271            }
272        }
273
274        all_matches.truncate(max_matches);
275        Ok(all_matches)
276    }
277}
278
279// ---------------------------------------------------------------------------
280// search_boundary_overlap
281// ---------------------------------------------------------------------------
282
283/// Search the overlap zone between two regions for matches that span the
284/// boundary.  `prev_tail` is the tail of the previous region, `next_head`
285/// is the head of the next region.  `doc_offset` is the document byte
286/// offset of `prev_tail[0]`.  Only matches that cross the boundary (start
287/// in tail, end in head) are returned — pure-tail matches were already found.
288pub(crate) fn search_boundary_overlap(
289    prev_tail: &[u8],
290    next_head: &[u8],
291    doc_offset: usize,
292    running_line: usize,
293    regex: &Regex,
294    max_matches: usize,
295) -> Vec<SearchMatch> {
296    let mut buf = Vec::with_capacity(prev_tail.len() + next_head.len());
297    buf.extend_from_slice(prev_tail);
298    buf.extend_from_slice(next_head);
299
300    let overlap_len = prev_tail.len();
301    let newlines_before = prev_tail.iter().filter(|&&b| b == b'\n').count();
302    let mut line_at = running_line.saturating_sub(newlines_before);
303    let mut counted_to = 0usize;
304    let mut matches = Vec::new();
305
306    for m in regex.find_iter(&buf) {
307        // Only keep matches that cross the boundary
308        if m.start() < overlap_len && m.end() > overlap_len {
309            if matches.len() >= max_matches {
310                break;
311            }
312
313            line_at += buf[counted_to..m.start()]
314                .iter()
315                .filter(|&&b| b == b'\n')
316                .count();
317            counted_to = m.start();
318
319            let line_start = buf[..m.start()]
320                .iter()
321                .rposition(|&b| b == b'\n')
322                .map(|p| p + 1)
323                .unwrap_or(0);
324            let line_end = buf[m.start()..]
325                .iter()
326                .position(|&b| b == b'\n')
327                .map(|p| m.start() + p)
328                .unwrap_or(buf.len());
329
330            let column = m.start() - line_start + 1;
331            let context = String::from_utf8_lossy(&buf[line_start..line_end]).into_owned();
332
333            matches.push(SearchMatch {
334                byte_offset: doc_offset + m.start(),
335                length: m.end() - m.start(),
336                line: line_at,
337                column,
338                context,
339            });
340        }
341    }
342    matches
343}