Skip to main content

aft/
grep_executor.rs

1use std::collections::HashSet;
2use std::env;
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
5use std::sync::Arc;
6use std::time::{Duration, Instant};
7
8use ignore::WalkBuilder;
9use rayon::prelude::*;
10
11use crate::commands::multi_path::{
12    canonical_key, dedupe_nested_paths, resolve_path_or_multi, SearchPathResolution,
13};
14use crate::context::AppContext;
15use crate::pattern_compile::{CompiledPattern, LiteralSearch};
16use crate::protocol::Response;
17use crate::search_index::{
18    build_path_filters, has_any_project_file_from, read_searchable_text, resolve_search_scope,
19    sort_grep_matches_by_mtime_desc, sort_paths_by_mtime_desc, GrepMatch, GrepResult, IndexStatus,
20    PathFilters,
21};
22
23/// Maximum files enumerated during grep/glob index-unavailable fallback walks.
24pub(crate) const MAX_FALLBACK_WALK_FILES: usize = 50_000;
25/// Wall-clock budget for grep/glob index-unavailable fallback walks on the dispatch thread.
26pub(crate) const FALLBACK_WALK_BUDGET: Duration = Duration::from_secs(10);
27
28#[derive(Clone, Debug)]
29pub struct FallbackWalkOutcome {
30    pub files: Vec<PathBuf>,
31    pub walk_truncated: bool,
32}
33
34#[derive(Clone, Debug)]
35pub struct GrepParams {
36    pub include: Vec<String>,
37    pub exclude: Vec<String>,
38    pub max_results: usize,
39}
40
41#[derive(Clone, Debug)]
42pub struct GrepScope {
43    pub roots: Vec<ResolvedRoot>,
44    pub multi_root: bool,
45    pub per_root_max: usize,
46}
47
48#[derive(Clone, Debug)]
49pub struct ResolvedRoot {
50    pub search_root: PathBuf,
51    pub filter_root: PathBuf,
52    pub use_index: bool,
53    pub is_external: bool,
54}
55
56pub fn project_root(ctx: &AppContext) -> PathBuf {
57    let project_root = ctx
58        .config()
59        .project_root
60        .clone()
61        .unwrap_or_else(|| env::current_dir().unwrap_or_default());
62    std::fs::canonicalize(&project_root).unwrap_or(project_root)
63}
64
65pub fn resolve_grep_scope(
66    ctx: &AppContext,
67    paths: Option<&serde_json::Value>,
68    max_results: usize,
69    req_id: &str,
70) -> Result<GrepScope, Response> {
71    let project_root = project_root(ctx);
72    let search_roots = resolve_roots(ctx, paths, &project_root, req_id)?;
73
74    if let Some(missing_root) = search_roots.iter().find(|root| !root.exists()) {
75        return Err(Response::error(
76            req_id,
77            "path_not_found",
78            format!(
79                "grep: search path does not exist: {}",
80                missing_root.display()
81            ),
82        ));
83    }
84
85    let roots = search_roots
86        .into_iter()
87        .map(|search_root| {
88            let scope = resolve_search_scope(&project_root, Some(&search_root.to_string_lossy()));
89            let is_external = !scope.use_index;
90            let filter_root =
91                compute_filter_root(&project_root, &scope.root, scope.use_index, is_external);
92            ResolvedRoot {
93                search_root: scope.root,
94                filter_root,
95                use_index: scope.use_index,
96                is_external,
97            }
98        })
99        .collect::<Vec<_>>();
100
101    let multi_root = roots.len() > 1;
102    let per_root_max = if multi_root {
103        max_results.saturating_mul(2).max(max_results)
104    } else {
105        max_results
106    };
107
108    Ok(GrepScope {
109        roots,
110        multi_root,
111        per_root_max,
112    })
113}
114
115pub fn compute_filter_root(
116    project_root: &Path,
117    search_root: &Path,
118    use_index: bool,
119    is_external: bool,
120) -> PathBuf {
121    if is_external && !use_index {
122        search_root.to_path_buf()
123    } else {
124        project_root.to_path_buf()
125    }
126}
127
128pub fn scope_has_files(project_root: &Path, scope: &GrepScope) -> bool {
129    scope.roots.iter().any(|root| {
130        // An explicitly-named existing file is always in scope (it's searched
131        // directly even if gitignored / .aftignored), so don't report it as
132        // "no files matched scope".
133        if root.search_root.is_file() {
134            return true;
135        }
136        let catch_all =
137            build_path_filters(&["**/*".to_string()], &[]).expect("valid catch-all glob");
138        has_any_project_file_from(&root.filter_root, &root.search_root, &catch_all)
139            || has_any_project_file_from(project_root, &root.search_root, &catch_all)
140    })
141}
142
143pub fn execute(
144    ctx: &AppContext,
145    pattern: &CompiledPattern,
146    scope: &GrepScope,
147    params: &GrepParams,
148) -> GrepResult {
149    let project_root = project_root(ctx);
150    if scope.roots.len() == 1 {
151        return execute_root(
152            ctx,
153            pattern,
154            &scope.roots[0],
155            params,
156            params.max_results,
157            &project_root,
158        );
159    }
160
161    let mut results = Vec::new();
162    for root in &scope.roots {
163        results.push(execute_root(
164            ctx,
165            pattern,
166            root,
167            params,
168            scope.per_root_max,
169            &project_root,
170        ));
171    }
172    merge_grep_results(results, &project_root, params.max_results)
173}
174
175fn resolve_roots(
176    ctx: &AppContext,
177    paths: Option<&serde_json::Value>,
178    project_root: &Path,
179    req_id: &str,
180) -> Result<Vec<PathBuf>, Response> {
181    let Some(paths) = paths else {
182        return Ok(vec![resolve_search_scope(project_root, None).root]);
183    };
184    if paths.is_null() {
185        return Ok(vec![resolve_search_scope(project_root, None).root]);
186    }
187    if let Some(path) = paths.as_str() {
188        return match resolve_path_or_multi(
189            path,
190            project_root,
191            |candidate| ctx.validate_path(req_id, candidate),
192            req_id,
193        )? {
194            SearchPathResolution::Single(root) => Ok(vec![root]),
195            SearchPathResolution::Multi(roots) => Ok(roots),
196        };
197    }
198    if let Some(items) = paths.as_array() {
199        let mut roots = Vec::with_capacity(items.len());
200        for item in items {
201            let Some(path) = item.as_str() else {
202                return Err(Response::error(
203                    req_id,
204                    "invalid_request",
205                    "grep: path array entries must be strings",
206                ));
207            };
208            let validated = ctx.validate_path(req_id, Path::new(path))?;
209            let raw = validated.to_string_lossy();
210            roots.push(resolve_search_scope(project_root, Some(raw.as_ref())).root);
211        }
212        let roots = dedupe_nested_paths(roots);
213        if roots.is_empty() {
214            Ok(vec![resolve_search_scope(project_root, None).root])
215        } else {
216            Ok(roots)
217        }
218    } else {
219        Err(Response::error(
220            req_id,
221            "invalid_request",
222            "grep: path must be a string, array of strings, or null",
223        ))
224    }
225}
226
227fn execute_root(
228    ctx: &AppContext,
229    pattern: &CompiledPattern,
230    root: &ResolvedRoot,
231    params: &GrepParams,
232    max_results: usize,
233    project_root: &Path,
234) -> GrepResult {
235    // Explicit single-file scope: search the named file directly, bypassing the
236    // trigram index and the gitignore/.aftignore-aware walk. Matches ripgrep,
237    // where naming a file explicitly searches it even when it is gitignored,
238    // .aftignored, or not yet indexed. Binary + UTF-8 guards still apply.
239    if root.search_root.is_file() {
240        let index_status = if root.use_index {
241            current_index_status(ctx)
242        } else {
243            IndexStatus::Fallback
244        };
245        return grep_explicit_file(&root.search_root, pattern, max_results, index_status);
246    }
247
248    let indexed_snapshot = {
249        let search_index = ctx
250            .search_index()
251            .read()
252            .unwrap_or_else(std::sync::PoisonError::into_inner);
253        match search_index.as_ref() {
254            Some(index) if index.ready && root.use_index => Some(index.snapshot()),
255            _ => None,
256        }
257    };
258    let indexed = indexed_snapshot.map(|snapshot| {
259        snapshot.search_grep(
260            pattern,
261            &params.include,
262            &params.exclude,
263            &root.search_root,
264            max_results,
265        )
266    });
267
268    match indexed {
269        Some(result) => result,
270        None => {
271            let index_status = if root.use_index {
272                current_index_status(ctx)
273            } else {
274                IndexStatus::Fallback
275            };
276            fallback_grep(
277                project_root,
278                &root.search_root,
279                &root.filter_root,
280                pattern,
281                &params.include,
282                &params.exclude,
283                max_results,
284                index_status,
285            )
286        }
287    }
288}
289
290/// Grep a single explicitly-named file directly, bypassing the trigram index
291/// and the gitignore/.aftignore-aware walk. Used when the caller's `path`
292/// resolves to one existing file — ripgrep semantics: an explicitly-named file
293/// is searched even when it is gitignored, `.aftignore`d, or not yet indexed.
294/// Binary detection and UTF-8 guards still apply (via `read_searchable_text`
295/// inside `fallback_search_file`).
296fn grep_explicit_file(
297    file: &Path,
298    pattern: &CompiledPattern,
299    max_results: usize,
300    index_status: IndexStatus,
301) -> GrepResult {
302    let total_matches = AtomicUsize::new(0);
303    let files_searched = AtomicUsize::new(0);
304    let files_with_matches = AtomicUsize::new(0);
305    let truncated = AtomicBool::new(false);
306    let engine_capped = AtomicBool::new(false);
307    let stop_after = max_results.saturating_mul(2);
308
309    let matches = fallback_search_file(
310        &file.to_path_buf(),
311        pattern,
312        max_results,
313        stop_after,
314        &total_matches,
315        &files_searched,
316        &files_with_matches,
317        &truncated,
318        &engine_capped,
319    );
320
321    GrepResult {
322        total_matches: total_matches.load(Ordering::Relaxed),
323        matches,
324        files_searched: files_searched.load(Ordering::Relaxed),
325        files_with_matches: files_with_matches.load(Ordering::Relaxed),
326        index_status,
327        truncated: truncated.load(Ordering::Relaxed),
328        fully_degraded: false,
329        engine_capped: engine_capped.load(Ordering::Relaxed),
330        walk_truncated: false,
331    }
332}
333
334pub fn merge_grep_results(
335    results: Vec<GrepResult>,
336    project_root: &Path,
337    max_results: usize,
338) -> GrepResult {
339    let mut matches = Vec::new();
340    let mut total_matches = 0usize;
341    let mut files_searched = 0usize;
342    let mut files_with_matches = 0usize;
343    let mut index_status = IndexStatus::Ready;
344    let mut any_child_truncated = false;
345    let mut fully_degraded = false;
346    let mut engine_capped = false;
347    let mut walk_truncated = false;
348    let mut seen_match_keys = HashSet::new();
349
350    for result in results {
351        total_matches += result.total_matches;
352        files_searched += result.files_searched;
353        files_with_matches += result.files_with_matches;
354        index_status = weakest_index_status(index_status, result.index_status);
355        any_child_truncated |= result.truncated;
356        fully_degraded |= result.fully_degraded;
357        engine_capped |= result.engine_capped;
358        walk_truncated |= result.walk_truncated;
359
360        for grep_match in result.matches {
361            let file_key = canonical_key(&grep_match.file);
362            let match_key = (file_key, grep_match.line, grep_match.column);
363            if seen_match_keys.insert(match_key) {
364                matches.push(grep_match);
365            }
366        }
367    }
368
369    sort_grep_matches_by_mtime_desc(&mut matches, project_root);
370    if matches.len() > max_results {
371        matches.truncate(max_results);
372    }
373
374    GrepResult {
375        matches,
376        total_matches,
377        files_searched,
378        files_with_matches,
379        index_status,
380        truncated: any_child_truncated || total_matches > max_results,
381        fully_degraded,
382        engine_capped,
383        walk_truncated,
384    }
385}
386
387fn fallback_project_walk_builder(search_root: &Path) -> WalkBuilder {
388    let mut builder = WalkBuilder::new(search_root);
389    builder
390        .hidden(false)
391        .git_ignore(true)
392        .git_global(true)
393        .git_exclude(true)
394        .add_custom_ignore_filename(".aftignore")
395        .filter_entry(|entry| {
396            let name = entry.file_name().to_string_lossy();
397            if entry.file_type().map_or(false, |ft| ft.is_dir()) {
398                return !matches!(
399                    name.as_ref(),
400                    "node_modules"
401                        | "target"
402                        | "venv"
403                        | ".venv"
404                        | ".git"
405                        | "__pycache__"
406                        | ".tox"
407                        | "dist"
408                        | "build"
409                );
410            }
411            true
412        });
413    builder
414}
415
416/// Bounded project walk used when the trigram index is unavailable (grep/glob fallback).
417pub(crate) fn bounded_fallback_walk_files(
418    filter_root: &Path,
419    search_root: &Path,
420    filters: &PathFilters,
421) -> FallbackWalkOutcome {
422    bounded_fallback_walk_files_with_limits(
423        filter_root,
424        search_root,
425        filters,
426        MAX_FALLBACK_WALK_FILES,
427        FALLBACK_WALK_BUDGET,
428    )
429}
430
431fn bounded_fallback_walk_files_with_limits(
432    filter_root: &Path,
433    search_root: &Path,
434    filters: &PathFilters,
435    max_files: usize,
436    budget: Duration,
437) -> FallbackWalkOutcome {
438    let started = Instant::now();
439    let mut files = Vec::new();
440    let mut walk_truncated = false;
441    let builder = fallback_project_walk_builder(search_root);
442
443    for entry in builder.build().filter_map(|entry| entry.ok()) {
444        if started.elapsed() >= budget {
445            walk_truncated = true;
446            break;
447        }
448        if !entry
449            .file_type()
450            .map_or(false, |file_type| file_type.is_file())
451        {
452            continue;
453        }
454        let path = entry.into_path();
455        if filters.matches(filter_root, &path) {
456            files.push(path);
457            if files.len() > max_files {
458                walk_truncated = true;
459                files.truncate(max_files);
460                break;
461            }
462        }
463    }
464
465    sort_paths_by_mtime_desc(&mut files);
466    FallbackWalkOutcome {
467        files,
468        walk_truncated,
469    }
470}
471
472pub(crate) fn for_each_bounded_fallback_walk_file<F>(
473    filter_root: &Path,
474    search_root: &Path,
475    filters: &PathFilters,
476    mut on_file: F,
477) -> bool
478where
479    F: FnMut(&PathBuf),
480{
481    for_each_bounded_fallback_walk_file_with_limits(
482        filter_root,
483        search_root,
484        filters,
485        MAX_FALLBACK_WALK_FILES,
486        FALLBACK_WALK_BUDGET,
487        &mut on_file,
488    )
489}
490
491fn for_each_bounded_fallback_walk_file_with_limits<F>(
492    filter_root: &Path,
493    search_root: &Path,
494    filters: &PathFilters,
495    max_files: usize,
496    budget: Duration,
497    on_file: &mut F,
498) -> bool
499where
500    F: FnMut(&PathBuf),
501{
502    let started = Instant::now();
503    let mut files_seen = 0usize;
504    let builder = fallback_project_walk_builder(search_root);
505
506    for entry in builder.build().filter_map(|entry| entry.ok()) {
507        if started.elapsed() >= budget {
508            return true;
509        }
510        if !entry
511            .file_type()
512            .map_or(false, |file_type| file_type.is_file())
513        {
514            continue;
515        }
516        let path = entry.into_path();
517        if filters.matches(filter_root, &path) {
518            files_seen += 1;
519            if files_seen > max_files {
520                return true;
521            }
522            on_file(&path);
523        }
524    }
525    false
526}
527
528pub fn weakest_index_status(left: IndexStatus, right: IndexStatus) -> IndexStatus {
529    match (left, right) {
530        (IndexStatus::Disabled, _) | (_, IndexStatus::Disabled) => IndexStatus::Disabled,
531        (IndexStatus::Fallback, _) | (_, IndexStatus::Fallback) => IndexStatus::Fallback,
532        (IndexStatus::Building, _) | (_, IndexStatus::Building) => IndexStatus::Building,
533        (IndexStatus::Ready, IndexStatus::Ready) => IndexStatus::Ready,
534    }
535}
536
537/// Hidden entry for `search_startup_bench` timing (fallback grep path).
538#[doc(hidden)]
539pub fn fallback_grep_bench(
540    project_root: &Path,
541    search_root: &Path,
542    filter_root: &Path,
543    pattern: &CompiledPattern,
544    include: &[String],
545    exclude: &[String],
546    max_results: usize,
547) -> GrepResult {
548    fallback_grep(
549        project_root,
550        search_root,
551        filter_root,
552        pattern,
553        include,
554        exclude,
555        max_results,
556        IndexStatus::Fallback,
557    )
558}
559
560fn fallback_grep(
561    project_root: &Path,
562    search_root: &Path,
563    filter_root: &Path,
564    pattern: &CompiledPattern,
565    include: &[String],
566    exclude: &[String],
567    max_results: usize,
568    index_status: IndexStatus,
569) -> GrepResult {
570    let filters = build_path_filters(include, exclude).unwrap_or_default();
571
572    let total_matches = AtomicUsize::new(0);
573    let files_searched = AtomicUsize::new(0);
574    let files_with_matches = AtomicUsize::new(0);
575    let truncated = AtomicBool::new(false);
576    let engine_capped = AtomicBool::new(false);
577    let stop_after = max_results.saturating_mul(2);
578    let stop_scan = Arc::new(AtomicBool::new(false));
579
580    let mut matches = Vec::new();
581    let mut batch: Vec<PathBuf> = Vec::with_capacity(256);
582
583    let flush_batch = |batch: &mut Vec<PathBuf>, matches: &mut Vec<GrepMatch>| {
584        if batch.is_empty() {
585            return;
586        }
587        let chunk = std::mem::take(batch);
588        let partial: Vec<GrepMatch> = chunk
589            .par_iter()
590            .filter_map(|file| {
591                if stop_scan.load(Ordering::Relaxed) {
592                    return None;
593                }
594                let file_matches = fallback_search_file(
595                    file,
596                    pattern,
597                    max_results,
598                    stop_after,
599                    &total_matches,
600                    &files_searched,
601                    &files_with_matches,
602                    &truncated,
603                    &engine_capped,
604                );
605                if truncated.load(Ordering::Relaxed)
606                    && total_matches.load(Ordering::Relaxed) >= stop_after
607                {
608                    stop_scan.store(true, Ordering::Relaxed);
609                }
610                (!file_matches.is_empty()).then_some(file_matches)
611            })
612            .flatten()
613            .collect();
614        matches.extend(partial);
615    };
616
617    let walk_truncated =
618        for_each_bounded_fallback_walk_file(filter_root, search_root, &filters, |path| {
619            if stop_scan.load(Ordering::Relaxed) {
620                return;
621            }
622            batch.push(path.clone());
623            if batch.len() >= 256 {
624                flush_batch(&mut batch, &mut matches);
625            }
626        });
627    flush_batch(&mut batch, &mut matches);
628
629    sort_grep_matches_by_mtime_desc(&mut matches, project_root);
630
631    GrepResult {
632        total_matches: total_matches.load(Ordering::Relaxed),
633        matches,
634        files_searched: files_searched.load(Ordering::Relaxed),
635        files_with_matches: files_with_matches.load(Ordering::Relaxed),
636        index_status,
637        truncated: truncated.load(Ordering::Relaxed),
638        fully_degraded: true,
639        engine_capped: engine_capped.load(Ordering::Relaxed),
640        walk_truncated,
641    }
642}
643
644fn fallback_search_file(
645    file: &PathBuf,
646    pattern: &CompiledPattern,
647    max_results: usize,
648    stop_after: usize,
649    total_matches: &AtomicUsize,
650    files_searched: &AtomicUsize,
651    files_with_matches: &AtomicUsize,
652    truncated: &AtomicBool,
653    engine_capped: &AtomicBool,
654) -> Vec<GrepMatch> {
655    if should_stop_fallback_search(truncated, total_matches, stop_after) {
656        engine_capped.store(true, Ordering::Relaxed);
657        return Vec::new();
658    }
659
660    let Some(content) = read_searchable_text(file) else {
661        return Vec::new();
662    };
663    files_searched.fetch_add(1, Ordering::Relaxed);
664
665    let line_starts = line_starts(&content);
666    let mut seen_lines = HashSet::new();
667    let mut matched_this_file = false;
668    let mut matches = Vec::new();
669
670    match pattern {
671        CompiledPattern::Literal(literal) => search_literal_in_text(
672            file,
673            &content,
674            &line_starts,
675            literal,
676            max_results,
677            stop_after,
678            total_matches,
679            &mut seen_lines,
680            truncated,
681            engine_capped,
682            &mut matched_this_file,
683            &mut matches,
684        ),
685        CompiledPattern::Regex { compiled, .. } => {
686            for matched in compiled.find_iter(content.as_bytes()) {
687                if should_stop_fallback_search(truncated, total_matches, stop_after) {
688                    engine_capped.store(true, Ordering::Relaxed);
689                    break;
690                }
691
692                let (line, column, line_text) =
693                    line_details(&content, &line_starts, matched.start());
694                if !seen_lines.insert(line) {
695                    continue;
696                }
697
698                matched_this_file = true;
699                let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
700                if match_number > max_results {
701                    truncated.store(true, Ordering::Relaxed);
702                    break;
703                }
704
705                matches.push(GrepMatch {
706                    file: file.clone(),
707                    line,
708                    column,
709                    line_text,
710                    match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
711                });
712            }
713        }
714    }
715
716    if matched_this_file {
717        files_with_matches.fetch_add(1, Ordering::Relaxed);
718    }
719
720    matches
721}
722
723fn search_literal_in_text(
724    file: &Path,
725    content: &str,
726    line_starts: &[usize],
727    literal: &LiteralSearch,
728    max_results: usize,
729    stop_after: usize,
730    total_matches: &AtomicUsize,
731    seen_lines: &mut HashSet<u32>,
732    truncated: &AtomicBool,
733    engine_capped: &AtomicBool,
734    matched_this_file: &mut bool,
735    matches: &mut Vec<GrepMatch>,
736) {
737    let content_bytes = content.as_bytes();
738    let search_content;
739    let haystack = if literal.case_insensitive_ascii {
740        search_content = content_bytes.to_ascii_lowercase();
741        search_content.as_slice()
742    } else {
743        content_bytes
744    };
745    let finder = memchr::memmem::Finder::new(&literal.needle);
746    let mut start = 0usize;
747
748    while let Some(position) = finder.find(&haystack[start..]) {
749        if should_stop_fallback_search(truncated, total_matches, stop_after) {
750            engine_capped.store(true, Ordering::Relaxed);
751            break;
752        }
753
754        let offset = start + position;
755        start = offset + 1;
756        let (line, column, line_text) = line_details(content, line_starts, offset);
757        if !seen_lines.insert(line) {
758            continue;
759        }
760
761        *matched_this_file = true;
762        let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
763        if match_number > max_results {
764            truncated.store(true, Ordering::Relaxed);
765            break;
766        }
767
768        let end = offset + literal.needle.len();
769        matches.push(GrepMatch {
770            file: file.to_path_buf(),
771            line,
772            column,
773            line_text,
774            match_text: String::from_utf8_lossy(&content_bytes[offset..end]).into_owned(),
775        });
776    }
777}
778
779fn should_stop_fallback_search(
780    truncated: &AtomicBool,
781    total_matches: &AtomicUsize,
782    stop_after: usize,
783) -> bool {
784    truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
785}
786
787pub(crate) fn ripgrep_glob(
788    search_root: &Path,
789    pattern: &str,
790    max_results: usize,
791) -> Option<FallbackWalkOutcome> {
792    let filters = build_path_filters(&[pattern.to_string()], &[]).ok()?;
793    let mut outcome = bounded_fallback_walk_files(search_root, search_root, &filters);
794    outcome.files.truncate(max_results);
795    Some(outcome)
796}
797
798fn current_index_status(ctx: &AppContext) -> IndexStatus {
799    let index_ready = {
800        let search_index = ctx
801            .search_index()
802            .read()
803            .unwrap_or_else(std::sync::PoisonError::into_inner);
804        search_index.as_ref().is_some_and(|index| index.ready)
805    };
806    if index_ready {
807        return IndexStatus::Ready;
808    }
809
810    let build_in_progress = {
811        let search_index_rx = ctx
812            .search_index_rx()
813            .read()
814            .unwrap_or_else(std::sync::PoisonError::into_inner);
815        search_index_rx.is_some()
816    };
817    let has_index = {
818        let search_index = ctx
819            .search_index()
820            .read()
821            .unwrap_or_else(std::sync::PoisonError::into_inner);
822        search_index.is_some()
823    };
824    if build_in_progress || has_index {
825        IndexStatus::Building
826    } else {
827        IndexStatus::Fallback
828    }
829}
830
831pub fn line_starts(content: &str) -> Vec<usize> {
832    let mut starts = vec![0usize];
833    for (index, byte) in content.bytes().enumerate() {
834        if byte == b'\n' {
835            starts.push(index + 1);
836        }
837    }
838    starts
839}
840
841pub fn line_details(content: &str, line_starts: &[usize], offset: usize) -> (u32, u32, String) {
842    let line_index = match line_starts.binary_search(&offset) {
843        Ok(index) => index,
844        Err(index) => index.saturating_sub(1),
845    };
846    let line_start = line_starts.get(line_index).copied().unwrap_or(0);
847    let line_end = content[line_start..]
848        .find('\n')
849        .map(|length| line_start + length)
850        .unwrap_or(content.len());
851    let line_text = content[line_start..line_end]
852        .trim_end_matches('\r')
853        .to_string();
854    let column = content[line_start..offset].chars().count() as u32 + 1;
855    (line_index as u32 + 1, column, line_text)
856}
857
858#[cfg(test)]
859mod tests {
860    use super::*;
861
862    fn grep_match(file: &Path, line: u32, column: u32) -> GrepMatch {
863        GrepMatch {
864            file: file.to_path_buf(),
865            line,
866            column,
867            line_text: "needle".to_string(),
868            match_text: "needle".to_string(),
869        }
870    }
871
872    fn result(matches: Vec<GrepMatch>, truncated: bool, status: IndexStatus) -> GrepResult {
873        GrepResult {
874            total_matches: matches.len(),
875            files_searched: matches.len(),
876            files_with_matches: matches.len(),
877            matches,
878            index_status: status,
879            truncated,
880            fully_degraded: false,
881            engine_capped: false,
882            walk_truncated: false,
883        }
884    }
885
886    #[test]
887    fn single_root_uses_requested_max() {
888        let scope = GrepScope {
889            roots: vec![ResolvedRoot {
890                search_root: PathBuf::from("/project"),
891                filter_root: PathBuf::from("/project"),
892                use_index: true,
893                is_external: false,
894            }],
895            multi_root: false,
896            per_root_max: 10,
897        };
898        assert!(!scope.multi_root);
899        assert_eq!(scope.per_root_max, 10);
900    }
901
902    #[test]
903    fn multi_root_uses_double_per_root_max() {
904        let project = tempfile::tempdir().expect("project");
905        let ctx = AppContext::new(
906            Box::new(crate::parser::TreeSitterProvider::new()),
907            crate::config::Config {
908                project_root: Some(project.path().to_path_buf()),
909                ..crate::config::Config::default()
910            },
911        );
912        let left = project.path().join("left");
913        let right = project.path().join("right");
914        std::fs::create_dir_all(&left).expect("left");
915        std::fs::create_dir_all(&right).expect("right");
916        let paths = serde_json::json!([left.display().to_string(), right.display().to_string()]);
917
918        let scope = resolve_grep_scope(&ctx, Some(&paths), 10, "test").expect("scope");
919
920        assert!(scope.multi_root);
921        assert_eq!(scope.per_root_max, 20);
922    }
923
924    #[test]
925    fn bounded_fallback_walk_truncates_at_file_cap() {
926        let dir = tempfile::tempdir().expect("tempdir");
927        let root = dir.path();
928        for i in 0..25 {
929            let path = root.join(format!("file_{i:03}.txt"));
930            std::fs::write(path, "needle\n").expect("write");
931        }
932        let filters = build_path_filters(&["**/*.txt".to_string()], &[]).expect("filters");
933        let outcome = bounded_fallback_walk_files_with_limits(
934            root,
935            root,
936            &filters,
937            20,
938            Duration::from_secs(60),
939        );
940        assert!(outcome.walk_truncated);
941        assert_eq!(outcome.files.len(), 20);
942    }
943
944    #[test]
945    fn bounded_fallback_walk_small_tree_not_truncated() {
946        let dir = tempfile::tempdir().expect("tempdir");
947        let root = dir.path();
948        std::fs::write(root.join("a.txt"), "x\n").expect("write");
949        std::fs::write(root.join("b.txt"), "x\n").expect("write");
950        let filters = build_path_filters(&["**/*.txt".to_string()], &[]).expect("filters");
951        let outcome = bounded_fallback_walk_files(root, root, &filters);
952        assert!(!outcome.walk_truncated);
953        assert_eq!(outcome.files.len(), 2);
954    }
955
956    #[test]
957    fn filter_root_is_project_for_in_project_and_search_root_for_external_unindexed() {
958        let project = PathBuf::from("/project");
959        let in_project = compute_filter_root(&project, Path::new("/project/src"), true, false);
960        let external = compute_filter_root(&project, Path::new("/tmp/external"), false, true);
961        assert_eq!(in_project, project);
962        assert_eq!(external, PathBuf::from("/tmp/external"));
963    }
964
965    #[test]
966    fn weakest_status_orders_disabled_fallback_building_ready() {
967        assert_eq!(
968            weakest_index_status(IndexStatus::Ready, IndexStatus::Building),
969            IndexStatus::Building
970        );
971        assert_eq!(
972            weakest_index_status(IndexStatus::Building, IndexStatus::Fallback),
973            IndexStatus::Fallback
974        );
975        assert_eq!(
976            weakest_index_status(IndexStatus::Fallback, IndexStatus::Disabled),
977            IndexStatus::Disabled
978        );
979    }
980
981    #[test]
982    fn merge_dedupes_by_canonical_file_line_column() {
983        let temp = tempfile::tempdir().expect("temp");
984        let file = temp.path().join("file.rs");
985        std::fs::write(&file, "needle").expect("write");
986        let symlink = temp.path().join("link.rs");
987        #[cfg(unix)]
988        std::os::unix::fs::symlink(&file, &symlink).expect("symlink");
989        #[cfg(windows)]
990        std::os::windows::fs::symlink_file(&file, &symlink).expect("symlink");
991
992        let merged = merge_grep_results(
993            vec![
994                result(vec![grep_match(&file, 1, 1)], false, IndexStatus::Ready),
995                result(vec![grep_match(&symlink, 1, 1)], false, IndexStatus::Ready),
996            ],
997            temp.path(),
998            10,
999        );
1000
1001        assert_eq!(merged.matches.len(), 1);
1002    }
1003
1004    #[test]
1005    fn merge_truncated_when_child_truncated_or_pre_merge_exceeds_max() {
1006        let root = Path::new("/project");
1007        let child = merge_grep_results(
1008            vec![result(
1009                vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
1010                true,
1011                IndexStatus::Ready,
1012            )],
1013            root,
1014            10,
1015        );
1016        assert!(child.truncated);
1017
1018        let many = merge_grep_results(
1019            vec![
1020                result(
1021                    vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
1022                    false,
1023                    IndexStatus::Ready,
1024                ),
1025                result(
1026                    vec![grep_match(Path::new("/project/b.rs"), 1, 1)],
1027                    false,
1028                    IndexStatus::Ready,
1029                ),
1030            ],
1031            root,
1032            1,
1033        );
1034        assert!(many.truncated);
1035    }
1036}