Skip to main content

aft/
grep_executor.rs

1use std::collections::HashSet;
2use std::env;
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
5use std::sync::Arc;
6use std::time::{Duration, Instant};
7
8use ignore::WalkBuilder;
9use rayon::prelude::*;
10
11use crate::commands::multi_path::{
12    canonical_key, dedupe_nested_paths, resolve_path_or_multi, SearchPathResolution,
13};
14use crate::context::AppContext;
15use crate::pattern_compile::{CompiledPattern, LiteralSearch};
16use crate::protocol::Response;
17use crate::search_index::{
18    build_path_filters, has_any_project_file_from, read_searchable_text, resolve_search_scope,
19    sort_grep_matches_by_mtime_desc, sort_paths_by_mtime_desc, GrepMatch, GrepResult, IndexStatus,
20    PathFilters,
21};
22
23/// Maximum files enumerated during grep/glob index-unavailable fallback walks.
24pub(crate) const MAX_FALLBACK_WALK_FILES: usize = 50_000;
25/// Wall-clock budget for grep/glob index-unavailable fallback walks on the dispatch thread.
26pub(crate) const FALLBACK_WALK_BUDGET: Duration = Duration::from_secs(10);
27
28#[derive(Clone, Debug)]
29pub struct FallbackWalkOutcome {
30    pub files: Vec<PathBuf>,
31    pub walk_truncated: bool,
32}
33
34#[derive(Clone, Debug)]
35pub struct GrepParams {
36    pub include: Vec<String>,
37    pub exclude: Vec<String>,
38    pub max_results: usize,
39}
40
41#[derive(Clone, Debug)]
42pub struct GrepScope {
43    pub roots: Vec<ResolvedRoot>,
44    pub multi_root: bool,
45    pub per_root_max: usize,
46}
47
48#[derive(Clone, Debug)]
49pub struct ResolvedRoot {
50    pub search_root: PathBuf,
51    pub filter_root: PathBuf,
52    pub use_index: bool,
53    pub is_external: bool,
54}
55
56pub fn project_root(ctx: &AppContext) -> PathBuf {
57    let project_root = ctx
58        .config()
59        .project_root
60        .clone()
61        .unwrap_or_else(|| env::current_dir().unwrap_or_default());
62    std::fs::canonicalize(&project_root).unwrap_or(project_root)
63}
64
65pub fn resolve_grep_scope(
66    ctx: &AppContext,
67    paths: Option<&serde_json::Value>,
68    max_results: usize,
69    req_id: &str,
70) -> Result<GrepScope, Response> {
71    let project_root = project_root(ctx);
72    let search_roots = resolve_roots(ctx, paths, &project_root, req_id)?;
73
74    if let Some(missing_root) = search_roots.iter().find(|root| !root.exists()) {
75        return Err(Response::error(
76            req_id,
77            "path_not_found",
78            format!(
79                "grep: search path does not exist: {}",
80                missing_root.display()
81            ),
82        ));
83    }
84
85    let roots = search_roots
86        .into_iter()
87        .map(|search_root| {
88            let scope = resolve_search_scope(&project_root, Some(&search_root.to_string_lossy()));
89            let is_external = !scope.use_index;
90            let filter_root =
91                compute_filter_root(&project_root, &scope.root, scope.use_index, is_external);
92            ResolvedRoot {
93                search_root: scope.root,
94                filter_root,
95                use_index: scope.use_index,
96                is_external,
97            }
98        })
99        .collect::<Vec<_>>();
100
101    let multi_root = roots.len() > 1;
102    let per_root_max = if multi_root {
103        max_results.saturating_mul(2).max(max_results)
104    } else {
105        max_results
106    };
107
108    Ok(GrepScope {
109        roots,
110        multi_root,
111        per_root_max,
112    })
113}
114
115pub fn compute_filter_root(
116    project_root: &Path,
117    search_root: &Path,
118    use_index: bool,
119    is_external: bool,
120) -> PathBuf {
121    if is_external && !use_index {
122        search_root.to_path_buf()
123    } else {
124        project_root.to_path_buf()
125    }
126}
127
128pub fn scope_has_files(project_root: &Path, scope: &GrepScope) -> bool {
129    scope.roots.iter().any(|root| {
130        // An explicitly-named existing file is always in scope (it's searched
131        // directly even if gitignored / .aftignored), so don't report it as
132        // "no files matched scope".
133        if root.search_root.is_file() {
134            return true;
135        }
136        let catch_all =
137            build_path_filters(&["**/*".to_string()], &[]).expect("valid catch-all glob");
138        has_any_project_file_from(&root.filter_root, &root.search_root, &catch_all)
139            || has_any_project_file_from(project_root, &root.search_root, &catch_all)
140    })
141}
142
143pub fn execute(
144    ctx: &AppContext,
145    pattern: &CompiledPattern,
146    scope: &GrepScope,
147    params: &GrepParams,
148) -> GrepResult {
149    let project_root = project_root(ctx);
150    if scope.roots.len() == 1 {
151        return execute_root(
152            ctx,
153            pattern,
154            &scope.roots[0],
155            params,
156            params.max_results,
157            &project_root,
158        );
159    }
160
161    let mut results = Vec::new();
162    for root in &scope.roots {
163        results.push(execute_root(
164            ctx,
165            pattern,
166            root,
167            params,
168            scope.per_root_max,
169            &project_root,
170        ));
171    }
172    merge_grep_results(results, &project_root, params.max_results)
173}
174
175fn resolve_roots(
176    ctx: &AppContext,
177    paths: Option<&serde_json::Value>,
178    project_root: &Path,
179    req_id: &str,
180) -> Result<Vec<PathBuf>, Response> {
181    let Some(paths) = paths else {
182        return Ok(vec![resolve_search_scope(project_root, None).root]);
183    };
184    if paths.is_null() {
185        return Ok(vec![resolve_search_scope(project_root, None).root]);
186    }
187    if let Some(path) = paths.as_str() {
188        return match resolve_path_or_multi(
189            path,
190            project_root,
191            |candidate| ctx.validate_path(req_id, candidate),
192            req_id,
193        )? {
194            SearchPathResolution::Single(root) => Ok(vec![root]),
195            SearchPathResolution::Multi(roots) => Ok(roots),
196        };
197    }
198    if let Some(items) = paths.as_array() {
199        let mut roots = Vec::with_capacity(items.len());
200        for item in items {
201            let Some(path) = item.as_str() else {
202                return Err(Response::error(
203                    req_id,
204                    "invalid_request",
205                    "grep: path array entries must be strings",
206                ));
207            };
208            let validated = ctx.validate_path(req_id, Path::new(path))?;
209            let raw = validated.to_string_lossy();
210            roots.push(resolve_search_scope(project_root, Some(raw.as_ref())).root);
211        }
212        let roots = dedupe_nested_paths(roots);
213        if roots.is_empty() {
214            Ok(vec![resolve_search_scope(project_root, None).root])
215        } else {
216            Ok(roots)
217        }
218    } else {
219        Err(Response::error(
220            req_id,
221            "invalid_request",
222            "grep: path must be a string, array of strings, or null",
223        ))
224    }
225}
226
227fn execute_root(
228    ctx: &AppContext,
229    pattern: &CompiledPattern,
230    root: &ResolvedRoot,
231    params: &GrepParams,
232    max_results: usize,
233    project_root: &Path,
234) -> GrepResult {
235    // Explicit single-file scope: search the named file directly, bypassing the
236    // trigram index and the gitignore/.aftignore-aware walk. Matches ripgrep,
237    // where naming a file explicitly searches it even when it is gitignored,
238    // .aftignored, or not yet indexed. Binary + UTF-8 guards still apply.
239    if root.search_root.is_file() {
240        let index_status = if root.use_index {
241            current_index_status(ctx)
242        } else {
243            IndexStatus::Fallback
244        };
245        return grep_explicit_file(&root.search_root, pattern, max_results, index_status);
246    }
247
248    let indexed = {
249        let search_index = ctx
250            .search_index()
251            .read()
252            .unwrap_or_else(std::sync::PoisonError::into_inner);
253        match search_index.as_ref() {
254            Some(index) if index.ready && root.use_index => Some(index.search_grep(
255                pattern,
256                &params.include,
257                &params.exclude,
258                &root.search_root,
259                max_results,
260            )),
261            _ => None,
262        }
263    };
264
265    match indexed {
266        Some(result) => result,
267        None => {
268            let index_status = if root.use_index {
269                current_index_status(ctx)
270            } else {
271                IndexStatus::Fallback
272            };
273            fallback_grep(
274                project_root,
275                &root.search_root,
276                &root.filter_root,
277                pattern,
278                &params.include,
279                &params.exclude,
280                max_results,
281                index_status,
282            )
283        }
284    }
285}
286
287/// Grep a single explicitly-named file directly, bypassing the trigram index
288/// and the gitignore/.aftignore-aware walk. Used when the caller's `path`
289/// resolves to one existing file — ripgrep semantics: an explicitly-named file
290/// is searched even when it is gitignored, `.aftignore`d, or not yet indexed.
291/// Binary detection and UTF-8 guards still apply (via `read_searchable_text`
292/// inside `fallback_search_file`).
293fn grep_explicit_file(
294    file: &Path,
295    pattern: &CompiledPattern,
296    max_results: usize,
297    index_status: IndexStatus,
298) -> GrepResult {
299    let total_matches = AtomicUsize::new(0);
300    let files_searched = AtomicUsize::new(0);
301    let files_with_matches = AtomicUsize::new(0);
302    let truncated = AtomicBool::new(false);
303    let engine_capped = AtomicBool::new(false);
304    let stop_after = max_results.saturating_mul(2);
305
306    let matches = fallback_search_file(
307        &file.to_path_buf(),
308        pattern,
309        max_results,
310        stop_after,
311        &total_matches,
312        &files_searched,
313        &files_with_matches,
314        &truncated,
315        &engine_capped,
316    );
317
318    GrepResult {
319        total_matches: total_matches.load(Ordering::Relaxed),
320        matches,
321        files_searched: files_searched.load(Ordering::Relaxed),
322        files_with_matches: files_with_matches.load(Ordering::Relaxed),
323        index_status,
324        truncated: truncated.load(Ordering::Relaxed),
325        fully_degraded: false,
326        engine_capped: engine_capped.load(Ordering::Relaxed),
327        walk_truncated: false,
328    }
329}
330
331pub fn merge_grep_results(
332    results: Vec<GrepResult>,
333    project_root: &Path,
334    max_results: usize,
335) -> GrepResult {
336    let mut matches = Vec::new();
337    let mut total_matches = 0usize;
338    let mut files_searched = 0usize;
339    let mut files_with_matches = 0usize;
340    let mut index_status = IndexStatus::Ready;
341    let mut any_child_truncated = false;
342    let mut fully_degraded = false;
343    let mut engine_capped = false;
344    let mut walk_truncated = false;
345    let mut seen_match_keys = HashSet::new();
346
347    for result in results {
348        total_matches += result.total_matches;
349        files_searched += result.files_searched;
350        files_with_matches += result.files_with_matches;
351        index_status = weakest_index_status(index_status, result.index_status);
352        any_child_truncated |= result.truncated;
353        fully_degraded |= result.fully_degraded;
354        engine_capped |= result.engine_capped;
355        walk_truncated |= result.walk_truncated;
356
357        for grep_match in result.matches {
358            let file_key = canonical_key(&grep_match.file);
359            let match_key = (file_key, grep_match.line, grep_match.column);
360            if seen_match_keys.insert(match_key) {
361                matches.push(grep_match);
362            }
363        }
364    }
365
366    sort_grep_matches_by_mtime_desc(&mut matches, project_root);
367    if matches.len() > max_results {
368        matches.truncate(max_results);
369    }
370
371    GrepResult {
372        matches,
373        total_matches,
374        files_searched,
375        files_with_matches,
376        index_status,
377        truncated: any_child_truncated || total_matches > max_results,
378        fully_degraded,
379        engine_capped,
380        walk_truncated,
381    }
382}
383
384fn fallback_project_walk_builder(search_root: &Path) -> WalkBuilder {
385    let mut builder = WalkBuilder::new(search_root);
386    builder
387        .hidden(false)
388        .git_ignore(true)
389        .git_global(true)
390        .git_exclude(true)
391        .add_custom_ignore_filename(".aftignore")
392        .filter_entry(|entry| {
393            let name = entry.file_name().to_string_lossy();
394            if entry.file_type().map_or(false, |ft| ft.is_dir()) {
395                return !matches!(
396                    name.as_ref(),
397                    "node_modules"
398                        | "target"
399                        | "venv"
400                        | ".venv"
401                        | ".git"
402                        | "__pycache__"
403                        | ".tox"
404                        | "dist"
405                        | "build"
406                );
407            }
408            true
409        });
410    builder
411}
412
413/// Bounded project walk used when the trigram index is unavailable (grep/glob fallback).
414pub(crate) fn bounded_fallback_walk_files(
415    filter_root: &Path,
416    search_root: &Path,
417    filters: &PathFilters,
418) -> FallbackWalkOutcome {
419    bounded_fallback_walk_files_with_limits(
420        filter_root,
421        search_root,
422        filters,
423        MAX_FALLBACK_WALK_FILES,
424        FALLBACK_WALK_BUDGET,
425    )
426}
427
428fn bounded_fallback_walk_files_with_limits(
429    filter_root: &Path,
430    search_root: &Path,
431    filters: &PathFilters,
432    max_files: usize,
433    budget: Duration,
434) -> FallbackWalkOutcome {
435    let started = Instant::now();
436    let mut files = Vec::new();
437    let mut walk_truncated = false;
438    let builder = fallback_project_walk_builder(search_root);
439
440    for entry in builder.build().filter_map(|entry| entry.ok()) {
441        if started.elapsed() >= budget {
442            walk_truncated = true;
443            break;
444        }
445        if !entry
446            .file_type()
447            .map_or(false, |file_type| file_type.is_file())
448        {
449            continue;
450        }
451        let path = entry.into_path();
452        if filters.matches(filter_root, &path) {
453            files.push(path);
454            if files.len() > max_files {
455                walk_truncated = true;
456                files.truncate(max_files);
457                break;
458            }
459        }
460    }
461
462    sort_paths_by_mtime_desc(&mut files);
463    FallbackWalkOutcome {
464        files,
465        walk_truncated,
466    }
467}
468
469pub(crate) fn for_each_bounded_fallback_walk_file<F>(
470    filter_root: &Path,
471    search_root: &Path,
472    filters: &PathFilters,
473    mut on_file: F,
474) -> bool
475where
476    F: FnMut(&PathBuf),
477{
478    for_each_bounded_fallback_walk_file_with_limits(
479        filter_root,
480        search_root,
481        filters,
482        MAX_FALLBACK_WALK_FILES,
483        FALLBACK_WALK_BUDGET,
484        &mut on_file,
485    )
486}
487
488fn for_each_bounded_fallback_walk_file_with_limits<F>(
489    filter_root: &Path,
490    search_root: &Path,
491    filters: &PathFilters,
492    max_files: usize,
493    budget: Duration,
494    on_file: &mut F,
495) -> bool
496where
497    F: FnMut(&PathBuf),
498{
499    let started = Instant::now();
500    let mut files_seen = 0usize;
501    let builder = fallback_project_walk_builder(search_root);
502
503    for entry in builder.build().filter_map(|entry| entry.ok()) {
504        if started.elapsed() >= budget {
505            return true;
506        }
507        if !entry
508            .file_type()
509            .map_or(false, |file_type| file_type.is_file())
510        {
511            continue;
512        }
513        let path = entry.into_path();
514        if filters.matches(filter_root, &path) {
515            files_seen += 1;
516            if files_seen > max_files {
517                return true;
518            }
519            on_file(&path);
520        }
521    }
522    false
523}
524
525pub fn weakest_index_status(left: IndexStatus, right: IndexStatus) -> IndexStatus {
526    match (left, right) {
527        (IndexStatus::Disabled, _) | (_, IndexStatus::Disabled) => IndexStatus::Disabled,
528        (IndexStatus::Fallback, _) | (_, IndexStatus::Fallback) => IndexStatus::Fallback,
529        (IndexStatus::Building, _) | (_, IndexStatus::Building) => IndexStatus::Building,
530        (IndexStatus::Ready, IndexStatus::Ready) => IndexStatus::Ready,
531    }
532}
533
534/// Hidden entry for `search_startup_bench` timing (fallback grep path).
535#[doc(hidden)]
536pub fn fallback_grep_bench(
537    project_root: &Path,
538    search_root: &Path,
539    filter_root: &Path,
540    pattern: &CompiledPattern,
541    include: &[String],
542    exclude: &[String],
543    max_results: usize,
544) -> GrepResult {
545    fallback_grep(
546        project_root,
547        search_root,
548        filter_root,
549        pattern,
550        include,
551        exclude,
552        max_results,
553        IndexStatus::Fallback,
554    )
555}
556
557fn fallback_grep(
558    project_root: &Path,
559    search_root: &Path,
560    filter_root: &Path,
561    pattern: &CompiledPattern,
562    include: &[String],
563    exclude: &[String],
564    max_results: usize,
565    index_status: IndexStatus,
566) -> GrepResult {
567    let filters = build_path_filters(include, exclude).unwrap_or_default();
568
569    let total_matches = AtomicUsize::new(0);
570    let files_searched = AtomicUsize::new(0);
571    let files_with_matches = AtomicUsize::new(0);
572    let truncated = AtomicBool::new(false);
573    let engine_capped = AtomicBool::new(false);
574    let stop_after = max_results.saturating_mul(2);
575    let stop_scan = Arc::new(AtomicBool::new(false));
576
577    let mut matches = Vec::new();
578    let mut batch: Vec<PathBuf> = Vec::with_capacity(256);
579
580    let flush_batch = |batch: &mut Vec<PathBuf>, matches: &mut Vec<GrepMatch>| {
581        if batch.is_empty() {
582            return;
583        }
584        let chunk = std::mem::take(batch);
585        let partial: Vec<GrepMatch> = chunk
586            .par_iter()
587            .filter_map(|file| {
588                if stop_scan.load(Ordering::Relaxed) {
589                    return None;
590                }
591                let file_matches = fallback_search_file(
592                    file,
593                    pattern,
594                    max_results,
595                    stop_after,
596                    &total_matches,
597                    &files_searched,
598                    &files_with_matches,
599                    &truncated,
600                    &engine_capped,
601                );
602                if truncated.load(Ordering::Relaxed)
603                    && total_matches.load(Ordering::Relaxed) >= stop_after
604                {
605                    stop_scan.store(true, Ordering::Relaxed);
606                }
607                (!file_matches.is_empty()).then_some(file_matches)
608            })
609            .flatten()
610            .collect();
611        matches.extend(partial);
612    };
613
614    let walk_truncated =
615        for_each_bounded_fallback_walk_file(filter_root, search_root, &filters, |path| {
616            if stop_scan.load(Ordering::Relaxed) {
617                return;
618            }
619            batch.push(path.clone());
620            if batch.len() >= 256 {
621                flush_batch(&mut batch, &mut matches);
622            }
623        });
624    flush_batch(&mut batch, &mut matches);
625
626    sort_grep_matches_by_mtime_desc(&mut matches, project_root);
627
628    GrepResult {
629        total_matches: total_matches.load(Ordering::Relaxed),
630        matches,
631        files_searched: files_searched.load(Ordering::Relaxed),
632        files_with_matches: files_with_matches.load(Ordering::Relaxed),
633        index_status,
634        truncated: truncated.load(Ordering::Relaxed),
635        fully_degraded: true,
636        engine_capped: engine_capped.load(Ordering::Relaxed),
637        walk_truncated,
638    }
639}
640
641fn fallback_search_file(
642    file: &PathBuf,
643    pattern: &CompiledPattern,
644    max_results: usize,
645    stop_after: usize,
646    total_matches: &AtomicUsize,
647    files_searched: &AtomicUsize,
648    files_with_matches: &AtomicUsize,
649    truncated: &AtomicBool,
650    engine_capped: &AtomicBool,
651) -> Vec<GrepMatch> {
652    if should_stop_fallback_search(truncated, total_matches, stop_after) {
653        engine_capped.store(true, Ordering::Relaxed);
654        return Vec::new();
655    }
656
657    let Some(content) = read_searchable_text(file) else {
658        return Vec::new();
659    };
660    files_searched.fetch_add(1, Ordering::Relaxed);
661
662    let line_starts = line_starts(&content);
663    let mut seen_lines = HashSet::new();
664    let mut matched_this_file = false;
665    let mut matches = Vec::new();
666
667    match pattern {
668        CompiledPattern::Literal(literal) => search_literal_in_text(
669            file,
670            &content,
671            &line_starts,
672            literal,
673            max_results,
674            stop_after,
675            total_matches,
676            &mut seen_lines,
677            truncated,
678            engine_capped,
679            &mut matched_this_file,
680            &mut matches,
681        ),
682        CompiledPattern::Regex { compiled, .. } => {
683            for matched in compiled.find_iter(content.as_bytes()) {
684                if should_stop_fallback_search(truncated, total_matches, stop_after) {
685                    engine_capped.store(true, Ordering::Relaxed);
686                    break;
687                }
688
689                let (line, column, line_text) =
690                    line_details(&content, &line_starts, matched.start());
691                if !seen_lines.insert(line) {
692                    continue;
693                }
694
695                matched_this_file = true;
696                let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
697                if match_number > max_results {
698                    truncated.store(true, Ordering::Relaxed);
699                    break;
700                }
701
702                matches.push(GrepMatch {
703                    file: file.clone(),
704                    line,
705                    column,
706                    line_text,
707                    match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
708                });
709            }
710        }
711    }
712
713    if matched_this_file {
714        files_with_matches.fetch_add(1, Ordering::Relaxed);
715    }
716
717    matches
718}
719
720fn search_literal_in_text(
721    file: &Path,
722    content: &str,
723    line_starts: &[usize],
724    literal: &LiteralSearch,
725    max_results: usize,
726    stop_after: usize,
727    total_matches: &AtomicUsize,
728    seen_lines: &mut HashSet<u32>,
729    truncated: &AtomicBool,
730    engine_capped: &AtomicBool,
731    matched_this_file: &mut bool,
732    matches: &mut Vec<GrepMatch>,
733) {
734    let content_bytes = content.as_bytes();
735    let search_content;
736    let haystack = if literal.case_insensitive_ascii {
737        search_content = content_bytes.to_ascii_lowercase();
738        search_content.as_slice()
739    } else {
740        content_bytes
741    };
742    let finder = memchr::memmem::Finder::new(&literal.needle);
743    let mut start = 0usize;
744
745    while let Some(position) = finder.find(&haystack[start..]) {
746        if should_stop_fallback_search(truncated, total_matches, stop_after) {
747            engine_capped.store(true, Ordering::Relaxed);
748            break;
749        }
750
751        let offset = start + position;
752        start = offset + 1;
753        let (line, column, line_text) = line_details(content, line_starts, offset);
754        if !seen_lines.insert(line) {
755            continue;
756        }
757
758        *matched_this_file = true;
759        let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
760        if match_number > max_results {
761            truncated.store(true, Ordering::Relaxed);
762            break;
763        }
764
765        let end = offset + literal.needle.len();
766        matches.push(GrepMatch {
767            file: file.to_path_buf(),
768            line,
769            column,
770            line_text,
771            match_text: String::from_utf8_lossy(&content_bytes[offset..end]).into_owned(),
772        });
773    }
774}
775
776fn should_stop_fallback_search(
777    truncated: &AtomicBool,
778    total_matches: &AtomicUsize,
779    stop_after: usize,
780) -> bool {
781    truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
782}
783
784pub(crate) fn ripgrep_glob(
785    search_root: &Path,
786    pattern: &str,
787    max_results: usize,
788) -> Option<FallbackWalkOutcome> {
789    let filters = build_path_filters(&[pattern.to_string()], &[]).ok()?;
790    let mut outcome = bounded_fallback_walk_files(search_root, search_root, &filters);
791    outcome.files.truncate(max_results);
792    Some(outcome)
793}
794
795fn current_index_status(ctx: &AppContext) -> IndexStatus {
796    let index_ready = {
797        let search_index = ctx
798            .search_index()
799            .read()
800            .unwrap_or_else(std::sync::PoisonError::into_inner);
801        search_index.as_ref().is_some_and(|index| index.ready)
802    };
803    if index_ready {
804        return IndexStatus::Ready;
805    }
806
807    let build_in_progress = {
808        let search_index_rx = ctx
809            .search_index_rx()
810            .read()
811            .unwrap_or_else(std::sync::PoisonError::into_inner);
812        search_index_rx.is_some()
813    };
814    let has_index = {
815        let search_index = ctx
816            .search_index()
817            .read()
818            .unwrap_or_else(std::sync::PoisonError::into_inner);
819        search_index.is_some()
820    };
821    if build_in_progress || has_index {
822        IndexStatus::Building
823    } else {
824        IndexStatus::Fallback
825    }
826}
827
828pub fn line_starts(content: &str) -> Vec<usize> {
829    let mut starts = vec![0usize];
830    for (index, byte) in content.bytes().enumerate() {
831        if byte == b'\n' {
832            starts.push(index + 1);
833        }
834    }
835    starts
836}
837
838pub fn line_details(content: &str, line_starts: &[usize], offset: usize) -> (u32, u32, String) {
839    let line_index = match line_starts.binary_search(&offset) {
840        Ok(index) => index,
841        Err(index) => index.saturating_sub(1),
842    };
843    let line_start = line_starts.get(line_index).copied().unwrap_or(0);
844    let line_end = content[line_start..]
845        .find('\n')
846        .map(|length| line_start + length)
847        .unwrap_or(content.len());
848    let line_text = content[line_start..line_end]
849        .trim_end_matches('\r')
850        .to_string();
851    let column = content[line_start..offset].chars().count() as u32 + 1;
852    (line_index as u32 + 1, column, line_text)
853}
854
855#[cfg(test)]
856mod tests {
857    use super::*;
858
859    fn grep_match(file: &Path, line: u32, column: u32) -> GrepMatch {
860        GrepMatch {
861            file: file.to_path_buf(),
862            line,
863            column,
864            line_text: "needle".to_string(),
865            match_text: "needle".to_string(),
866        }
867    }
868
869    fn result(matches: Vec<GrepMatch>, truncated: bool, status: IndexStatus) -> GrepResult {
870        GrepResult {
871            total_matches: matches.len(),
872            files_searched: matches.len(),
873            files_with_matches: matches.len(),
874            matches,
875            index_status: status,
876            truncated,
877            fully_degraded: false,
878            engine_capped: false,
879            walk_truncated: false,
880        }
881    }
882
883    #[test]
884    fn single_root_uses_requested_max() {
885        let scope = GrepScope {
886            roots: vec![ResolvedRoot {
887                search_root: PathBuf::from("/project"),
888                filter_root: PathBuf::from("/project"),
889                use_index: true,
890                is_external: false,
891            }],
892            multi_root: false,
893            per_root_max: 10,
894        };
895        assert!(!scope.multi_root);
896        assert_eq!(scope.per_root_max, 10);
897    }
898
899    #[test]
900    fn multi_root_uses_double_per_root_max() {
901        let project = tempfile::tempdir().expect("project");
902        let ctx = AppContext::new(
903            Box::new(crate::parser::TreeSitterProvider::new()),
904            crate::config::Config {
905                project_root: Some(project.path().to_path_buf()),
906                ..crate::config::Config::default()
907            },
908        );
909        let left = project.path().join("left");
910        let right = project.path().join("right");
911        std::fs::create_dir_all(&left).expect("left");
912        std::fs::create_dir_all(&right).expect("right");
913        let paths = serde_json::json!([left.display().to_string(), right.display().to_string()]);
914
915        let scope = resolve_grep_scope(&ctx, Some(&paths), 10, "test").expect("scope");
916
917        assert!(scope.multi_root);
918        assert_eq!(scope.per_root_max, 20);
919    }
920
921    #[test]
922    fn bounded_fallback_walk_truncates_at_file_cap() {
923        let dir = tempfile::tempdir().expect("tempdir");
924        let root = dir.path();
925        for i in 0..25 {
926            let path = root.join(format!("file_{i:03}.txt"));
927            std::fs::write(path, "needle\n").expect("write");
928        }
929        let filters = build_path_filters(&["**/*.txt".to_string()], &[]).expect("filters");
930        let outcome = bounded_fallback_walk_files_with_limits(
931            root,
932            root,
933            &filters,
934            20,
935            Duration::from_secs(60),
936        );
937        assert!(outcome.walk_truncated);
938        assert_eq!(outcome.files.len(), 20);
939    }
940
941    #[test]
942    fn bounded_fallback_walk_small_tree_not_truncated() {
943        let dir = tempfile::tempdir().expect("tempdir");
944        let root = dir.path();
945        std::fs::write(root.join("a.txt"), "x\n").expect("write");
946        std::fs::write(root.join("b.txt"), "x\n").expect("write");
947        let filters = build_path_filters(&["**/*.txt".to_string()], &[]).expect("filters");
948        let outcome = bounded_fallback_walk_files(root, root, &filters);
949        assert!(!outcome.walk_truncated);
950        assert_eq!(outcome.files.len(), 2);
951    }
952
953    #[test]
954    fn filter_root_is_project_for_in_project_and_search_root_for_external_unindexed() {
955        let project = PathBuf::from("/project");
956        let in_project = compute_filter_root(&project, Path::new("/project/src"), true, false);
957        let external = compute_filter_root(&project, Path::new("/tmp/external"), false, true);
958        assert_eq!(in_project, project);
959        assert_eq!(external, PathBuf::from("/tmp/external"));
960    }
961
962    #[test]
963    fn weakest_status_orders_disabled_fallback_building_ready() {
964        assert_eq!(
965            weakest_index_status(IndexStatus::Ready, IndexStatus::Building),
966            IndexStatus::Building
967        );
968        assert_eq!(
969            weakest_index_status(IndexStatus::Building, IndexStatus::Fallback),
970            IndexStatus::Fallback
971        );
972        assert_eq!(
973            weakest_index_status(IndexStatus::Fallback, IndexStatus::Disabled),
974            IndexStatus::Disabled
975        );
976    }
977
978    #[test]
979    fn merge_dedupes_by_canonical_file_line_column() {
980        let temp = tempfile::tempdir().expect("temp");
981        let file = temp.path().join("file.rs");
982        std::fs::write(&file, "needle").expect("write");
983        let symlink = temp.path().join("link.rs");
984        #[cfg(unix)]
985        std::os::unix::fs::symlink(&file, &symlink).expect("symlink");
986        #[cfg(windows)]
987        std::os::windows::fs::symlink_file(&file, &symlink).expect("symlink");
988
989        let merged = merge_grep_results(
990            vec![
991                result(vec![grep_match(&file, 1, 1)], false, IndexStatus::Ready),
992                result(vec![grep_match(&symlink, 1, 1)], false, IndexStatus::Ready),
993            ],
994            temp.path(),
995            10,
996        );
997
998        assert_eq!(merged.matches.len(), 1);
999    }
1000
1001    #[test]
1002    fn merge_truncated_when_child_truncated_or_pre_merge_exceeds_max() {
1003        let root = Path::new("/project");
1004        let child = merge_grep_results(
1005            vec![result(
1006                vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
1007                true,
1008                IndexStatus::Ready,
1009            )],
1010            root,
1011            10,
1012        );
1013        assert!(child.truncated);
1014
1015        let many = merge_grep_results(
1016            vec![
1017                result(
1018                    vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
1019                    false,
1020                    IndexStatus::Ready,
1021                ),
1022                result(
1023                    vec![grep_match(Path::new("/project/b.rs"), 1, 1)],
1024                    false,
1025                    IndexStatus::Ready,
1026                ),
1027            ],
1028            root,
1029            1,
1030        );
1031        assert!(many.truncated);
1032    }
1033}