Skip to main content

aft/
grep_executor.rs

1use std::collections::HashSet;
2use std::env;
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
5
6use rayon::prelude::*;
7
8use crate::commands::multi_path::{
9    canonical_key, dedupe_nested_paths, resolve_path_or_multi, SearchPathResolution,
10};
11use crate::context::AppContext;
12use crate::pattern_compile::{CompiledPattern, LiteralSearch};
13use crate::protocol::Response;
14use crate::search_index::{
15    build_path_filters, has_any_project_file_from, read_searchable_text, resolve_search_scope,
16    sort_grep_matches_by_mtime_desc, walk_project_files_from, GrepMatch, GrepResult, IndexStatus,
17};
18
19#[derive(Clone, Debug)]
20pub struct GrepParams {
21    pub include: Vec<String>,
22    pub exclude: Vec<String>,
23    pub max_results: usize,
24}
25
26#[derive(Clone, Debug)]
27pub struct GrepScope {
28    pub roots: Vec<ResolvedRoot>,
29    pub multi_root: bool,
30    pub per_root_max: usize,
31}
32
33#[derive(Clone, Debug)]
34pub struct ResolvedRoot {
35    pub search_root: PathBuf,
36    pub filter_root: PathBuf,
37    pub use_index: bool,
38    pub is_external: bool,
39}
40
41pub fn project_root(ctx: &AppContext) -> PathBuf {
42    let project_root = ctx
43        .config()
44        .project_root
45        .clone()
46        .unwrap_or_else(|| env::current_dir().unwrap_or_default());
47    std::fs::canonicalize(&project_root).unwrap_or(project_root)
48}
49
50pub fn resolve_grep_scope(
51    ctx: &AppContext,
52    paths: Option<&serde_json::Value>,
53    max_results: usize,
54    req_id: &str,
55) -> Result<GrepScope, Response> {
56    let project_root = project_root(ctx);
57    let search_roots = resolve_roots(ctx, paths, &project_root, req_id)?;
58
59    if let Some(missing_root) = search_roots.iter().find(|root| !root.exists()) {
60        return Err(Response::error(
61            req_id,
62            "path_not_found",
63            format!(
64                "grep: search path does not exist: {}",
65                missing_root.display()
66            ),
67        ));
68    }
69
70    let roots = search_roots
71        .into_iter()
72        .map(|search_root| {
73            let scope = resolve_search_scope(&project_root, Some(&search_root.to_string_lossy()));
74            let is_external = !scope.use_index;
75            let filter_root =
76                compute_filter_root(&project_root, &scope.root, scope.use_index, is_external);
77            ResolvedRoot {
78                search_root: scope.root,
79                filter_root,
80                use_index: scope.use_index,
81                is_external,
82            }
83        })
84        .collect::<Vec<_>>();
85
86    let multi_root = roots.len() > 1;
87    let per_root_max = if multi_root {
88        max_results.saturating_mul(2).max(max_results)
89    } else {
90        max_results
91    };
92
93    Ok(GrepScope {
94        roots,
95        multi_root,
96        per_root_max,
97    })
98}
99
100pub fn compute_filter_root(
101    project_root: &Path,
102    search_root: &Path,
103    use_index: bool,
104    is_external: bool,
105) -> PathBuf {
106    if is_external && !use_index {
107        search_root.to_path_buf()
108    } else {
109        project_root.to_path_buf()
110    }
111}
112
113pub fn scope_has_files(project_root: &Path, scope: &GrepScope) -> bool {
114    scope.roots.iter().any(|root| {
115        // An explicitly-named existing file is always in scope (it's searched
116        // directly even if gitignored / .aftignored), so don't report it as
117        // "no files matched scope".
118        if root.search_root.is_file() {
119            return true;
120        }
121        let catch_all =
122            build_path_filters(&["**/*".to_string()], &[]).expect("valid catch-all glob");
123        has_any_project_file_from(&root.filter_root, &root.search_root, &catch_all)
124            || has_any_project_file_from(project_root, &root.search_root, &catch_all)
125    })
126}
127
128pub fn execute(
129    ctx: &AppContext,
130    pattern: &CompiledPattern,
131    scope: &GrepScope,
132    params: &GrepParams,
133) -> GrepResult {
134    let project_root = project_root(ctx);
135    if scope.roots.len() == 1 {
136        return execute_root(
137            ctx,
138            pattern,
139            &scope.roots[0],
140            params,
141            params.max_results,
142            &project_root,
143        );
144    }
145
146    let mut results = Vec::new();
147    for root in &scope.roots {
148        results.push(execute_root(
149            ctx,
150            pattern,
151            root,
152            params,
153            scope.per_root_max,
154            &project_root,
155        ));
156    }
157    merge_grep_results(results, &project_root, params.max_results)
158}
159
160fn resolve_roots(
161    ctx: &AppContext,
162    paths: Option<&serde_json::Value>,
163    project_root: &Path,
164    req_id: &str,
165) -> Result<Vec<PathBuf>, Response> {
166    let Some(paths) = paths else {
167        return Ok(vec![resolve_search_scope(project_root, None).root]);
168    };
169    if paths.is_null() {
170        return Ok(vec![resolve_search_scope(project_root, None).root]);
171    }
172    if let Some(path) = paths.as_str() {
173        return match resolve_path_or_multi(
174            path,
175            project_root,
176            |candidate| ctx.validate_path(req_id, candidate),
177            req_id,
178        )? {
179            SearchPathResolution::Single(root) => Ok(vec![root]),
180            SearchPathResolution::Multi(roots) => Ok(roots),
181        };
182    }
183    if let Some(items) = paths.as_array() {
184        let mut roots = Vec::with_capacity(items.len());
185        for item in items {
186            let Some(path) = item.as_str() else {
187                return Err(Response::error(
188                    req_id,
189                    "invalid_request",
190                    "grep: path array entries must be strings",
191                ));
192            };
193            let validated = ctx.validate_path(req_id, Path::new(path))?;
194            let raw = validated.to_string_lossy();
195            roots.push(resolve_search_scope(project_root, Some(raw.as_ref())).root);
196        }
197        let roots = dedupe_nested_paths(roots);
198        if roots.is_empty() {
199            Ok(vec![resolve_search_scope(project_root, None).root])
200        } else {
201            Ok(roots)
202        }
203    } else {
204        Err(Response::error(
205            req_id,
206            "invalid_request",
207            "grep: path must be a string, array of strings, or null",
208        ))
209    }
210}
211
212fn execute_root(
213    ctx: &AppContext,
214    pattern: &CompiledPattern,
215    root: &ResolvedRoot,
216    params: &GrepParams,
217    max_results: usize,
218    project_root: &Path,
219) -> GrepResult {
220    // Explicit single-file scope: search the named file directly, bypassing the
221    // trigram index and the gitignore/.aftignore-aware walk. Matches ripgrep,
222    // where naming a file explicitly searches it even when it is gitignored,
223    // .aftignored, or not yet indexed. Binary + UTF-8 guards still apply.
224    if root.search_root.is_file() {
225        let index_status = if root.use_index {
226            current_index_status(ctx)
227        } else {
228            IndexStatus::Fallback
229        };
230        return grep_explicit_file(&root.search_root, pattern, max_results, index_status);
231    }
232
233    let search_index = ctx.search_index().borrow();
234    match search_index.as_ref() {
235        Some(index) if index.ready && root.use_index => index.search_grep(
236            pattern,
237            &params.include,
238            &params.exclude,
239            &root.search_root,
240            max_results,
241        ),
242        _ => {
243            let index_status = if root.use_index {
244                current_index_status(ctx)
245            } else {
246                IndexStatus::Fallback
247            };
248            fallback_grep(
249                project_root,
250                &root.search_root,
251                &root.filter_root,
252                pattern,
253                &params.include,
254                &params.exclude,
255                max_results,
256                index_status,
257            )
258        }
259    }
260}
261
262/// Grep a single explicitly-named file directly, bypassing the trigram index
263/// and the gitignore/.aftignore-aware walk. Used when the caller's `path`
264/// resolves to one existing file — ripgrep semantics: an explicitly-named file
265/// is searched even when it is gitignored, `.aftignore`d, or not yet indexed.
266/// Binary detection and UTF-8 guards still apply (via `read_searchable_text`
267/// inside `fallback_search_file`).
268fn grep_explicit_file(
269    file: &Path,
270    pattern: &CompiledPattern,
271    max_results: usize,
272    index_status: IndexStatus,
273) -> GrepResult {
274    let total_matches = AtomicUsize::new(0);
275    let files_searched = AtomicUsize::new(0);
276    let files_with_matches = AtomicUsize::new(0);
277    let truncated = AtomicBool::new(false);
278    let engine_capped = AtomicBool::new(false);
279    let stop_after = max_results.saturating_mul(2);
280
281    let matches = fallback_search_file(
282        &file.to_path_buf(),
283        pattern,
284        max_results,
285        stop_after,
286        &total_matches,
287        &files_searched,
288        &files_with_matches,
289        &truncated,
290        &engine_capped,
291    );
292
293    GrepResult {
294        total_matches: total_matches.load(Ordering::Relaxed),
295        matches,
296        files_searched: files_searched.load(Ordering::Relaxed),
297        files_with_matches: files_with_matches.load(Ordering::Relaxed),
298        index_status,
299        truncated: truncated.load(Ordering::Relaxed),
300        fully_degraded: false,
301        engine_capped: engine_capped.load(Ordering::Relaxed),
302    }
303}
304
305pub fn merge_grep_results(
306    results: Vec<GrepResult>,
307    project_root: &Path,
308    max_results: usize,
309) -> GrepResult {
310    let mut matches = Vec::new();
311    let mut total_matches = 0usize;
312    let mut files_searched = 0usize;
313    let mut files_with_matches = 0usize;
314    let mut index_status = IndexStatus::Ready;
315    let mut any_child_truncated = false;
316    let mut fully_degraded = false;
317    let mut engine_capped = false;
318    let mut seen_match_keys = HashSet::new();
319
320    for result in results {
321        total_matches += result.total_matches;
322        files_searched += result.files_searched;
323        files_with_matches += result.files_with_matches;
324        index_status = weakest_index_status(index_status, result.index_status);
325        any_child_truncated |= result.truncated;
326        fully_degraded |= result.fully_degraded;
327        engine_capped |= result.engine_capped;
328
329        for grep_match in result.matches {
330            let file_key = canonical_key(&grep_match.file);
331            let match_key = (file_key, grep_match.line, grep_match.column);
332            if seen_match_keys.insert(match_key) {
333                matches.push(grep_match);
334            }
335        }
336    }
337
338    sort_grep_matches_by_mtime_desc(&mut matches, project_root);
339    if matches.len() > max_results {
340        matches.truncate(max_results);
341    }
342
343    GrepResult {
344        matches,
345        total_matches,
346        files_searched,
347        files_with_matches,
348        index_status,
349        truncated: any_child_truncated || total_matches > max_results,
350        fully_degraded,
351        engine_capped,
352    }
353}
354
355pub fn weakest_index_status(left: IndexStatus, right: IndexStatus) -> IndexStatus {
356    match (left, right) {
357        (IndexStatus::Disabled, _) | (_, IndexStatus::Disabled) => IndexStatus::Disabled,
358        (IndexStatus::Fallback, _) | (_, IndexStatus::Fallback) => IndexStatus::Fallback,
359        (IndexStatus::Building, _) | (_, IndexStatus::Building) => IndexStatus::Building,
360        (IndexStatus::Ready, IndexStatus::Ready) => IndexStatus::Ready,
361    }
362}
363
364fn fallback_grep(
365    project_root: &Path,
366    search_root: &Path,
367    filter_root: &Path,
368    pattern: &CompiledPattern,
369    include: &[String],
370    exclude: &[String],
371    max_results: usize,
372    index_status: IndexStatus,
373) -> GrepResult {
374    let filters = build_path_filters(include, exclude).unwrap_or_default();
375    let files = walk_project_files_from(filter_root, search_root, &filters);
376
377    let total_matches = AtomicUsize::new(0);
378    let files_searched = AtomicUsize::new(0);
379    let files_with_matches = AtomicUsize::new(0);
380    let truncated = AtomicBool::new(false);
381    let engine_capped = AtomicBool::new(false);
382    let stop_after = max_results.saturating_mul(2);
383
384    let mut matches = files
385        .par_iter()
386        .map(|file| {
387            fallback_search_file(
388                file,
389                pattern,
390                max_results,
391                stop_after,
392                &total_matches,
393                &files_searched,
394                &files_with_matches,
395                &truncated,
396                &engine_capped,
397            )
398        })
399        .reduce(Vec::new, |mut left, mut right| {
400            left.append(&mut right);
401            left
402        });
403
404    sort_grep_matches_by_mtime_desc(&mut matches, project_root);
405
406    GrepResult {
407        total_matches: total_matches.load(Ordering::Relaxed),
408        matches,
409        files_searched: files_searched.load(Ordering::Relaxed),
410        files_with_matches: files_with_matches.load(Ordering::Relaxed),
411        index_status,
412        truncated: truncated.load(Ordering::Relaxed),
413        fully_degraded: true,
414        engine_capped: engine_capped.load(Ordering::Relaxed),
415    }
416}
417
418fn fallback_search_file(
419    file: &PathBuf,
420    pattern: &CompiledPattern,
421    max_results: usize,
422    stop_after: usize,
423    total_matches: &AtomicUsize,
424    files_searched: &AtomicUsize,
425    files_with_matches: &AtomicUsize,
426    truncated: &AtomicBool,
427    engine_capped: &AtomicBool,
428) -> Vec<GrepMatch> {
429    if should_stop_fallback_search(truncated, total_matches, stop_after) {
430        engine_capped.store(true, Ordering::Relaxed);
431        return Vec::new();
432    }
433
434    let Some(content) = read_searchable_text(file) else {
435        return Vec::new();
436    };
437    files_searched.fetch_add(1, Ordering::Relaxed);
438
439    let line_starts = line_starts(&content);
440    let mut seen_lines = HashSet::new();
441    let mut matched_this_file = false;
442    let mut matches = Vec::new();
443
444    match pattern {
445        CompiledPattern::Literal(literal) => search_literal_in_text(
446            file,
447            &content,
448            &line_starts,
449            literal,
450            max_results,
451            stop_after,
452            total_matches,
453            &mut seen_lines,
454            truncated,
455            engine_capped,
456            &mut matched_this_file,
457            &mut matches,
458        ),
459        CompiledPattern::Regex { compiled, .. } => {
460            for matched in compiled.find_iter(content.as_bytes()) {
461                if should_stop_fallback_search(truncated, total_matches, stop_after) {
462                    engine_capped.store(true, Ordering::Relaxed);
463                    break;
464                }
465
466                let (line, column, line_text) =
467                    line_details(&content, &line_starts, matched.start());
468                if !seen_lines.insert(line) {
469                    continue;
470                }
471
472                matched_this_file = true;
473                let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
474                if match_number > max_results {
475                    truncated.store(true, Ordering::Relaxed);
476                    break;
477                }
478
479                matches.push(GrepMatch {
480                    file: file.clone(),
481                    line,
482                    column,
483                    line_text,
484                    match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
485                });
486            }
487        }
488    }
489
490    if matched_this_file {
491        files_with_matches.fetch_add(1, Ordering::Relaxed);
492    }
493
494    matches
495}
496
497fn search_literal_in_text(
498    file: &Path,
499    content: &str,
500    line_starts: &[usize],
501    literal: &LiteralSearch,
502    max_results: usize,
503    stop_after: usize,
504    total_matches: &AtomicUsize,
505    seen_lines: &mut HashSet<u32>,
506    truncated: &AtomicBool,
507    engine_capped: &AtomicBool,
508    matched_this_file: &mut bool,
509    matches: &mut Vec<GrepMatch>,
510) {
511    let content_bytes = content.as_bytes();
512    let search_content;
513    let haystack = if literal.case_insensitive_ascii {
514        search_content = content_bytes.to_ascii_lowercase();
515        search_content.as_slice()
516    } else {
517        content_bytes
518    };
519    let finder = memchr::memmem::Finder::new(&literal.needle);
520    let mut start = 0usize;
521
522    while let Some(position) = finder.find(&haystack[start..]) {
523        if should_stop_fallback_search(truncated, total_matches, stop_after) {
524            engine_capped.store(true, Ordering::Relaxed);
525            break;
526        }
527
528        let offset = start + position;
529        start = offset + 1;
530        let (line, column, line_text) = line_details(content, line_starts, offset);
531        if !seen_lines.insert(line) {
532            continue;
533        }
534
535        *matched_this_file = true;
536        let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
537        if match_number > max_results {
538            truncated.store(true, Ordering::Relaxed);
539            break;
540        }
541
542        let end = offset + literal.needle.len();
543        matches.push(GrepMatch {
544            file: file.to_path_buf(),
545            line,
546            column,
547            line_text,
548            match_text: String::from_utf8_lossy(&content_bytes[offset..end]).into_owned(),
549        });
550    }
551}
552
553fn should_stop_fallback_search(
554    truncated: &AtomicBool,
555    total_matches: &AtomicUsize,
556    stop_after: usize,
557) -> bool {
558    truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
559}
560
561pub(crate) fn ripgrep_glob(
562    search_root: &Path,
563    pattern: &str,
564    max_results: usize,
565) -> Option<Vec<PathBuf>> {
566    let filters = build_path_filters(&[pattern.to_string()], &[]).ok()?;
567    let mut files = walk_project_files_from(search_root, search_root, &filters);
568    files.truncate(max_results);
569    Some(files)
570}
571
572fn current_index_status(ctx: &AppContext) -> IndexStatus {
573    if ctx
574        .search_index()
575        .borrow()
576        .as_ref()
577        .is_some_and(|index| index.ready)
578    {
579        IndexStatus::Ready
580    } else if ctx.search_index_rx().borrow().is_some() || ctx.search_index().borrow().is_some() {
581        IndexStatus::Building
582    } else {
583        IndexStatus::Fallback
584    }
585}
586
587pub fn line_starts(content: &str) -> Vec<usize> {
588    let mut starts = vec![0usize];
589    for (index, byte) in content.bytes().enumerate() {
590        if byte == b'\n' {
591            starts.push(index + 1);
592        }
593    }
594    starts
595}
596
597pub fn line_details(content: &str, line_starts: &[usize], offset: usize) -> (u32, u32, String) {
598    let line_index = match line_starts.binary_search(&offset) {
599        Ok(index) => index,
600        Err(index) => index.saturating_sub(1),
601    };
602    let line_start = line_starts.get(line_index).copied().unwrap_or(0);
603    let line_end = content[line_start..]
604        .find('\n')
605        .map(|length| line_start + length)
606        .unwrap_or(content.len());
607    let line_text = content[line_start..line_end]
608        .trim_end_matches('\r')
609        .to_string();
610    let column = content[line_start..offset].chars().count() as u32 + 1;
611    (line_index as u32 + 1, column, line_text)
612}
613
614#[cfg(test)]
615mod tests {
616    use super::*;
617
618    fn grep_match(file: &Path, line: u32, column: u32) -> GrepMatch {
619        GrepMatch {
620            file: file.to_path_buf(),
621            line,
622            column,
623            line_text: "needle".to_string(),
624            match_text: "needle".to_string(),
625        }
626    }
627
628    fn result(matches: Vec<GrepMatch>, truncated: bool, status: IndexStatus) -> GrepResult {
629        GrepResult {
630            total_matches: matches.len(),
631            files_searched: matches.len(),
632            files_with_matches: matches.len(),
633            matches,
634            index_status: status,
635            truncated,
636            fully_degraded: false,
637            engine_capped: false,
638        }
639    }
640
641    #[test]
642    fn single_root_uses_requested_max() {
643        let scope = GrepScope {
644            roots: vec![ResolvedRoot {
645                search_root: PathBuf::from("/project"),
646                filter_root: PathBuf::from("/project"),
647                use_index: true,
648                is_external: false,
649            }],
650            multi_root: false,
651            per_root_max: 10,
652        };
653        assert!(!scope.multi_root);
654        assert_eq!(scope.per_root_max, 10);
655    }
656
657    #[test]
658    fn multi_root_uses_double_per_root_max() {
659        let project = tempfile::tempdir().expect("project");
660        let ctx = AppContext::new(
661            Box::new(crate::parser::TreeSitterProvider::new()),
662            crate::config::Config {
663                project_root: Some(project.path().to_path_buf()),
664                ..crate::config::Config::default()
665            },
666        );
667        let left = project.path().join("left");
668        let right = project.path().join("right");
669        std::fs::create_dir_all(&left).expect("left");
670        std::fs::create_dir_all(&right).expect("right");
671        let paths = serde_json::json!([left.display().to_string(), right.display().to_string()]);
672
673        let scope = resolve_grep_scope(&ctx, Some(&paths), 10, "test").expect("scope");
674
675        assert!(scope.multi_root);
676        assert_eq!(scope.per_root_max, 20);
677    }
678
679    #[test]
680    fn filter_root_is_project_for_in_project_and_search_root_for_external_unindexed() {
681        let project = PathBuf::from("/project");
682        let in_project = compute_filter_root(&project, Path::new("/project/src"), true, false);
683        let external = compute_filter_root(&project, Path::new("/tmp/external"), false, true);
684        assert_eq!(in_project, project);
685        assert_eq!(external, PathBuf::from("/tmp/external"));
686    }
687
688    #[test]
689    fn weakest_status_orders_disabled_fallback_building_ready() {
690        assert_eq!(
691            weakest_index_status(IndexStatus::Ready, IndexStatus::Building),
692            IndexStatus::Building
693        );
694        assert_eq!(
695            weakest_index_status(IndexStatus::Building, IndexStatus::Fallback),
696            IndexStatus::Fallback
697        );
698        assert_eq!(
699            weakest_index_status(IndexStatus::Fallback, IndexStatus::Disabled),
700            IndexStatus::Disabled
701        );
702    }
703
704    #[test]
705    fn merge_dedupes_by_canonical_file_line_column() {
706        let temp = tempfile::tempdir().expect("temp");
707        let file = temp.path().join("file.rs");
708        std::fs::write(&file, "needle").expect("write");
709        let symlink = temp.path().join("link.rs");
710        #[cfg(unix)]
711        std::os::unix::fs::symlink(&file, &symlink).expect("symlink");
712        #[cfg(windows)]
713        std::os::windows::fs::symlink_file(&file, &symlink).expect("symlink");
714
715        let merged = merge_grep_results(
716            vec![
717                result(vec![grep_match(&file, 1, 1)], false, IndexStatus::Ready),
718                result(vec![grep_match(&symlink, 1, 1)], false, IndexStatus::Ready),
719            ],
720            temp.path(),
721            10,
722        );
723
724        assert_eq!(merged.matches.len(), 1);
725    }
726
727    #[test]
728    fn merge_truncated_when_child_truncated_or_pre_merge_exceeds_max() {
729        let root = Path::new("/project");
730        let child = merge_grep_results(
731            vec![result(
732                vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
733                true,
734                IndexStatus::Ready,
735            )],
736            root,
737            10,
738        );
739        assert!(child.truncated);
740
741        let many = merge_grep_results(
742            vec![
743                result(
744                    vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
745                    false,
746                    IndexStatus::Ready,
747                ),
748                result(
749                    vec![grep_match(Path::new("/project/b.rs"), 1, 1)],
750                    false,
751                    IndexStatus::Ready,
752                ),
753            ],
754            root,
755            1,
756        );
757        assert!(many.truncated);
758    }
759}