1use std::collections::HashSet;
2use std::env;
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
5use std::sync::Arc;
6
7use rayon::prelude::*;
8
9use crate::commands::multi_path::{
10 canonical_key, dedupe_nested_paths, resolve_path_or_multi, SearchPathResolution,
11};
12use crate::context::AppContext;
13use crate::pattern_compile::{CompiledPattern, LiteralSearch};
14use crate::protocol::Response;
15use crate::search_index::{
16 build_path_filters, for_each_walk_project_file_from, has_any_project_file_from,
17 read_searchable_text, resolve_search_scope, sort_grep_matches_by_mtime_desc,
18 walk_project_files_from, GrepMatch, GrepResult, IndexStatus,
19};
20
21#[derive(Clone, Debug)]
22pub struct GrepParams {
23 pub include: Vec<String>,
24 pub exclude: Vec<String>,
25 pub max_results: usize,
26}
27
28#[derive(Clone, Debug)]
29pub struct GrepScope {
30 pub roots: Vec<ResolvedRoot>,
31 pub multi_root: bool,
32 pub per_root_max: usize,
33}
34
35#[derive(Clone, Debug)]
36pub struct ResolvedRoot {
37 pub search_root: PathBuf,
38 pub filter_root: PathBuf,
39 pub use_index: bool,
40 pub is_external: bool,
41}
42
43pub fn project_root(ctx: &AppContext) -> PathBuf {
44 let project_root = ctx
45 .config()
46 .project_root
47 .clone()
48 .unwrap_or_else(|| env::current_dir().unwrap_or_default());
49 std::fs::canonicalize(&project_root).unwrap_or(project_root)
50}
51
52pub fn resolve_grep_scope(
53 ctx: &AppContext,
54 paths: Option<&serde_json::Value>,
55 max_results: usize,
56 req_id: &str,
57) -> Result<GrepScope, Response> {
58 let project_root = project_root(ctx);
59 let search_roots = resolve_roots(ctx, paths, &project_root, req_id)?;
60
61 if let Some(missing_root) = search_roots.iter().find(|root| !root.exists()) {
62 return Err(Response::error(
63 req_id,
64 "path_not_found",
65 format!(
66 "grep: search path does not exist: {}",
67 missing_root.display()
68 ),
69 ));
70 }
71
72 let roots = search_roots
73 .into_iter()
74 .map(|search_root| {
75 let scope = resolve_search_scope(&project_root, Some(&search_root.to_string_lossy()));
76 let is_external = !scope.use_index;
77 let filter_root =
78 compute_filter_root(&project_root, &scope.root, scope.use_index, is_external);
79 ResolvedRoot {
80 search_root: scope.root,
81 filter_root,
82 use_index: scope.use_index,
83 is_external,
84 }
85 })
86 .collect::<Vec<_>>();
87
88 let multi_root = roots.len() > 1;
89 let per_root_max = if multi_root {
90 max_results.saturating_mul(2).max(max_results)
91 } else {
92 max_results
93 };
94
95 Ok(GrepScope {
96 roots,
97 multi_root,
98 per_root_max,
99 })
100}
101
102pub fn compute_filter_root(
103 project_root: &Path,
104 search_root: &Path,
105 use_index: bool,
106 is_external: bool,
107) -> PathBuf {
108 if is_external && !use_index {
109 search_root.to_path_buf()
110 } else {
111 project_root.to_path_buf()
112 }
113}
114
115pub fn scope_has_files(project_root: &Path, scope: &GrepScope) -> bool {
116 scope.roots.iter().any(|root| {
117 if root.search_root.is_file() {
121 return true;
122 }
123 let catch_all =
124 build_path_filters(&["**/*".to_string()], &[]).expect("valid catch-all glob");
125 has_any_project_file_from(&root.filter_root, &root.search_root, &catch_all)
126 || has_any_project_file_from(project_root, &root.search_root, &catch_all)
127 })
128}
129
130pub fn execute(
131 ctx: &AppContext,
132 pattern: &CompiledPattern,
133 scope: &GrepScope,
134 params: &GrepParams,
135) -> GrepResult {
136 let project_root = project_root(ctx);
137 if scope.roots.len() == 1 {
138 return execute_root(
139 ctx,
140 pattern,
141 &scope.roots[0],
142 params,
143 params.max_results,
144 &project_root,
145 );
146 }
147
148 let mut results = Vec::new();
149 for root in &scope.roots {
150 results.push(execute_root(
151 ctx,
152 pattern,
153 root,
154 params,
155 scope.per_root_max,
156 &project_root,
157 ));
158 }
159 merge_grep_results(results, &project_root, params.max_results)
160}
161
162fn resolve_roots(
163 ctx: &AppContext,
164 paths: Option<&serde_json::Value>,
165 project_root: &Path,
166 req_id: &str,
167) -> Result<Vec<PathBuf>, Response> {
168 let Some(paths) = paths else {
169 return Ok(vec![resolve_search_scope(project_root, None).root]);
170 };
171 if paths.is_null() {
172 return Ok(vec![resolve_search_scope(project_root, None).root]);
173 }
174 if let Some(path) = paths.as_str() {
175 return match resolve_path_or_multi(
176 path,
177 project_root,
178 |candidate| ctx.validate_path(req_id, candidate),
179 req_id,
180 )? {
181 SearchPathResolution::Single(root) => Ok(vec![root]),
182 SearchPathResolution::Multi(roots) => Ok(roots),
183 };
184 }
185 if let Some(items) = paths.as_array() {
186 let mut roots = Vec::with_capacity(items.len());
187 for item in items {
188 let Some(path) = item.as_str() else {
189 return Err(Response::error(
190 req_id,
191 "invalid_request",
192 "grep: path array entries must be strings",
193 ));
194 };
195 let validated = ctx.validate_path(req_id, Path::new(path))?;
196 let raw = validated.to_string_lossy();
197 roots.push(resolve_search_scope(project_root, Some(raw.as_ref())).root);
198 }
199 let roots = dedupe_nested_paths(roots);
200 if roots.is_empty() {
201 Ok(vec![resolve_search_scope(project_root, None).root])
202 } else {
203 Ok(roots)
204 }
205 } else {
206 Err(Response::error(
207 req_id,
208 "invalid_request",
209 "grep: path must be a string, array of strings, or null",
210 ))
211 }
212}
213
214fn execute_root(
215 ctx: &AppContext,
216 pattern: &CompiledPattern,
217 root: &ResolvedRoot,
218 params: &GrepParams,
219 max_results: usize,
220 project_root: &Path,
221) -> GrepResult {
222 if root.search_root.is_file() {
227 let index_status = if root.use_index {
228 current_index_status(ctx)
229 } else {
230 IndexStatus::Fallback
231 };
232 return grep_explicit_file(&root.search_root, pattern, max_results, index_status);
233 }
234
235 let search_index = ctx.search_index().borrow();
236 match search_index.as_ref() {
237 Some(index) if index.ready && root.use_index => index.search_grep(
238 pattern,
239 ¶ms.include,
240 ¶ms.exclude,
241 &root.search_root,
242 max_results,
243 ),
244 _ => {
245 let index_status = if root.use_index {
246 current_index_status(ctx)
247 } else {
248 IndexStatus::Fallback
249 };
250 fallback_grep(
251 project_root,
252 &root.search_root,
253 &root.filter_root,
254 pattern,
255 ¶ms.include,
256 ¶ms.exclude,
257 max_results,
258 index_status,
259 )
260 }
261 }
262}
263
264fn grep_explicit_file(
271 file: &Path,
272 pattern: &CompiledPattern,
273 max_results: usize,
274 index_status: IndexStatus,
275) -> GrepResult {
276 let total_matches = AtomicUsize::new(0);
277 let files_searched = AtomicUsize::new(0);
278 let files_with_matches = AtomicUsize::new(0);
279 let truncated = AtomicBool::new(false);
280 let engine_capped = AtomicBool::new(false);
281 let stop_after = max_results.saturating_mul(2);
282
283 let matches = fallback_search_file(
284 &file.to_path_buf(),
285 pattern,
286 max_results,
287 stop_after,
288 &total_matches,
289 &files_searched,
290 &files_with_matches,
291 &truncated,
292 &engine_capped,
293 );
294
295 GrepResult {
296 total_matches: total_matches.load(Ordering::Relaxed),
297 matches,
298 files_searched: files_searched.load(Ordering::Relaxed),
299 files_with_matches: files_with_matches.load(Ordering::Relaxed),
300 index_status,
301 truncated: truncated.load(Ordering::Relaxed),
302 fully_degraded: false,
303 engine_capped: engine_capped.load(Ordering::Relaxed),
304 }
305}
306
307pub fn merge_grep_results(
308 results: Vec<GrepResult>,
309 project_root: &Path,
310 max_results: usize,
311) -> GrepResult {
312 let mut matches = Vec::new();
313 let mut total_matches = 0usize;
314 let mut files_searched = 0usize;
315 let mut files_with_matches = 0usize;
316 let mut index_status = IndexStatus::Ready;
317 let mut any_child_truncated = false;
318 let mut fully_degraded = false;
319 let mut engine_capped = false;
320 let mut seen_match_keys = HashSet::new();
321
322 for result in results {
323 total_matches += result.total_matches;
324 files_searched += result.files_searched;
325 files_with_matches += result.files_with_matches;
326 index_status = weakest_index_status(index_status, result.index_status);
327 any_child_truncated |= result.truncated;
328 fully_degraded |= result.fully_degraded;
329 engine_capped |= result.engine_capped;
330
331 for grep_match in result.matches {
332 let file_key = canonical_key(&grep_match.file);
333 let match_key = (file_key, grep_match.line, grep_match.column);
334 if seen_match_keys.insert(match_key) {
335 matches.push(grep_match);
336 }
337 }
338 }
339
340 sort_grep_matches_by_mtime_desc(&mut matches, project_root);
341 if matches.len() > max_results {
342 matches.truncate(max_results);
343 }
344
345 GrepResult {
346 matches,
347 total_matches,
348 files_searched,
349 files_with_matches,
350 index_status,
351 truncated: any_child_truncated || total_matches > max_results,
352 fully_degraded,
353 engine_capped,
354 }
355}
356
357pub fn weakest_index_status(left: IndexStatus, right: IndexStatus) -> IndexStatus {
358 match (left, right) {
359 (IndexStatus::Disabled, _) | (_, IndexStatus::Disabled) => IndexStatus::Disabled,
360 (IndexStatus::Fallback, _) | (_, IndexStatus::Fallback) => IndexStatus::Fallback,
361 (IndexStatus::Building, _) | (_, IndexStatus::Building) => IndexStatus::Building,
362 (IndexStatus::Ready, IndexStatus::Ready) => IndexStatus::Ready,
363 }
364}
365
366#[doc(hidden)]
368pub fn fallback_grep_bench(
369 project_root: &Path,
370 search_root: &Path,
371 filter_root: &Path,
372 pattern: &CompiledPattern,
373 include: &[String],
374 exclude: &[String],
375 max_results: usize,
376) -> GrepResult {
377 fallback_grep(
378 project_root,
379 search_root,
380 filter_root,
381 pattern,
382 include,
383 exclude,
384 max_results,
385 IndexStatus::Fallback,
386 )
387}
388
389fn fallback_grep(
390 project_root: &Path,
391 search_root: &Path,
392 filter_root: &Path,
393 pattern: &CompiledPattern,
394 include: &[String],
395 exclude: &[String],
396 max_results: usize,
397 index_status: IndexStatus,
398) -> GrepResult {
399 let filters = build_path_filters(include, exclude).unwrap_or_default();
400
401 let total_matches = AtomicUsize::new(0);
402 let files_searched = AtomicUsize::new(0);
403 let files_with_matches = AtomicUsize::new(0);
404 let truncated = AtomicBool::new(false);
405 let engine_capped = AtomicBool::new(false);
406 let stop_after = max_results.saturating_mul(2);
407 let stop_scan = Arc::new(AtomicBool::new(false));
408
409 let mut matches = Vec::new();
410 let mut batch: Vec<PathBuf> = Vec::with_capacity(256);
411
412 let flush_batch = |batch: &mut Vec<PathBuf>, matches: &mut Vec<GrepMatch>| {
413 if batch.is_empty() {
414 return;
415 }
416 let chunk = std::mem::take(batch);
417 let partial: Vec<GrepMatch> = chunk
418 .par_iter()
419 .filter_map(|file| {
420 if stop_scan.load(Ordering::Relaxed) {
421 return None;
422 }
423 let file_matches = fallback_search_file(
424 file,
425 pattern,
426 max_results,
427 stop_after,
428 &total_matches,
429 &files_searched,
430 &files_with_matches,
431 &truncated,
432 &engine_capped,
433 );
434 if truncated.load(Ordering::Relaxed)
435 && total_matches.load(Ordering::Relaxed) >= stop_after
436 {
437 stop_scan.store(true, Ordering::Relaxed);
438 }
439 (!file_matches.is_empty()).then_some(file_matches)
440 })
441 .flatten()
442 .collect();
443 matches.extend(partial);
444 };
445
446 for_each_walk_project_file_from(filter_root, search_root, &filters, |path| {
447 if stop_scan.load(Ordering::Relaxed) {
448 return;
449 }
450 batch.push(path.clone());
451 if batch.len() >= 256 {
452 flush_batch(&mut batch, &mut matches);
453 }
454 });
455 flush_batch(&mut batch, &mut matches);
456
457 sort_grep_matches_by_mtime_desc(&mut matches, project_root);
458
459 GrepResult {
460 total_matches: total_matches.load(Ordering::Relaxed),
461 matches,
462 files_searched: files_searched.load(Ordering::Relaxed),
463 files_with_matches: files_with_matches.load(Ordering::Relaxed),
464 index_status,
465 truncated: truncated.load(Ordering::Relaxed),
466 fully_degraded: true,
467 engine_capped: engine_capped.load(Ordering::Relaxed),
468 }
469}
470
471fn fallback_search_file(
472 file: &PathBuf,
473 pattern: &CompiledPattern,
474 max_results: usize,
475 stop_after: usize,
476 total_matches: &AtomicUsize,
477 files_searched: &AtomicUsize,
478 files_with_matches: &AtomicUsize,
479 truncated: &AtomicBool,
480 engine_capped: &AtomicBool,
481) -> Vec<GrepMatch> {
482 if should_stop_fallback_search(truncated, total_matches, stop_after) {
483 engine_capped.store(true, Ordering::Relaxed);
484 return Vec::new();
485 }
486
487 let Some(content) = read_searchable_text(file) else {
488 return Vec::new();
489 };
490 files_searched.fetch_add(1, Ordering::Relaxed);
491
492 let line_starts = line_starts(&content);
493 let mut seen_lines = HashSet::new();
494 let mut matched_this_file = false;
495 let mut matches = Vec::new();
496
497 match pattern {
498 CompiledPattern::Literal(literal) => search_literal_in_text(
499 file,
500 &content,
501 &line_starts,
502 literal,
503 max_results,
504 stop_after,
505 total_matches,
506 &mut seen_lines,
507 truncated,
508 engine_capped,
509 &mut matched_this_file,
510 &mut matches,
511 ),
512 CompiledPattern::Regex { compiled, .. } => {
513 for matched in compiled.find_iter(content.as_bytes()) {
514 if should_stop_fallback_search(truncated, total_matches, stop_after) {
515 engine_capped.store(true, Ordering::Relaxed);
516 break;
517 }
518
519 let (line, column, line_text) =
520 line_details(&content, &line_starts, matched.start());
521 if !seen_lines.insert(line) {
522 continue;
523 }
524
525 matched_this_file = true;
526 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
527 if match_number > max_results {
528 truncated.store(true, Ordering::Relaxed);
529 break;
530 }
531
532 matches.push(GrepMatch {
533 file: file.clone(),
534 line,
535 column,
536 line_text,
537 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
538 });
539 }
540 }
541 }
542
543 if matched_this_file {
544 files_with_matches.fetch_add(1, Ordering::Relaxed);
545 }
546
547 matches
548}
549
550fn search_literal_in_text(
551 file: &Path,
552 content: &str,
553 line_starts: &[usize],
554 literal: &LiteralSearch,
555 max_results: usize,
556 stop_after: usize,
557 total_matches: &AtomicUsize,
558 seen_lines: &mut HashSet<u32>,
559 truncated: &AtomicBool,
560 engine_capped: &AtomicBool,
561 matched_this_file: &mut bool,
562 matches: &mut Vec<GrepMatch>,
563) {
564 let content_bytes = content.as_bytes();
565 let search_content;
566 let haystack = if literal.case_insensitive_ascii {
567 search_content = content_bytes.to_ascii_lowercase();
568 search_content.as_slice()
569 } else {
570 content_bytes
571 };
572 let finder = memchr::memmem::Finder::new(&literal.needle);
573 let mut start = 0usize;
574
575 while let Some(position) = finder.find(&haystack[start..]) {
576 if should_stop_fallback_search(truncated, total_matches, stop_after) {
577 engine_capped.store(true, Ordering::Relaxed);
578 break;
579 }
580
581 let offset = start + position;
582 start = offset + 1;
583 let (line, column, line_text) = line_details(content, line_starts, offset);
584 if !seen_lines.insert(line) {
585 continue;
586 }
587
588 *matched_this_file = true;
589 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
590 if match_number > max_results {
591 truncated.store(true, Ordering::Relaxed);
592 break;
593 }
594
595 let end = offset + literal.needle.len();
596 matches.push(GrepMatch {
597 file: file.to_path_buf(),
598 line,
599 column,
600 line_text,
601 match_text: String::from_utf8_lossy(&content_bytes[offset..end]).into_owned(),
602 });
603 }
604}
605
606fn should_stop_fallback_search(
607 truncated: &AtomicBool,
608 total_matches: &AtomicUsize,
609 stop_after: usize,
610) -> bool {
611 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
612}
613
614pub(crate) fn ripgrep_glob(
615 search_root: &Path,
616 pattern: &str,
617 max_results: usize,
618) -> Option<Vec<PathBuf>> {
619 let filters = build_path_filters(&[pattern.to_string()], &[]).ok()?;
620 let mut files = walk_project_files_from(search_root, search_root, &filters);
621 files.truncate(max_results);
622 Some(files)
623}
624
625fn current_index_status(ctx: &AppContext) -> IndexStatus {
626 if ctx
627 .search_index()
628 .borrow()
629 .as_ref()
630 .is_some_and(|index| index.ready)
631 {
632 IndexStatus::Ready
633 } else if ctx.search_index_rx().borrow().is_some() || ctx.search_index().borrow().is_some() {
634 IndexStatus::Building
635 } else {
636 IndexStatus::Fallback
637 }
638}
639
640pub fn line_starts(content: &str) -> Vec<usize> {
641 let mut starts = vec![0usize];
642 for (index, byte) in content.bytes().enumerate() {
643 if byte == b'\n' {
644 starts.push(index + 1);
645 }
646 }
647 starts
648}
649
650pub fn line_details(content: &str, line_starts: &[usize], offset: usize) -> (u32, u32, String) {
651 let line_index = match line_starts.binary_search(&offset) {
652 Ok(index) => index,
653 Err(index) => index.saturating_sub(1),
654 };
655 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
656 let line_end = content[line_start..]
657 .find('\n')
658 .map(|length| line_start + length)
659 .unwrap_or(content.len());
660 let line_text = content[line_start..line_end]
661 .trim_end_matches('\r')
662 .to_string();
663 let column = content[line_start..offset].chars().count() as u32 + 1;
664 (line_index as u32 + 1, column, line_text)
665}
666
667#[cfg(test)]
668mod tests {
669 use super::*;
670
671 fn grep_match(file: &Path, line: u32, column: u32) -> GrepMatch {
672 GrepMatch {
673 file: file.to_path_buf(),
674 line,
675 column,
676 line_text: "needle".to_string(),
677 match_text: "needle".to_string(),
678 }
679 }
680
681 fn result(matches: Vec<GrepMatch>, truncated: bool, status: IndexStatus) -> GrepResult {
682 GrepResult {
683 total_matches: matches.len(),
684 files_searched: matches.len(),
685 files_with_matches: matches.len(),
686 matches,
687 index_status: status,
688 truncated,
689 fully_degraded: false,
690 engine_capped: false,
691 }
692 }
693
694 #[test]
695 fn single_root_uses_requested_max() {
696 let scope = GrepScope {
697 roots: vec![ResolvedRoot {
698 search_root: PathBuf::from("/project"),
699 filter_root: PathBuf::from("/project"),
700 use_index: true,
701 is_external: false,
702 }],
703 multi_root: false,
704 per_root_max: 10,
705 };
706 assert!(!scope.multi_root);
707 assert_eq!(scope.per_root_max, 10);
708 }
709
710 #[test]
711 fn multi_root_uses_double_per_root_max() {
712 let project = tempfile::tempdir().expect("project");
713 let ctx = AppContext::new(
714 Box::new(crate::parser::TreeSitterProvider::new()),
715 crate::config::Config {
716 project_root: Some(project.path().to_path_buf()),
717 ..crate::config::Config::default()
718 },
719 );
720 let left = project.path().join("left");
721 let right = project.path().join("right");
722 std::fs::create_dir_all(&left).expect("left");
723 std::fs::create_dir_all(&right).expect("right");
724 let paths = serde_json::json!([left.display().to_string(), right.display().to_string()]);
725
726 let scope = resolve_grep_scope(&ctx, Some(&paths), 10, "test").expect("scope");
727
728 assert!(scope.multi_root);
729 assert_eq!(scope.per_root_max, 20);
730 }
731
732 #[test]
733 fn filter_root_is_project_for_in_project_and_search_root_for_external_unindexed() {
734 let project = PathBuf::from("/project");
735 let in_project = compute_filter_root(&project, Path::new("/project/src"), true, false);
736 let external = compute_filter_root(&project, Path::new("/tmp/external"), false, true);
737 assert_eq!(in_project, project);
738 assert_eq!(external, PathBuf::from("/tmp/external"));
739 }
740
741 #[test]
742 fn weakest_status_orders_disabled_fallback_building_ready() {
743 assert_eq!(
744 weakest_index_status(IndexStatus::Ready, IndexStatus::Building),
745 IndexStatus::Building
746 );
747 assert_eq!(
748 weakest_index_status(IndexStatus::Building, IndexStatus::Fallback),
749 IndexStatus::Fallback
750 );
751 assert_eq!(
752 weakest_index_status(IndexStatus::Fallback, IndexStatus::Disabled),
753 IndexStatus::Disabled
754 );
755 }
756
757 #[test]
758 fn merge_dedupes_by_canonical_file_line_column() {
759 let temp = tempfile::tempdir().expect("temp");
760 let file = temp.path().join("file.rs");
761 std::fs::write(&file, "needle").expect("write");
762 let symlink = temp.path().join("link.rs");
763 #[cfg(unix)]
764 std::os::unix::fs::symlink(&file, &symlink).expect("symlink");
765 #[cfg(windows)]
766 std::os::windows::fs::symlink_file(&file, &symlink).expect("symlink");
767
768 let merged = merge_grep_results(
769 vec![
770 result(vec![grep_match(&file, 1, 1)], false, IndexStatus::Ready),
771 result(vec![grep_match(&symlink, 1, 1)], false, IndexStatus::Ready),
772 ],
773 temp.path(),
774 10,
775 );
776
777 assert_eq!(merged.matches.len(), 1);
778 }
779
780 #[test]
781 fn merge_truncated_when_child_truncated_or_pre_merge_exceeds_max() {
782 let root = Path::new("/project");
783 let child = merge_grep_results(
784 vec![result(
785 vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
786 true,
787 IndexStatus::Ready,
788 )],
789 root,
790 10,
791 );
792 assert!(child.truncated);
793
794 let many = merge_grep_results(
795 vec![
796 result(
797 vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
798 false,
799 IndexStatus::Ready,
800 ),
801 result(
802 vec![grep_match(Path::new("/project/b.rs"), 1, 1)],
803 false,
804 IndexStatus::Ready,
805 ),
806 ],
807 root,
808 1,
809 );
810 assert!(many.truncated);
811 }
812}