1use std::collections::HashSet;
2use std::env;
3use std::path::{Path, PathBuf};
4use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
5
6use rayon::prelude::*;
7
8use crate::commands::multi_path::{
9 canonical_key, dedupe_nested_paths, resolve_path_or_multi, SearchPathResolution,
10};
11use crate::context::AppContext;
12use crate::pattern_compile::{CompiledPattern, LiteralSearch};
13use crate::protocol::Response;
14use crate::search_index::{
15 build_path_filters, has_any_project_file_from, read_searchable_text, resolve_search_scope,
16 sort_grep_matches_by_mtime_desc, walk_project_files_from, GrepMatch, GrepResult, IndexStatus,
17};
18
19#[derive(Clone, Debug)]
20pub struct GrepParams {
21 pub include: Vec<String>,
22 pub exclude: Vec<String>,
23 pub max_results: usize,
24}
25
26#[derive(Clone, Debug)]
27pub struct GrepScope {
28 pub roots: Vec<ResolvedRoot>,
29 pub multi_root: bool,
30 pub per_root_max: usize,
31}
32
33#[derive(Clone, Debug)]
34pub struct ResolvedRoot {
35 pub search_root: PathBuf,
36 pub filter_root: PathBuf,
37 pub use_index: bool,
38 pub is_external: bool,
39}
40
41pub fn project_root(ctx: &AppContext) -> PathBuf {
42 let project_root = ctx
43 .config()
44 .project_root
45 .clone()
46 .unwrap_or_else(|| env::current_dir().unwrap_or_default());
47 std::fs::canonicalize(&project_root).unwrap_or(project_root)
48}
49
50pub fn resolve_grep_scope(
51 ctx: &AppContext,
52 paths: Option<&serde_json::Value>,
53 max_results: usize,
54 req_id: &str,
55) -> Result<GrepScope, Response> {
56 let project_root = project_root(ctx);
57 let search_roots = resolve_roots(ctx, paths, &project_root, req_id)?;
58
59 if let Some(missing_root) = search_roots.iter().find(|root| !root.exists()) {
60 return Err(Response::error(
61 req_id,
62 "path_not_found",
63 format!(
64 "grep: search path does not exist: {}",
65 missing_root.display()
66 ),
67 ));
68 }
69
70 let roots = search_roots
71 .into_iter()
72 .map(|search_root| {
73 let scope = resolve_search_scope(&project_root, Some(&search_root.to_string_lossy()));
74 let is_external = !scope.use_index;
75 let filter_root =
76 compute_filter_root(&project_root, &scope.root, scope.use_index, is_external);
77 ResolvedRoot {
78 search_root: scope.root,
79 filter_root,
80 use_index: scope.use_index,
81 is_external,
82 }
83 })
84 .collect::<Vec<_>>();
85
86 let multi_root = roots.len() > 1;
87 let per_root_max = if multi_root {
88 max_results.saturating_mul(2).max(max_results)
89 } else {
90 max_results
91 };
92
93 Ok(GrepScope {
94 roots,
95 multi_root,
96 per_root_max,
97 })
98}
99
100pub fn compute_filter_root(
101 project_root: &Path,
102 search_root: &Path,
103 use_index: bool,
104 is_external: bool,
105) -> PathBuf {
106 if is_external && !use_index {
107 search_root.to_path_buf()
108 } else {
109 project_root.to_path_buf()
110 }
111}
112
113pub fn scope_has_files(project_root: &Path, scope: &GrepScope) -> bool {
114 scope.roots.iter().any(|root| {
115 if root.search_root.is_file() {
119 return true;
120 }
121 let catch_all =
122 build_path_filters(&["**/*".to_string()], &[]).expect("valid catch-all glob");
123 has_any_project_file_from(&root.filter_root, &root.search_root, &catch_all)
124 || has_any_project_file_from(project_root, &root.search_root, &catch_all)
125 })
126}
127
128pub fn execute(
129 ctx: &AppContext,
130 pattern: &CompiledPattern,
131 scope: &GrepScope,
132 params: &GrepParams,
133) -> GrepResult {
134 let project_root = project_root(ctx);
135 if scope.roots.len() == 1 {
136 return execute_root(
137 ctx,
138 pattern,
139 &scope.roots[0],
140 params,
141 params.max_results,
142 &project_root,
143 );
144 }
145
146 let mut results = Vec::new();
147 for root in &scope.roots {
148 results.push(execute_root(
149 ctx,
150 pattern,
151 root,
152 params,
153 scope.per_root_max,
154 &project_root,
155 ));
156 }
157 merge_grep_results(results, &project_root, params.max_results)
158}
159
160fn resolve_roots(
161 ctx: &AppContext,
162 paths: Option<&serde_json::Value>,
163 project_root: &Path,
164 req_id: &str,
165) -> Result<Vec<PathBuf>, Response> {
166 let Some(paths) = paths else {
167 return Ok(vec![resolve_search_scope(project_root, None).root]);
168 };
169 if paths.is_null() {
170 return Ok(vec![resolve_search_scope(project_root, None).root]);
171 }
172 if let Some(path) = paths.as_str() {
173 return match resolve_path_or_multi(
174 path,
175 project_root,
176 |candidate| ctx.validate_path(req_id, candidate),
177 req_id,
178 )? {
179 SearchPathResolution::Single(root) => Ok(vec![root]),
180 SearchPathResolution::Multi(roots) => Ok(roots),
181 };
182 }
183 if let Some(items) = paths.as_array() {
184 let mut roots = Vec::with_capacity(items.len());
185 for item in items {
186 let Some(path) = item.as_str() else {
187 return Err(Response::error(
188 req_id,
189 "invalid_request",
190 "grep: path array entries must be strings",
191 ));
192 };
193 let validated = ctx.validate_path(req_id, Path::new(path))?;
194 let raw = validated.to_string_lossy();
195 roots.push(resolve_search_scope(project_root, Some(raw.as_ref())).root);
196 }
197 let roots = dedupe_nested_paths(roots);
198 if roots.is_empty() {
199 Ok(vec![resolve_search_scope(project_root, None).root])
200 } else {
201 Ok(roots)
202 }
203 } else {
204 Err(Response::error(
205 req_id,
206 "invalid_request",
207 "grep: path must be a string, array of strings, or null",
208 ))
209 }
210}
211
212fn execute_root(
213 ctx: &AppContext,
214 pattern: &CompiledPattern,
215 root: &ResolvedRoot,
216 params: &GrepParams,
217 max_results: usize,
218 project_root: &Path,
219) -> GrepResult {
220 if root.search_root.is_file() {
225 let index_status = if root.use_index {
226 current_index_status(ctx)
227 } else {
228 IndexStatus::Fallback
229 };
230 return grep_explicit_file(&root.search_root, pattern, max_results, index_status);
231 }
232
233 let search_index = ctx.search_index().borrow();
234 match search_index.as_ref() {
235 Some(index) if index.ready && root.use_index => index.search_grep(
236 pattern,
237 ¶ms.include,
238 ¶ms.exclude,
239 &root.search_root,
240 max_results,
241 ),
242 _ => {
243 let index_status = if root.use_index {
244 current_index_status(ctx)
245 } else {
246 IndexStatus::Fallback
247 };
248 fallback_grep(
249 project_root,
250 &root.search_root,
251 &root.filter_root,
252 pattern,
253 ¶ms.include,
254 ¶ms.exclude,
255 max_results,
256 index_status,
257 )
258 }
259 }
260}
261
262fn grep_explicit_file(
269 file: &Path,
270 pattern: &CompiledPattern,
271 max_results: usize,
272 index_status: IndexStatus,
273) -> GrepResult {
274 let total_matches = AtomicUsize::new(0);
275 let files_searched = AtomicUsize::new(0);
276 let files_with_matches = AtomicUsize::new(0);
277 let truncated = AtomicBool::new(false);
278 let engine_capped = AtomicBool::new(false);
279 let stop_after = max_results.saturating_mul(2);
280
281 let matches = fallback_search_file(
282 &file.to_path_buf(),
283 pattern,
284 max_results,
285 stop_after,
286 &total_matches,
287 &files_searched,
288 &files_with_matches,
289 &truncated,
290 &engine_capped,
291 );
292
293 GrepResult {
294 total_matches: total_matches.load(Ordering::Relaxed),
295 matches,
296 files_searched: files_searched.load(Ordering::Relaxed),
297 files_with_matches: files_with_matches.load(Ordering::Relaxed),
298 index_status,
299 truncated: truncated.load(Ordering::Relaxed),
300 fully_degraded: false,
301 engine_capped: engine_capped.load(Ordering::Relaxed),
302 }
303}
304
305pub fn merge_grep_results(
306 results: Vec<GrepResult>,
307 project_root: &Path,
308 max_results: usize,
309) -> GrepResult {
310 let mut matches = Vec::new();
311 let mut total_matches = 0usize;
312 let mut files_searched = 0usize;
313 let mut files_with_matches = 0usize;
314 let mut index_status = IndexStatus::Ready;
315 let mut any_child_truncated = false;
316 let mut fully_degraded = false;
317 let mut engine_capped = false;
318 let mut seen_match_keys = HashSet::new();
319
320 for result in results {
321 total_matches += result.total_matches;
322 files_searched += result.files_searched;
323 files_with_matches += result.files_with_matches;
324 index_status = weakest_index_status(index_status, result.index_status);
325 any_child_truncated |= result.truncated;
326 fully_degraded |= result.fully_degraded;
327 engine_capped |= result.engine_capped;
328
329 for grep_match in result.matches {
330 let file_key = canonical_key(&grep_match.file);
331 let match_key = (file_key, grep_match.line, grep_match.column);
332 if seen_match_keys.insert(match_key) {
333 matches.push(grep_match);
334 }
335 }
336 }
337
338 sort_grep_matches_by_mtime_desc(&mut matches, project_root);
339 if matches.len() > max_results {
340 matches.truncate(max_results);
341 }
342
343 GrepResult {
344 matches,
345 total_matches,
346 files_searched,
347 files_with_matches,
348 index_status,
349 truncated: any_child_truncated || total_matches > max_results,
350 fully_degraded,
351 engine_capped,
352 }
353}
354
355pub fn weakest_index_status(left: IndexStatus, right: IndexStatus) -> IndexStatus {
356 match (left, right) {
357 (IndexStatus::Disabled, _) | (_, IndexStatus::Disabled) => IndexStatus::Disabled,
358 (IndexStatus::Fallback, _) | (_, IndexStatus::Fallback) => IndexStatus::Fallback,
359 (IndexStatus::Building, _) | (_, IndexStatus::Building) => IndexStatus::Building,
360 (IndexStatus::Ready, IndexStatus::Ready) => IndexStatus::Ready,
361 }
362}
363
364fn fallback_grep(
365 project_root: &Path,
366 search_root: &Path,
367 filter_root: &Path,
368 pattern: &CompiledPattern,
369 include: &[String],
370 exclude: &[String],
371 max_results: usize,
372 index_status: IndexStatus,
373) -> GrepResult {
374 let filters = build_path_filters(include, exclude).unwrap_or_default();
375 let files = walk_project_files_from(filter_root, search_root, &filters);
376
377 let total_matches = AtomicUsize::new(0);
378 let files_searched = AtomicUsize::new(0);
379 let files_with_matches = AtomicUsize::new(0);
380 let truncated = AtomicBool::new(false);
381 let engine_capped = AtomicBool::new(false);
382 let stop_after = max_results.saturating_mul(2);
383
384 let mut matches = files
385 .par_iter()
386 .map(|file| {
387 fallback_search_file(
388 file,
389 pattern,
390 max_results,
391 stop_after,
392 &total_matches,
393 &files_searched,
394 &files_with_matches,
395 &truncated,
396 &engine_capped,
397 )
398 })
399 .reduce(Vec::new, |mut left, mut right| {
400 left.append(&mut right);
401 left
402 });
403
404 sort_grep_matches_by_mtime_desc(&mut matches, project_root);
405
406 GrepResult {
407 total_matches: total_matches.load(Ordering::Relaxed),
408 matches,
409 files_searched: files_searched.load(Ordering::Relaxed),
410 files_with_matches: files_with_matches.load(Ordering::Relaxed),
411 index_status,
412 truncated: truncated.load(Ordering::Relaxed),
413 fully_degraded: true,
414 engine_capped: engine_capped.load(Ordering::Relaxed),
415 }
416}
417
418fn fallback_search_file(
419 file: &PathBuf,
420 pattern: &CompiledPattern,
421 max_results: usize,
422 stop_after: usize,
423 total_matches: &AtomicUsize,
424 files_searched: &AtomicUsize,
425 files_with_matches: &AtomicUsize,
426 truncated: &AtomicBool,
427 engine_capped: &AtomicBool,
428) -> Vec<GrepMatch> {
429 if should_stop_fallback_search(truncated, total_matches, stop_after) {
430 engine_capped.store(true, Ordering::Relaxed);
431 return Vec::new();
432 }
433
434 let Some(content) = read_searchable_text(file) else {
435 return Vec::new();
436 };
437 files_searched.fetch_add(1, Ordering::Relaxed);
438
439 let line_starts = line_starts(&content);
440 let mut seen_lines = HashSet::new();
441 let mut matched_this_file = false;
442 let mut matches = Vec::new();
443
444 match pattern {
445 CompiledPattern::Literal(literal) => search_literal_in_text(
446 file,
447 &content,
448 &line_starts,
449 literal,
450 max_results,
451 stop_after,
452 total_matches,
453 &mut seen_lines,
454 truncated,
455 engine_capped,
456 &mut matched_this_file,
457 &mut matches,
458 ),
459 CompiledPattern::Regex { compiled, .. } => {
460 for matched in compiled.find_iter(content.as_bytes()) {
461 if should_stop_fallback_search(truncated, total_matches, stop_after) {
462 engine_capped.store(true, Ordering::Relaxed);
463 break;
464 }
465
466 let (line, column, line_text) =
467 line_details(&content, &line_starts, matched.start());
468 if !seen_lines.insert(line) {
469 continue;
470 }
471
472 matched_this_file = true;
473 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
474 if match_number > max_results {
475 truncated.store(true, Ordering::Relaxed);
476 break;
477 }
478
479 matches.push(GrepMatch {
480 file: file.clone(),
481 line,
482 column,
483 line_text,
484 match_text: String::from_utf8_lossy(matched.as_bytes()).into_owned(),
485 });
486 }
487 }
488 }
489
490 if matched_this_file {
491 files_with_matches.fetch_add(1, Ordering::Relaxed);
492 }
493
494 matches
495}
496
497fn search_literal_in_text(
498 file: &Path,
499 content: &str,
500 line_starts: &[usize],
501 literal: &LiteralSearch,
502 max_results: usize,
503 stop_after: usize,
504 total_matches: &AtomicUsize,
505 seen_lines: &mut HashSet<u32>,
506 truncated: &AtomicBool,
507 engine_capped: &AtomicBool,
508 matched_this_file: &mut bool,
509 matches: &mut Vec<GrepMatch>,
510) {
511 let content_bytes = content.as_bytes();
512 let search_content;
513 let haystack = if literal.case_insensitive_ascii {
514 search_content = content_bytes.to_ascii_lowercase();
515 search_content.as_slice()
516 } else {
517 content_bytes
518 };
519 let finder = memchr::memmem::Finder::new(&literal.needle);
520 let mut start = 0usize;
521
522 while let Some(position) = finder.find(&haystack[start..]) {
523 if should_stop_fallback_search(truncated, total_matches, stop_after) {
524 engine_capped.store(true, Ordering::Relaxed);
525 break;
526 }
527
528 let offset = start + position;
529 start = offset + 1;
530 let (line, column, line_text) = line_details(content, line_starts, offset);
531 if !seen_lines.insert(line) {
532 continue;
533 }
534
535 *matched_this_file = true;
536 let match_number = total_matches.fetch_add(1, Ordering::Relaxed) + 1;
537 if match_number > max_results {
538 truncated.store(true, Ordering::Relaxed);
539 break;
540 }
541
542 let end = offset + literal.needle.len();
543 matches.push(GrepMatch {
544 file: file.to_path_buf(),
545 line,
546 column,
547 line_text,
548 match_text: String::from_utf8_lossy(&content_bytes[offset..end]).into_owned(),
549 });
550 }
551}
552
553fn should_stop_fallback_search(
554 truncated: &AtomicBool,
555 total_matches: &AtomicUsize,
556 stop_after: usize,
557) -> bool {
558 truncated.load(Ordering::Relaxed) && total_matches.load(Ordering::Relaxed) >= stop_after
559}
560
561pub(crate) fn ripgrep_glob(
562 search_root: &Path,
563 pattern: &str,
564 max_results: usize,
565) -> Option<Vec<PathBuf>> {
566 let filters = build_path_filters(&[pattern.to_string()], &[]).ok()?;
567 let mut files = walk_project_files_from(search_root, search_root, &filters);
568 files.truncate(max_results);
569 Some(files)
570}
571
572fn current_index_status(ctx: &AppContext) -> IndexStatus {
573 if ctx
574 .search_index()
575 .borrow()
576 .as_ref()
577 .is_some_and(|index| index.ready)
578 {
579 IndexStatus::Ready
580 } else if ctx.search_index_rx().borrow().is_some() || ctx.search_index().borrow().is_some() {
581 IndexStatus::Building
582 } else {
583 IndexStatus::Fallback
584 }
585}
586
587pub fn line_starts(content: &str) -> Vec<usize> {
588 let mut starts = vec![0usize];
589 for (index, byte) in content.bytes().enumerate() {
590 if byte == b'\n' {
591 starts.push(index + 1);
592 }
593 }
594 starts
595}
596
597pub fn line_details(content: &str, line_starts: &[usize], offset: usize) -> (u32, u32, String) {
598 let line_index = match line_starts.binary_search(&offset) {
599 Ok(index) => index,
600 Err(index) => index.saturating_sub(1),
601 };
602 let line_start = line_starts.get(line_index).copied().unwrap_or(0);
603 let line_end = content[line_start..]
604 .find('\n')
605 .map(|length| line_start + length)
606 .unwrap_or(content.len());
607 let line_text = content[line_start..line_end]
608 .trim_end_matches('\r')
609 .to_string();
610 let column = content[line_start..offset].chars().count() as u32 + 1;
611 (line_index as u32 + 1, column, line_text)
612}
613
614#[cfg(test)]
615mod tests {
616 use super::*;
617
618 fn grep_match(file: &Path, line: u32, column: u32) -> GrepMatch {
619 GrepMatch {
620 file: file.to_path_buf(),
621 line,
622 column,
623 line_text: "needle".to_string(),
624 match_text: "needle".to_string(),
625 }
626 }
627
628 fn result(matches: Vec<GrepMatch>, truncated: bool, status: IndexStatus) -> GrepResult {
629 GrepResult {
630 total_matches: matches.len(),
631 files_searched: matches.len(),
632 files_with_matches: matches.len(),
633 matches,
634 index_status: status,
635 truncated,
636 fully_degraded: false,
637 engine_capped: false,
638 }
639 }
640
641 #[test]
642 fn single_root_uses_requested_max() {
643 let scope = GrepScope {
644 roots: vec![ResolvedRoot {
645 search_root: PathBuf::from("/project"),
646 filter_root: PathBuf::from("/project"),
647 use_index: true,
648 is_external: false,
649 }],
650 multi_root: false,
651 per_root_max: 10,
652 };
653 assert!(!scope.multi_root);
654 assert_eq!(scope.per_root_max, 10);
655 }
656
657 #[test]
658 fn multi_root_uses_double_per_root_max() {
659 let project = tempfile::tempdir().expect("project");
660 let ctx = AppContext::new(
661 Box::new(crate::parser::TreeSitterProvider::new()),
662 crate::config::Config {
663 project_root: Some(project.path().to_path_buf()),
664 ..crate::config::Config::default()
665 },
666 );
667 let left = project.path().join("left");
668 let right = project.path().join("right");
669 std::fs::create_dir_all(&left).expect("left");
670 std::fs::create_dir_all(&right).expect("right");
671 let paths = serde_json::json!([left.display().to_string(), right.display().to_string()]);
672
673 let scope = resolve_grep_scope(&ctx, Some(&paths), 10, "test").expect("scope");
674
675 assert!(scope.multi_root);
676 assert_eq!(scope.per_root_max, 20);
677 }
678
679 #[test]
680 fn filter_root_is_project_for_in_project_and_search_root_for_external_unindexed() {
681 let project = PathBuf::from("/project");
682 let in_project = compute_filter_root(&project, Path::new("/project/src"), true, false);
683 let external = compute_filter_root(&project, Path::new("/tmp/external"), false, true);
684 assert_eq!(in_project, project);
685 assert_eq!(external, PathBuf::from("/tmp/external"));
686 }
687
688 #[test]
689 fn weakest_status_orders_disabled_fallback_building_ready() {
690 assert_eq!(
691 weakest_index_status(IndexStatus::Ready, IndexStatus::Building),
692 IndexStatus::Building
693 );
694 assert_eq!(
695 weakest_index_status(IndexStatus::Building, IndexStatus::Fallback),
696 IndexStatus::Fallback
697 );
698 assert_eq!(
699 weakest_index_status(IndexStatus::Fallback, IndexStatus::Disabled),
700 IndexStatus::Disabled
701 );
702 }
703
704 #[test]
705 fn merge_dedupes_by_canonical_file_line_column() {
706 let temp = tempfile::tempdir().expect("temp");
707 let file = temp.path().join("file.rs");
708 std::fs::write(&file, "needle").expect("write");
709 let symlink = temp.path().join("link.rs");
710 #[cfg(unix)]
711 std::os::unix::fs::symlink(&file, &symlink).expect("symlink");
712 #[cfg(windows)]
713 std::os::windows::fs::symlink_file(&file, &symlink).expect("symlink");
714
715 let merged = merge_grep_results(
716 vec![
717 result(vec![grep_match(&file, 1, 1)], false, IndexStatus::Ready),
718 result(vec![grep_match(&symlink, 1, 1)], false, IndexStatus::Ready),
719 ],
720 temp.path(),
721 10,
722 );
723
724 assert_eq!(merged.matches.len(), 1);
725 }
726
727 #[test]
728 fn merge_truncated_when_child_truncated_or_pre_merge_exceeds_max() {
729 let root = Path::new("/project");
730 let child = merge_grep_results(
731 vec![result(
732 vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
733 true,
734 IndexStatus::Ready,
735 )],
736 root,
737 10,
738 );
739 assert!(child.truncated);
740
741 let many = merge_grep_results(
742 vec![
743 result(
744 vec![grep_match(Path::new("/project/a.rs"), 1, 1)],
745 false,
746 IndexStatus::Ready,
747 ),
748 result(
749 vec![grep_match(Path::new("/project/b.rs"), 1, 1)],
750 false,
751 IndexStatus::Ready,
752 ),
753 ],
754 root,
755 1,
756 );
757 assert!(many.truncated);
758 }
759}