1use super::file_search_bridge::{self, FileSearchConfig};
15use super::grep_cache::GrepSearchCache;
16use anyhow::{Context, Result};
17use serde_json::{self, Value};
18use std::num::NonZeroUsize;
19use std::path::PathBuf;
20use std::sync::Arc;
21use std::sync::Mutex;
22use std::sync::OnceLock;
23use std::sync::atomic::AtomicBool;
24use std::sync::atomic::Ordering;
25use std::thread;
26use std::time::Duration;
27use tokio::task::spawn_blocking;
28use tracing::warn;
29
30const MAX_SEARCH_RESULTS: NonZeroUsize = NonZeroUsize::new(5).unwrap();
32
33static OPTIMAL_SEARCH_THREADS: OnceLock<NonZeroUsize> = OnceLock::new();
35
36fn optimal_search_threads() -> NonZeroUsize {
39 *OPTIMAL_SEARCH_THREADS.get_or_init(|| {
40 let cpu_count = num_cpus::get();
41 let threads = (cpu_count * 3 / 4).clamp(2, 8);
43 NonZeroUsize::new(threads).unwrap_or(NonZeroUsize::new(2).unwrap())
44 })
45}
46
47const DEFAULT_MAX_RESULT_BYTES: usize = 32 * 1024;
49
50const DEFAULT_SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
52
53use vtcode_commons::exclusions::DEFAULT_IGNORE_GLOBS;
54
55const SEARCH_DEBOUNCE: Duration = Duration::from_millis(150);
58
59const ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL: Duration = Duration::from_millis(20);
61
62use serde::{Deserialize, Serialize};
63
64#[derive(Debug, Clone, Deserialize, Serialize)]
66pub struct GrepSearchInput {
67 pub pattern: String,
68 pub path: String,
69 pub case_sensitive: Option<bool>,
70 pub literal: Option<bool>,
71 pub glob_pattern: Option<String>,
72 pub context_lines: Option<usize>,
73 pub include_hidden: Option<bool>,
74 pub max_results: Option<usize>,
75 pub respect_ignore_files: Option<bool>, pub max_file_size: Option<usize>, pub search_hidden: Option<bool>, pub search_binary: Option<bool>, pub files_with_matches: Option<bool>, pub type_pattern: Option<String>, pub invert_match: Option<bool>, pub word_boundaries: Option<bool>, pub line_number: Option<bool>, pub column: Option<bool>, pub only_matching: Option<bool>, pub trim: Option<bool>, pub max_result_bytes: Option<usize>, pub timeout: Option<Duration>, pub extra_ignore_globs: Option<Vec<String>>, }
91
92impl GrepSearchInput {
93 #[inline]
95 pub fn new(pattern: String, path: String) -> Self {
96 Self {
97 pattern,
98 path,
99 case_sensitive: None,
100 literal: None,
101 glob_pattern: None,
102 context_lines: None,
103 include_hidden: None,
104 max_results: None,
105 respect_ignore_files: None,
106 max_file_size: None,
107 search_hidden: None,
108 search_binary: None,
109 files_with_matches: None,
110 type_pattern: None,
111 invert_match: None,
112 word_boundaries: None,
113 line_number: None,
114 column: None,
115 only_matching: None,
116 trim: None,
117 max_result_bytes: None,
118 timeout: None,
119 extra_ignore_globs: None,
120 }
121 }
122
123 #[inline]
125 pub fn with_defaults(pattern: String, path: String) -> Self {
126 Self {
127 pattern,
128 path,
129 case_sensitive: Some(true),
130 literal: Some(false),
131 glob_pattern: None,
132 context_lines: None,
133 include_hidden: Some(false),
134 max_results: Some(MAX_SEARCH_RESULTS.get()),
135 respect_ignore_files: Some(true),
136 max_file_size: None,
137 search_hidden: Some(false),
138 search_binary: Some(false),
139 files_with_matches: Some(false),
140 type_pattern: None,
141 invert_match: Some(false),
142 word_boundaries: Some(false),
143 line_number: Some(true),
144 column: Some(false),
145 only_matching: Some(false),
146 trim: Some(false),
147 max_result_bytes: Some(DEFAULT_MAX_RESULT_BYTES),
148 timeout: Some(DEFAULT_SEARCH_TIMEOUT),
149 extra_ignore_globs: None,
150 }
151 }
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct GrepSearchResult {
157 pub query: String,
158 pub matches: Vec<Value>,
159 pub truncated: bool,
160 #[serde(default)]
164 pub total_matches: Option<usize>,
165}
166
167pub struct GrepSearchManager {
169 state: Arc<Mutex<SearchState>>,
171
172 search_dir: PathBuf,
173
174 cache: Arc<GrepSearchCache>,
176}
177
178struct SearchState {
179 latest_query: String,
181
182 is_search_scheduled: bool,
184
185 active_search: Option<ActiveSearch>,
187 last_result: Option<GrepSearchResult>,
188}
189
190struct ActiveSearch {
191 query: String,
192 cancellation_token: Arc<AtomicBool>,
193}
194
195impl GrepSearchManager {
196 pub fn new(search_dir: PathBuf) -> Self {
197 Self {
198 state: Arc::new(Mutex::new(SearchState {
199 latest_query: String::new(),
200 is_search_scheduled: false,
201 active_search: None,
202 last_result: None,
203 })),
204 search_dir,
205 cache: Arc::new(GrepSearchCache::new(100)), }
207 }
208
209 fn cached_result(cache: &GrepSearchCache, input: &GrepSearchInput) -> Option<GrepSearchResult> {
210 cache.get(input).map(|cached| GrepSearchResult {
211 query: cached.query.clone(),
212 matches: cached.matches.clone(),
213 truncated: cached.truncated,
214 total_matches: cached.total_matches,
215 })
216 }
217
218 pub fn on_user_query(&self, query: &str) {
220 {
221 let mut st = match self.state.lock() {
222 Ok(state) => state,
223 Err(err) => {
224 warn!("grep search state lock poisoned while handling query update: {err}");
225 return;
226 }
227 };
228 if query != st.latest_query {
229 st.latest_query.clear();
230 st.latest_query.push_str(query);
231 } else {
232 return;
233 }
234
235 if let Some(active_search) = &st.active_search
238 && !query.starts_with(&active_search.query)
239 {
240 active_search
241 .cancellation_token
242 .store(true, Ordering::Relaxed);
243 st.active_search = None;
244 }
245
246 if !st.is_search_scheduled {
248 st.is_search_scheduled = true;
249 } else {
250 return;
251 }
252 }
253
254 let state = self.state.clone();
258 let search_dir = self.search_dir.clone();
259 let cache = self.cache.clone();
260 spawn_blocking(move || {
263 thread::sleep(SEARCH_DEBOUNCE);
266 loop {
267 let active_is_none = match state.lock() {
268 Ok(st) => st.active_search.is_none(),
269 Err(err) => {
270 warn!(
271 "grep search state lock poisoned while waiting for active search: {err}"
272 );
273 return;
274 }
275 };
276 if active_is_none {
277 break;
278 }
279 thread::sleep(ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL);
280 }
281
282 let cancellation_token = Arc::new(AtomicBool::new(false));
285 let token = cancellation_token.clone();
286 let query = {
287 let mut st = match state.lock() {
288 Ok(state) => state,
289 Err(err) => {
290 warn!(
291 "grep search state lock poisoned while preparing debounced search: {err}"
292 );
293 return;
294 }
295 };
296 let query = st.latest_query.clone();
297 st.is_search_scheduled = false;
298 st.active_search = Some(ActiveSearch {
299 query: query.clone(),
300 cancellation_token: token,
301 });
302 query
303 };
304
305 GrepSearchManager::spawn_grep_file(
306 query,
307 search_dir,
308 cancellation_token,
309 state,
310 Some(cache),
311 );
312 });
313 }
314
315 pub fn last_result(&self) -> Option<GrepSearchResult> {
317 match self.state.lock() {
318 Ok(st) => st.last_result.clone(),
319 Err(err) => {
320 warn!("grep search state lock poisoned while reading last result: {err}");
321 None
322 }
323 }
324 }
325
326 fn execute_with_backends(input: &GrepSearchInput) -> Result<(Vec<Value>, bool, usize)> {
327 Self::run_ripgrep_backend(input)
328 }
329
330 fn run_ripgrep_backend(input: &GrepSearchInput) -> Result<(Vec<Value>, bool, usize)> {
331 use std::process::Command;
332
333 let mut cmd = Command::new("rg");
334 cmd.arg("-j")
335 .arg(optimal_search_threads().get().to_string());
336
337 if !input.respect_ignore_files.unwrap_or(true) {
339 cmd.arg("--no-ignore");
340 }
341
342 if input.search_hidden.unwrap_or(false) {
344 cmd.arg("--hidden");
345 }
346
347 if input.search_binary.unwrap_or(false) {
349 cmd.arg("--binary");
350 }
351
352 if input.files_with_matches.unwrap_or(false) {
354 cmd.arg("--files-with-matches");
355 }
356
357 if let Some(type_pattern) = &input.type_pattern {
359 cmd.arg("--type").arg(type_pattern);
360 }
361
362 if let Some(max_file_size) = input.max_file_size {
364 cmd.arg("--max-filesize").arg(format!("{}B", max_file_size));
365 }
366
367 if let Some(case_sensitive) = input.case_sensitive {
369 if case_sensitive {
370 cmd.arg("--case-sensitive");
371 } else {
372 cmd.arg("--ignore-case");
373 }
374 } else {
375 cmd.arg("--smart-case");
377 }
378
379 if input.invert_match.unwrap_or(false) {
381 cmd.arg("--invert-match");
382 }
383
384 if input.word_boundaries.unwrap_or(false) {
386 cmd.arg("--word-regexp");
387 }
388
389 if input.line_number.unwrap_or(true) {
391 cmd.arg("--line-number");
393 } else {
394 cmd.arg("--no-line-number");
395 }
396
397 if input.column.unwrap_or(false) {
399 cmd.arg("--column");
400 }
401
402 if input.only_matching.unwrap_or(false) {
404 cmd.arg("--only-matching");
405 }
406
407 if input.trim.unwrap_or(false) {
409 }
411
412 if let Some(literal) = input.literal
413 && literal
414 {
415 cmd.arg("--fixed-strings");
416 }
417
418 if let Some(glob_pattern) = &input.glob_pattern {
419 cmd.arg("--glob").arg(glob_pattern);
420 }
421
422 if input.respect_ignore_files.unwrap_or(true) {
423 for pattern in DEFAULT_IGNORE_GLOBS {
424 cmd.arg("--glob").arg(format!("!{}", pattern));
425 }
426 if let Some(extra) = &input.extra_ignore_globs {
427 for pattern in extra {
428 cmd.arg("--glob").arg(format!("!{}", pattern));
429 }
430 }
431 }
432
433 if let Some(context_lines) = input.context_lines {
434 cmd.arg("--context").arg(context_lines.to_string());
435 }
436
437 let max_results = input.max_results.unwrap_or(MAX_SEARCH_RESULTS.get());
438 cmd.arg("--max-count").arg(max_results.to_string());
439
440 cmd.arg("--json");
442
443 cmd.arg(&input.pattern);
444 cmd.arg(&input.path);
445
446 let output = cmd.output().with_context(|| {
447 format!("failed to execute ripgrep for pattern '{}'", input.pattern)
448 })?;
449
450 let output_str = String::from_utf8_lossy(&output.stdout);
451 let matches: Vec<Value> = output_str
452 .lines()
453 .filter_map(|line| serde_json::from_str::<Value>(line).ok())
454 .collect();
455
456 Ok(Self::finalize_matches(matches, input))
457 }
458
459 fn finalize_matches(
460 mut matches: Vec<Value>,
461 input: &GrepSearchInput,
462 ) -> (Vec<Value>, bool, usize) {
463 let mut truncated = false;
464 let max_results = input.max_results.unwrap_or(MAX_SEARCH_RESULTS.get());
465
466 if max_results == 0 {
467 return (Vec::new(), !matches.is_empty(), 0);
468 }
469
470 let total_match_count = matches
472 .iter()
473 .filter(|e| e.get("type").and_then(Value::as_str) == Some("match"))
474 .count();
475
476 let mut match_count = 0usize;
479 let mut cut_index = matches.len();
480 for (i, entry) in matches.iter().enumerate() {
481 let is_match = entry
482 .get("type")
483 .and_then(Value::as_str)
484 .is_some_and(|t| t == "match");
485 if is_match {
486 match_count += 1;
487 if match_count >= max_results {
488 cut_index = i + 1;
491 for rest in matches.iter().skip(i + 1) {
493 let tp = rest.get("type").and_then(Value::as_str);
494 if tp == Some("context") {
495 cut_index += 1;
496 } else {
497 break;
498 }
499 }
500 break;
501 }
502 }
503 }
504 if matches[cut_index..]
506 .iter()
507 .any(|e| e.get("type").and_then(Value::as_str) == Some("match"))
508 {
509 truncated = true;
510 }
511 if cut_index < matches.len() {
512 matches.truncate(cut_index);
513 }
514
515 if let Some(limit) = input.max_result_bytes {
516 let mut total = 0usize;
517 let mut kept_count = 0;
518 for entry in &matches {
519 let entry_bytes = entry.to_string().len();
520 if total + entry_bytes > limit {
521 truncated = true;
522 break;
523 }
524 total += entry_bytes;
525 kept_count += 1;
526 }
527 matches.truncate(kept_count);
528 }
529
530 (matches, truncated, total_match_count)
531 }
532
533 fn spawn_grep_file(
534 query: String,
535 search_dir: PathBuf,
536 cancellation_token: Arc<AtomicBool>,
537 search_state: Arc<Mutex<SearchState>>,
538 cache: Option<Arc<GrepSearchCache>>,
539 ) {
540 spawn_blocking(move || {
542 if cancellation_token.load(Ordering::Relaxed) {
544 {
546 let mut st = match search_state.lock() {
547 Ok(state) => state,
548 Err(err) => {
549 warn!("grep search state lock poisoned while cancelling search: {err}");
550 return;
551 }
552 };
553 if let Some(active_search) = &st.active_search
554 && Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
555 {
556 st.active_search = None;
557 }
558 }
559 return;
560 }
561
562 let input = GrepSearchInput::with_defaults(
563 query.clone(),
564 search_dir.to_string_lossy().into_owned(),
565 );
566
567 if let Some(ref cache) = cache
569 && let Some(cached_result) = Self::cached_result(cache, &input)
570 {
571 let mut st = match search_state.lock() {
572 Ok(state) => state,
573 Err(err) => {
574 warn!("grep search state lock poisoned while loading cached result: {err}");
575 return;
576 }
577 };
578 st.last_result = Some(cached_result);
579 return;
580 }
581
582 let search_result = GrepSearchManager::execute_with_backends(&input);
583
584 let is_cancelled = cancellation_token.load(Ordering::Relaxed);
585 if !is_cancelled
586 && let Ok((matches, truncated, total_match_count)) = search_result
587 && !matches.is_empty()
588 {
589 let result = GrepSearchResult {
590 query,
591 matches,
592 truncated,
593 total_matches: if truncated {
594 Some(total_match_count)
595 } else {
596 None
597 },
598 };
599
600 if let Some(ref cache) = cache
602 && GrepSearchCache::should_cache(&result)
603 {
604 cache.put(&input, result.clone());
605 }
606
607 let mut st = match search_state.lock() {
608 Ok(state) => state,
609 Err(err) => {
610 warn!("grep search state lock poisoned while storing search result: {err}");
611 return;
612 }
613 };
614 st.last_result = Some(result);
615 }
616
617 {
619 let mut st = match search_state.lock() {
620 Ok(state) => state,
621 Err(err) => {
622 warn!(
623 "grep search state lock poisoned while clearing active search: {err}"
624 );
625 return;
626 }
627 };
628 if let Some(active_search) = &st.active_search
629 && Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
630 {
631 st.active_search = None;
632 }
633 }
634 });
635 }
636
637 pub async fn perform_search(&self, input: GrepSearchInput) -> Result<GrepSearchResult> {
639 if let Some(cached_result) = Self::cached_result(&self.cache, &input) {
641 return Ok(cached_result);
642 }
643
644 let query = input.pattern.clone();
645 let input_clone = input.clone();
646
647 let timeout = input.timeout.unwrap_or(DEFAULT_SEARCH_TIMEOUT);
648 let (matches, truncated, total_match_count) = tokio::time::timeout(
649 timeout,
650 spawn_blocking(move || GrepSearchManager::execute_with_backends(&input_clone)),
651 )
652 .await
653 .context("ripgrep search timed out")?
654 .context("ripgrep search worker panicked")??;
655
656 let result = GrepSearchResult {
657 query,
658 matches,
659 truncated,
660 total_matches: if truncated {
661 Some(total_match_count)
662 } else {
663 None
664 },
665 };
666
667 if GrepSearchCache::should_cache(&result) {
669 self.cache.put(&input, result.clone());
670 }
671
672 Ok(result)
673 }
674
675 pub fn enumerate_files_with_pattern(
694 &self,
695 pattern: String,
696 max_results: usize,
697 cancel_flag: Option<Arc<AtomicBool>>,
698 ) -> Result<Vec<String>> {
699 let config = FileSearchConfig::new(pattern, self.search_dir.clone())
700 .with_limit(max_results)
701 .respect_gitignore(true);
702
703 let results = file_search_bridge::search_files(config, cancel_flag)?;
704
705 Ok(file_search_bridge::file_matches_only(results.matches)
706 .into_iter()
707 .map(|m| m.path)
708 .collect())
709 }
710
711 pub fn list_all_files(
725 &self,
726 max_results: usize,
727 exclude_patterns: Vec<String>,
728 ) -> Result<Vec<String>> {
729 let mut config = FileSearchConfig::new("".to_string(), self.search_dir.clone())
730 .with_limit(max_results)
731 .respect_gitignore(true);
732
733 for pattern in exclude_patterns {
734 config = config.exclude(pattern);
735 }
736
737 let results = file_search_bridge::search_files(config, None)?;
738
739 Ok(file_search_bridge::file_matches_only(results.matches)
740 .into_iter()
741 .map(|m| m.path)
742 .collect())
743 }
744}
745
746#[cfg(test)]
747mod tests {
748 use super::*;
749 use serde_json::json;
750
751 #[test]
752 fn finalize_matches_respects_max_bytes() {
753 let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
754 input.max_result_bytes = Some(100);
755 input.max_results = Some(5);
756
757 let matches = vec![json!({"text": "12345"}), json!({"text": "6789"})];
758
759 let (kept, truncated, _total) = GrepSearchManager::finalize_matches(matches, &input);
760 assert!(!truncated);
761 assert_eq!(kept.len(), 2);
762
763 input.max_result_bytes = Some(20);
765 let matches = vec![json!({"text": "12345"}), json!({"text": "6789"})];
766 let (kept, truncated, _total) = GrepSearchManager::finalize_matches(matches, &input);
767 assert!(truncated);
768 assert_eq!(kept.len(), 1); }
770
771 #[test]
772 fn finalize_matches_counts_only_match_type_entries() {
773 let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
774 input.max_results = Some(2);
775
776 let matches = vec![
778 json!({"type": "begin", "data": {"path": {"text": "Cargo.lock"}}}),
779 json!({"type": "context", "data": {"line_number": 538, "lines": {"text": "ctx1"}}}),
780 json!({"type": "context", "data": {"line_number": 539, "lines": {"text": "ctx2"}}}),
781 json!({"type": "match", "data": {"line_number": 553, "lines": {"text": "match1"}}}),
782 json!({"type": "context", "data": {"line_number": 554, "lines": {"text": "ctx3"}}}),
783 json!({"type": "context", "data": {"line_number": 555, "lines": {"text": "ctx4"}}}),
784 json!({"type": "context", "data": {"line_number": 560, "lines": {"text": "ctx5"}}}),
785 json!({"type": "match", "data": {"line_number": 563, "lines": {"text": "match2"}}}),
786 json!({"type": "context", "data": {"line_number": 564, "lines": {"text": "ctx6"}}}),
787 json!({"type": "end", "data": {"path": {"text": "Cargo.lock"}}}),
788 ];
789
790 let (kept, truncated, total) = GrepSearchManager::finalize_matches(matches, &input);
791 assert!(!truncated);
794 assert_eq!(kept.len(), 9);
795 assert_eq!(kept[3]["type"], "match");
796 assert_eq!(kept[7]["type"], "match");
797 assert_eq!(total, 2);
798 }
799
800 #[test]
801 fn finalize_matches_truncates_when_more_match_types_than_limit() {
802 let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
803 input.max_results = Some(1);
804
805 let matches = vec![
806 json!({"type": "begin", "data": {"path": {"text": "f.txt"}}}),
807 json!({"type": "match", "data": {"line_number": 1, "lines": {"text": "m1"}}}),
808 json!({"type": "context", "data": {"line_number": 2, "lines": {"text": "c1"}}}),
809 json!({"type": "match", "data": {"line_number": 10, "lines": {"text": "m2"}}}),
810 json!({"type": "context", "data": {"line_number": 11, "lines": {"text": "c2"}}}),
811 ];
812
813 let (kept, truncated, total) = GrepSearchManager::finalize_matches(matches, &input);
814 assert!(truncated);
815 assert_eq!(kept.len(), 3);
817 assert_eq!(kept[1]["type"], "match");
818 assert_eq!(kept[2]["type"], "context");
819 assert_eq!(total, 2); }
821
822 #[test]
823 fn test_grep_search_manager_creation() {
824 let manager = GrepSearchManager::new(PathBuf::from("."));
825 assert_eq!(manager.search_dir, PathBuf::from("."));
826 }
827
828 #[test]
829 fn test_grep_search_input_new() {
830 let input = GrepSearchInput::new("pattern".to_string(), "/path/to/search".to_string());
831 assert_eq!(input.pattern, "pattern");
832 assert_eq!(input.path, "/path/to/search");
833 assert!(input.case_sensitive.is_none());
834 }
835
836 #[test]
837 fn test_grep_search_input_with_defaults() {
838 let input = GrepSearchInput::with_defaults("pattern".to_string(), "/path".to_string());
839 assert_eq!(input.pattern, "pattern");
840 assert_eq!(input.path, "/path");
841 assert_eq!(input.case_sensitive, Some(true));
842 assert_eq!(input.include_hidden, Some(false));
843 assert_eq!(input.max_results, Some(MAX_SEARCH_RESULTS.get()));
844 }
845}