Skip to main content

vtcode_core/tools/
grep_file.rs

1//! Helper that owns the debounce/cancellation logic for `grep_file` operations.
2//!
3//! This module manages the orchestration of ripgrep searches, implementing
4//! debounce and cancellation logic to ensure responsive and efficient searches.
5//!
6//! It works as follows:
7//! 1. First query starts a debounce timer.
8//! 2. While the timer is pending, the latest query from the user is stored.
9//! 3. When the timer fires, it is cleared, and a search is done for the most
10//!    recent query.
11//! 4. If there is an in-flight search that is not a prefix of the latest thing
12//!    the user typed, it is cancelled.
13
14use super::file_search_bridge::{self, FileSearchConfig};
15use super::grep_cache::GrepSearchCache;
16use anyhow::{Context, Result};
17use serde_json::{self, Value};
18use std::num::NonZeroUsize;
19use std::path::PathBuf;
20use std::sync::Arc;
21use std::sync::Mutex;
22use std::sync::OnceLock;
23use std::sync::atomic::AtomicBool;
24use std::sync::atomic::Ordering;
25use std::thread;
26use std::time::Duration;
27use tokio::task::spawn_blocking;
28use tracing::warn;
29
30/// Maximum number of search results to return - AGENTS.md requires max 5 results
31const MAX_SEARCH_RESULTS: NonZeroUsize = NonZeroUsize::new(5).unwrap();
32
33/// Optimal number of threads for searching, calculated based on CPU count
34static OPTIMAL_SEARCH_THREADS: OnceLock<NonZeroUsize> = OnceLock::new();
35
36/// Calculate optimal number of search threads based on available CPU cores
37/// Uses 75% of cores, clamped between 2 and 8 threads
38fn optimal_search_threads() -> NonZeroUsize {
39    *OPTIMAL_SEARCH_THREADS.get_or_init(|| {
40        let cpu_count = num_cpus::get();
41        // Use 75% of cores for better parallelism, min 2, max 8
42        let threads = (cpu_count * 3 / 4).clamp(2, 8);
43        NonZeroUsize::new(threads).unwrap_or(NonZeroUsize::new(2).unwrap())
44    })
45}
46
47/// Maximum bytes to keep in a single grep response before truncation.
48const DEFAULT_MAX_RESULT_BYTES: usize = 32 * 1024;
49
50/// Default timeout for blocking grep invocations.
51const DEFAULT_SEARCH_TIMEOUT: Duration = Duration::from_secs(5);
52
53use vtcode_commons::exclusions::DEFAULT_IGNORE_GLOBS;
54
55/// How long to wait after a keystroke before firing the first search when none
56/// is currently running. Keeps early queries more meaningful.
57const SEARCH_DEBOUNCE: Duration = Duration::from_millis(150);
58
59/// Poll interval when waiting for an active search to complete
60const ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL: Duration = Duration::from_millis(20);
61
62use serde::{Deserialize, Serialize};
63
64/// Input parameters for ripgrep search
65#[derive(Debug, Clone, Deserialize, Serialize)]
66pub struct GrepSearchInput {
67    pub pattern: String,
68    pub path: String,
69    pub case_sensitive: Option<bool>,
70    pub literal: Option<bool>,
71    pub glob_pattern: Option<String>,
72    pub context_lines: Option<usize>,
73    pub include_hidden: Option<bool>,
74    pub max_results: Option<usize>,
75    pub respect_ignore_files: Option<bool>, // Whether to respect .gitignore, .ignore files
76    pub max_file_size: Option<usize>,       // Maximum file size to search (in bytes)
77    pub search_hidden: Option<bool>,        // Whether to search hidden files/directories
78    pub search_binary: Option<bool>,        // Whether to search binary files
79    pub files_with_matches: Option<bool>,   // Only print filenames with matches
80    pub type_pattern: Option<String>, // Search files of a specific type (e.g., "rust", "python")
81    pub invert_match: Option<bool>,   // Invert the matching
82    pub word_boundaries: Option<bool>, // Match only word boundaries (regexp \b)
83    pub line_number: Option<bool>,    // Show line numbers
84    pub column: Option<bool>,         // Show column numbers
85    pub only_matching: Option<bool>,  // Show only matching parts
86    pub trim: Option<bool>,           // Trim whitespace from matches
87    pub max_result_bytes: Option<usize>, // Optional truncation threshold (bytes)
88    pub timeout: Option<Duration>,    // Optional timeout for blocking grep
89    pub extra_ignore_globs: Option<Vec<String>>, // Additional ignore globs
90}
91
92impl GrepSearchInput {
93    /// Create a new search input with pattern and path, using sensible defaults
94    #[inline]
95    pub fn new(pattern: String, path: String) -> Self {
96        Self {
97            pattern,
98            path,
99            case_sensitive: None,
100            literal: None,
101            glob_pattern: None,
102            context_lines: None,
103            include_hidden: None,
104            max_results: None,
105            respect_ignore_files: None,
106            max_file_size: None,
107            search_hidden: None,
108            search_binary: None,
109            files_with_matches: None,
110            type_pattern: None,
111            invert_match: None,
112            word_boundaries: None,
113            line_number: None,
114            column: None,
115            only_matching: None,
116            trim: None,
117            max_result_bytes: None,
118            timeout: None,
119            extra_ignore_globs: None,
120        }
121    }
122
123    /// Create a search input with common defaults for internal grep searches
124    #[inline]
125    pub fn with_defaults(pattern: String, path: String) -> Self {
126        Self {
127            pattern,
128            path,
129            case_sensitive: Some(true),
130            literal: Some(false),
131            glob_pattern: None,
132            context_lines: None,
133            include_hidden: Some(false),
134            max_results: Some(MAX_SEARCH_RESULTS.get()),
135            respect_ignore_files: Some(true),
136            max_file_size: None,
137            search_hidden: Some(false),
138            search_binary: Some(false),
139            files_with_matches: Some(false),
140            type_pattern: None,
141            invert_match: Some(false),
142            word_boundaries: Some(false),
143            line_number: Some(true),
144            column: Some(false),
145            only_matching: Some(false),
146            trim: Some(false),
147            max_result_bytes: Some(DEFAULT_MAX_RESULT_BYTES),
148            timeout: Some(DEFAULT_SEARCH_TIMEOUT),
149            extra_ignore_globs: None,
150        }
151    }
152}
153
154/// Result of a ripgrep search
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct GrepSearchResult {
157    pub query: String,
158    pub matches: Vec<Value>,
159    pub truncated: bool,
160    /// Total number of "match" type entries found before truncation.
161    /// When `truncated` is true, this tells the agent how many matches exist
162    /// vs how many are returned in `matches`.
163    #[serde(default)]
164    pub total_matches: Option<usize>,
165}
166
167/// State machine for grep_file orchestration.
168pub struct GrepSearchManager {
169    /// Unified state guarded by one mutex.
170    state: Arc<Mutex<SearchState>>,
171
172    search_dir: PathBuf,
173
174    /// LRU cache for search results to avoid redundant searches
175    cache: Arc<GrepSearchCache>,
176}
177
178struct SearchState {
179    /// Latest query typed by user (updated every keystroke).
180    latest_query: String,
181
182    /// true if a search is currently scheduled.
183    is_search_scheduled: bool,
184
185    /// If there is an active search, this will be the query being searched.
186    active_search: Option<ActiveSearch>,
187    last_result: Option<GrepSearchResult>,
188}
189
190struct ActiveSearch {
191    query: String,
192    cancellation_token: Arc<AtomicBool>,
193}
194
195impl GrepSearchManager {
196    pub fn new(search_dir: PathBuf) -> Self {
197        Self {
198            state: Arc::new(Mutex::new(SearchState {
199                latest_query: String::new(),
200                is_search_scheduled: false,
201                active_search: None,
202                last_result: None,
203            })),
204            search_dir,
205            cache: Arc::new(GrepSearchCache::new(100)), // Cache up to 100 recent searches
206        }
207    }
208
209    fn cached_result(cache: &GrepSearchCache, input: &GrepSearchInput) -> Option<GrepSearchResult> {
210        cache.get(input).map(|cached| GrepSearchResult {
211            query: cached.query.clone(),
212            matches: cached.matches.clone(),
213            truncated: cached.truncated,
214            total_matches: cached.total_matches,
215        })
216    }
217
218    /// Call whenever the user edits the search query.
219    pub fn on_user_query(&self, query: &str) {
220        {
221            let mut st = match self.state.lock() {
222                Ok(state) => state,
223                Err(err) => {
224                    warn!("grep search state lock poisoned while handling query update: {err}");
225                    return;
226                }
227            };
228            if query != st.latest_query {
229                st.latest_query.clear();
230                st.latest_query.push_str(query);
231            } else {
232                return;
233            }
234
235            // If there is an in-flight search that is definitely obsolete,
236            // cancel it now.
237            if let Some(active_search) = &st.active_search
238                && !query.starts_with(&active_search.query)
239            {
240                active_search
241                    .cancellation_token
242                    .store(true, Ordering::Relaxed);
243                st.active_search = None;
244            }
245
246            // Schedule a search to run after debounce.
247            if !st.is_search_scheduled {
248                st.is_search_scheduled = true;
249            } else {
250                return;
251            }
252        }
253
254        // If we are here, we set `st.is_search_scheduled = true` before
255        // dropping the lock. This means we are the only thread that can spawn a
256        // debounce timer.
257        let state = self.state.clone();
258        let search_dir = self.search_dir.clone();
259        let cache = self.cache.clone();
260        // Run debounce and search spawn on a blocking thread to avoid
261        // blocking the async runtime or reader threads.
262        spawn_blocking(move || {
263            // Always do a minimum debounce, but then poll until the
264            // `active_search` is cleared.
265            thread::sleep(SEARCH_DEBOUNCE);
266            loop {
267                let active_is_none = match state.lock() {
268                    Ok(st) => st.active_search.is_none(),
269                    Err(err) => {
270                        warn!(
271                            "grep search state lock poisoned while waiting for active search: {err}"
272                        );
273                        return;
274                    }
275                };
276                if active_is_none {
277                    break;
278                }
279                thread::sleep(ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL);
280            }
281
282            // The debounce timer has expired, so start a search using the
283            // latest query.
284            let cancellation_token = Arc::new(AtomicBool::new(false));
285            let token = cancellation_token.clone();
286            let query = {
287                let mut st = match state.lock() {
288                    Ok(state) => state,
289                    Err(err) => {
290                        warn!(
291                            "grep search state lock poisoned while preparing debounced search: {err}"
292                        );
293                        return;
294                    }
295                };
296                let query = st.latest_query.clone();
297                st.is_search_scheduled = false;
298                st.active_search = Some(ActiveSearch {
299                    query: query.clone(),
300                    cancellation_token: token,
301                });
302                query
303            };
304
305            GrepSearchManager::spawn_grep_file(
306                query,
307                search_dir,
308                cancellation_token,
309                state,
310                Some(cache),
311            );
312        });
313    }
314
315    /// Retrieve the last successful search result
316    pub fn last_result(&self) -> Option<GrepSearchResult> {
317        match self.state.lock() {
318            Ok(st) => st.last_result.clone(),
319            Err(err) => {
320                warn!("grep search state lock poisoned while reading last result: {err}");
321                None
322            }
323        }
324    }
325
326    fn execute_with_backends(input: &GrepSearchInput) -> Result<(Vec<Value>, bool, usize)> {
327        Self::run_ripgrep_backend(input)
328    }
329
330    fn run_ripgrep_backend(input: &GrepSearchInput) -> Result<(Vec<Value>, bool, usize)> {
331        use std::process::Command;
332
333        let mut cmd = Command::new("rg");
334        cmd.arg("-j")
335            .arg(optimal_search_threads().get().to_string());
336
337        // Add support for respecting ignore files (default is to respect them)
338        if !input.respect_ignore_files.unwrap_or(true) {
339            cmd.arg("--no-ignore");
340        }
341
342        // Add support for searching hidden files (default is not to search hidden)
343        if input.search_hidden.unwrap_or(false) {
344            cmd.arg("--hidden");
345        }
346
347        // Add support for searching binary files
348        if input.search_binary.unwrap_or(false) {
349            cmd.arg("--binary");
350        }
351
352        // Add support for files with matches only
353        if input.files_with_matches.unwrap_or(false) {
354            cmd.arg("--files-with-matches");
355        }
356
357        // Add support for file type filtering
358        if let Some(type_pattern) = &input.type_pattern {
359            cmd.arg("--type").arg(type_pattern);
360        }
361
362        // Add support for max file size
363        if let Some(max_file_size) = input.max_file_size {
364            cmd.arg("--max-filesize").arg(format!("{}B", max_file_size));
365        }
366
367        // Case sensitivity
368        if let Some(case_sensitive) = input.case_sensitive {
369            if case_sensitive {
370                cmd.arg("--case-sensitive");
371            } else {
372                cmd.arg("--ignore-case");
373            }
374        } else {
375            // Default to smart case if not specified
376            cmd.arg("--smart-case");
377        }
378
379        // Invert match
380        if input.invert_match.unwrap_or(false) {
381            cmd.arg("--invert-match");
382        }
383
384        // Word boundaries
385        if input.word_boundaries.unwrap_or(false) {
386            cmd.arg("--word-regexp");
387        }
388
389        // Line numbers
390        if input.line_number.unwrap_or(true) {
391            // Default to true to maintain context
392            cmd.arg("--line-number");
393        } else {
394            cmd.arg("--no-line-number");
395        }
396
397        // Column numbers
398        if input.column.unwrap_or(false) {
399            cmd.arg("--column");
400        }
401
402        // Only matching parts
403        if input.only_matching.unwrap_or(false) {
404            cmd.arg("--only-matching");
405        }
406
407        // Trim whitespace (handled by not adding the --no-unicode flag, which is default)
408        if input.trim.unwrap_or(false) {
409            // This is handled in post-processing, not as a flag
410        }
411
412        if let Some(literal) = input.literal
413            && literal
414        {
415            cmd.arg("--fixed-strings");
416        }
417
418        if let Some(glob_pattern) = &input.glob_pattern {
419            cmd.arg("--glob").arg(glob_pattern);
420        }
421
422        if input.respect_ignore_files.unwrap_or(true) {
423            for pattern in DEFAULT_IGNORE_GLOBS {
424                cmd.arg("--glob").arg(format!("!{}", pattern));
425            }
426            if let Some(extra) = &input.extra_ignore_globs {
427                for pattern in extra {
428                    cmd.arg("--glob").arg(format!("!{}", pattern));
429                }
430            }
431        }
432
433        if let Some(context_lines) = input.context_lines {
434            cmd.arg("--context").arg(context_lines.to_string());
435        }
436
437        let max_results = input.max_results.unwrap_or(MAX_SEARCH_RESULTS.get());
438        cmd.arg("--max-count").arg(max_results.to_string());
439
440        // Use JSON output format for structured results
441        cmd.arg("--json");
442
443        cmd.arg(&input.pattern);
444        cmd.arg(&input.path);
445
446        let output = cmd.output().with_context(|| {
447            format!("failed to execute ripgrep for pattern '{}'", input.pattern)
448        })?;
449
450        let output_str = String::from_utf8_lossy(&output.stdout);
451        let matches: Vec<Value> = output_str
452            .lines()
453            .filter_map(|line| serde_json::from_str::<Value>(line).ok())
454            .collect();
455
456        Ok(Self::finalize_matches(matches, input))
457    }
458
459    fn finalize_matches(
460        mut matches: Vec<Value>,
461        input: &GrepSearchInput,
462    ) -> (Vec<Value>, bool, usize) {
463        let mut truncated = false;
464        let max_results = input.max_results.unwrap_or(MAX_SEARCH_RESULTS.get());
465
466        if max_results == 0 {
467            return (Vec::new(), !matches.is_empty(), 0);
468        }
469
470        // Count total "match" type entries before any truncation.
471        let total_match_count = matches
472            .iter()
473            .filter(|e| e.get("type").and_then(Value::as_str) == Some("match"))
474            .count();
475
476        // Count only "match" type entries (not "context", "begin", "end") so that
477        // context lines don't crowd out actual matches from the result set.
478        let mut match_count = 0usize;
479        let mut cut_index = matches.len();
480        for (i, entry) in matches.iter().enumerate() {
481            let is_match = entry
482                .get("type")
483                .and_then(Value::as_str)
484                .is_some_and(|t| t == "match");
485            if is_match {
486                match_count += 1;
487                if match_count >= max_results {
488                    // Keep everything up to and including this match, plus any
489                    // trailing context lines that belong to it.
490                    cut_index = i + 1;
491                    // Advance past trailing context lines for this match.
492                    for rest in matches.iter().skip(i + 1) {
493                        let tp = rest.get("type").and_then(Value::as_str);
494                        if tp == Some("context") {
495                            cut_index += 1;
496                        } else {
497                            break;
498                        }
499                    }
500                    break;
501                }
502            }
503        }
504        // Check if there are more match-type entries beyond our cut point.
505        if matches[cut_index..]
506            .iter()
507            .any(|e| e.get("type").and_then(Value::as_str) == Some("match"))
508        {
509            truncated = true;
510        }
511        if cut_index < matches.len() {
512            matches.truncate(cut_index);
513        }
514
515        if let Some(limit) = input.max_result_bytes {
516            let mut total = 0usize;
517            let mut kept_count = 0;
518            for entry in &matches {
519                let entry_bytes = entry.to_string().len();
520                if total + entry_bytes > limit {
521                    truncated = true;
522                    break;
523                }
524                total += entry_bytes;
525                kept_count += 1;
526            }
527            matches.truncate(kept_count);
528        }
529
530        (matches, truncated, total_match_count)
531    }
532
533    fn spawn_grep_file(
534        query: String,
535        search_dir: PathBuf,
536        cancellation_token: Arc<AtomicBool>,
537        search_state: Arc<Mutex<SearchState>>,
538        cache: Option<Arc<GrepSearchCache>>,
539    ) {
540        // Spawn grep worker on a blocking thread — searching and ripgrep are blocking.
541        spawn_blocking(move || {
542            // Check if cancelled before starting
543            if cancellation_token.load(Ordering::Relaxed) {
544                // Reset the active search state
545                {
546                    let mut st = match search_state.lock() {
547                        Ok(state) => state,
548                        Err(err) => {
549                            warn!("grep search state lock poisoned while cancelling search: {err}");
550                            return;
551                        }
552                    };
553                    if let Some(active_search) = &st.active_search
554                        && Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
555                    {
556                        st.active_search = None;
557                    }
558                }
559                return;
560            }
561
562            let input = GrepSearchInput::with_defaults(
563                query.clone(),
564                search_dir.to_string_lossy().into_owned(),
565            );
566
567            // Check cache first if available
568            if let Some(ref cache) = cache
569                && let Some(cached_result) = Self::cached_result(cache, &input)
570            {
571                let mut st = match search_state.lock() {
572                    Ok(state) => state,
573                    Err(err) => {
574                        warn!("grep search state lock poisoned while loading cached result: {err}");
575                        return;
576                    }
577                };
578                st.last_result = Some(cached_result);
579                return;
580            }
581
582            let search_result = GrepSearchManager::execute_with_backends(&input);
583
584            let is_cancelled = cancellation_token.load(Ordering::Relaxed);
585            if !is_cancelled
586                && let Ok((matches, truncated, total_match_count)) = search_result
587                && !matches.is_empty()
588            {
589                let result = GrepSearchResult {
590                    query,
591                    matches,
592                    truncated,
593                    total_matches: if truncated {
594                        Some(total_match_count)
595                    } else {
596                        None
597                    },
598                };
599
600                // Cache the result if cache is available
601                if let Some(ref cache) = cache
602                    && GrepSearchCache::should_cache(&result)
603                {
604                    cache.put(&input, result.clone());
605                }
606
607                let mut st = match search_state.lock() {
608                    Ok(state) => state,
609                    Err(err) => {
610                        warn!("grep search state lock poisoned while storing search result: {err}");
611                        return;
612                    }
613                };
614                st.last_result = Some(result);
615            }
616
617            // Reset the active search state
618            {
619                let mut st = match search_state.lock() {
620                    Ok(state) => state,
621                    Err(err) => {
622                        warn!(
623                            "grep search state lock poisoned while clearing active search: {err}"
624                        );
625                        return;
626                    }
627                };
628                if let Some(active_search) = &st.active_search
629                    && Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
630                {
631                    st.active_search = None;
632                }
633            }
634        });
635    }
636
637    /// Perform an actual ripgrep search with the given input parameters
638    pub async fn perform_search(&self, input: GrepSearchInput) -> Result<GrepSearchResult> {
639        // Check cache first
640        if let Some(cached_result) = Self::cached_result(&self.cache, &input) {
641            return Ok(cached_result);
642        }
643
644        let query = input.pattern.clone();
645        let input_clone = input.clone();
646
647        let timeout = input.timeout.unwrap_or(DEFAULT_SEARCH_TIMEOUT);
648        let (matches, truncated, total_match_count) = tokio::time::timeout(
649            timeout,
650            spawn_blocking(move || GrepSearchManager::execute_with_backends(&input_clone)),
651        )
652        .await
653        .context("ripgrep search timed out")?
654        .context("ripgrep search worker panicked")??;
655
656        let result = GrepSearchResult {
657            query,
658            matches,
659            truncated,
660            total_matches: if truncated {
661                Some(total_match_count)
662            } else {
663                None
664            },
665        };
666
667        // Cache the result if it's worth caching (non-empty, successful)
668        if GrepSearchCache::should_cache(&result) {
669            self.cache.put(&input, result.clone());
670        }
671
672        Ok(result)
673    }
674
675    /// Perform file enumeration using the optimized file search bridge
676    ///
677    /// This method uses the vtcode-file-search crate for parallel, fuzzy file discovery.
678    /// It's optimized for:
679    /// - Listing files in large directories
680    /// - Fuzzy filename matching
681    /// - Respecting .gitignore and .ignore files
682    /// - Parallel directory traversal
683    ///
684    /// # Arguments
685    ///
686    /// * `pattern` - Fuzzy search pattern for filenames (e.g., "main", "test.rs")
687    /// * `max_results` - Maximum number of files to return
688    /// * `cancel_flag` - Optional cancellation token for early termination
689    ///
690    /// # Returns
691    ///
692    /// A vector of file paths matching the pattern, sorted by match quality
693    pub fn enumerate_files_with_pattern(
694        &self,
695        pattern: String,
696        max_results: usize,
697        cancel_flag: Option<Arc<AtomicBool>>,
698    ) -> Result<Vec<String>> {
699        let config = FileSearchConfig::new(pattern, self.search_dir.clone())
700            .with_limit(max_results)
701            .respect_gitignore(true);
702
703        let results = file_search_bridge::search_files(config, cancel_flag)?;
704
705        Ok(file_search_bridge::file_matches_only(results.matches)
706            .into_iter()
707            .map(|m| m.path)
708            .collect())
709    }
710
711    /// List all files in the search directory using the file search bridge
712    ///
713    /// This is useful for operations that need to enumerate all discoverable files
714    /// without a specific pattern match.
715    ///
716    /// # Arguments
717    ///
718    /// * `max_results` - Maximum number of files to return
719    /// * `exclude_patterns` - Patterns to exclude from results (glob-style)
720    ///
721    /// # Returns
722    ///
723    /// A vector of file paths
724    pub fn list_all_files(
725        &self,
726        max_results: usize,
727        exclude_patterns: Vec<String>,
728    ) -> Result<Vec<String>> {
729        let mut config = FileSearchConfig::new("".to_string(), self.search_dir.clone())
730            .with_limit(max_results)
731            .respect_gitignore(true);
732
733        for pattern in exclude_patterns {
734            config = config.exclude(pattern);
735        }
736
737        let results = file_search_bridge::search_files(config, None)?;
738
739        Ok(file_search_bridge::file_matches_only(results.matches)
740            .into_iter()
741            .map(|m| m.path)
742            .collect())
743    }
744}
745
746#[cfg(test)]
747mod tests {
748    use super::*;
749    use serde_json::json;
750
751    #[test]
752    fn finalize_matches_respects_max_bytes() {
753        let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
754        input.max_result_bytes = Some(100);
755        input.max_results = Some(5);
756
757        let matches = vec![json!({"text": "12345"}), json!({"text": "6789"})];
758
759        let (kept, truncated, _total) = GrepSearchManager::finalize_matches(matches, &input);
760        assert!(!truncated);
761        assert_eq!(kept.len(), 2);
762
763        // Test with smaller limit that truncates
764        input.max_result_bytes = Some(20);
765        let matches = vec![json!({"text": "12345"}), json!({"text": "6789"})];
766        let (kept, truncated, _total) = GrepSearchManager::finalize_matches(matches, &input);
767        assert!(truncated);
768        assert_eq!(kept.len(), 1); // Only first match fits in 20 bytes
769    }
770
771    #[test]
772    fn finalize_matches_counts_only_match_type_entries() {
773        let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
774        input.max_results = Some(2);
775
776        // Simulate ripgrep JSON output: begin, context, match, context, end
777        let matches = vec![
778            json!({"type": "begin", "data": {"path": {"text": "Cargo.lock"}}}),
779            json!({"type": "context", "data": {"line_number": 538, "lines": {"text": "ctx1"}}}),
780            json!({"type": "context", "data": {"line_number": 539, "lines": {"text": "ctx2"}}}),
781            json!({"type": "match", "data": {"line_number": 553, "lines": {"text": "match1"}}}),
782            json!({"type": "context", "data": {"line_number": 554, "lines": {"text": "ctx3"}}}),
783            json!({"type": "context", "data": {"line_number": 555, "lines": {"text": "ctx4"}}}),
784            json!({"type": "context", "data": {"line_number": 560, "lines": {"text": "ctx5"}}}),
785            json!({"type": "match", "data": {"line_number": 563, "lines": {"text": "match2"}}}),
786            json!({"type": "context", "data": {"line_number": 564, "lines": {"text": "ctx6"}}}),
787            json!({"type": "end", "data": {"path": {"text": "Cargo.lock"}}}),
788        ];
789
790        let (kept, truncated, total) = GrepSearchManager::finalize_matches(matches, &input);
791        // Should keep all entries up through the second match's trailing context.
792        // match_count reaches 2 at index 7, then trailing context at index 8 -> cut_index = 9.
793        assert!(!truncated);
794        assert_eq!(kept.len(), 9);
795        assert_eq!(kept[3]["type"], "match");
796        assert_eq!(kept[7]["type"], "match");
797        assert_eq!(total, 2);
798    }
799
800    #[test]
801    fn finalize_matches_truncates_when_more_match_types_than_limit() {
802        let mut input = GrepSearchInput::with_defaults("pat".into(), ".".into());
803        input.max_results = Some(1);
804
805        let matches = vec![
806            json!({"type": "begin", "data": {"path": {"text": "f.txt"}}}),
807            json!({"type": "match", "data": {"line_number": 1, "lines": {"text": "m1"}}}),
808            json!({"type": "context", "data": {"line_number": 2, "lines": {"text": "c1"}}}),
809            json!({"type": "match", "data": {"line_number": 10, "lines": {"text": "m2"}}}),
810            json!({"type": "context", "data": {"line_number": 11, "lines": {"text": "c2"}}}),
811        ];
812
813        let (kept, truncated, total) = GrepSearchManager::finalize_matches(matches, &input);
814        assert!(truncated);
815        // Keeps: begin + match1 + context after match1 = 3 entries
816        assert_eq!(kept.len(), 3);
817        assert_eq!(kept[1]["type"], "match");
818        assert_eq!(kept[2]["type"], "context");
819        assert_eq!(total, 2); // 2 match-type entries in the raw input
820    }
821
822    #[test]
823    fn test_grep_search_manager_creation() {
824        let manager = GrepSearchManager::new(PathBuf::from("."));
825        assert_eq!(manager.search_dir, PathBuf::from("."));
826    }
827
828    #[test]
829    fn test_grep_search_input_new() {
830        let input = GrepSearchInput::new("pattern".to_string(), "/path/to/search".to_string());
831        assert_eq!(input.pattern, "pattern");
832        assert_eq!(input.path, "/path/to/search");
833        assert!(input.case_sensitive.is_none());
834    }
835
836    #[test]
837    fn test_grep_search_input_with_defaults() {
838        let input = GrepSearchInput::with_defaults("pattern".to_string(), "/path".to_string());
839        assert_eq!(input.pattern, "pattern");
840        assert_eq!(input.path, "/path");
841        assert_eq!(input.case_sensitive, Some(true));
842        assert_eq!(input.include_hidden, Some(false));
843        assert_eq!(input.max_results, Some(MAX_SEARCH_RESULTS.get()));
844    }
845}