Skip to main content

vtcode_core/tools/summarizers/
search.rs

1//! Search result summarization
2//!
3//! Summarizes grep_file and list_files outputs from full match listings
4//! into concise summaries suitable for LLM context.
5//!
6//! ## Strategy
7//!
8//! Instead of sending all 127 matches across 2,500 tokens, send:
9//! "Found 127 matches in 15 files. Key files: src/tools/grep.rs (3 matches),
10//! src/tools/list.rs (1 match). Pattern in: execute_grep(), grep_impl() functions"
11//!
12//! Target: ~50 tokens vs 2,500 tokens = 98% savings
13
14use super::{Summarizer, truncate_to_tokens};
15use anyhow::Result;
16use hashbrown::HashMap;
17
18/// Summarizer for grep_file results
19pub struct GrepSummarizer {
20    /// Maximum number of files to list in summary
21    pub max_files: usize,
22    /// Maximum number of functions/symbols to mention
23    pub max_symbols: usize,
24    /// Maximum tokens for entire summary
25    pub max_tokens: usize,
26}
27
28impl Default for GrepSummarizer {
29    fn default() -> Self {
30        Self {
31            max_files: 5,
32            max_symbols: 5,
33            max_tokens: 100,
34        }
35    }
36}
37
38impl Summarizer for GrepSummarizer {
39    fn summarize(
40        &self,
41        full_output: &str,
42        _metadata: Option<&serde_json::Value>,
43    ) -> Result<String> {
44        // Parse grep output to extract key information
45        let stats = parse_grep_output(full_output);
46
47        // Build concise summary
48        let mut summary = format!(
49            "Found {} matches in {} files",
50            stats.total_matches, stats.unique_files
51        );
52
53        // Add top files if available
54        if !stats.top_files.is_empty() {
55            let file_list: Vec<String> = stats
56                .top_files
57                .iter()
58                .take(self.max_files)
59                .map(|(file, count)| format!("{} ({})", file, count))
60                .collect();
61            summary.push_str(&format!(". Key files: {}", file_list.join(", ")));
62        }
63
64        // Add pattern context if available
65        if !stats.symbols.is_empty() {
66            let symbol_list: Vec<&str> = stats
67                .symbols
68                .iter()
69                .take(self.max_symbols)
70                .map(|s| s.as_str())
71                .collect();
72            summary.push_str(&format!(". Pattern in: {}", symbol_list.join(", ")));
73        }
74
75        // Truncate to token limit
76        Ok(truncate_to_tokens(&summary, self.max_tokens))
77    }
78}
79
80/// Summarizer for list_files results
81pub struct ListSummarizer {
82    pub max_dirs: usize,
83    pub max_files: usize,
84    pub max_tokens: usize,
85}
86
87impl Default for ListSummarizer {
88    fn default() -> Self {
89        Self {
90            max_dirs: 3,
91            max_files: 10,
92            max_tokens: 80,
93        }
94    }
95}
96
97impl Summarizer for ListSummarizer {
98    fn summarize(
99        &self,
100        full_output: &str,
101        _metadata: Option<&serde_json::Value>,
102    ) -> Result<String> {
103        let stats = parse_list_output(full_output);
104
105        let mut summary = format!(
106            "Listed {} items ({} files, {} directories)",
107            stats.total_items, stats.file_count, stats.dir_count
108        );
109
110        // Add sample files if available
111        if !stats.sample_files.is_empty() {
112            let files: Vec<&str> = stats
113                .sample_files
114                .iter()
115                .take(self.max_files)
116                .map(|s| s.as_str())
117                .collect();
118            summary.push_str(&format!(". Files: {}", files.join(", ")));
119        }
120
121        Ok(truncate_to_tokens(&summary, self.max_tokens))
122    }
123}
124
125/// Statistics extracted from grep output
126#[derive(Debug, Default)]
127struct GrepStats {
128    total_matches: usize,
129    unique_files: usize,
130    top_files: Vec<(String, usize)>, // (filename, match_count)
131    symbols: Vec<String>,            // function names, identifiers
132}
133
134/// Statistics extracted from list output
135#[derive(Debug, Default)]
136struct ListStats {
137    total_items: usize,
138    file_count: usize,
139    dir_count: usize,
140    sample_files: Vec<String>,
141}
142
143/// Parse grep output to extract statistics
144fn parse_grep_output(output: &str) -> GrepStats {
145    let mut stats = GrepStats::default();
146    let mut file_matches: HashMap<String, usize> = HashMap::new();
147    let mut symbols_set: hashbrown::HashSet<String> = hashbrown::HashSet::new();
148
149    for line in output.lines() {
150        stats.total_matches += 1;
151
152        // Extract filename (format: "path/file.rs:42:content")
153        if let Some(colon_pos) = line.find(':') {
154            let file = &line[..colon_pos];
155            if !file.is_empty() {
156                *file_matches.entry(file.to_string()).or_insert(0) += 1;
157
158                // Extract simple filename for display
159                if let Some(slash_pos) = file.rfind('/') {
160                    let filename = &file[slash_pos + 1..];
161                    if filename.len() < 30 {
162                        // reasonable filename length
163                        *file_matches.entry(filename.to_string()).or_insert(0) += 1;
164                    }
165                }
166            }
167
168            // Extract potential symbols (functions, methods)
169            // Look for patterns like "fn name(", "impl Name", "pub struct"
170            let content = &line[colon_pos..];
171            extract_symbols(content, &mut symbols_set);
172        }
173    }
174
175    stats.unique_files = file_matches.len();
176
177    // Sort files by match count (descending)
178    let mut sorted_files: Vec<(String, usize)> = file_matches.into_iter().collect();
179    sorted_files.sort_by(|a, b| b.1.cmp(&a.1));
180    stats.top_files = sorted_files.into_iter().take(10).collect();
181
182    stats.symbols = symbols_set.into_iter().take(10).collect();
183
184    stats
185}
186
187/// Parse list output to extract statistics
188fn parse_list_output(output: &str) -> ListStats {
189    let mut stats = ListStats::default();
190
191    for line in output.lines() {
192        stats.total_items += 1;
193
194        // Detect directories (usually end with / or marked with [dir])
195        if line.ends_with('/') || line.contains("[dir]") || line.contains("DIR") {
196            stats.dir_count += 1;
197        } else {
198            stats.file_count += 1;
199            // Extract simple filename
200            if let Some(name) = line.split('/').next_back()
201                && !name.is_empty()
202                && name.len() < 50
203            {
204                stats.sample_files.push(name.to_string());
205            }
206        }
207    }
208
209    stats
210}
211
212/// Extract potential symbols (function names, types) from code line
213fn extract_symbols(line: &str, symbols: &mut hashbrown::HashSet<String>) {
214    // Look for function definitions: "fn name(" or "async fn name("
215    if let Some(fn_pos) = line.find("fn ") {
216        let after_fn = &line[fn_pos + 3..];
217        if let Some(paren_pos) = after_fn.find('(') {
218            let name = after_fn[..paren_pos].trim();
219            if !name.is_empty() && name.len() < 30 {
220                symbols.insert(format!("{}()", name));
221            }
222        }
223    }
224
225    // Look for struct/impl/trait definitions
226    for keyword in &["struct ", "impl ", "trait ", "enum "] {
227        if let Some(pos) = line.find(keyword) {
228            let after_kw = &line[pos + keyword.len()..];
229            if let Some(first_word) = after_kw.split_whitespace().next()
230                && first_word.len() < 30
231                && !first_word.contains('{')
232            {
233                symbols.insert(first_word.to_string());
234            }
235        }
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::super::estimate_tokens;
242    use super::*;
243
244    #[test]
245    fn test_grep_summarizer() {
246        let full_output = "\
247src/tools/grep.rs:45:    pub fn execute_grep(pattern: &str) -> Result<String> {
248src/tools/grep.rs:67:        let matches = grep_impl(pattern)?;
249src/tools/grep.rs:89:    fn grep_impl(pattern: &str) -> Result<Vec<Match>> {
250src/tools/list.rs:23:    // Uses grep internally for filtering
251src/main.rs:100:    grep.execute(\"test\")?;
252";
253
254        let summarizer = GrepSummarizer::default();
255        let summary = summarizer.summarize(full_output, None).unwrap();
256
257        assert!(summary.contains("Found 5 matches"));
258        assert!(summary.contains("files"));
259        assert!(estimate_tokens(&summary) < 100);
260
261        // Verify savings
262        let (llm, ui, pct) = summarizer.estimate_savings(full_output, &summary);
263        assert!(
264            pct > 20.0,
265            "Should save >20% (got {:.1}%, {} → {} tokens)",
266            pct,
267            ui,
268            llm
269        );
270        assert!(llm < ui);
271    }
272
273    #[test]
274    fn test_list_summarizer() {
275        let full_output = "\
276src/main.rs
277src/lib.rs
278src/tools/
279src/tools/grep.rs
280src/tools/list.rs
281tests/
282tests/integration.rs
283README.md
284";
285
286        let summarizer = ListSummarizer::default();
287        let summary = summarizer.summarize(full_output, None).unwrap();
288
289        assert!(summary.contains("Listed 8 items"));
290        assert!(summary.contains("files"));
291        assert!(summary.contains("directories"));
292        assert!(estimate_tokens(&summary) < 100);
293    }
294
295    #[test]
296    fn test_grep_stats_parsing() {
297        let output = "\
298src/tools/grep.rs:45:    pub fn execute_grep(pattern: &str) -> Result<String> {
299src/tools/grep.rs:67:        let matches = grep_impl(pattern)?;
300src/tools/list.rs:23:    // comment
301";
302
303        let stats = parse_grep_output(output);
304
305        assert_eq!(stats.total_matches, 3);
306        assert!(stats.unique_files > 0);
307        assert!(!stats.top_files.is_empty());
308    }
309
310    #[test]
311    fn test_symbol_extraction() {
312        let mut symbols = hashbrown::HashSet::new();
313
314        extract_symbols("    pub fn execute_grep(pattern: &str)", &mut symbols);
315        assert!(symbols.contains("execute_grep()"));
316
317        extract_symbols("impl GrepTool {", &mut symbols);
318        assert!(symbols.contains("GrepTool"));
319
320        extract_symbols("pub struct MyStruct {", &mut symbols);
321        assert!(symbols.contains("MyStruct"));
322    }
323
324    #[test]
325    fn test_list_stats_parsing() {
326        let output = "file1.rs\nfile2.rs\nsrc/\ntests/\nREADME.md";
327        let stats = parse_list_output(output);
328
329        assert_eq!(stats.total_items, 5);
330        assert_eq!(stats.dir_count, 2); // src/ and tests/
331        assert_eq!(stats.file_count, 3);
332    }
333
334    #[test]
335    fn test_large_grep_output() {
336        // Simulate large output with many matches
337        let mut output = String::new();
338        for i in 0..200 {
339            output.push_str(&format!("src/file{}.rs:{}:    match line\n", i % 20, i));
340        }
341
342        let summarizer = GrepSummarizer::default();
343        let summary = summarizer.summarize(&output, None).unwrap();
344
345        // Should be very concise despite 200 matches
346        assert!(estimate_tokens(&summary) < 150);
347        assert!(summary.contains("Found 200 matches"));
348
349        // Verify massive savings
350        let (_llm, _ui, pct) = summarizer.estimate_savings(&output, &summary);
351        assert!(pct > 95.0, "Should save >95% on large output");
352    }
353}