Skip to main content

vtcode_core/tools/
search_metrics.rs

1/// Search operation metrics and optimization
2///
3/// Tracks token cost of search results to enable intelligent caching,
4/// sampling, and summarization of expensive searches in large codebases.
5use hashbrown::HashMap;
6
7/// Metrics for a single search operation
8#[derive(Debug, Clone)]
9pub struct SearchMetric {
10    /// Search pattern used
11    pub pattern: String,
12    /// Number of matches found
13    pub match_count: usize,
14    /// Estimated tokens for results
15    pub result_tokens: usize,
16    /// Time to execute search
17    pub duration_ms: u64,
18    /// Files searched
19    pub files_searched: usize,
20    /// Whether search was expensive (high token cost)
21    pub is_expensive: bool,
22}
23
24/// Tracks search operation metrics for optimization
25#[derive(Debug, Clone)]
26pub struct SearchMetrics {
27    /// All recorded searches indexed by pattern
28    searches: HashMap<String, SearchMetric>,
29    /// Total token cost of all searches
30    pub total_tokens: usize,
31    /// Total searches executed
32    pub total_searches: usize,
33    /// Token threshold for "expensive" searches (default: 10000)
34    expensive_threshold: usize,
35}
36
37impl Default for SearchMetrics {
38    fn default() -> Self {
39        Self::new()
40    }
41}
42
43impl SearchMetrics {
44    /// Create new search metrics tracker
45    pub fn new() -> Self {
46        Self {
47            searches: HashMap::new(),
48            total_tokens: 0,
49            total_searches: 0,
50            expensive_threshold: 10000,
51        }
52    }
53
54    /// Set token threshold for expensive searches
55    pub fn with_expensive_threshold(mut self, threshold: usize) -> Self {
56        self.expensive_threshold = threshold;
57        self
58    }
59
60    /// Record a search operation
61    pub fn record_search(
62        &mut self,
63        pattern: &str,
64        match_count: usize,
65        result_chars: usize,
66        duration_ms: u64,
67        files_searched: usize,
68    ) {
69        // Estimate tokens from character count (using default 4.0 chars/token)
70        let estimated_tokens = (result_chars as f64 / 4.0).ceil() as usize;
71        let is_expensive = estimated_tokens > self.expensive_threshold;
72
73        let metric = SearchMetric {
74            pattern: pattern.to_owned(),
75            match_count,
76            result_tokens: estimated_tokens,
77            duration_ms,
78            files_searched,
79            is_expensive,
80        };
81
82        self.total_tokens += estimated_tokens;
83        self.total_searches += 1;
84        self.searches.insert(pattern.to_owned(), metric);
85    }
86
87    /// Get metric for a specific pattern
88    pub fn get_search(&self, pattern: &str) -> Option<&SearchMetric> {
89        self.searches.get(pattern)
90    }
91
92    /// Find most expensive searches
93    pub fn expensive_searches(&self, limit: usize) -> Vec<&SearchMetric> {
94        let mut searches: Vec<_> = self.searches.values().filter(|s| s.is_expensive).collect();
95        searches.sort_by(|a, b| b.result_tokens.cmp(&a.result_tokens));
96        searches.into_iter().take(limit).collect()
97    }
98
99    /// Find slowest searches
100    pub fn slowest_searches(&self, limit: usize) -> Vec<&SearchMetric> {
101        let mut searches: Vec<_> = self.searches.values().collect();
102        searches.sort_by(|a, b| b.duration_ms.cmp(&a.duration_ms));
103        searches.into_iter().take(limit).collect()
104    }
105
106    /// Calculate average tokens per search
107    pub fn avg_tokens_per_search(&self) -> f64 {
108        if self.total_searches == 0 {
109            0.0
110        } else {
111            self.total_tokens as f64 / self.total_searches as f64
112        }
113    }
114
115    /// Check if search should be sampled (too many results)
116    pub fn should_sample_results(&self, pattern: &str) -> bool {
117        self.get_search(pattern)
118            .map(|m| m.is_expensive)
119            .unwrap_or(false)
120    }
121
122    /// Estimate sampling ratio for expensive search
123    ///
124    /// Returns a value between 0.1 (10% of results) and 1.0 (no sampling)
125    pub fn estimate_sampling_ratio(&self, pattern: &str) -> f64 {
126        if let Some(metric) = self.get_search(pattern) {
127            if !metric.is_expensive {
128                return 1.0;
129            }
130
131            // Linear interpolation: at threshold = 1.0, at 2x threshold = 0.1
132            let ratio = self.expensive_threshold as f64 / metric.result_tokens as f64;
133            (ratio * 0.9 + 0.1).clamp(0.1, 1.0)
134        } else {
135            1.0 // No sampling if not tracked
136        }
137    }
138
139    /// Format metrics for display
140    pub fn format_summary(&self) -> String {
141        use std::fmt::Write;
142        // Pre-allocate with reasonable estimate for metrics summary
143        let mut output = String::with_capacity(512);
144        output.push_str("[SEARCH] Metrics Summary\n");
145        let _ = writeln!(output, "  Total searches: {}", self.total_searches);
146        let _ = writeln!(output, "  Total tokens: {}", self.total_tokens);
147        let _ = writeln!(
148            output,
149            "  Avg tokens/search: {:.0}",
150            self.avg_tokens_per_search()
151        );
152        let _ = writeln!(
153            output,
154            "  Expensive searches: {}",
155            self.searches.values().filter(|s| s.is_expensive).count()
156        );
157
158        let expensive = self.expensive_searches(3);
159        if !expensive.is_empty() {
160            output.push_str("\n  Most expensive searches:\n");
161            for (i, metric) in expensive.iter().enumerate() {
162                let _ = writeln!(
163                    output,
164                    "    {}. '{}': {} tokens ({} matches)",
165                    i + 1,
166                    metric.pattern,
167                    metric.result_tokens,
168                    metric.match_count
169                );
170            }
171        }
172
173        output
174    }
175
176    /// Clear all metrics
177    pub fn reset(&mut self) {
178        self.searches.clear();
179        self.total_tokens = 0;
180        self.total_searches = 0;
181    }
182
183    /// Get stats for monitoring
184    pub fn stats(&self) -> SearchMetricsStats {
185        let expensive_count = self.searches.values().filter(|s| s.is_expensive).count();
186        SearchMetricsStats {
187            total_searches: self.total_searches,
188            total_tokens: self.total_tokens,
189            expensive_searches: expensive_count,
190            avg_tokens_per_search: self.avg_tokens_per_search(),
191        }
192    }
193}
194
195/// Statistics for search metrics
196#[derive(Debug, Clone)]
197pub struct SearchMetricsStats {
198    pub total_searches: usize,
199    pub total_tokens: usize,
200    pub expensive_searches: usize,
201    pub avg_tokens_per_search: f64,
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn test_creates_metrics() {
210        let metrics = SearchMetrics::new();
211        assert_eq!(metrics.total_searches, 0);
212        assert_eq!(metrics.total_tokens, 0);
213    }
214
215    #[test]
216    fn test_records_search() {
217        let mut metrics = SearchMetrics::new();
218        metrics.record_search("fn main", 5, 1000, 10, 3);
219
220        assert_eq!(metrics.total_searches, 1);
221        assert!(metrics.total_tokens > 0);
222
223        let metric = metrics.get_search("fn main").unwrap();
224        assert_eq!(metric.match_count, 5);
225        assert_eq!(metric.files_searched, 3);
226    }
227
228    #[test]
229    fn test_identifies_expensive_searches() {
230        let mut metrics = SearchMetrics::new().with_expensive_threshold(5000);
231        // This will be expensive (12500 tokens estimated)
232        metrics.record_search("common_pattern", 100, 50000, 50, 50);
233
234        let metric = metrics.get_search("common_pattern").unwrap();
235        assert!(metric.is_expensive);
236    }
237
238    #[test]
239    fn test_expensive_searches() {
240        let mut metrics = SearchMetrics::new().with_expensive_threshold(5000);
241        metrics.record_search("pattern1", 10, 25000, 20, 5);
242        metrics.record_search("pattern2", 5, 2000, 10, 2);
243        metrics.record_search("pattern3", 50, 30000, 100, 20);
244
245        let expensive = metrics.expensive_searches(2);
246        assert_eq!(expensive.len(), 2);
247        assert!(expensive[0].result_tokens > expensive[1].result_tokens);
248    }
249
250    #[test]
251    fn test_slowest_searches() {
252        let mut metrics = SearchMetrics::new();
253        metrics.record_search("fast", 10, 1000, 5, 2);
254        metrics.record_search("slow", 10, 1000, 100, 2);
255        metrics.record_search("medium", 10, 1000, 50, 2);
256
257        let slowest = metrics.slowest_searches(2);
258        assert_eq!(slowest.len(), 2);
259        assert!(slowest[0].duration_ms > slowest[1].duration_ms);
260    }
261
262    #[test]
263    fn test_sampling_ratio() {
264        let mut metrics = SearchMetrics::new().with_expensive_threshold(10000);
265
266        // Non-expensive search should not be sampled
267        metrics.record_search("cheap", 10, 5000, 10, 5);
268        assert_eq!(metrics.estimate_sampling_ratio("cheap"), 1.0);
269
270        // Expensive search should be sampled
271        metrics.record_search("expensive", 100, 50000, 100, 50);
272        let ratio = metrics.estimate_sampling_ratio("expensive");
273        assert!(ratio < 1.0);
274        assert!(ratio >= 0.1);
275    }
276
277    #[test]
278    fn test_average_tokens() {
279        let mut metrics = SearchMetrics::new();
280        metrics.record_search("search1", 10, 4000, 10, 5);
281        metrics.record_search("search2", 5, 8000, 20, 3);
282
283        let avg = metrics.avg_tokens_per_search();
284        assert!(avg > 0.0);
285        // Should be approximately (1000 + 2000) / 2 = 1500
286    }
287
288    #[test]
289    fn test_should_sample_results() {
290        let mut metrics = SearchMetrics::new().with_expensive_threshold(5000);
291        metrics.record_search("cheap", 10, 2000, 10, 5);
292        metrics.record_search("expensive", 100, 50000, 100, 50);
293
294        assert!(!metrics.should_sample_results("cheap"));
295        assert!(metrics.should_sample_results("expensive"));
296    }
297
298    #[test]
299    fn test_format_summary() {
300        let mut metrics = SearchMetrics::new();
301        metrics.record_search("pattern1", 10, 4000, 10, 5);
302
303        let summary = metrics.format_summary();
304        assert!(summary.contains("[SEARCH] Metrics Summary"));
305        assert!(summary.contains("Total searches: 1"));
306    }
307
308    #[test]
309    fn test_reset() {
310        let mut metrics = SearchMetrics::new();
311        metrics.record_search("pattern1", 10, 4000, 10, 5);
312
313        metrics.reset();
314        assert_eq!(metrics.total_searches, 0);
315        assert_eq!(metrics.total_tokens, 0);
316        assert!(metrics.get_search("pattern1").is_none());
317    }
318
319    #[test]
320    fn test_stats() {
321        let mut metrics = SearchMetrics::new().with_expensive_threshold(5000);
322        metrics.record_search("cheap", 10, 2000, 10, 5);
323        metrics.record_search("expensive", 100, 50000, 100, 50);
324
325        let stats = metrics.stats();
326        assert_eq!(stats.total_searches, 2);
327        assert_eq!(stats.expensive_searches, 1);
328        assert!(stats.avg_tokens_per_search > 0.0);
329    }
330}