Skip to main content

vtcode_core/tools/
tool_effectiveness.rs

1//! Tool effectiveness tracking and adaptive selection
2//!
3//! Tracks which tools are effective for given contexts and helps the agent
4//! select the best tool based on prior success rates and result quality.
5
6use crate::types::CompactStr;
7use crate::utils::current_timestamp;
8use hashbrown::HashMap;
9use serde::{Deserialize, Serialize};
10
11use crate::tools::result_metadata::ResultMetadata;
12
13/// Tracks effectiveness of a tool
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct ToolEffectiveness {
16    pub tool_name: CompactStr,
17
18    /// Success rate (0.0-1.0)
19    pub success_rate: f32,
20
21    /// Average result quality (0.0-1.0)
22    pub avg_result_quality: f32,
23
24    /// Number of times tool was used
25    pub usage_count: usize,
26
27    /// Number of successful executions
28    pub success_count: usize,
29
30    /// Last time tool was used
31    pub last_used_timestamp: u64,
32
33    /// Common failure modes
34    #[serde(default)]
35    pub failure_modes: Vec<ToolFailureMode>,
36
37    /// Average execution time in milliseconds
38    #[serde(default)]
39    pub avg_execution_time_ms: f32,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
43pub enum ToolFailureMode {
44    Timeout,
45    NoResults,
46    InvalidArgs,
47    ParseError,
48    PermissionDenied,
49    Unknown,
50}
51
52impl ToolEffectiveness {
53    pub fn new(tool_name: impl Into<CompactStr>) -> Self {
54        Self {
55            tool_name: tool_name.into(),
56            success_rate: 0.0,
57            avg_result_quality: 0.0,
58            usage_count: 0,
59            success_count: 0,
60            last_used_timestamp: 0,
61            failure_modes: vec![],
62            avg_execution_time_ms: 0.0,
63        }
64    }
65
66    /// Record a successful tool execution
67    pub fn record_success(&mut self, quality: f32, execution_time_ms: f32) {
68        self.usage_count += 1;
69        self.success_count += 1;
70        self.last_used_timestamp = current_timestamp();
71
72        // Update rolling average of quality
73        self.avg_result_quality = (self.avg_result_quality * (self.success_count - 1) as f32
74            + quality)
75            / self.success_count as f32;
76
77        // Update rolling average of execution time
78        self.avg_execution_time_ms = (self.avg_execution_time_ms * (self.success_count - 1) as f32
79            + execution_time_ms)
80            / self.success_count as f32;
81
82        self.update_success_rate();
83    }
84
85    /// Record a failed tool execution
86    pub fn record_failure(&mut self, failure_mode: ToolFailureMode, execution_time_ms: f32) {
87        self.usage_count += 1;
88        self.last_used_timestamp = current_timestamp();
89
90        // Track failure mode
91        if !self.failure_modes.iter().any(|m| m == &failure_mode) {
92            self.failure_modes.push(failure_mode);
93        }
94
95        // Update rolling average of execution time
96        let success_count_f = (self.success_count + 1) as f32;
97        self.avg_execution_time_ms = (self.avg_execution_time_ms
98            * (self.success_count as f32 / success_count_f))
99            + (execution_time_ms / success_count_f);
100
101        self.update_success_rate();
102    }
103
104    fn update_success_rate(&mut self) {
105        if self.usage_count > 0 {
106            self.success_rate = self.success_count as f32 / self.usage_count as f32;
107        }
108    }
109
110    /// Get overall effectiveness score
111    pub fn effectiveness_score(&self) -> f32 {
112        if self.usage_count == 0 {
113            return 0.5; // Unknown
114        }
115
116        // Weight success rate (60%) and result quality (40%)
117        (self.success_rate * 0.6) + (self.avg_result_quality * 0.4)
118    }
119
120    /// Whether this tool is considered reliable
121    pub fn is_reliable(&self) -> bool {
122        self.usage_count >= 3 && self.success_rate > 0.7
123    }
124
125    /// Time since last use in seconds
126    pub fn time_since_last_use_seconds(&self) -> u64 {
127        if self.last_used_timestamp == 0 {
128            u64::MAX
129        } else {
130            current_timestamp().saturating_sub(self.last_used_timestamp)
131        }
132    }
133}
134
135/// Tool selection context
136#[derive(Debug, Clone)]
137pub struct ToolSelectionContext {
138    /// Description of current task
139    pub task_description: String,
140
141    /// Tools already used in current context
142    pub prior_tools_used: Vec<String>,
143
144    /// Quality scores of prior results
145    pub prior_result_qualities: Vec<f32>,
146
147    /// Current effectiveness snapshot
148    pub tool_effectiveness: HashMap<CompactStr, ToolEffectiveness>,
149}
150
151/// Trait for selecting which tool to use
152pub trait ToolSelector: Send + Sync {
153    fn select_tool(&self, context: &ToolSelectionContext, candidates: &[&str]) -> Option<String>;
154}
155
156/// Adaptive tool selector based on effectiveness
157pub struct AdaptiveToolSelector;
158
159impl ToolSelector for AdaptiveToolSelector {
160    fn select_tool(&self, context: &ToolSelectionContext, candidates: &[&str]) -> Option<String> {
161        if candidates.is_empty() {
162            return None;
163        }
164
165        // Score each candidate
166        let mut scored: Vec<(String, f32)> = candidates
167            .iter()
168            .map(|tool| {
169                let name = (*tool).to_owned();
170                let score = score_tool(&name, context);
171                (name, score)
172            })
173            .collect();
174
175        // Sort by score (highest first)
176        scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
177
178        scored.first().map(|(name, _)| name.clone())
179    }
180}
181
182/// Score a tool based on context
183fn score_tool(tool_name: &str, context: &ToolSelectionContext) -> f32 {
184    let mut score = 0.5; // Base score
185
186    // Get effectiveness for this tool
187    if let Some(eff) = context.tool_effectiveness.get(tool_name) {
188        // Factor 1: Tool effectiveness history (weight: 40%)
189        score += eff.effectiveness_score() * 0.4;
190
191        // Factor 2: Execution time (prefer faster tools, weight: 10%)
192        let time_score = 1.0 - (eff.avg_execution_time_ms / 10000.0).min(1.0);
193        score += time_score * 0.1;
194
195        // Factor 3: Reliability penalty for tools with recent failures
196        if !eff.failure_modes.is_empty() {
197            let failure_penalty = (eff.failure_modes.len() as f32) * 0.1;
198            score -= failure_penalty;
199        }
200    }
201
202    // Factor 4: Tool diversity - penalize recently used tools
203    if context.prior_tools_used.iter().any(|s| s == tool_name) {
204        score -= 0.15; // Avoid repeating same tool
205    }
206
207    // Normalize to 0.0-1.0 range
208    score.clamp(0.0, 1.0)
209}
210
211/// Tracker for tool effectiveness across a session
212pub struct ToolEffectivenessTracker {
213    effectiveness: HashMap<CompactStr, ToolEffectiveness>,
214}
215
216impl ToolEffectivenessTracker {
217    pub fn new() -> Self {
218        Self {
219            effectiveness: HashMap::new(),
220        }
221    }
222
223    /// Get or create effectiveness tracker for tool
224    fn get_or_create(&mut self, tool_name: &str) -> &mut ToolEffectiveness {
225        self.effectiveness
226            .entry(CompactStr::from(tool_name))
227            .or_insert_with(|| ToolEffectiveness::new(tool_name))
228    }
229
230    /// Record successful tool execution
231    pub fn record_success(
232        &mut self,
233        tool_name: &str,
234        metadata: &ResultMetadata,
235        execution_time_ms: f32,
236    ) {
237        let quality = metadata.quality_score();
238        self.get_or_create(tool_name)
239            .record_success(quality, execution_time_ms);
240    }
241
242    /// Record failed tool execution
243    pub fn record_failure(
244        &mut self,
245        tool_name: &str,
246        mode: ToolFailureMode,
247        execution_time_ms: f32,
248    ) {
249        self.get_or_create(tool_name)
250            .record_failure(mode, execution_time_ms);
251    }
252
253    /// Get effectiveness snapshot
254    pub fn snapshot(&self) -> HashMap<CompactStr, ToolEffectiveness> {
255        self.effectiveness.clone()
256    }
257
258    /// Get effectiveness for specific tool
259    pub fn get(&self, tool_name: &str) -> Option<&ToolEffectiveness> {
260        self.effectiveness.get(tool_name)
261    }
262
263    /// Get tools sorted by effectiveness
264    pub fn sorted_by_effectiveness(&self) -> Vec<(CompactStr, f32)> {
265        let mut tools: Vec<_> = self
266            .effectiveness
267            .iter()
268            .map(|(name, eff)| (name.clone(), eff.effectiveness_score()))
269            .collect();
270
271        tools.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
272        tools
273    }
274}
275
276impl Default for ToolEffectivenessTracker {
277    fn default() -> Self {
278        Self::new()
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn test_tool_effectiveness_success() {
288        let mut eff = ToolEffectiveness::new("grep".to_owned());
289        eff.record_success(0.9, 100.0);
290        eff.record_success(0.85, 110.0);
291
292        assert_eq!(eff.success_count, 2);
293        assert_eq!(eff.usage_count, 2);
294        assert_eq!(eff.success_rate, 1.0);
295        assert!(eff.avg_result_quality > 0.8);
296    }
297
298    #[test]
299    fn test_tool_effectiveness_failure() {
300        let mut eff = ToolEffectiveness::new("find".to_owned());
301        eff.record_failure(ToolFailureMode::Timeout, 5000.0);
302
303        assert_eq!(eff.success_count, 0);
304        assert_eq!(eff.usage_count, 1);
305        assert_eq!(eff.success_rate, 0.0);
306        assert!(eff.failure_modes.contains(&ToolFailureMode::Timeout));
307    }
308
309    #[test]
310    fn test_adaptive_selector() {
311        let selector = AdaptiveToolSelector;
312        let mut effectiveness = HashMap::new();
313
314        let mut grep_eff = ToolEffectiveness::new("grep");
315        grep_eff.record_success(0.9, 100.0);
316        effectiveness.insert(CompactStr::from("grep"), grep_eff);
317
318        let mut find_eff = ToolEffectiveness::new("find");
319        find_eff.record_failure(ToolFailureMode::Timeout, 5000.0);
320        effectiveness.insert(CompactStr::from("find"), find_eff);
321
322        let context = ToolSelectionContext {
323            task_description: "find error patterns".to_owned(),
324            prior_tools_used: vec![],
325            prior_result_qualities: vec![],
326            tool_effectiveness: effectiveness,
327        };
328
329        let selected = selector.select_tool(&context, &["grep", "find"]);
330        assert_eq!(selected, Some("grep".to_owned()));
331    }
332
333    #[test]
334    fn test_tool_diversity_penalty() {
335        let selector = AdaptiveToolSelector;
336        let effectiveness = HashMap::new();
337
338        let context = ToolSelectionContext {
339            task_description: "find files".to_owned(),
340            prior_tools_used: vec!["grep".to_owned()],
341            prior_result_qualities: vec![],
342            tool_effectiveness: effectiveness,
343        };
344
345        let selected = selector.select_tool(&context, &["grep", "find"]);
346        // Should prefer find over grep since grep was recently used
347        assert_eq!(selected, Some("find".to_owned()));
348    }
349
350    #[test]
351    fn test_effectiveness_tracker() {
352        let mut tracker = ToolEffectivenessTracker::new();
353        let meta = ResultMetadata::success(0.8, 0.8);
354
355        tracker.record_success("grep", &meta, 100.0);
356        tracker.record_success("grep", &meta, 110.0);
357
358        let sorted = tracker.sorted_by_effectiveness();
359        assert_eq!(sorted[0].0, "grep");
360        assert!(sorted[0].1 > 0.7);
361    }
362}