use hashbrown::HashMap;
use parking_lot::RwLock;
use smallvec::SmallVec;
use std::collections::VecDeque;
use std::sync::Arc;
use std::time::Duration;
#[derive(Debug, Clone, Copy, PartialEq)]
struct ExecutionResult {
success: bool,
latency_ms: f64,
}
#[derive(Debug, Clone, Default)]
pub struct ToolStats {
pub success_count: u64,
pub failure_count: u64,
pub total_count: u64,
pub consecutive_failures: u64,
pub avg_latency_ms: f64,
pub recent_failure_count: u64,
recent_history: VecDeque<ExecutionResult>,
}
pub struct ToolHealthTracker {
stats: Arc<RwLock<HashMap<String, ToolStats>>>,
failure_threshold: u64,
window_size: usize,
}
impl ToolHealthTracker {
pub fn new(failure_threshold: u64) -> Self {
Self {
stats: Arc::new(RwLock::new(HashMap::new())),
failure_threshold,
window_size: 20, }
}
pub fn set_window_size(&mut self, size: usize) {
self.window_size = size;
}
pub fn record_execution(&self, tool_name: &str, success: bool, latency: Duration) {
let mut stats_map = self.stats.write();
let tool_stats = if let Some(stats) = stats_map.get_mut(tool_name) {
stats
} else {
stats_map.entry(tool_name.to_string()).or_default()
};
let latency_ms = latency.as_secs_f64() * 1000.0;
tool_stats.total_count += 1;
if tool_stats.total_count == 1 {
tool_stats.avg_latency_ms = latency_ms;
} else {
let n = tool_stats.total_count as f64;
tool_stats.avg_latency_ms =
tool_stats.avg_latency_ms * ((n - 1.0) / n) + latency_ms / n;
}
if success {
tool_stats.success_count += 1;
tool_stats.consecutive_failures = 0;
} else {
tool_stats.failure_count += 1;
tool_stats.consecutive_failures += 1;
tool_stats.recent_failure_count += 1;
}
tool_stats.recent_history.push_back(ExecutionResult {
success,
latency_ms,
});
if tool_stats.recent_history.len() > self.window_size
&& let Some(removed) = tool_stats.recent_history.pop_front()
&& !removed.success
{
tool_stats.recent_failure_count = tool_stats.recent_failure_count.saturating_sub(1);
}
}
pub fn is_healthy(&self, tool_name: &str) -> bool {
self.check_health(tool_name).0
}
pub fn check_health(&self, tool_name: &str) -> (bool, Option<String>) {
let stats_map = self.stats.read();
if let Some(stats) = stats_map.get(tool_name) {
if stats.consecutive_failures >= self.failure_threshold {
return (
false,
Some(format!(
"{} consecutive failures",
stats.consecutive_failures
)),
);
}
let history_len = stats.recent_history.len();
if history_len >= 5 {
let failure_rate = stats.recent_failure_count as f64 / history_len as f64;
if failure_rate > 0.6 {
return (
false,
Some(format!(
"High recent failure rate: {:.1}%",
failure_rate * 100.0
)),
);
}
}
}
(true, None)
}
pub fn get_latency_stats(&self, tool: &str) -> Option<(f64, f64)> {
let map = self.stats.read();
let stats = map.get(tool)?;
let avg = stats.avg_latency_ms;
if stats.recent_history.is_empty() {
return Some((avg, avg));
}
let mut sorted = SmallVec::<[f64; 32]>::new();
sorted.extend(stats.recent_history.iter().map(|r| r.latency_ms));
sorted.sort_unstable_by(f64::total_cmp);
let p95_idx = ((sorted.len() as f64 * 0.95).ceil() as usize).saturating_sub(1);
let p95 = sorted.get(p95_idx).copied().unwrap_or(avg);
Some((avg, p95))
}
pub fn get_all_tool_stats(&self) -> Vec<(String, ToolStats)> {
self.stats
.read()
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
}
impl Default for ToolHealthTracker {
fn default() -> Self {
Self::new(50)
}
}