use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::time::Instant;
#[derive(Debug, Clone)]
pub struct ClientMetrics {
pub requests_total: Arc<AtomicU64>,
pub requests_successful: Arc<AtomicU64>,
pub requests_failed: Arc<AtomicU64>,
pub total_latency_ms: Arc<AtomicU64>,
pub total_tokens_used: Arc<AtomicU64>,
pub cache_hits: Arc<AtomicU64>,
pub cache_misses: Arc<AtomicU64>,
}
impl Default for ClientMetrics {
fn default() -> Self {
Self::new()
}
}
impl ClientMetrics {
pub fn new() -> Self {
Self {
requests_total: Arc::new(AtomicU64::new(0)),
requests_successful: Arc::new(AtomicU64::new(0)),
requests_failed: Arc::new(AtomicU64::new(0)),
total_latency_ms: Arc::new(AtomicU64::new(0)),
total_tokens_used: Arc::new(AtomicU64::new(0)),
cache_hits: Arc::new(AtomicU64::new(0)),
cache_misses: Arc::new(AtomicU64::new(0)),
}
}
pub fn record_request(&self, success: bool, latency_ms: u64, tokens: Option<u32>) {
self.requests_total.fetch_add(1, Ordering::Relaxed);
self.total_latency_ms.fetch_add(latency_ms, Ordering::Relaxed);
if success {
self.requests_successful.fetch_add(1, Ordering::Relaxed);
} else {
self.requests_failed.fetch_add(1, Ordering::Relaxed);
}
if let Some(tokens) = tokens {
self.total_tokens_used.fetch_add(tokens as u64, Ordering::Relaxed);
}
}
pub fn record_cache_hit(&self) {
self.cache_hits.fetch_add(1, Ordering::Relaxed);
}
pub fn record_cache_miss(&self) {
self.cache_misses.fetch_add(1, Ordering::Relaxed);
}
pub fn get_stats(&self) -> MetricsSnapshot {
let total = self.requests_total.load(Ordering::Relaxed);
let cache_total = self.cache_hits.load(Ordering::Relaxed) + self.cache_misses.load(Ordering::Relaxed);
MetricsSnapshot {
requests_total: total,
requests_successful: self.requests_successful.load(Ordering::Relaxed),
requests_failed: self.requests_failed.load(Ordering::Relaxed),
success_rate: if total > 0 {
self.requests_successful.load(Ordering::Relaxed) as f64 / total as f64
} else { 0.0 },
average_latency_ms: if total > 0 {
Some(self.total_latency_ms.load(Ordering::Relaxed) as f64 / total as f64)
} else { None },
total_tokens_used: self.total_tokens_used.load(Ordering::Relaxed),
cache_hits: self.cache_hits.load(Ordering::Relaxed),
cache_misses: self.cache_misses.load(Ordering::Relaxed),
cache_hit_rate: if cache_total > 0 {
self.cache_hits.load(Ordering::Relaxed) as f64 / cache_total as f64
} else { 0.0 },
}
}
pub fn snapshot(&self) -> MetricsSnapshot {
self.get_stats()
}
pub fn reset(&self) {
self.requests_total.store(0, Ordering::Relaxed);
self.requests_successful.store(0, Ordering::Relaxed);
self.requests_failed.store(0, Ordering::Relaxed);
self.total_latency_ms.store(0, Ordering::Relaxed);
self.total_tokens_used.store(0, Ordering::Relaxed);
self.cache_hits.store(0, Ordering::Relaxed);
self.cache_misses.store(0, Ordering::Relaxed);
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct MetricsSnapshot {
pub requests_total: u64,
pub requests_successful: u64,
pub requests_failed: u64,
pub success_rate: f64,
pub average_latency_ms: Option<f64>,
pub total_tokens_used: u64,
pub cache_hits: u64,
pub cache_misses: u64,
pub cache_hit_rate: f64,
}
impl MetricsSnapshot {
pub fn summary(&self) -> String {
let latency_str = self.average_latency_ms
.map(|l| format!("{:.0}ms", l))
.unwrap_or_else(|| "N/A".to_string());
format!(
"Requests: {} (Success: {:.1}%), Avg Latency: {}, Tokens: {}, Cache Hit: {:.1}%",
self.requests_total,
self.success_rate * 100.0,
latency_str,
self.total_tokens_used,
self.cache_hit_rate * 100.0
)
}
}
pub struct RequestTimer {
start: Instant,
metrics: ClientMetrics,
}
impl RequestTimer {
pub fn new(metrics: ClientMetrics) -> Self {
Self {
start: Instant::now(),
metrics,
}
}
pub fn complete(self, success: bool, tokens: Option<u32>) {
let latency_ms = self.start.elapsed().as_millis() as u64;
self.metrics.record_request(success, latency_ms, tokens);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metrics_recording() {
let metrics = ClientMetrics::new();
metrics.record_request(true, 100, Some(50));
metrics.record_request(true, 200, Some(75));
metrics.record_request(false, 50, None);
let stats = metrics.get_stats();
assert_eq!(stats.requests_total, 3);
assert_eq!(stats.requests_successful, 2);
assert_eq!(stats.requests_failed, 1);
assert!(stats.average_latency_ms.unwrap() > 116.0 && stats.average_latency_ms.unwrap() < 117.0); assert_eq!(stats.total_tokens_used, 125);
assert!(stats.success_rate > 0.66 && stats.success_rate < 0.67); }
#[test]
fn test_cache_metrics() {
let metrics = ClientMetrics::new();
metrics.record_cache_hit();
metrics.record_cache_hit();
metrics.record_cache_miss();
let stats = metrics.get_stats();
assert!(stats.cache_hit_rate > 0.66 && stats.cache_hit_rate < 0.67); }
}