use std::sync::atomic::{AtomicU64, Ordering};
use crate::config::LlmMetricsConfig;
#[derive(Debug, Default)]
pub struct LlmMetrics {
pub total_calls: AtomicU64,
pub successful_calls: AtomicU64,
pub failed_calls: AtomicU64,
pub total_input_tokens: AtomicU64,
pub total_output_tokens: AtomicU64,
pub total_latency_ms: AtomicU64,
pub estimated_cost_micros: AtomicU64,
pub rate_limit_errors: AtomicU64,
pub timeout_errors: AtomicU64,
pub fallback_triggers: AtomicU64,
}
impl LlmMetrics {
pub fn new() -> Self {
Self::default()
}
pub fn record_call(
&self,
input_tokens: u64,
output_tokens: u64,
latency_ms: u64,
success: bool,
config: &LlmMetricsConfig,
) {
self.total_calls.fetch_add(1, Ordering::Relaxed);
if success {
self.successful_calls.fetch_add(1, Ordering::Relaxed);
} else {
self.failed_calls.fetch_add(1, Ordering::Relaxed);
}
if config.track_tokens {
self.total_input_tokens
.fetch_add(input_tokens, Ordering::Relaxed);
self.total_output_tokens
.fetch_add(output_tokens, Ordering::Relaxed);
}
if config.track_latency {
self.total_latency_ms
.fetch_add(latency_ms, Ordering::Relaxed);
}
if config.track_cost {
let cost = config.calculate_cost(input_tokens, output_tokens);
let cost_micros = (cost * 1_000_000.0) as u64;
self.estimated_cost_micros
.fetch_add(cost_micros, Ordering::Relaxed);
}
}
pub fn record_rate_limit(&self) {
self.rate_limit_errors.fetch_add(1, Ordering::Relaxed);
}
pub fn record_timeout(&self) {
self.timeout_errors.fetch_add(1, Ordering::Relaxed);
}
pub fn record_fallback(&self) {
self.fallback_triggers.fetch_add(1, Ordering::Relaxed);
}
pub fn reset(&self) {
self.total_calls.store(0, Ordering::Relaxed);
self.successful_calls.store(0, Ordering::Relaxed);
self.failed_calls.store(0, Ordering::Relaxed);
self.total_input_tokens.store(0, Ordering::Relaxed);
self.total_output_tokens.store(0, Ordering::Relaxed);
self.total_latency_ms.store(0, Ordering::Relaxed);
self.estimated_cost_micros.store(0, Ordering::Relaxed);
self.rate_limit_errors.store(0, Ordering::Relaxed);
self.timeout_errors.store(0, Ordering::Relaxed);
self.fallback_triggers.store(0, Ordering::Relaxed);
}
pub fn generate_report(&self) -> LlmMetricsReport {
let total_calls = self.total_calls.load(Ordering::Relaxed);
let successful = self.successful_calls.load(Ordering::Relaxed);
let failed = self.failed_calls.load(Ordering::Relaxed);
let total_latency = self.total_latency_ms.load(Ordering::Relaxed);
LlmMetricsReport {
total_calls,
successful_calls: successful,
failed_calls: failed,
success_rate: if total_calls > 0 {
successful as f64 / total_calls as f64
} else {
0.0
},
total_input_tokens: self.total_input_tokens.load(Ordering::Relaxed),
total_output_tokens: self.total_output_tokens.load(Ordering::Relaxed),
total_tokens: self.total_input_tokens.load(Ordering::Relaxed)
+ self.total_output_tokens.load(Ordering::Relaxed),
avg_latency_ms: if total_calls > 0 {
total_latency as f64 / total_calls as f64
} else {
0.0
},
total_latency_ms: total_latency,
estimated_cost_usd: self.estimated_cost_micros.load(Ordering::Relaxed) as f64
/ 1_000_000.0,
rate_limit_errors: self.rate_limit_errors.load(Ordering::Relaxed),
timeout_errors: self.timeout_errors.load(Ordering::Relaxed),
fallback_triggers: self.fallback_triggers.load(Ordering::Relaxed),
}
}
}
#[derive(Debug, Clone)]
pub struct LlmMetricsReport {
pub total_calls: u64,
pub successful_calls: u64,
pub failed_calls: u64,
pub success_rate: f64,
pub total_input_tokens: u64,
pub total_output_tokens: u64,
pub total_tokens: u64,
pub avg_latency_ms: f64,
pub total_latency_ms: u64,
pub estimated_cost_usd: f64,
pub rate_limit_errors: u64,
pub timeout_errors: u64,
pub fallback_triggers: u64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_llm_metrics_recording() {
let config = LlmMetricsConfig::default();
let metrics = LlmMetrics::new();
metrics.record_call(100, 50, 150, true, &config);
metrics.record_call(200, 100, 300, true, &config);
metrics.record_call(100, 0, 0, false, &config);
let report = metrics.generate_report();
assert_eq!(report.total_calls, 3);
assert_eq!(report.successful_calls, 2);
assert_eq!(report.failed_calls, 1);
assert!((report.success_rate - 0.666666).abs() < 0.01);
assert_eq!(report.total_input_tokens, 400);
assert_eq!(report.total_output_tokens, 150);
}
#[test]
fn test_llm_metrics_reset() {
let config = LlmMetricsConfig::default();
let metrics = LlmMetrics::new();
metrics.record_call(100, 50, 150, true, &config);
metrics.reset();
let report = metrics.generate_report();
assert_eq!(report.total_calls, 0);
}
}