1use std::sync::atomic::{AtomicU64, Ordering};
7
8use crate::config::LlmMetricsConfig;
9
10#[derive(Debug, Default)]
12pub struct LlmMetrics {
13 pub total_calls: AtomicU64,
15 pub successful_calls: AtomicU64,
17 pub failed_calls: AtomicU64,
19 pub total_input_tokens: AtomicU64,
21 pub total_output_tokens: AtomicU64,
23 pub total_latency_ms: AtomicU64,
25 pub estimated_cost_micros: AtomicU64,
27 pub rate_limit_errors: AtomicU64,
29 pub timeout_errors: AtomicU64,
31 pub fallback_triggers: AtomicU64,
33}
34
35impl LlmMetrics {
36 pub fn new() -> Self {
38 Self::default()
39 }
40
41 pub fn record_call(
43 &self,
44 input_tokens: u64,
45 output_tokens: u64,
46 latency_ms: u64,
47 success: bool,
48 config: &LlmMetricsConfig,
49 ) {
50 self.total_calls.fetch_add(1, Ordering::Relaxed);
51
52 if success {
53 self.successful_calls.fetch_add(1, Ordering::Relaxed);
54 } else {
55 self.failed_calls.fetch_add(1, Ordering::Relaxed);
56 }
57
58 if config.track_tokens {
59 self.total_input_tokens
60 .fetch_add(input_tokens, Ordering::Relaxed);
61 self.total_output_tokens
62 .fetch_add(output_tokens, Ordering::Relaxed);
63 }
64
65 if config.track_latency {
66 self.total_latency_ms
67 .fetch_add(latency_ms, Ordering::Relaxed);
68 }
69
70 if config.track_cost {
71 let cost = config.calculate_cost(input_tokens, output_tokens);
72 let cost_micros = (cost * 1_000_000.0) as u64;
74 self.estimated_cost_micros
75 .fetch_add(cost_micros, Ordering::Relaxed);
76 }
77 }
78
79 pub fn record_rate_limit(&self) {
81 self.rate_limit_errors.fetch_add(1, Ordering::Relaxed);
82 }
83
84 pub fn record_timeout(&self) {
86 self.timeout_errors.fetch_add(1, Ordering::Relaxed);
87 }
88
89 pub fn record_fallback(&self) {
91 self.fallback_triggers.fetch_add(1, Ordering::Relaxed);
92 }
93
94 pub fn reset(&self) {
96 self.total_calls.store(0, Ordering::Relaxed);
97 self.successful_calls.store(0, Ordering::Relaxed);
98 self.failed_calls.store(0, Ordering::Relaxed);
99 self.total_input_tokens.store(0, Ordering::Relaxed);
100 self.total_output_tokens.store(0, Ordering::Relaxed);
101 self.total_latency_ms.store(0, Ordering::Relaxed);
102 self.estimated_cost_micros.store(0, Ordering::Relaxed);
103 self.rate_limit_errors.store(0, Ordering::Relaxed);
104 self.timeout_errors.store(0, Ordering::Relaxed);
105 self.fallback_triggers.store(0, Ordering::Relaxed);
106 }
107
108 pub fn generate_report(&self) -> LlmMetricsReport {
110 let total_calls = self.total_calls.load(Ordering::Relaxed);
111 let successful = self.successful_calls.load(Ordering::Relaxed);
112 let failed = self.failed_calls.load(Ordering::Relaxed);
113 let total_latency = self.total_latency_ms.load(Ordering::Relaxed);
114
115 LlmMetricsReport {
116 total_calls,
117 successful_calls: successful,
118 failed_calls: failed,
119 success_rate: if total_calls > 0 {
120 successful as f64 / total_calls as f64
121 } else {
122 0.0
123 },
124 total_input_tokens: self.total_input_tokens.load(Ordering::Relaxed),
125 total_output_tokens: self.total_output_tokens.load(Ordering::Relaxed),
126 total_tokens: self.total_input_tokens.load(Ordering::Relaxed)
127 + self.total_output_tokens.load(Ordering::Relaxed),
128 avg_latency_ms: if total_calls > 0 {
129 total_latency as f64 / total_calls as f64
130 } else {
131 0.0
132 },
133 total_latency_ms: total_latency,
134 estimated_cost_usd: self.estimated_cost_micros.load(Ordering::Relaxed) as f64
135 / 1_000_000.0,
136 rate_limit_errors: self.rate_limit_errors.load(Ordering::Relaxed),
137 timeout_errors: self.timeout_errors.load(Ordering::Relaxed),
138 fallback_triggers: self.fallback_triggers.load(Ordering::Relaxed),
139 }
140 }
141}
142
143#[derive(Debug, Clone)]
145pub struct LlmMetricsReport {
146 pub total_calls: u64,
148 pub successful_calls: u64,
150 pub failed_calls: u64,
152 pub success_rate: f64,
154 pub total_input_tokens: u64,
156 pub total_output_tokens: u64,
158 pub total_tokens: u64,
160 pub avg_latency_ms: f64,
162 pub total_latency_ms: u64,
164 pub estimated_cost_usd: f64,
166 pub rate_limit_errors: u64,
168 pub timeout_errors: u64,
170 pub fallback_triggers: u64,
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn test_llm_metrics_recording() {
180 let config = LlmMetricsConfig::default();
181 let metrics = LlmMetrics::new();
182
183 metrics.record_call(100, 50, 150, true, &config);
184 metrics.record_call(200, 100, 300, true, &config);
185 metrics.record_call(100, 0, 0, false, &config);
186
187 let report = metrics.generate_report();
188 assert_eq!(report.total_calls, 3);
189 assert_eq!(report.successful_calls, 2);
190 assert_eq!(report.failed_calls, 1);
191 assert!((report.success_rate - 0.666666).abs() < 0.01);
192 assert_eq!(report.total_input_tokens, 400);
193 assert_eq!(report.total_output_tokens, 150);
194 }
195
196 #[test]
197 fn test_llm_metrics_reset() {
198 let config = LlmMetricsConfig::default();
199 let metrics = LlmMetrics::new();
200
201 metrics.record_call(100, 50, 150, true, &config);
202 metrics.reset();
203
204 let report = metrics.generate_report();
205 assert_eq!(report.total_calls, 0);
206 }
207}