claude_agent/observability/
metrics.rs1use std::sync::atomic::{AtomicU64, Ordering};
7use std::time::Duration;
8
9#[cfg(feature = "otel")]
10use super::otel::{OtelConfig, OtelMetricsBridge, SERVICE_NAME_DEFAULT};
11#[cfg(feature = "otel")]
12use opentelemetry::global;
13
14#[derive(Clone, Default)]
16pub struct MetricsConfig {
17 pub enabled: bool,
18 pub export_interval: Option<Duration>,
19}
20
21impl MetricsConfig {
22 pub fn new() -> Self {
23 Self {
24 enabled: true,
25 export_interval: Some(Duration::from_secs(60)),
26 }
27 }
28
29 pub fn disabled() -> Self {
30 Self {
31 enabled: false,
32 export_interval: None,
33 }
34 }
35}
36
37#[derive(Default)]
39pub struct Counter {
40 value: AtomicU64,
41}
42
43impl Counter {
44 pub fn new() -> Self {
45 Self::default()
46 }
47
48 pub fn inc(&self) {
49 self.value.fetch_add(1, Ordering::Relaxed);
50 }
51
52 pub fn add(&self, n: u64) {
53 self.value.fetch_add(n, Ordering::Relaxed);
54 }
55
56 pub fn get(&self) -> u64 {
57 self.value.load(Ordering::Relaxed)
58 }
59}
60
61#[derive(Default)]
63pub struct Gauge {
64 value: AtomicU64,
65}
66
67impl Gauge {
68 pub fn new() -> Self {
69 Self::default()
70 }
71
72 pub fn set(&self, value: u64) {
73 self.value.store(value, Ordering::Relaxed);
74 }
75
76 pub fn inc(&self) {
77 self.value.fetch_add(1, Ordering::Relaxed);
78 }
79
80 pub fn dec(&self) {
81 self.value.fetch_sub(1, Ordering::Relaxed);
82 }
83
84 pub fn get(&self) -> u64 {
85 self.value.load(Ordering::Relaxed)
86 }
87}
88
89pub struct Histogram {
91 buckets: Vec<AtomicU64>,
92 bucket_bounds: Vec<f64>,
93 sum: AtomicU64,
94 count: AtomicU64,
95}
96
97impl Histogram {
98 pub fn new(bucket_bounds: Vec<f64>) -> Self {
99 let buckets = (0..=bucket_bounds.len())
100 .map(|_| AtomicU64::new(0))
101 .collect();
102 Self {
103 buckets,
104 bucket_bounds,
105 sum: AtomicU64::new(0),
106 count: AtomicU64::new(0),
107 }
108 }
109
110 pub fn default_latency() -> Self {
111 Self::new(vec![
112 10.0, 25.0, 50.0, 100.0, 250.0, 500.0, 1000.0, 2500.0, 5000.0, 10000.0,
113 ])
114 }
115
116 pub fn observe(&self, value: f64) {
117 let bucket_idx = self
118 .bucket_bounds
119 .iter()
120 .position(|&bound| value <= bound)
121 .unwrap_or(self.bucket_bounds.len());
122
123 self.buckets[bucket_idx].fetch_add(1, Ordering::Relaxed);
124 self.sum.fetch_add(value as u64, Ordering::Relaxed);
125 self.count.fetch_add(1, Ordering::Relaxed);
126 }
127
128 pub fn count(&self) -> u64 {
129 self.count.load(Ordering::Relaxed)
130 }
131
132 pub fn sum(&self) -> u64 {
133 self.sum.load(Ordering::Relaxed)
134 }
135}
136
137pub struct MetricsRegistry {
142 pub requests_total: Counter,
143 pub requests_success: Counter,
144 pub requests_error: Counter,
145 pub tokens_input: Counter,
146 pub tokens_output: Counter,
147 pub cache_read_tokens: Counter,
148 pub cache_creation_tokens: Counter,
149 pub tool_calls_total: Counter,
150 pub tool_errors: Counter,
151 pub active_sessions: Gauge,
152 pub request_latency_ms: Histogram,
153 pub cost_total_micros: Counter,
154 #[cfg(feature = "otel")]
155 otel_bridge: Option<OtelMetricsBridge>,
156}
157
158impl MetricsRegistry {
159 pub fn new(_config: &MetricsConfig) -> Self {
160 Self {
161 requests_total: Counter::new(),
162 requests_success: Counter::new(),
163 requests_error: Counter::new(),
164 tokens_input: Counter::new(),
165 tokens_output: Counter::new(),
166 cache_read_tokens: Counter::new(),
167 cache_creation_tokens: Counter::new(),
168 tool_calls_total: Counter::new(),
169 tool_errors: Counter::new(),
170 active_sessions: Gauge::new(),
171 request_latency_ms: Histogram::default_latency(),
172 cost_total_micros: Counter::new(),
173 #[cfg(feature = "otel")]
174 otel_bridge: None,
175 }
176 }
177
178 #[cfg(feature = "otel")]
179 pub fn with_otel(_config: &MetricsConfig, otel_config: &OtelConfig) -> Self {
180 let meter = global::meter(SERVICE_NAME_DEFAULT);
181 let bridge = OtelMetricsBridge::new(meter);
182 let _ = &otel_config.service_name; Self {
185 requests_total: Counter::new(),
186 requests_success: Counter::new(),
187 requests_error: Counter::new(),
188 tokens_input: Counter::new(),
189 tokens_output: Counter::new(),
190 cache_read_tokens: Counter::new(),
191 cache_creation_tokens: Counter::new(),
192 tool_calls_total: Counter::new(),
193 tool_errors: Counter::new(),
194 active_sessions: Gauge::new(),
195 request_latency_ms: Histogram::default_latency(),
196 cost_total_micros: Counter::new(),
197 otel_bridge: Some(bridge),
198 }
199 }
200
201 pub fn record_request_start(&self) {
202 self.requests_total.inc();
203 self.active_sessions.inc();
204
205 #[cfg(feature = "otel")]
206 if let Some(ref bridge) = self.otel_bridge {
207 bridge.record_request_start();
208 }
209 }
210
211 pub fn record_request_end(&self, success: bool, latency_ms: f64) {
212 self.active_sessions.dec();
213 self.request_latency_ms.observe(latency_ms);
214 if success {
215 self.requests_success.inc();
216 } else {
217 self.requests_error.inc();
218 }
219
220 #[cfg(feature = "otel")]
221 if let Some(ref bridge) = self.otel_bridge {
222 bridge.record_request_end(success, latency_ms);
223 }
224 }
225
226 pub fn record_tokens(&self, input: u32, output: u32) {
227 self.tokens_input.add(input as u64);
228 self.tokens_output.add(output as u64);
229
230 #[cfg(feature = "otel")]
231 if let Some(ref bridge) = self.otel_bridge {
232 bridge.record_tokens(input as u64, output as u64);
233 }
234 }
235
236 pub fn record_cache(&self, read: u32, creation: u32) {
237 self.cache_read_tokens.add(read as u64);
238 self.cache_creation_tokens.add(creation as u64);
239
240 #[cfg(feature = "otel")]
241 if let Some(ref bridge) = self.otel_bridge {
242 bridge.record_cache(read as u64, creation as u64);
243 }
244 }
245
246 pub fn record_tool_call(&self, success: bool) {
247 self.tool_calls_total.inc();
248 if !success {
249 self.tool_errors.inc();
250 }
251
252 #[cfg(feature = "otel")]
253 if let Some(ref bridge) = self.otel_bridge {
254 bridge.record_tool_call(success);
255 }
256 }
257
258 pub fn record_cost(&self, cost_usd: f64) {
259 let micros = (cost_usd * 1_000_000.0) as u64;
260 self.cost_total_micros.add(micros);
261
262 #[cfg(feature = "otel")]
263 if let Some(ref bridge) = self.otel_bridge {
264 bridge.record_cost(cost_usd);
265 }
266 }
267
268 pub fn total_cost_usd(&self) -> f64 {
269 self.cost_total_micros.get() as f64 / 1_000_000.0
270 }
271}
272
273impl Default for MetricsRegistry {
274 fn default() -> Self {
275 Self::new(&MetricsConfig::default())
276 }
277}
278
279#[derive(Debug, Clone, Default)]
281pub struct AgentMetrics {
282 pub total_requests: u64,
283 pub successful_requests: u64,
284 pub failed_requests: u64,
285 pub total_input_tokens: u64,
286 pub total_output_tokens: u64,
287 pub cache_read_tokens: u64,
288 pub cache_creation_tokens: u64,
289 pub total_tool_calls: u64,
290 pub failed_tool_calls: u64,
291 pub total_cost_usd: f64,
292 pub avg_latency_ms: f64,
293}
294
295impl AgentMetrics {
296 pub fn from_registry(registry: &MetricsRegistry) -> Self {
297 let count = registry.request_latency_ms.count();
298 let avg_latency = if count > 0 {
299 registry.request_latency_ms.sum() as f64 / count as f64
300 } else {
301 0.0
302 };
303
304 Self {
305 total_requests: registry.requests_total.get(),
306 successful_requests: registry.requests_success.get(),
307 failed_requests: registry.requests_error.get(),
308 total_input_tokens: registry.tokens_input.get(),
309 total_output_tokens: registry.tokens_output.get(),
310 cache_read_tokens: registry.cache_read_tokens.get(),
311 cache_creation_tokens: registry.cache_creation_tokens.get(),
312 total_tool_calls: registry.tool_calls_total.get(),
313 failed_tool_calls: registry.tool_errors.get(),
314 total_cost_usd: registry.total_cost_usd(),
315 avg_latency_ms: avg_latency,
316 }
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::*;
323
324 #[test]
325 fn test_counter() {
326 let counter = Counter::new();
327 assert_eq!(counter.get(), 0);
328 counter.inc();
329 assert_eq!(counter.get(), 1);
330 counter.add(5);
331 assert_eq!(counter.get(), 6);
332 }
333
334 #[test]
335 fn test_gauge() {
336 let gauge = Gauge::new();
337 gauge.set(10);
338 assert_eq!(gauge.get(), 10);
339 gauge.inc();
340 assert_eq!(gauge.get(), 11);
341 gauge.dec();
342 assert_eq!(gauge.get(), 10);
343 }
344
345 #[test]
346 fn test_histogram() {
347 let hist = Histogram::new(vec![10.0, 50.0, 100.0]);
348 hist.observe(5.0);
349 hist.observe(25.0);
350 hist.observe(75.0);
351 hist.observe(150.0);
352 assert_eq!(hist.count(), 4);
353 }
354
355 #[test]
356 fn test_metrics_registry() {
357 let registry = MetricsRegistry::default();
358 registry.record_request_start();
359 registry.record_tokens(100, 50);
360 registry.record_tool_call(true);
361 registry.record_cost(0.001);
362 registry.record_request_end(true, 250.0);
363
364 let metrics = AgentMetrics::from_registry(®istry);
365 assert_eq!(metrics.total_requests, 1);
366 assert_eq!(metrics.total_input_tokens, 100);
367 assert_eq!(metrics.total_output_tokens, 50);
368 assert_eq!(metrics.total_tool_calls, 1);
369 }
370}