Skip to main content

llm_agent_runtime/
metrics.rs

1//! # Module: Metrics
2//!
3//! Runtime observability counters for `AgentRuntime`.
4//! All global counters use atomics for lock-free, thread-safe increment/read.
5//! Per-tool counters use a `Mutex<HashMap>` to avoid requiring a concurrent
6//! map dependency.
7
8use std::collections::HashMap;
9use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
10use std::sync::{Arc, Mutex};
11
12// ── LatencyHistogram ──────────────────────────────────────────────────────────
13
14/// A simple fixed-bucket latency histogram.
15///
16/// Bucket upper bounds are **inclusive** (i.e., a sample of exactly 1 ms falls into bucket 0).
17/// Bucket index mapping:
18/// - 0: ≤ 1 ms
19/// - 1: 2 – 5 ms
20/// - 2: 6 – 10 ms
21/// - 3: 11 – 50 ms
22/// - 4: 51 – 100 ms
23/// - 5: 101 – 500 ms
24/// - 6: > 500 ms
25#[derive(Debug)]
26pub struct LatencyHistogram {
27    /// Counts per bucket. Index 0 = ≤1ms, …, index 6 = >500ms.
28    /// Bucket upper bounds are **inclusive**.
29    buckets: [AtomicU64; 7],
30    total_count: AtomicU64,
31    total_sum_ms: AtomicU64,
32}
33
34impl Default for LatencyHistogram {
35    fn default() -> Self {
36        Self {
37            buckets: [
38                AtomicU64::new(0),
39                AtomicU64::new(0),
40                AtomicU64::new(0),
41                AtomicU64::new(0),
42                AtomicU64::new(0),
43                AtomicU64::new(0),
44                AtomicU64::new(0),
45            ],
46            total_count: AtomicU64::new(0),
47            total_sum_ms: AtomicU64::new(0),
48        }
49    }
50}
51
52impl LatencyHistogram {
53    /// Bucket upper bounds in milliseconds.
54    ///
55    /// Boundaries were chosen to cover the full range of observed latencies in
56    /// LLM-backed agent systems:
57    ///
58    /// | Bucket | Range      | Typical source                         |
59    /// |--------|------------|----------------------------------------|
60    /// | 0      | ≤ 1 ms     | In-process tool calls, cache hits      |
61    /// | 1      | ≤ 5 ms     | Fast local I/O, simple calculations    |
62    /// | 2      | ≤ 10 ms    | Network round-trips to local services  |
63    /// | 3      | ≤ 50 ms    | p50 LLM token latency (streaming)      |
64    /// | 4      | ≤ 100 ms   | p95 for small LLM completions          |
65    /// | 5      | ≤ 500 ms   | p99 for medium LLM completions         |
66    /// | 6      | > 500 ms   | Slow completions, network retries      |
67    const BOUNDS: [u64; 7] = [1, 5, 10, 50, 100, 500, u64::MAX];
68
69    /// Record a latency sample in milliseconds.
70    pub fn record(&self, ms: u64) {
71        self.total_count.fetch_add(1, Ordering::Relaxed);
72        self.total_sum_ms.fetch_add(ms, Ordering::Relaxed);
73        for (i, &bound) in Self::BOUNDS.iter().enumerate() {
74            if ms <= bound {
75                self.buckets[i].fetch_add(1, Ordering::Relaxed);
76                return;
77            }
78        }
79    }
80
81    /// Return the mean latency in ms, or 0.0 if no samples.
82    pub fn mean_ms(&self) -> f64 {
83        let count = self.total_count.load(Ordering::Relaxed);
84        if count == 0 {
85            return 0.0;
86        }
87        self.total_sum_ms.load(Ordering::Relaxed) as f64 / count as f64
88    }
89
90    /// Return the total sample count.
91    pub fn count(&self) -> u64 {
92        self.total_count.load(Ordering::Relaxed)
93    }
94
95    /// Return bucket counts as `(upper_bound_ms, count)` pairs.
96    pub fn buckets(&self) -> Vec<(u64, u64)> {
97        Self::BOUNDS
98            .iter()
99            .zip(self.buckets.iter())
100            .map(|(&b, a)| (b, a.load(Ordering::Relaxed)))
101            .collect()
102    }
103}
104
105/// A point-in-time snapshot of all runtime counters.
106///
107/// Obtained by calling [`RuntimeMetrics::snapshot`].  All fields are plain
108/// integers so the snapshot can be logged, serialised, or diffed without
109/// holding any locks.
110///
111/// See also [`snapshot`] for a richer snapshot including per-tool and histogram data.
112///
113/// # Example
114/// ```rust
115/// use llm_agent_runtime::metrics::RuntimeMetrics;
116///
117/// let m = RuntimeMetrics::new();
118/// let snap = m.snapshot();
119/// assert_eq!(snap.active_sessions, 0);
120/// assert_eq!(snap.total_sessions, 0);
121/// ```
122///
123/// [`snapshot`]: RuntimeMetrics::snapshot
124#[derive(Debug, Clone, PartialEq, Default)]
125pub struct MetricsSnapshot {
126    /// Number of agent sessions currently in progress.
127    pub active_sessions: usize,
128    /// Total number of sessions started since the runtime was created.
129    pub total_sessions: u64,
130    /// Total number of ReAct steps executed across all sessions.
131    pub total_steps: u64,
132    /// Total number of tool calls dispatched (across all tool names).
133    pub total_tool_calls: u64,
134    /// Total number of tool calls that returned an error observation.
135    pub failed_tool_calls: u64,
136    /// Total number of requests shed due to backpressure.
137    pub backpressure_shed_count: u64,
138    /// Total number of memory recall operations.
139    pub memory_recall_count: u64,
140    /// Per-tool call counts: `tool_name → total_calls`.
141    pub per_tool_calls: HashMap<String, u64>,
142    /// Per-tool failure counts: `tool_name → failed_calls`.
143    pub per_tool_failures: HashMap<String, u64>,
144    /// Step latency histogram bucket counts as `(upper_bound_ms_inclusive, count)`.
145    pub step_latency_buckets: Vec<(u64, u64)>,
146    /// Mean step latency in milliseconds.
147    pub step_latency_mean_ms: f64,
148    /// Per-agent, per-tool call counts: `agent_id → tool_name → count`.
149    pub per_agent_tool_calls: HashMap<String, HashMap<String, u64>>,
150    /// Per-agent, per-tool failure counts: `agent_id → tool_name → count`.
151    pub per_agent_tool_failures: HashMap<String, HashMap<String, u64>>,
152}
153
154/// Shared runtime metrics. Clone the `Arc` to share across threads.
155#[derive(Debug)]
156pub struct RuntimeMetrics {
157    /// Number of agent sessions currently in progress.
158    pub active_sessions: AtomicUsize,
159    /// Total number of sessions started since the runtime was created.
160    pub total_sessions: AtomicU64,
161    /// Total number of ReAct steps executed across all sessions.
162    pub total_steps: AtomicU64,
163    /// Total number of tool calls dispatched (across all tool names).
164    pub total_tool_calls: AtomicU64,
165    /// Total number of tool calls that returned an error observation.
166    pub failed_tool_calls: AtomicU64,
167    /// Total number of requests shed due to backpressure.
168    pub backpressure_shed_count: AtomicU64,
169    /// Total number of memory recall operations.
170    pub memory_recall_count: AtomicU64,
171    /// Per-tool call counts: `tool_name → total_calls`.
172    per_tool_calls: Mutex<HashMap<String, u64>>,
173    /// Per-tool failure counts: `tool_name → failed_calls`.
174    per_tool_failures: Mutex<HashMap<String, u64>>,
175    /// Per-step latency histogram.
176    pub step_latency: LatencyHistogram,
177    /// Per-agent, per-tool call counts.
178    per_agent_tool_calls: Mutex<HashMap<String, HashMap<String, u64>>>,
179    /// Per-agent, per-tool failure counts.
180    per_agent_tool_failures: Mutex<HashMap<String, HashMap<String, u64>>>,
181}
182
183impl Default for RuntimeMetrics {
184    fn default() -> Self {
185        Self {
186            active_sessions: AtomicUsize::new(0),
187            total_sessions: AtomicU64::new(0),
188            total_steps: AtomicU64::new(0),
189            total_tool_calls: AtomicU64::new(0),
190            failed_tool_calls: AtomicU64::new(0),
191            backpressure_shed_count: AtomicU64::new(0),
192            memory_recall_count: AtomicU64::new(0),
193            per_tool_calls: Mutex::new(HashMap::new()),
194            per_tool_failures: Mutex::new(HashMap::new()),
195            step_latency: LatencyHistogram::default(),
196            per_agent_tool_calls: Mutex::new(HashMap::new()),
197            per_agent_tool_failures: Mutex::new(HashMap::new()),
198        }
199    }
200}
201
202impl RuntimeMetrics {
203    /// Allocate a new `RuntimeMetrics` instance wrapped in an `Arc`.
204    pub fn new() -> Arc<Self> {
205        Arc::new(Self::default())
206    }
207
208    /// Return the number of agent sessions currently in progress.
209    pub fn active_sessions(&self) -> usize {
210        self.active_sessions.load(Ordering::Relaxed)
211    }
212
213    /// Return the total number of sessions started since the runtime was created.
214    pub fn total_sessions(&self) -> u64 {
215        self.total_sessions.load(Ordering::Relaxed)
216    }
217
218    /// Return the total number of ReAct steps executed across all sessions.
219    pub fn total_steps(&self) -> u64 {
220        self.total_steps.load(Ordering::Relaxed)
221    }
222
223    /// Return the total number of tool calls dispatched.
224    pub fn total_tool_calls(&self) -> u64 {
225        self.total_tool_calls.load(Ordering::Relaxed)
226    }
227
228    /// Return the total number of tool calls that returned an error observation.
229    pub fn failed_tool_calls(&self) -> u64 {
230        self.failed_tool_calls.load(Ordering::Relaxed)
231    }
232
233    /// Return the total number of requests shed due to backpressure.
234    pub fn backpressure_shed_count(&self) -> u64 {
235        self.backpressure_shed_count.load(Ordering::Relaxed)
236    }
237
238    /// Return the total number of memory recall operations performed.
239    pub fn memory_recall_count(&self) -> u64 {
240        self.memory_recall_count.load(Ordering::Relaxed)
241    }
242
243    /// Increment the call counter for `tool_name` by 1.
244    ///
245    /// Called automatically by the agent loop when `with_metrics` is configured.
246    pub fn record_tool_call(&self, tool_name: &str) {
247        self.total_tool_calls.fetch_add(1, Ordering::Relaxed);
248        if let Ok(mut map) = self.per_tool_calls.lock() {
249            *map.entry(tool_name.to_owned()).or_insert(0) += 1;
250        }
251    }
252
253    /// Increment the failure counter for `tool_name` by 1.
254    ///
255    /// Called automatically by the agent loop when a tool returns an error.
256    pub fn record_tool_failure(&self, tool_name: &str) {
257        self.failed_tool_calls.fetch_add(1, Ordering::Relaxed);
258        if let Ok(mut map) = self.per_tool_failures.lock() {
259            *map.entry(tool_name.to_owned()).or_insert(0) += 1;
260        }
261    }
262
263    /// Return a snapshot of per-tool call counts as a `HashMap<tool_name, count>`.
264    pub fn per_tool_calls_snapshot(&self) -> HashMap<String, u64> {
265        self.per_tool_calls
266            .lock()
267            .map(|m| m.clone())
268            .unwrap_or_default()
269    }
270
271    /// Return a snapshot of per-tool failure counts as a `HashMap<tool_name, count>`.
272    pub fn per_tool_failures_snapshot(&self) -> HashMap<String, u64> {
273        self.per_tool_failures
274            .lock()
275            .map(|m| m.clone())
276            .unwrap_or_default()
277    }
278
279    /// Increment call counter for (agent_id, tool_name).
280    pub fn record_agent_tool_call(&self, agent_id: &str, tool_name: &str) {
281        if let Ok(mut map) = self.per_agent_tool_calls.lock() {
282            *map.entry(agent_id.to_owned())
283                .or_default()
284                .entry(tool_name.to_owned())
285                .or_insert(0) += 1;
286        }
287    }
288
289    /// Increment failure counter for (agent_id, tool_name).
290    pub fn record_agent_tool_failure(&self, agent_id: &str, tool_name: &str) {
291        if let Ok(mut map) = self.per_agent_tool_failures.lock() {
292            *map.entry(agent_id.to_owned())
293                .or_default()
294                .entry(tool_name.to_owned())
295                .or_insert(0) += 1;
296        }
297    }
298
299    /// Snapshot of per-agent, per-tool call counts.
300    pub fn per_agent_tool_calls_snapshot(&self) -> HashMap<String, HashMap<String, u64>> {
301        self.per_agent_tool_calls
302            .lock()
303            .map(|m| m.clone())
304            .unwrap_or_default()
305    }
306
307    /// Snapshot of per-agent, per-tool failure counts.
308    pub fn per_agent_tool_failures_snapshot(&self) -> HashMap<String, HashMap<String, u64>> {
309        self.per_agent_tool_failures
310            .lock()
311            .map(|m| m.clone())
312            .unwrap_or_default()
313    }
314
315    /// Capture a complete snapshot of all counters, including per-tool breakdowns.
316    ///
317    /// This is the preferred alternative to [`to_snapshot`] — it returns a
318    /// named [`MetricsSnapshot`] struct instead of an opaque tuple.
319    ///
320    /// [`to_snapshot`]: RuntimeMetrics::to_snapshot
321    pub fn snapshot(&self) -> MetricsSnapshot {
322        MetricsSnapshot {
323            active_sessions: self.active_sessions.load(Ordering::Relaxed),
324            total_sessions: self.total_sessions.load(Ordering::Relaxed),
325            total_steps: self.total_steps.load(Ordering::Relaxed),
326            total_tool_calls: self.total_tool_calls.load(Ordering::Relaxed),
327            failed_tool_calls: self.failed_tool_calls.load(Ordering::Relaxed),
328            backpressure_shed_count: self.backpressure_shed_count.load(Ordering::Relaxed),
329            memory_recall_count: self.memory_recall_count.load(Ordering::Relaxed),
330            per_tool_calls: self.per_tool_calls_snapshot(),
331            per_tool_failures: self.per_tool_failures_snapshot(),
332            step_latency_buckets: self.step_latency.buckets(),
333            step_latency_mean_ms: self.step_latency.mean_ms(),
334            per_agent_tool_calls: self.per_agent_tool_calls_snapshot(),
335            per_agent_tool_failures: self.per_agent_tool_failures_snapshot(),
336        }
337    }
338
339    /// Record a step latency sample.
340    pub fn record_step_latency(&self, ms: u64) {
341        self.step_latency.record(ms);
342    }
343
344    /// Reset all counters to zero.
345    ///
346    /// Intended for testing. In production, counters are monotonically increasing.
347    pub fn reset(&self) {
348        self.active_sessions.store(0, Ordering::Relaxed);
349        self.total_sessions.store(0, Ordering::Relaxed);
350        self.total_steps.store(0, Ordering::Relaxed);
351        self.total_tool_calls.store(0, Ordering::Relaxed);
352        self.failed_tool_calls.store(0, Ordering::Relaxed);
353        self.backpressure_shed_count.store(0, Ordering::Relaxed);
354        self.memory_recall_count.store(0, Ordering::Relaxed);
355        if let Ok(mut m) = self.per_tool_calls.lock() {
356            m.clear();
357        }
358        if let Ok(mut m) = self.per_tool_failures.lock() {
359            m.clear();
360        }
361        if let Ok(mut m) = self.per_agent_tool_calls.lock() {
362            m.clear();
363        }
364        if let Ok(mut m) = self.per_agent_tool_failures.lock() {
365            m.clear();
366        }
367        self.step_latency.total_count.store(0, Ordering::Relaxed);
368        self.step_latency.total_sum_ms.store(0, Ordering::Relaxed);
369        for b in &self.step_latency.buckets {
370            b.store(0, Ordering::Relaxed);
371        }
372    }
373
374    /// Capture a snapshot of global counters as plain integers.
375    ///
376    /// Returns `(active_sessions, total_sessions, total_steps,
377    ///           total_tool_calls, failed_tool_calls,
378    ///           backpressure_shed_count, memory_recall_count)`.
379    /// For per-tool breakdowns use [`per_tool_calls_snapshot`] and
380    /// [`per_tool_failures_snapshot`].
381    ///
382    /// [`per_tool_calls_snapshot`]: RuntimeMetrics::per_tool_calls_snapshot
383    /// [`per_tool_failures_snapshot`]: RuntimeMetrics::per_tool_failures_snapshot
384    pub fn to_snapshot(&self) -> (usize, u64, u64, u64, u64, u64, u64) {
385        (
386            self.active_sessions.load(Ordering::Relaxed),
387            self.total_sessions.load(Ordering::Relaxed),
388            self.total_steps.load(Ordering::Relaxed),
389            self.total_tool_calls.load(Ordering::Relaxed),
390            self.failed_tool_calls.load(Ordering::Relaxed),
391            self.backpressure_shed_count.load(Ordering::Relaxed),
392            self.memory_recall_count.load(Ordering::Relaxed),
393        )
394    }
395}
396
397// ── Tests ─────────────────────────────────────────────────────────────────────
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn test_metrics_new_returns_arc_with_zero_counters() {
405        let m = RuntimeMetrics::new();
406        assert_eq!(m.active_sessions(), 0);
407        assert_eq!(m.total_sessions(), 0);
408        assert_eq!(m.total_steps(), 0);
409        assert_eq!(m.total_tool_calls(), 0);
410        assert_eq!(m.failed_tool_calls(), 0);
411        assert_eq!(m.backpressure_shed_count(), 0);
412        assert_eq!(m.memory_recall_count(), 0);
413    }
414
415    #[test]
416    fn test_active_sessions_increments_and_decrements() {
417        let m = RuntimeMetrics::new();
418        m.active_sessions.fetch_add(1, Ordering::Relaxed);
419        assert_eq!(m.active_sessions(), 1);
420        m.active_sessions.fetch_sub(1, Ordering::Relaxed);
421        assert_eq!(m.active_sessions(), 0);
422    }
423
424    #[test]
425    fn test_total_sessions_increments() {
426        let m = RuntimeMetrics::new();
427        m.total_sessions.fetch_add(1, Ordering::Relaxed);
428        m.total_sessions.fetch_add(1, Ordering::Relaxed);
429        assert_eq!(m.total_sessions(), 2);
430    }
431
432    #[test]
433    fn test_total_steps_increments() {
434        let m = RuntimeMetrics::new();
435        m.total_steps.fetch_add(5, Ordering::Relaxed);
436        assert_eq!(m.total_steps(), 5);
437    }
438
439    #[test]
440    fn test_total_tool_calls_increments() {
441        let m = RuntimeMetrics::new();
442        m.total_tool_calls.fetch_add(3, Ordering::Relaxed);
443        assert_eq!(m.total_tool_calls(), 3);
444    }
445
446    #[test]
447    fn test_failed_tool_calls_increments() {
448        let m = RuntimeMetrics::new();
449        m.failed_tool_calls.fetch_add(2, Ordering::Relaxed);
450        assert_eq!(m.failed_tool_calls(), 2);
451    }
452
453    #[test]
454    fn test_backpressure_shed_count_increments() {
455        let m = RuntimeMetrics::new();
456        m.backpressure_shed_count.fetch_add(7, Ordering::Relaxed);
457        assert_eq!(m.backpressure_shed_count(), 7);
458    }
459
460    #[test]
461    fn test_memory_recall_count_increments() {
462        let m = RuntimeMetrics::new();
463        m.memory_recall_count.fetch_add(4, Ordering::Relaxed);
464        assert_eq!(m.memory_recall_count(), 4);
465    }
466
467    #[test]
468    fn test_reset_zeroes_all_counters() {
469        let m = RuntimeMetrics::new();
470        m.active_sessions.store(3, Ordering::Relaxed);
471        m.total_sessions.store(10, Ordering::Relaxed);
472        m.total_steps.store(50, Ordering::Relaxed);
473        m.total_tool_calls.store(20, Ordering::Relaxed);
474        m.failed_tool_calls.store(2, Ordering::Relaxed);
475        m.backpressure_shed_count.store(1, Ordering::Relaxed);
476        m.memory_recall_count.store(8, Ordering::Relaxed);
477
478        m.reset();
479
480        assert_eq!(m.active_sessions(), 0);
481        assert_eq!(m.total_sessions(), 0);
482        assert_eq!(m.total_steps(), 0);
483        assert_eq!(m.total_tool_calls(), 0);
484        assert_eq!(m.failed_tool_calls(), 0);
485        assert_eq!(m.backpressure_shed_count(), 0);
486        assert_eq!(m.memory_recall_count(), 0);
487    }
488
489    #[test]
490    fn test_to_snapshot_captures_correct_values() {
491        let m = RuntimeMetrics::new();
492        m.active_sessions.store(1, Ordering::Relaxed);
493        m.total_sessions.store(2, Ordering::Relaxed);
494        m.total_steps.store(3, Ordering::Relaxed);
495        m.total_tool_calls.store(4, Ordering::Relaxed);
496        m.failed_tool_calls.store(5, Ordering::Relaxed);
497        m.backpressure_shed_count.store(6, Ordering::Relaxed);
498        m.memory_recall_count.store(7, Ordering::Relaxed);
499
500        let snap = m.to_snapshot();
501        assert_eq!(snap, (1, 2, 3, 4, 5, 6, 7));
502    }
503
504    #[test]
505    fn test_metrics_is_send_sync() {
506        fn assert_send_sync<T: Send + Sync>() {}
507        assert_send_sync::<RuntimeMetrics>();
508    }
509
510    #[test]
511    fn test_multiple_increments_are_cumulative() {
512        let m = RuntimeMetrics::new();
513        for _ in 0..100 {
514            m.total_sessions.fetch_add(1, Ordering::Relaxed);
515        }
516        assert_eq!(m.total_sessions(), 100);
517    }
518
519    #[test]
520    fn test_arc_clone_shares_state() {
521        let m = RuntimeMetrics::new();
522        let m2 = Arc::clone(&m);
523        m.total_sessions.fetch_add(1, Ordering::Relaxed);
524        assert_eq!(m2.total_sessions(), 1);
525    }
526
527    // ── Per-tool metrics ──────────────────────────────────────────────────────
528
529    #[test]
530    fn test_record_tool_call_increments_global_and_per_tool() {
531        let m = RuntimeMetrics::new();
532        m.record_tool_call("search");
533        m.record_tool_call("search");
534        m.record_tool_call("lookup");
535        assert_eq!(m.total_tool_calls(), 3);
536        let snap = m.per_tool_calls_snapshot();
537        assert_eq!(snap.get("search").copied(), Some(2));
538        assert_eq!(snap.get("lookup").copied(), Some(1));
539    }
540
541    #[test]
542    fn test_record_tool_failure_increments_global_and_per_tool() {
543        let m = RuntimeMetrics::new();
544        m.record_tool_failure("search");
545        m.record_tool_failure("lookup");
546        m.record_tool_failure("search");
547        assert_eq!(m.failed_tool_calls(), 3);
548        let snap = m.per_tool_failures_snapshot();
549        assert_eq!(snap.get("search").copied(), Some(2));
550        assert_eq!(snap.get("lookup").copied(), Some(1));
551    }
552
553    #[test]
554    fn test_reset_clears_per_tool_counters() {
555        let m = RuntimeMetrics::new();
556        m.record_tool_call("foo");
557        m.record_tool_failure("foo");
558        m.reset();
559        assert!(m.per_tool_calls_snapshot().is_empty());
560        assert!(m.per_tool_failures_snapshot().is_empty());
561    }
562
563    #[test]
564    fn test_per_tool_snapshot_is_independent_for_unknown_tools() {
565        let m = RuntimeMetrics::new();
566        let snap = m.per_tool_calls_snapshot();
567        assert!(snap.is_empty());
568    }
569
570    // ── LatencyHistogram ───────────────────────────────────────────────────────
571
572    #[test]
573    fn test_latency_histogram_records_sample() {
574        let h = LatencyHistogram::default();
575        h.record(10);
576        assert_eq!(h.count(), 1);
577    }
578
579    #[test]
580    fn test_latency_histogram_mean_ms() {
581        let h = LatencyHistogram::default();
582        h.record(10);
583        h.record(20);
584        assert!((h.mean_ms() - 15.0).abs() < 1e-5);
585    }
586
587    #[test]
588    fn test_latency_histogram_buckets_correct_bucket() {
589        let h = LatencyHistogram::default();
590        h.record(3); // falls in ≤5ms bucket (index 1)
591        let buckets = h.buckets();
592        // bucket at index 1 is ≤5ms
593        assert_eq!(buckets[1].1, 1, "3ms should land in ≤5ms bucket");
594        // other buckets should be zero
595        assert_eq!(buckets[0].1, 0);
596        assert_eq!(buckets[2].1, 0);
597    }
598
599    // ── MetricsSnapshot ───────────────────────────────────────────────────────
600
601    #[test]
602    fn test_snapshot_returns_all_fields() {
603        let m = RuntimeMetrics::new();
604        m.active_sessions.store(1, Ordering::Relaxed);
605        m.total_sessions.store(2, Ordering::Relaxed);
606        m.total_steps.store(3, Ordering::Relaxed);
607        m.backpressure_shed_count.store(6, Ordering::Relaxed);
608        m.memory_recall_count.store(7, Ordering::Relaxed);
609        // Use record_* methods so global and per-tool counters stay consistent.
610        m.record_tool_call("my_tool");
611        m.record_tool_call("my_tool");
612        m.record_tool_failure("my_tool");
613
614        let snap = m.snapshot();
615        assert_eq!(snap.active_sessions, 1);
616        assert_eq!(snap.total_sessions, 2);
617        assert_eq!(snap.total_steps, 3);
618        assert_eq!(snap.total_tool_calls, 2);
619        assert_eq!(snap.failed_tool_calls, 1);
620        assert_eq!(snap.backpressure_shed_count, 6);
621        assert_eq!(snap.memory_recall_count, 7);
622        assert_eq!(snap.per_tool_calls.get("my_tool").copied(), Some(2));
623        assert_eq!(snap.per_tool_failures.get("my_tool").copied(), Some(1));
624    }
625
626    #[test]
627    fn test_snapshot_default_is_zeroed() {
628        let snap = MetricsSnapshot::default();
629        assert_eq!(snap.active_sessions, 0);
630        assert_eq!(snap.total_sessions, 0);
631        assert_eq!(snap.total_steps, 0);
632        assert!(snap.per_tool_calls.is_empty());
633        assert!(snap.per_tool_failures.is_empty());
634    }
635
636    // ── #8 MetricsSnapshot histogram fields ───────────────────────────────────
637
638    #[test]
639    fn test_metrics_snapshot_contains_all_fields() {
640        let m = RuntimeMetrics::new();
641        m.record_step_latency(5);
642        m.record_step_latency(200);
643        let snap = m.snapshot();
644        // Should have 7 buckets
645        assert_eq!(snap.step_latency_buckets.len(), 7);
646        assert!(snap.step_latency_mean_ms > 0.0);
647    }
648
649    // ── #9 per-agent tool call tracking ──────────────────────────────────────
650
651    #[test]
652    fn test_per_agent_tool_call_tracking() {
653        let m = RuntimeMetrics::new();
654        m.record_agent_tool_call("agent-1", "search");
655        m.record_agent_tool_call("agent-1", "search");
656        m.record_agent_tool_call("agent-2", "lookup");
657        m.record_agent_tool_failure("agent-1", "search");
658
659        let calls = m.per_agent_tool_calls_snapshot();
660        assert_eq!(calls.get("agent-1").and_then(|t| t.get("search")).copied(), Some(2));
661        assert_eq!(calls.get("agent-2").and_then(|t| t.get("lookup")).copied(), Some(1));
662
663        let failures = m.per_agent_tool_failures_snapshot();
664        assert_eq!(failures.get("agent-1").and_then(|t| t.get("search")).copied(), Some(1));
665
666        // Also check snapshot includes them
667        let snap = m.snapshot();
668        assert_eq!(snap.per_agent_tool_calls.get("agent-1").and_then(|t| t.get("search")).copied(), Some(2));
669
670        // Reset clears them
671        m.reset();
672        assert!(m.per_agent_tool_calls_snapshot().is_empty());
673        assert!(m.per_agent_tool_failures_snapshot().is_empty());
674    }
675}