llm_agent_runtime/
metrics.rs

1//! # Module: Metrics
2//!
3//! Runtime observability counters for `AgentRuntime`.
4//! All global counters use atomics for lock-free, thread-safe increment/read.
5//! Per-tool counters use a `Mutex<HashMap>` to avoid requiring a concurrent
6//! map dependency.
7
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
11use std::sync::{Arc, Mutex};
12
13// ── LatencyHistogram ──────────────────────────────────────────────────────────
14
15/// A simple fixed-bucket latency histogram.
16///
17/// Bucket upper bounds are **inclusive** (i.e., a sample of exactly 1 ms falls into bucket 0).
18/// Bucket index mapping:
19/// - 0: ≤ 1 ms
20/// - 1: 2 – 5 ms
21/// - 2: 6 – 10 ms
22/// - 3: 11 – 50 ms
23/// - 4: 51 – 100 ms
24/// - 5: 101 – 500 ms
25/// - 6: > 500 ms
26#[derive(Debug)]
27pub struct LatencyHistogram {
28    /// Counts per bucket. Index 0 = ≤1ms, …, index 6 = >500ms.
29    /// Bucket upper bounds are **inclusive**.
30    buckets: [AtomicU64; 7],
31    total_count: AtomicU64,
32    total_sum_ms: AtomicU64,
33}
34
35impl Default for LatencyHistogram {
36    fn default() -> Self {
37        Self {
38            buckets: [
39                AtomicU64::new(0),
40                AtomicU64::new(0),
41                AtomicU64::new(0),
42                AtomicU64::new(0),
43                AtomicU64::new(0),
44                AtomicU64::new(0),
45                AtomicU64::new(0),
46            ],
47            total_count: AtomicU64::new(0),
48            total_sum_ms: AtomicU64::new(0),
49        }
50    }
51}
52
53impl LatencyHistogram {
54    /// Bucket upper bounds in milliseconds.
55    ///
56    /// Boundaries were chosen to cover the full range of observed latencies in
57    /// LLM-backed agent systems:
58    ///
59    /// | Bucket | Range      | Typical source                         |
60    /// |--------|------------|----------------------------------------|
61    /// | 0      | ≤ 1 ms     | In-process tool calls, cache hits      |
62    /// | 1      | ≤ 5 ms     | Fast local I/O, simple calculations    |
63    /// | 2      | ≤ 10 ms    | Network round-trips to local services  |
64    /// | 3      | ≤ 50 ms    | p50 LLM token latency (streaming)      |
65    /// | 4      | ≤ 100 ms   | p95 for small LLM completions          |
66    /// | 5      | ≤ 500 ms   | p99 for medium LLM completions         |
67    /// | 6      | > 500 ms   | Slow completions, network retries      |
68    const BOUNDS: [u64; 7] = [1, 5, 10, 50, 100, 500, u64::MAX];
69
70    /// Record a latency sample in milliseconds.
71    pub fn record(&self, ms: u64) {
72        self.total_count.fetch_add(1, Ordering::Relaxed);
73        self.total_sum_ms.fetch_add(ms, Ordering::Relaxed);
74        for (i, &bound) in Self::BOUNDS.iter().enumerate() {
75            if ms <= bound {
76                self.buckets[i].fetch_add(1, Ordering::Relaxed);
77                return;
78            }
79        }
80    }
81
82    /// Return the mean latency in ms, or 0.0 if no samples.
83    pub fn mean_ms(&self) -> f64 {
84        let count = self.total_count.load(Ordering::Relaxed);
85        if count == 0 {
86            return 0.0;
87        }
88        self.total_sum_ms.load(Ordering::Relaxed) as f64 / count as f64
89    }
90
91    /// Return a bucket-midpoint approximation of the standard deviation in ms.
92    ///
93    /// Uses the midpoint of each histogram bucket to estimate the second moment,
94    /// then applies `√(E[X²] − E[X]²)`.  Returns `0.0` when fewer than two
95    /// samples have been recorded.
96    ///
97    /// # Accuracy
98    /// The result is an estimate; its accuracy improves as the sample count
99    /// increases and degrades near the boundaries of wide buckets.
100    pub fn std_dev_ms(&self) -> f64 {
101        let count = self.total_count.load(Ordering::Relaxed);
102        if count < 2 {
103            return 0.0;
104        }
105        const MIDS: [f64; 7] = [0.5, 3.0, 7.5, 30.0, 75.0, 300.0, 500.0];
106        let (sum, sum_sq): (f64, f64) = self
107            .buckets
108            .iter()
109            .zip(MIDS.iter())
110            .map(|(b, &m)| {
111                let c = b.load(Ordering::Relaxed) as f64;
112                (c * m, c * m * m)
113            })
114            .fold((0.0, 0.0), |(s, ss), (v, v2)| (s + v, ss + v2));
115        let n = count as f64;
116        let variance = sum_sq / n - (sum / n) * (sum / n);
117        variance.max(0.0).sqrt()
118    }
119
120    /// Return the total sample count.
121    pub fn count(&self) -> u64 {
122        self.total_count.load(Ordering::Relaxed)
123    }
124
125    /// Return `true` if at least one sample has been recorded.
126    pub fn has_data(&self) -> bool {
127        self.count() > 0
128    }
129
130    /// Return `true` when no samples have been recorded yet.
131    pub fn is_empty(&self) -> bool {
132        self.count() == 0
133    }
134
135    /// Estimate the p-th percentile latency in milliseconds from the histogram.
136    ///
137    /// `p` must be in `[0.0, 1.0]`.  Returns the **upper bound** of the first
138    /// bucket that contains the p-th percentile.  Returns `0` if no samples
139    /// have been recorded.
140    ///
141    /// # Accuracy
142    ///
143    /// This is a bucket-boundary estimate, not an exact value.  The error is
144    /// bounded by the bucket width at that percentile.
145    pub fn percentile(&self, p: f64) -> u64 {
146        let total = self.total_count.load(Ordering::Relaxed);
147        if total == 0 {
148            return 0;
149        }
150        let target = (p.clamp(0.0, 1.0) * total as f64).ceil() as u64;
151        let mut cumulative = 0u64;
152        for (i, bucket) in self.buckets.iter().enumerate() {
153            cumulative += bucket.load(Ordering::Relaxed);
154            if cumulative >= target {
155                return Self::BOUNDS[i];
156            }
157        }
158        // All samples accounted for — return the last bound.
159        *Self::BOUNDS.last().unwrap_or(&u64::MAX)
160    }
161
162    /// Return the upper-bound of the bucket with the highest sample count (the mode).
163    ///
164    /// Returns `None` if no samples have been recorded.  When multiple buckets
165    /// tie for the maximum, the lowest-latency bucket is returned.
166    pub fn mode_bucket_ms(&self) -> Option<u64> {
167        if self.count() == 0 {
168            return None;
169        }
170        let (idx, _) = self
171            .buckets
172            .iter()
173            .enumerate()
174            .max_by_key(|(_, a)| a.load(Ordering::Relaxed))?;
175        Some(Self::BOUNDS[idx])
176    }
177
178    /// Return bucket counts as `(upper_bound_ms, count)` pairs.
179    pub fn buckets(&self) -> Vec<(u64, u64)> {
180        Self::BOUNDS
181            .iter()
182            .zip(self.buckets.iter())
183            .map(|(&b, a)| (b, a.load(Ordering::Relaxed)))
184            .collect()
185    }
186
187    /// Return the minimum recorded latency in ms, or `None` if no samples.
188    pub fn min_ms(&self) -> Option<u64> {
189        let total = self.total_count.load(Ordering::Relaxed);
190        if total == 0 {
191            return None;
192        }
193        // Walk buckets from the fastest; the first non-empty bucket's lower
194        // bound is 0 (or the previous bound), so return the upper bound as
195        // the conservative minimum estimate.
196        for (i, bucket) in self.buckets.iter().enumerate() {
197            if bucket.load(Ordering::Relaxed) > 0 {
198                return Some(if i == 0 { 0 } else { Self::BOUNDS[i - 1] + 1 });
199            }
200        }
201        None
202    }
203
204    /// Return the maximum recorded latency in ms, or `None` if no samples.
205    pub fn max_ms(&self) -> Option<u64> {
206        let total = self.total_count.load(Ordering::Relaxed);
207        if total == 0 {
208            return None;
209        }
210        // Walk from the slowest bucket; return the upper bound of the last non-empty bucket.
211        for (i, bucket) in self.buckets.iter().enumerate().rev() {
212            if bucket.load(Ordering::Relaxed) > 0 {
213                return Some(Self::BOUNDS[i]);
214            }
215        }
216        None
217    }
218
219    /// Return the spread (max − min) of recorded latencies in milliseconds.
220    ///
221    /// Returns `None` if no samples have been recorded.  A narrow range
222    /// indicates consistent latency; a wide range suggests outliers.
223    pub fn range_ms(&self) -> Option<u64> {
224        Some(self.max_ms()?.saturating_sub(self.min_ms()?))
225    }
226
227    /// Return the interquartile range (p75 − p25) in milliseconds.
228    ///
229    /// A measure of dispersion that is less sensitive to outliers than
230    /// [`range_ms`].  Returns `0` when fewer than two samples have been
231    /// recorded (p25 == p75 == 0).
232    ///
233    /// [`range_ms`]: LatencyHistogram::range_ms
234    pub fn interquartile_range_ms(&self) -> u64 {
235        self.p75().saturating_sub(self.p25())
236    }
237
238    /// Return the 50th-percentile (median) latency in milliseconds.
239    pub fn p50(&self) -> u64 {
240        self.percentile(0.50)
241    }
242
243    /// Return the 95th-percentile latency in milliseconds.
244    pub fn p95(&self) -> u64 {
245        self.percentile(0.95)
246    }
247
248    /// Return the 99th-percentile latency in milliseconds.
249    pub fn p99(&self) -> u64 {
250        self.percentile(0.99)
251    }
252
253    /// Return the 25th-percentile latency in milliseconds.
254    pub fn p25(&self) -> u64 {
255        self.percentile(0.25)
256    }
257
258    /// Return the 75th-percentile latency in milliseconds.
259    pub fn p75(&self) -> u64 {
260        self.percentile(0.75)
261    }
262
263    /// Return the 90th-percentile latency in milliseconds.
264    pub fn p90(&self) -> u64 {
265        self.percentile(0.90)
266    }
267
268    /// Return the 10th-percentile latency in milliseconds.
269    ///
270    /// Useful for assessing the "best case" tail of the distribution.
271    pub fn p10(&self) -> u64 {
272        self.percentile(0.10)
273    }
274
275    /// Return the median (50th-percentile) step latency in milliseconds.
276    ///
277    /// Convenience alias for `p50`; useful when callers want an explicit
278    /// "median" name without importing percentile constants.
279    pub fn median_ms(&self) -> u64 {
280        self.p50()
281    }
282
283    /// Reset all histogram counters to zero.
284    pub fn reset(&self) {
285        self.total_count.store(0, Ordering::Relaxed);
286        self.total_sum_ms.store(0, Ordering::Relaxed);
287        for bucket in &self.buckets {
288            bucket.store(0, Ordering::Relaxed);
289        }
290    }
291
292    /// Return the total sum of all recorded latency samples in milliseconds.
293    ///
294    /// Equivalent to `mean_ms() * count()` but avoids floating-point arithmetic.
295    pub fn sum_ms(&self) -> u64 {
296        self.total_sum_ms.load(Ordering::Relaxed)
297    }
298
299    /// Return the coefficient of variation: `std_dev_ms / mean_ms`.
300    ///
301    /// A value of `0.0` means no variation; higher values indicate more
302    /// spread in latency.  Returns `0.0` when `mean_ms` is zero (empty
303    /// histogram or all-zero samples) to avoid division by zero.
304    pub fn coefficient_of_variation(&self) -> f64 {
305        let mean = self.mean_ms();
306        if mean == 0.0 {
307            return 0.0;
308        }
309        self.std_dev_ms() / mean
310    }
311
312    /// Return the total number of samples recorded in this histogram.
313    pub fn sample_count(&self) -> u64 {
314        self.total_count.load(std::sync::atomic::Ordering::Relaxed)
315    }
316
317    /// Return the difference between the p99 and p50 latency buckets in
318    /// milliseconds.
319    ///
320    /// A larger spread indicates a long-tail latency distribution.
321    /// Returns `0` when no samples have been recorded.
322    pub fn percentile_spread(&self) -> u64 {
323        self.p99().saturating_sub(self.p50())
324    }
325
326    /// Return the count for each bucket as an array, in order from the
327    /// fastest (≤1ms) to the slowest (>500ms) bucket.
328    pub fn bucket_counts(&self) -> [u64; 7] {
329        let mut out = [0u64; 7];
330        for (i, b) in self.buckets.iter().enumerate() {
331            out[i] = b.load(std::sync::atomic::Ordering::Relaxed);
332        }
333        out
334    }
335
336    /// Return the upper bound (ms) of the lowest bucket that has at least one
337    /// sample, or `None` if no samples have been recorded.
338    pub fn min_occupied_ms(&self) -> Option<u64> {
339        Self::BOUNDS
340            .iter()
341            .zip(self.buckets.iter())
342            .find(|(_, b)| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
343            .map(|(&bound, _)| bound)
344    }
345
346    /// Return the upper-bound of the largest bucket with at least one recorded sample.
347    ///
348    /// Returns `None` if the histogram is empty.
349    pub fn max_occupied_ms(&self) -> Option<u64> {
350        Self::BOUNDS
351            .iter()
352            .zip(self.buckets.iter())
353            .rev()
354            .find(|(_, b)| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
355            .map(|(&bound, _)| bound)
356    }
357
358    /// Return the number of buckets that have at least one recorded sample.
359    pub fn occupied_bucket_count(&self) -> usize {
360        self.buckets
361            .iter()
362            .filter(|b| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
363            .count()
364    }
365
366    /// Return `true` if the latency distribution is skewed (p99 > 2 × p50).
367    ///
368    /// Returns `false` for empty histograms.
369    pub fn is_skewed(&self) -> bool {
370        let p50 = self.p50();
371        if p50 == 0 {
372            return false;
373        }
374        self.p99() > 2 * p50
375    }
376
377    /// Return `true` if all recorded samples fall into exactly one bucket.
378    ///
379    /// An empty histogram is considered uniform.
380    pub fn is_uniform(&self) -> bool {
381        let non_empty = self
382            .buckets
383            .iter()
384            .filter(|b| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
385            .count();
386        non_empty <= 1
387    }
388
389    /// Reset all histogram counters to zero.
390    ///
391    /// Alias for [`reset`] using more conventional naming.
392    ///
393    /// [`reset`]: LatencyHistogram::reset
394    pub fn clear(&self) {
395        self.reset();
396    }
397
398    /// Return `true` if `latency_ms` is strictly greater than the current p99.
399    ///
400    /// Useful for detecting outlier requests at call sites without storing
401    /// the p99 value separately.  Returns `false` when the histogram is empty.
402    pub fn is_above_p99(&self, latency_ms: u64) -> bool {
403        latency_ms > self.p99()
404    }
405
406    /// Return `true` if the p99 latency is strictly below `threshold_ms`.
407    ///
408    /// Useful for SLO checks.  Returns `true` when no samples have been
409    /// recorded (`p99 == 0`).
410    pub fn is_below_p99(&self, threshold_ms: u64) -> bool {
411        self.p99() < threshold_ms
412    }
413
414}
415
416impl MetricsSnapshot {
417    /// Compute the difference between `after` and `before` (i.e., `after - before`).
418    ///
419    /// Useful for per-request instrumentation:
420    /// ```rust,ignore
421    /// let before = metrics.snapshot();
422    /// // ... run one agent invocation ...
423    /// let after = metrics.snapshot();
424    /// let delta = MetricsSnapshot::delta(&after, &before);
425    /// println!("steps this run: {}", delta.total_steps);
426    /// ```
427    ///
428    /// Saturating subtraction is used so callers don't need to guard against
429    /// races where a counter is read before the full increment propagates.
430    pub fn delta(after: &Self, before: &Self) -> Self {
431        Self {
432            active_sessions: after.active_sessions.saturating_sub(before.active_sessions),
433            total_sessions: after.total_sessions.saturating_sub(before.total_sessions),
434            total_steps: after.total_steps.saturating_sub(before.total_steps),
435            total_tool_calls: after.total_tool_calls.saturating_sub(before.total_tool_calls),
436            failed_tool_calls: after.failed_tool_calls.saturating_sub(before.failed_tool_calls),
437            backpressure_shed_count: after
438                .backpressure_shed_count
439                .saturating_sub(before.backpressure_shed_count),
440            memory_recall_count: after
441                .memory_recall_count
442                .saturating_sub(before.memory_recall_count),
443            checkpoint_errors: after
444                .checkpoint_errors
445                .saturating_sub(before.checkpoint_errors),
446            per_tool_calls: {
447                let mut m = after.per_tool_calls.clone();
448                for (k, v) in &before.per_tool_calls {
449                    let entry = m.entry(k.clone()).or_default();
450                    *entry = entry.saturating_sub(*v);
451                }
452                m
453            },
454            per_tool_failures: {
455                let mut m = after.per_tool_failures.clone();
456                for (k, v) in &before.per_tool_failures {
457                    let entry = m.entry(k.clone()).or_default();
458                    *entry = entry.saturating_sub(*v);
459                }
460                m
461            },
462            step_latency_buckets: after
463                .step_latency_buckets
464                .iter()
465                .zip(before.step_latency_buckets.iter())
466                .map(|((bound, a), (_, b))| (*bound, a.saturating_sub(*b)))
467                .collect(),
468            step_latency_mean_ms: after.step_latency_mean_ms - before.step_latency_mean_ms,
469            per_agent_tool_calls: after.per_agent_tool_calls.clone(),
470            per_agent_tool_failures: after.per_agent_tool_failures.clone(),
471        }
472    }
473
474    /// Serialize the snapshot to a `serde_json::Value` for logging or export.
475    pub fn to_json(&self) -> serde_json::Value {
476        serde_json::json!({
477            "active_sessions": self.active_sessions,
478            "total_sessions": self.total_sessions,
479            "total_steps": self.total_steps,
480            "total_tool_calls": self.total_tool_calls,
481            "failed_tool_calls": self.failed_tool_calls,
482            "backpressure_shed_count": self.backpressure_shed_count,
483            "memory_recall_count": self.memory_recall_count,
484            "step_latency_mean_ms": self.step_latency_mean_ms,
485            "per_tool_calls": self.per_tool_calls,
486            "per_tool_failures": self.per_tool_failures,
487        })
488    }
489
490    /// Return the number of calls recorded for the named tool.
491    ///
492    /// Returns `0` if no calls have been recorded for that tool name.
493    pub fn tool_call_count(&self, name: &str) -> u64 {
494        self.per_tool_calls.get(name).copied().unwrap_or(0)
495    }
496
497    /// Return the number of failures recorded for the named tool.
498    ///
499    /// Returns `0` if no failures have been recorded for that tool name.
500    pub fn tool_failure_count(&self, name: &str) -> u64 {
501        self.per_tool_failures.get(name).copied().unwrap_or(0)
502    }
503
504    /// Return a sorted list of tool names that have at least one recorded call.
505    pub fn tool_names(&self) -> Vec<&str> {
506        let mut names: Vec<&str> = self.per_tool_calls.keys().map(|s| s.as_str()).collect();
507        names.sort_unstable();
508        names
509    }
510
511    /// Return the overall tool-call failure rate as a value in `[0.0, 1.0]`.
512    ///
513    /// Returns `0.0` if no tool calls have been recorded.
514    pub fn failure_rate(&self) -> f64 {
515        if self.total_tool_calls == 0 {
516            return 0.0;
517        }
518        self.failed_tool_calls as f64 / self.total_tool_calls as f64
519    }
520
521    /// Return the overall tool-call success rate as a value in `[0.0, 1.0]`.
522    ///
523    /// Returns `1.0` if no tool calls have been recorded (vacuously all succeeded).
524    pub fn success_rate(&self) -> f64 {
525        1.0 - self.failure_rate()
526    }
527
528    /// Return the number of successful calls for the named tool.
529    ///
530    /// Computed as `tool_call_count(name) - tool_failure_count(name)`.
531    pub fn tool_success_count(&self, name: &str) -> u64 {
532        self.tool_call_count(name)
533            .saturating_sub(self.tool_failure_count(name))
534    }
535
536    /// Return the per-tool failure rate for the named tool.
537    ///
538    /// Returns `0.0` if no calls have been recorded for that tool.
539    pub fn tool_failure_rate(&self, name: &str) -> f64 {
540        let calls = self.tool_call_count(name);
541        if calls == 0 {
542            return 0.0;
543        }
544        self.tool_failure_count(name) as f64 / calls as f64
545    }
546
547    /// Return the total number of successful tool calls (total minus failed).
548    ///
549    /// Uses saturating subtraction so a race between `total_tool_calls`
550    /// and `failed_tool_calls` cannot produce an underflow.
551    pub fn total_successful_tool_calls(&self) -> u64 {
552        self.total_tool_calls.saturating_sub(self.failed_tool_calls)
553    }
554
555    /// Return `true` if all counters are zero (no activity has been recorded).
556    pub fn is_zero(&self) -> bool {
557        self.active_sessions == 0
558            && self.total_sessions == 0
559            && self.total_steps == 0
560            && self.total_tool_calls == 0
561            && self.failed_tool_calls == 0
562            && self.backpressure_shed_count == 0
563            && self.memory_recall_count == 0
564            && self.checkpoint_errors == 0
565    }
566
567    /// Return the average number of ReAct steps per completed session.
568    ///
569    /// Returns `0.0` when no sessions have been recorded, to avoid
570    /// division by zero.
571    pub fn avg_steps_per_session(&self) -> f64 {
572        if self.total_sessions == 0 {
573            0.0
574        } else {
575            self.total_steps as f64 / self.total_sessions as f64
576        }
577    }
578
579    /// Return the overall tool error rate: `failed_tool_calls / total_tool_calls`.
580    ///
581    /// Returns `0.0` when no tool calls have been recorded.
582    pub fn error_rate(&self) -> f64 {
583        if self.total_tool_calls == 0 {
584            return 0.0;
585        }
586        self.failed_tool_calls as f64 / self.total_tool_calls as f64
587    }
588
589    /// Return memory recalls per completed session.
590    ///
591    /// Returns `0.0` when no sessions have been recorded.
592    pub fn memory_recall_rate(&self) -> f64 {
593        if self.total_sessions == 0 {
594            return 0.0;
595        }
596        self.memory_recall_count as f64 / self.total_sessions as f64
597    }
598
599    /// Return the average number of ReAct steps per session.
600    ///
601    /// Alias for `avg_steps_per_session` on the snapshot type; returns `0.0`
602    /// when no sessions have been recorded.
603    pub fn steps_per_session(&self) -> f64 {
604        if self.total_sessions == 0 {
605            return 0.0;
606        }
607        self.total_steps as f64 / self.total_sessions as f64
608    }
609
610    /// Return `true` if the snapshot contains any error indicators.
611    ///
612    /// Specifically, `true` when `failed_tool_calls > 0` or
613    /// `checkpoint_errors > 0`.  The complement of "no errors" but distinct
614    /// from `!is_healthy()` which also considers backpressure sheds.
615    pub fn has_errors(&self) -> bool {
616        self.failed_tool_calls > 0 || self.checkpoint_errors > 0
617    }
618
619    /// Return `true` if the snapshot shows no error indicators.
620    ///
621    /// A "healthy" snapshot has zero failed tool calls, zero backpressure
622    /// sheds, and zero checkpoint errors.  Useful for quick health checks
623    /// in tests and monitoring.
624    pub fn is_healthy(&self) -> bool {
625        self.failed_tool_calls == 0
626            && self.backpressure_shed_count == 0
627            && self.checkpoint_errors == 0
628    }
629
630    /// Return the average number of tool calls per session.
631    ///
632    /// Returns `0.0` when no sessions have been recorded.
633    pub fn tool_call_rate(&self) -> f64 {
634        if self.total_sessions == 0 {
635            return 0.0;
636        }
637        self.total_tool_calls as f64 / self.total_sessions as f64
638    }
639
640    /// Return the average number of backpressure shed events per session.
641    ///
642    /// Returns `0.0` when no sessions have been recorded.
643    pub fn backpressure_rate(&self) -> f64 {
644        if self.total_sessions == 0 {
645            return 0.0;
646        }
647        self.backpressure_shed_count as f64 / self.total_sessions as f64
648    }
649
650    /// Return the ratio of memory recalls to total steps.
651    ///
652    /// Returns `0.0` when no steps have been taken.
653    pub fn memory_efficiency(&self) -> f64 {
654        if self.total_steps == 0 {
655            return 0.0;
656        }
657        self.memory_recall_count as f64 / self.total_steps as f64
658    }
659
660    /// Return the fraction of sessions that are currently active.
661    ///
662    /// Returns `0.0` when no sessions have been started.
663    pub fn active_session_ratio(&self) -> f64 {
664        if self.total_sessions == 0 {
665            return 0.0;
666        }
667        self.active_sessions as f64 / self.total_sessions as f64
668    }
669
670    /// Return the average number of tool calls per step.
671    ///
672    /// Returns `0.0` when no steps have been taken.
673    pub fn step_to_tool_ratio(&self) -> f64 {
674        if self.total_steps == 0 {
675            return 0.0;
676        }
677        self.total_tool_calls as f64 / self.total_steps as f64
678    }
679
680    /// Return `true` if any tool-call failures have been recorded.
681    pub fn has_failures(&self) -> bool {
682        self.failed_tool_calls > 0
683    }
684
685    /// Return the number of distinct tool names that have been called at least once.
686    pub fn tool_diversity(&self) -> usize {
687        self.per_tool_calls.len()
688    }
689
690    /// Return the average number of tool-call failures per completed session.
691    ///
692    /// Returns `0.0` when no sessions have been recorded.
693    pub fn avg_failures_per_session(&self) -> f64 {
694        if self.total_sessions == 0 {
695            return 0.0;
696        }
697        self.failed_tool_calls as f64 / self.total_sessions as f64
698    }
699
700    /// Return the name of the tool with the most recorded calls.
701    ///
702    /// Returns `None` if no tool calls have been recorded.
703    pub fn most_called_tool(&self) -> Option<String> {
704        self.per_tool_calls
705            .iter()
706            .max_by_key(|(_, &v)| v)
707            .map(|(k, _)| k.clone())
708    }
709
710    /// Return a sorted list of tool names that have at least one recorded failure.
711    pub fn tool_names_with_failures(&self) -> Vec<String> {
712        let mut names: Vec<String> = self
713            .per_tool_failures
714            .iter()
715            .filter(|(_, &v)| v > 0)
716            .map(|(k, _)| k.clone())
717            .collect();
718        names.sort_unstable();
719        names
720    }
721
722    /// Return the failure rate for a specific tool (failures / calls).
723    ///
724    /// Returns `0.0` if the tool has no recorded calls.
725    pub fn failed_tool_ratio_for(&self, name: &str) -> f64 {
726        let calls = self.tool_call_count(name);
727        if calls == 0 {
728            return 0.0;
729        }
730        self.tool_failure_count(name) as f64 / calls as f64
731    }
732
733    /// Return the ratio of backpressure-shed events to total tool calls.
734    ///
735    /// Returns `0.0` if no tool calls have been recorded.
736    pub fn backpressure_shed_rate(&self) -> f64 {
737        if self.total_tool_calls == 0 {
738            return 0.0;
739        }
740        self.backpressure_shed_count as f64 / self.total_tool_calls as f64
741    }
742
743    /// Return the number of distinct agents that have recorded tool-call data.
744    pub fn total_agent_count(&self) -> usize {
745        self.per_agent_tool_calls.len()
746    }
747
748    /// Return the ratio of total steps to total tool calls.
749    ///
750    /// Returns `0.0` if no tool calls have been recorded.
751    pub fn steps_per_tool_call(&self) -> f64 {
752        if self.total_tool_calls == 0 {
753            return 0.0;
754        }
755        self.total_steps as f64 / self.total_tool_calls as f64
756    }
757
758    /// Return the agent id with the most total tool calls across all tools.
759    ///
760    /// Returns `None` if no per-agent tool-call data has been recorded.
761    pub fn agent_with_most_calls(&self) -> Option<String> {
762        self.per_agent_tool_calls
763            .iter()
764            .map(|(agent, tools)| (agent, tools.values().sum::<u64>()))
765            .max_by_key(|(_, total)| *total)
766            .map(|(agent, _)| agent.clone())
767    }
768}
769
770impl std::fmt::Display for MetricsSnapshot {
771    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
772        write!(
773            f,
774            "MetricsSnapshot {{ sessions: active={} total={}, steps={}, \
775             tool_calls={} (failed={}), backpressure_shed={}, \
776             memory_recalls={}, checkpoint_errors={}, latency_mean={:.1}ms }}",
777            self.active_sessions,
778            self.total_sessions,
779            self.total_steps,
780            self.total_tool_calls,
781            self.failed_tool_calls,
782            self.backpressure_shed_count,
783            self.memory_recall_count,
784            self.checkpoint_errors,
785            self.step_latency_mean_ms,
786        )
787    }
788}
789
790/// A point-in-time snapshot of all runtime counters.
791///
792/// Obtained by calling [`RuntimeMetrics::snapshot`].  All fields are plain
793/// integers so the snapshot can be logged, serialised, or diffed without
794/// holding any locks.
795///
796/// See also [`snapshot`] for a richer snapshot including per-tool and histogram data.
797///
798/// # Example
799/// ```rust
800/// use llm_agent_runtime::metrics::RuntimeMetrics;
801///
802/// let m = RuntimeMetrics::new();
803/// let snap = m.snapshot();
804/// assert_eq!(snap.active_sessions, 0);
805/// assert_eq!(snap.total_sessions, 0);
806/// ```
807///
808/// [`snapshot`]: RuntimeMetrics::snapshot
809#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
810pub struct MetricsSnapshot {
811    /// Number of agent sessions currently in progress.
812    pub active_sessions: usize,
813    /// Total number of sessions started since the runtime was created.
814    pub total_sessions: u64,
815    /// Total number of ReAct steps executed across all sessions.
816    pub total_steps: u64,
817    /// Total number of tool calls dispatched (across all tool names).
818    pub total_tool_calls: u64,
819    /// Total number of tool calls that returned an error observation.
820    pub failed_tool_calls: u64,
821    /// Total number of requests shed due to backpressure.
822    pub backpressure_shed_count: u64,
823    /// Total number of memory recall operations.
824    pub memory_recall_count: u64,
825    /// Total number of checkpoint failures encountered during `run_agent`.
826    pub checkpoint_errors: u64,
827    /// Per-tool call counts: `tool_name → total_calls`.
828    pub per_tool_calls: HashMap<String, u64>,
829    /// Per-tool failure counts: `tool_name → failed_calls`.
830    pub per_tool_failures: HashMap<String, u64>,
831    /// Step latency histogram bucket counts as `(upper_bound_ms_inclusive, count)`.
832    pub step_latency_buckets: Vec<(u64, u64)>,
833    /// Mean step latency in milliseconds.
834    pub step_latency_mean_ms: f64,
835    /// Per-agent, per-tool call counts: `agent_id → tool_name → count`.
836    pub per_agent_tool_calls: HashMap<String, HashMap<String, u64>>,
837    /// Per-agent, per-tool failure counts: `agent_id → tool_name → count`.
838    pub per_agent_tool_failures: HashMap<String, HashMap<String, u64>>,
839}
840
841/// All four per-tool / per-agent counter maps, protected by a single lock.
842///
843/// Grouping them under one `Mutex` halves lock acquisitions on the hot path
844/// (a single `record_tool_call` + `record_agent_tool_call` pair now requires
845/// only one acquire/release) and makes snapshot reads cheaper too.
846#[derive(Debug, Default)]
847struct PerToolMaps {
848    /// Per-tool call counts: `tool_name → total_calls`.
849    calls: HashMap<String, u64>,
850    /// Per-tool failure counts: `tool_name → failed_calls`.
851    failures: HashMap<String, u64>,
852    /// Per-agent, per-tool call counts: `agent_id → tool_name → count`.
853    agent_calls: HashMap<String, HashMap<String, u64>>,
854    /// Per-agent, per-tool failure counts: `agent_id → tool_name → count`.
855    agent_failures: HashMap<String, HashMap<String, u64>>,
856}
857
858/// Shared runtime metrics. Clone the `Arc` to share across threads.
859#[derive(Debug)]
860pub struct RuntimeMetrics {
861    /// Number of agent sessions currently in progress.
862    pub active_sessions: AtomicUsize,
863    /// Total number of sessions started since the runtime was created.
864    pub total_sessions: AtomicU64,
865    /// Total number of ReAct steps executed across all sessions.
866    pub total_steps: AtomicU64,
867    /// Total number of tool calls dispatched (across all tool names).
868    pub total_tool_calls: AtomicU64,
869    /// Total number of tool calls that returned an error observation.
870    pub failed_tool_calls: AtomicU64,
871    /// Total number of requests shed due to backpressure.
872    pub backpressure_shed_count: AtomicU64,
873    /// Total number of memory recall operations.
874    pub memory_recall_count: AtomicU64,
875    /// Total number of checkpoint failures encountered during `run_agent`.
876    pub checkpoint_errors: AtomicU64,
877    /// All four per-tool / per-agent maps under a single lock.
878    per_tool: Mutex<PerToolMaps>,
879    /// Per-step latency histogram.
880    pub step_latency: LatencyHistogram,
881}
882
883impl Default for RuntimeMetrics {
884    fn default() -> Self {
885        Self {
886            active_sessions: AtomicUsize::new(0),
887            total_sessions: AtomicU64::new(0),
888            total_steps: AtomicU64::new(0),
889            total_tool_calls: AtomicU64::new(0),
890            failed_tool_calls: AtomicU64::new(0),
891            backpressure_shed_count: AtomicU64::new(0),
892            memory_recall_count: AtomicU64::new(0),
893            checkpoint_errors: AtomicU64::new(0),
894            per_tool: Mutex::new(PerToolMaps::default()),
895            step_latency: LatencyHistogram::default(),
896        }
897    }
898}
899
900impl RuntimeMetrics {
901    /// Allocate a new `RuntimeMetrics` instance wrapped in an `Arc`.
902    pub fn new() -> Arc<Self> {
903        Arc::new(Self::default())
904    }
905
906    /// Return the number of agent sessions currently in progress.
907    pub fn active_sessions(&self) -> usize {
908        self.active_sessions.load(Ordering::Relaxed)
909    }
910
911    /// Return the total number of sessions started since the runtime was created.
912    pub fn total_sessions(&self) -> u64 {
913        self.total_sessions.load(Ordering::Relaxed)
914    }
915
916    /// Return the average number of tool calls per completed session.
917    ///
918    /// Returns `0.0` when no sessions have been recorded.
919    pub fn avg_tool_calls_per_session(&self) -> f64 {
920        let sessions = self.total_sessions();
921        if sessions == 0 {
922            return 0.0;
923        }
924        self.total_tool_calls() as f64 / sessions as f64
925    }
926
927    /// Return the total number of ReAct steps executed across all sessions.
928    pub fn total_steps(&self) -> u64 {
929        self.total_steps.load(Ordering::Relaxed)
930    }
931
932    /// Return the average number of ReAct steps per completed session.
933    ///
934    /// Returns `0.0` when no sessions have been recorded.
935    pub fn avg_steps_per_session(&self) -> f64 {
936        let sessions = self.total_sessions();
937        if sessions == 0 {
938            return 0.0;
939        }
940        self.total_steps() as f64 / sessions as f64
941    }
942
943    /// Return the total number of tool calls dispatched.
944    pub fn total_tool_calls(&self) -> u64 {
945        self.total_tool_calls.load(Ordering::Relaxed)
946    }
947
948    /// Return the total number of tool calls that returned an error observation.
949    pub fn failed_tool_calls(&self) -> u64 {
950        self.failed_tool_calls.load(Ordering::Relaxed)
951    }
952
953    /// Return the fraction of tool calls that succeeded (i.e. did not fail).
954    ///
955    /// Returns `1.0` if no tool calls have been recorded yet (vacuously all
956    /// succeeded) and a value in `[0.0, 1.0]` once calls have been made.
957    pub fn tool_success_rate(&self) -> f64 {
958        let total = self.total_tool_calls();
959        if total == 0 {
960            return 1.0;
961        }
962        let failed = self.failed_tool_calls();
963        1.0 - (failed as f64 / total as f64)
964    }
965
966    /// Return the total number of requests shed due to backpressure.
967    pub fn backpressure_shed_count(&self) -> u64 {
968        self.backpressure_shed_count.load(Ordering::Relaxed)
969    }
970
971    /// Return the total number of memory recall operations performed.
972    pub fn memory_recall_count(&self) -> u64 {
973        self.memory_recall_count.load(Ordering::Relaxed)
974    }
975
976    /// Return the total number of checkpoint failures encountered during `run_agent`.
977    pub fn checkpoint_errors(&self) -> u64 {
978        self.checkpoint_errors.load(Ordering::Relaxed)
979    }
980
981    /// Return the ratio of checkpoint errors to total completed sessions.
982    ///
983    /// Returns `0.0` when no sessions have been recorded.
984    pub fn checkpoint_error_rate(&self) -> f64 {
985        let sessions = self.total_sessions();
986        if sessions == 0 {
987            return 0.0;
988        }
989        self.checkpoint_errors() as f64 / sessions as f64
990    }
991
992    /// Return the median (50th-percentile) step latency in milliseconds.
993    ///
994    /// Convenience shorthand for `self.step_latency.p50()`.  Returns `0`
995    /// when no step latencies have been recorded.
996    pub fn p50_latency_ms(&self) -> u64 {
997        self.step_latency.p50()
998    }
999
1000    /// Increment the call counter for `tool_name` by 1.
1001    ///
1002    /// Called automatically by the agent loop when `with_metrics` is configured.
1003    pub fn record_tool_call(&self, tool_name: &str) {
1004        self.total_tool_calls.fetch_add(1, Ordering::Relaxed);
1005        if let Ok(mut maps) = self.per_tool.lock() {
1006            *maps.calls.entry(tool_name.to_owned()).or_insert(0) += 1;
1007        }
1008    }
1009
1010    /// Increment the failure counter for `tool_name` by 1.
1011    ///
1012    /// Called automatically by the agent loop when a tool returns an error.
1013    pub fn record_tool_failure(&self, tool_name: &str) {
1014        self.failed_tool_calls.fetch_add(1, Ordering::Relaxed);
1015        if let Ok(mut maps) = self.per_tool.lock() {
1016            *maps.failures.entry(tool_name.to_owned()).or_insert(0) += 1;
1017        }
1018    }
1019
1020    /// Return a snapshot of per-tool call counts as a `HashMap<tool_name, count>`.
1021    pub fn per_tool_calls_snapshot(&self) -> HashMap<String, u64> {
1022        self.per_tool
1023            .lock()
1024            .map(|m| m.calls.clone())
1025            .unwrap_or_default()
1026    }
1027
1028    /// Return a snapshot of per-tool failure counts as a `HashMap<tool_name, count>`.
1029    pub fn per_tool_failures_snapshot(&self) -> HashMap<String, u64> {
1030        self.per_tool
1031            .lock()
1032            .map(|m| m.failures.clone())
1033            .unwrap_or_default()
1034    }
1035
1036    /// Increment call counter for (agent_id, tool_name).
1037    pub fn record_agent_tool_call(&self, agent_id: &str, tool_name: &str) {
1038        if let Ok(mut maps) = self.per_tool.lock() {
1039            *maps
1040                .agent_calls
1041                .entry(agent_id.to_owned())
1042                .or_default()
1043                .entry(tool_name.to_owned())
1044                .or_insert(0) += 1;
1045        }
1046    }
1047
1048    /// Increment failure counter for (agent_id, tool_name).
1049    pub fn record_agent_tool_failure(&self, agent_id: &str, tool_name: &str) {
1050        if let Ok(mut maps) = self.per_tool.lock() {
1051            *maps
1052                .agent_failures
1053                .entry(agent_id.to_owned())
1054                .or_default()
1055                .entry(tool_name.to_owned())
1056                .or_insert(0) += 1;
1057        }
1058    }
1059
1060    /// Snapshot of per-agent, per-tool call counts.
1061    pub fn per_agent_tool_calls_snapshot(&self) -> HashMap<String, HashMap<String, u64>> {
1062        self.per_tool
1063            .lock()
1064            .map(|m| m.agent_calls.clone())
1065            .unwrap_or_default()
1066    }
1067
1068    /// Snapshot of per-agent, per-tool failure counts.
1069    pub fn per_agent_tool_failures_snapshot(&self) -> HashMap<String, HashMap<String, u64>> {
1070        self.per_tool
1071            .lock()
1072            .map(|m| m.agent_failures.clone())
1073            .unwrap_or_default()
1074    }
1075
1076    /// Capture a complete snapshot of all counters, including per-tool breakdowns.
1077    ///
1078    /// This is the preferred alternative to [`to_snapshot`] — it returns a
1079    /// named [`MetricsSnapshot`] struct instead of an opaque tuple.
1080    ///
1081    /// [`to_snapshot`]: RuntimeMetrics::to_snapshot
1082    pub fn snapshot(&self) -> MetricsSnapshot {
1083        // Acquire the single per-tool lock once for all four maps.
1084        let (per_tool_calls, per_tool_failures, per_agent_tool_calls, per_agent_tool_failures) =
1085            self.per_tool
1086                .lock()
1087                .map(|m| {
1088                    (
1089                        m.calls.clone(),
1090                        m.failures.clone(),
1091                        m.agent_calls.clone(),
1092                        m.agent_failures.clone(),
1093                    )
1094                })
1095                .unwrap_or_default();
1096
1097        MetricsSnapshot {
1098            active_sessions: self.active_sessions.load(Ordering::Relaxed),
1099            total_sessions: self.total_sessions.load(Ordering::Relaxed),
1100            total_steps: self.total_steps.load(Ordering::Relaxed),
1101            total_tool_calls: self.total_tool_calls.load(Ordering::Relaxed),
1102            failed_tool_calls: self.failed_tool_calls.load(Ordering::Relaxed),
1103            backpressure_shed_count: self.backpressure_shed_count.load(Ordering::Relaxed),
1104            memory_recall_count: self.memory_recall_count.load(Ordering::Relaxed),
1105            checkpoint_errors: self.checkpoint_errors.load(Ordering::Relaxed),
1106            per_tool_calls,
1107            per_tool_failures,
1108            step_latency_buckets: self.step_latency.buckets(),
1109            step_latency_mean_ms: self.step_latency.mean_ms(),
1110            per_agent_tool_calls,
1111            per_agent_tool_failures,
1112        }
1113    }
1114
1115    /// Record a step latency sample.
1116    pub fn record_step_latency(&self, ms: u64) {
1117        self.step_latency.record(ms);
1118    }
1119
1120    /// Reset all counters to zero.
1121    ///
1122    /// Intended for testing. In production, counters are monotonically increasing.
1123    pub fn reset(&self) {
1124        self.active_sessions.store(0, Ordering::Relaxed);
1125        self.total_sessions.store(0, Ordering::Relaxed);
1126        self.total_steps.store(0, Ordering::Relaxed);
1127        self.total_tool_calls.store(0, Ordering::Relaxed);
1128        self.failed_tool_calls.store(0, Ordering::Relaxed);
1129        self.backpressure_shed_count.store(0, Ordering::Relaxed);
1130        self.memory_recall_count.store(0, Ordering::Relaxed);
1131        self.checkpoint_errors.store(0, Ordering::Relaxed);
1132        if let Ok(mut maps) = self.per_tool.lock() {
1133            maps.calls.clear();
1134            maps.failures.clear();
1135            maps.agent_calls.clear();
1136            maps.agent_failures.clear();
1137        }
1138        self.step_latency.reset();
1139    }
1140
1141    /// Return the fraction of tool calls that failed: `failed / total`.
1142    ///
1143    /// Returns `0.0` if no tool calls have been recorded.
1144    pub fn failure_rate(&self) -> f64 {
1145        let total = self.total_tool_calls.load(Ordering::Relaxed);
1146        if total == 0 {
1147            return 0.0;
1148        }
1149        let failed = self.failed_tool_calls.load(Ordering::Relaxed);
1150        failed as f64 / total as f64
1151    }
1152
1153    /// Return the fraction of tool calls that succeeded: `1.0 - failure_rate()`.
1154    ///
1155    /// Returns `1.0` if no tool calls have been recorded (vacuously all succeeded).
1156    pub fn success_rate(&self) -> f64 {
1157        1.0 - self.failure_rate()
1158    }
1159
1160    /// Return `true` if there is at least one active (in-progress) session.
1161    pub fn is_active(&self) -> bool {
1162        self.active_sessions.load(Ordering::Relaxed) > 0
1163    }
1164
1165    /// Return the 50th-percentile (median) step latency in milliseconds.
1166    ///
1167    /// Delegates to [`LatencyHistogram::p50`] on the histogram tracked by
1168    /// this `RuntimeMetrics` instance.  Returns `0` if no steps have been recorded.
1169    pub fn step_latency_p50(&self) -> u64 {
1170        self.step_latency.p50()
1171    }
1172
1173    /// Return the 99th-percentile step latency in milliseconds.
1174    ///
1175    /// Delegates to [`LatencyHistogram::p99`].  Returns `0` if no steps have
1176    /// been recorded.
1177    pub fn step_latency_p99(&self) -> u64 {
1178        self.step_latency.p99()
1179    }
1180
1181    /// Return the top `n` tools by total call count, sorted descending.
1182    ///
1183    /// Returns fewer than `n` entries if fewer tools have been called.
1184    pub fn top_tools_by_calls(&self, n: usize) -> Vec<(String, u64)> {
1185        let snap = self.per_tool_calls_snapshot();
1186        let mut pairs: Vec<(String, u64)> = snap.into_iter().collect();
1187        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1));
1188        pairs.truncate(n);
1189        pairs
1190    }
1191
1192    /// Return the top `n` tools by total failure count, sorted descending.
1193    ///
1194    /// Analogous to [`top_tools_by_calls`]; returns fewer than `n` entries if
1195    /// fewer tools have recorded failures.
1196    ///
1197    /// [`top_tools_by_calls`]: RuntimeMetrics::top_tools_by_calls
1198    pub fn top_tools_by_failures(&self, n: usize) -> Vec<(String, u64)> {
1199        let snap = self.per_tool_failures_snapshot();
1200        let mut pairs: Vec<(String, u64)> = snap.into_iter().collect();
1201        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1));
1202        pairs.truncate(n);
1203        pairs
1204    }
1205
1206    /// Return the sum of all recorded step latencies in milliseconds.
1207    pub fn total_step_latency_ms(&self) -> u64 {
1208        self.step_latency.sum_ms()
1209    }
1210
1211    /// Capture a snapshot of global counters as plain integers.
1212    ///
1213    /// Returns `(active_sessions, total_sessions, total_steps,
1214    ///           total_tool_calls, failed_tool_calls,
1215    ///           backpressure_shed_count, memory_recall_count)`.
1216    /// For per-tool breakdowns use [`per_tool_calls_snapshot`] and
1217    /// [`per_tool_failures_snapshot`].
1218    ///
1219    /// # Deprecation
1220    ///
1221    /// Prefer [`snapshot`] which returns the named [`MetricsSnapshot`] struct
1222    /// and includes per-tool, per-agent, and histogram data.  This method
1223    /// returns an anonymous tuple whose field order is easy to misread.
1224    ///
1225    /// [`snapshot`]: RuntimeMetrics::snapshot
1226    /// [`per_tool_calls_snapshot`]: RuntimeMetrics::per_tool_calls_snapshot
1227    /// [`per_tool_failures_snapshot`]: RuntimeMetrics::per_tool_failures_snapshot
1228    #[deprecated(since = "1.0.3", note = "use `snapshot()` which returns the named MetricsSnapshot struct")]
1229    pub fn to_snapshot(&self) -> (usize, u64, u64, u64, u64, u64, u64) {
1230        (
1231            self.active_sessions.load(Ordering::Relaxed),
1232            self.total_sessions.load(Ordering::Relaxed),
1233            self.total_steps.load(Ordering::Relaxed),
1234            self.total_tool_calls.load(Ordering::Relaxed),
1235            self.failed_tool_calls.load(Ordering::Relaxed),
1236            self.backpressure_shed_count.load(Ordering::Relaxed),
1237            self.memory_recall_count.load(Ordering::Relaxed),
1238        )
1239    }
1240}
1241
1242// ── Tests ─────────────────────────────────────────────────────────────────────
1243
1244#[cfg(test)]
1245mod tests {
1246    use super::*;
1247
1248    #[test]
1249    fn test_metrics_new_returns_arc_with_zero_counters() {
1250        let m = RuntimeMetrics::new();
1251        assert_eq!(m.active_sessions(), 0);
1252        assert_eq!(m.total_sessions(), 0);
1253        assert_eq!(m.total_steps(), 0);
1254        assert_eq!(m.total_tool_calls(), 0);
1255        assert_eq!(m.failed_tool_calls(), 0);
1256        assert_eq!(m.backpressure_shed_count(), 0);
1257        assert_eq!(m.memory_recall_count(), 0);
1258    }
1259
1260    #[test]
1261    fn test_active_sessions_increments_and_decrements() {
1262        let m = RuntimeMetrics::new();
1263        m.active_sessions.fetch_add(1, Ordering::Relaxed);
1264        assert_eq!(m.active_sessions(), 1);
1265        m.active_sessions.fetch_sub(1, Ordering::Relaxed);
1266        assert_eq!(m.active_sessions(), 0);
1267    }
1268
1269    #[test]
1270    fn test_total_sessions_increments() {
1271        let m = RuntimeMetrics::new();
1272        m.total_sessions.fetch_add(1, Ordering::Relaxed);
1273        m.total_sessions.fetch_add(1, Ordering::Relaxed);
1274        assert_eq!(m.total_sessions(), 2);
1275    }
1276
1277    #[test]
1278    fn test_total_steps_increments() {
1279        let m = RuntimeMetrics::new();
1280        m.total_steps.fetch_add(5, Ordering::Relaxed);
1281        assert_eq!(m.total_steps(), 5);
1282    }
1283
1284    #[test]
1285    fn test_total_tool_calls_increments() {
1286        let m = RuntimeMetrics::new();
1287        m.total_tool_calls.fetch_add(3, Ordering::Relaxed);
1288        assert_eq!(m.total_tool_calls(), 3);
1289    }
1290
1291    #[test]
1292    fn test_failed_tool_calls_increments() {
1293        let m = RuntimeMetrics::new();
1294        m.failed_tool_calls.fetch_add(2, Ordering::Relaxed);
1295        assert_eq!(m.failed_tool_calls(), 2);
1296    }
1297
1298    #[test]
1299    fn test_backpressure_shed_count_increments() {
1300        let m = RuntimeMetrics::new();
1301        m.backpressure_shed_count.fetch_add(7, Ordering::Relaxed);
1302        assert_eq!(m.backpressure_shed_count(), 7);
1303    }
1304
1305    #[test]
1306    fn test_memory_recall_count_increments() {
1307        let m = RuntimeMetrics::new();
1308        m.memory_recall_count.fetch_add(4, Ordering::Relaxed);
1309        assert_eq!(m.memory_recall_count(), 4);
1310    }
1311
1312    #[test]
1313    fn test_reset_zeroes_all_counters() {
1314        let m = RuntimeMetrics::new();
1315        m.active_sessions.store(3, Ordering::Relaxed);
1316        m.total_sessions.store(10, Ordering::Relaxed);
1317        m.total_steps.store(50, Ordering::Relaxed);
1318        m.total_tool_calls.store(20, Ordering::Relaxed);
1319        m.failed_tool_calls.store(2, Ordering::Relaxed);
1320        m.backpressure_shed_count.store(1, Ordering::Relaxed);
1321        m.memory_recall_count.store(8, Ordering::Relaxed);
1322
1323        m.reset();
1324
1325        assert_eq!(m.active_sessions(), 0);
1326        assert_eq!(m.total_sessions(), 0);
1327        assert_eq!(m.total_steps(), 0);
1328        assert_eq!(m.total_tool_calls(), 0);
1329        assert_eq!(m.failed_tool_calls(), 0);
1330        assert_eq!(m.backpressure_shed_count(), 0);
1331        assert_eq!(m.memory_recall_count(), 0);
1332    }
1333
1334    #[test]
1335    fn test_to_snapshot_captures_correct_values() {
1336        let m = RuntimeMetrics::new();
1337        m.active_sessions.store(1, Ordering::Relaxed);
1338        m.total_sessions.store(2, Ordering::Relaxed);
1339        m.total_steps.store(3, Ordering::Relaxed);
1340        m.total_tool_calls.store(4, Ordering::Relaxed);
1341        m.failed_tool_calls.store(5, Ordering::Relaxed);
1342        m.backpressure_shed_count.store(6, Ordering::Relaxed);
1343        m.memory_recall_count.store(7, Ordering::Relaxed);
1344
1345        let snap = m.to_snapshot();
1346        assert_eq!(snap, (1, 2, 3, 4, 5, 6, 7));
1347    }
1348
1349    #[test]
1350    fn test_metrics_is_send_sync() {
1351        fn assert_send_sync<T: Send + Sync>() {}
1352        assert_send_sync::<RuntimeMetrics>();
1353    }
1354
1355    #[test]
1356    fn test_multiple_increments_are_cumulative() {
1357        let m = RuntimeMetrics::new();
1358        for _ in 0..100 {
1359            m.total_sessions.fetch_add(1, Ordering::Relaxed);
1360        }
1361        assert_eq!(m.total_sessions(), 100);
1362    }
1363
1364    #[test]
1365    fn test_arc_clone_shares_state() {
1366        let m = RuntimeMetrics::new();
1367        let m2 = Arc::clone(&m);
1368        m.total_sessions.fetch_add(1, Ordering::Relaxed);
1369        assert_eq!(m2.total_sessions(), 1);
1370    }
1371
1372    // ── Per-tool metrics ──────────────────────────────────────────────────────
1373
1374    #[test]
1375    fn test_record_tool_call_increments_global_and_per_tool() {
1376        let m = RuntimeMetrics::new();
1377        m.record_tool_call("search");
1378        m.record_tool_call("search");
1379        m.record_tool_call("lookup");
1380        assert_eq!(m.total_tool_calls(), 3);
1381        let snap = m.per_tool_calls_snapshot();
1382        assert_eq!(snap.get("search").copied(), Some(2));
1383        assert_eq!(snap.get("lookup").copied(), Some(1));
1384    }
1385
1386    #[test]
1387    fn test_record_tool_failure_increments_global_and_per_tool() {
1388        let m = RuntimeMetrics::new();
1389        m.record_tool_failure("search");
1390        m.record_tool_failure("lookup");
1391        m.record_tool_failure("search");
1392        assert_eq!(m.failed_tool_calls(), 3);
1393        let snap = m.per_tool_failures_snapshot();
1394        assert_eq!(snap.get("search").copied(), Some(2));
1395        assert_eq!(snap.get("lookup").copied(), Some(1));
1396    }
1397
1398    #[test]
1399    fn test_reset_clears_per_tool_counters() {
1400        let m = RuntimeMetrics::new();
1401        m.record_tool_call("foo");
1402        m.record_tool_failure("foo");
1403        m.reset();
1404        assert!(m.per_tool_calls_snapshot().is_empty());
1405        assert!(m.per_tool_failures_snapshot().is_empty());
1406    }
1407
1408    #[test]
1409    fn test_per_tool_snapshot_is_independent_for_unknown_tools() {
1410        let m = RuntimeMetrics::new();
1411        let snap = m.per_tool_calls_snapshot();
1412        assert!(snap.is_empty());
1413    }
1414
1415    // ── LatencyHistogram ───────────────────────────────────────────────────────
1416
1417    #[test]
1418    fn test_latency_histogram_records_sample() {
1419        let h = LatencyHistogram::default();
1420        h.record(10);
1421        assert_eq!(h.count(), 1);
1422    }
1423
1424    #[test]
1425    fn test_latency_histogram_mean_ms() {
1426        let h = LatencyHistogram::default();
1427        h.record(10);
1428        h.record(20);
1429        assert!((h.mean_ms() - 15.0).abs() < 1e-5);
1430    }
1431
1432    #[test]
1433    fn test_latency_histogram_buckets_correct_bucket() {
1434        let h = LatencyHistogram::default();
1435        h.record(3); // falls in ≤5ms bucket (index 1)
1436        let buckets = h.buckets();
1437        // bucket at index 1 is ≤5ms
1438        assert_eq!(buckets[1].1, 1, "3ms should land in ≤5ms bucket");
1439        // other buckets should be zero
1440        assert_eq!(buckets[0].1, 0);
1441        assert_eq!(buckets[2].1, 0);
1442    }
1443
1444    // ── MetricsSnapshot ───────────────────────────────────────────────────────
1445
1446    #[test]
1447    fn test_snapshot_returns_all_fields() {
1448        let m = RuntimeMetrics::new();
1449        m.active_sessions.store(1, Ordering::Relaxed);
1450        m.total_sessions.store(2, Ordering::Relaxed);
1451        m.total_steps.store(3, Ordering::Relaxed);
1452        m.backpressure_shed_count.store(6, Ordering::Relaxed);
1453        m.memory_recall_count.store(7, Ordering::Relaxed);
1454        // Use record_* methods so global and per-tool counters stay consistent.
1455        m.record_tool_call("my_tool");
1456        m.record_tool_call("my_tool");
1457        m.record_tool_failure("my_tool");
1458
1459        let snap = m.snapshot();
1460        assert_eq!(snap.active_sessions, 1);
1461        assert_eq!(snap.total_sessions, 2);
1462        assert_eq!(snap.total_steps, 3);
1463        assert_eq!(snap.total_tool_calls, 2);
1464        assert_eq!(snap.failed_tool_calls, 1);
1465        assert_eq!(snap.backpressure_shed_count, 6);
1466        assert_eq!(snap.memory_recall_count, 7);
1467        assert_eq!(snap.per_tool_calls.get("my_tool").copied(), Some(2));
1468        assert_eq!(snap.per_tool_failures.get("my_tool").copied(), Some(1));
1469    }
1470
1471    #[test]
1472    fn test_snapshot_default_is_zeroed() {
1473        let snap = MetricsSnapshot::default();
1474        assert_eq!(snap.active_sessions, 0);
1475        assert_eq!(snap.total_sessions, 0);
1476        assert_eq!(snap.total_steps, 0);
1477        assert!(snap.per_tool_calls.is_empty());
1478        assert!(snap.per_tool_failures.is_empty());
1479    }
1480
1481    // ── #8 MetricsSnapshot histogram fields ───────────────────────────────────
1482
1483    #[test]
1484    fn test_metrics_snapshot_contains_all_fields() {
1485        let m = RuntimeMetrics::new();
1486        m.record_step_latency(5);
1487        m.record_step_latency(200);
1488        let snap = m.snapshot();
1489        // Should have 7 buckets
1490        assert_eq!(snap.step_latency_buckets.len(), 7);
1491        assert!(snap.step_latency_mean_ms > 0.0);
1492    }
1493
1494    // ── #9 per-agent tool call tracking ──────────────────────────────────────
1495
1496    #[test]
1497    fn test_per_agent_tool_call_tracking() {
1498        let m = RuntimeMetrics::new();
1499        m.record_agent_tool_call("agent-1", "search");
1500        m.record_agent_tool_call("agent-1", "search");
1501        m.record_agent_tool_call("agent-2", "lookup");
1502        m.record_agent_tool_failure("agent-1", "search");
1503
1504        let calls = m.per_agent_tool_calls_snapshot();
1505        assert_eq!(calls.get("agent-1").and_then(|t| t.get("search")).copied(), Some(2));
1506        assert_eq!(calls.get("agent-2").and_then(|t| t.get("lookup")).copied(), Some(1));
1507
1508        let failures = m.per_agent_tool_failures_snapshot();
1509        assert_eq!(failures.get("agent-1").and_then(|t| t.get("search")).copied(), Some(1));
1510
1511        // Also check snapshot includes them
1512        let snap = m.snapshot();
1513        assert_eq!(snap.per_agent_tool_calls.get("agent-1").and_then(|t| t.get("search")).copied(), Some(2));
1514
1515        // Reset clears them
1516        m.reset();
1517        assert!(m.per_agent_tool_calls_snapshot().is_empty());
1518        assert!(m.per_agent_tool_failures_snapshot().is_empty());
1519    }
1520
1521    // ── New API tests (Rounds 4-8) ────────────────────────────────────────────
1522
1523    #[test]
1524    fn test_latency_histogram_min_max_ms() {
1525        let h = LatencyHistogram::default();
1526        assert!(h.min_ms().is_none());
1527        assert!(h.max_ms().is_none());
1528
1529        h.record(3);  // bucket 1 (≤5ms)
1530        h.record(200); // bucket 5 (≤500ms)
1531        assert!(h.min_ms().is_some());
1532        assert!(h.max_ms().is_some());
1533        assert!(h.min_ms().unwrap() <= h.max_ms().unwrap());
1534    }
1535
1536    #[test]
1537    fn test_latency_histogram_p50_p95_p99() {
1538        let h = LatencyHistogram::default();
1539        for _ in 0..100 {
1540            h.record(5); // all in ≤5ms bucket
1541        }
1542        // p50, p95, p99 should all resolve to the same bucket bound
1543        let p50 = h.p50();
1544        let p95 = h.p95();
1545        let p99 = h.p99();
1546        assert_eq!(p50, p95);
1547        assert_eq!(p95, p99);
1548    }
1549
1550    #[test]
1551    fn test_metrics_snapshot_delta_reflects_increments() {
1552        let m = RuntimeMetrics::new();
1553        let before = m.snapshot();
1554        m.total_steps.fetch_add(5, std::sync::atomic::Ordering::Relaxed);
1555        m.total_tool_calls.fetch_add(3, std::sync::atomic::Ordering::Relaxed);
1556        let after = m.snapshot();
1557        let delta = MetricsSnapshot::delta(&after, &before);
1558        assert_eq!(delta.total_steps, 5);
1559        assert_eq!(delta.total_tool_calls, 3);
1560    }
1561
1562    #[test]
1563    fn test_metrics_snapshot_display_contains_key_fields() {
1564        let m = RuntimeMetrics::new();
1565        let snap = m.snapshot();
1566        let s = snap.to_string();
1567        assert!(s.contains("sessions"));
1568        assert!(s.contains("steps"));
1569        assert!(s.contains("latency_mean"));
1570    }
1571
1572    #[test]
1573    fn test_failure_rate_zero_when_no_calls() {
1574        let m = RuntimeMetrics::new();
1575        assert_eq!(m.failure_rate(), 0.0);
1576    }
1577
1578    #[test]
1579    fn test_failure_rate_correct_proportion() {
1580        let m = RuntimeMetrics::new();
1581        m.record_tool_call("tool_a");
1582        m.record_tool_call("tool_a");
1583        m.record_tool_failure("tool_a");
1584        // 1 failure out of 2 total = 0.5
1585        assert!((m.failure_rate() - 0.5).abs() < 1e-9);
1586    }
1587
1588    #[test]
1589    fn test_failure_rate_all_failed() {
1590        let m = RuntimeMetrics::new();
1591        m.record_tool_call("x");
1592        m.record_tool_failure("x");
1593        assert!((m.failure_rate() - 1.0).abs() < 1e-9);
1594    }
1595
1596    #[test]
1597    fn test_top_tools_by_calls_returns_top_n() {
1598        let m = RuntimeMetrics::new();
1599        for _ in 0..5 { m.record_tool_call("a"); }
1600        for _ in 0..3 { m.record_tool_call("b"); }
1601        for _ in 0..1 { m.record_tool_call("c"); }
1602        let top = m.top_tools_by_calls(2);
1603        assert_eq!(top.len(), 2);
1604        assert_eq!(top[0].0, "a");
1605        assert_eq!(top[1].0, "b");
1606    }
1607
1608    #[test]
1609    fn test_top_tools_by_calls_returns_all_when_n_exceeds_count() {
1610        let m = RuntimeMetrics::new();
1611        m.record_tool_call("only");
1612        let top = m.top_tools_by_calls(10);
1613        assert_eq!(top.len(), 1);
1614        assert_eq!(top[0].0, "only");
1615    }
1616
1617    #[test]
1618    fn test_metrics_snapshot_to_json_contains_key_fields() {
1619        let m = RuntimeMetrics::new();
1620        m.record_tool_call("t");
1621        let snap = m.snapshot();
1622        let json = snap.to_json();
1623        assert!(json.get("total_sessions").is_some());
1624        assert!(json.get("total_steps").is_some());
1625        assert!(json.get("total_tool_calls").is_some());
1626    }
1627
1628    #[test]
1629    fn test_metrics_snapshot_is_zero_on_new_metrics() {
1630        let m = RuntimeMetrics::new();
1631        assert!(m.snapshot().is_zero());
1632    }
1633
1634    #[test]
1635    fn test_metrics_snapshot_is_zero_false_after_activity() {
1636        let m = RuntimeMetrics::new();
1637        m.record_tool_call("t");
1638        assert!(!m.snapshot().is_zero());
1639    }
1640
1641    #[test]
1642    fn test_tool_call_count_returns_per_tool_count() {
1643        let m = RuntimeMetrics::new();
1644        m.record_tool_call("search");
1645        m.record_tool_call("search");
1646        m.record_tool_call("fetch");
1647        let snap = m.snapshot();
1648        assert_eq!(snap.tool_call_count("search"), 2);
1649        assert_eq!(snap.tool_call_count("fetch"), 1);
1650        assert_eq!(snap.tool_call_count("absent"), 0);
1651    }
1652
1653    #[test]
1654    fn test_tool_failure_count_returns_per_tool_failures() {
1655        let m = RuntimeMetrics::new();
1656        m.record_tool_call("t");
1657        m.record_tool_failure("t");
1658        let snap = m.snapshot();
1659        assert_eq!(snap.tool_failure_count("t"), 1);
1660        assert_eq!(snap.tool_failure_count("other"), 0);
1661    }
1662
1663    #[test]
1664    fn test_latency_histogram_clear_resets_counts() {
1665        let h = LatencyHistogram::default();
1666        h.record(10);
1667        h.record(20);
1668        assert_eq!(h.count(), 2);
1669        h.clear();
1670        assert_eq!(h.count(), 0);
1671    }
1672
1673    #[test]
1674    fn test_metrics_snapshot_tool_names_sorted() {
1675        let m = RuntimeMetrics::new();
1676        m.record_tool_call("zebra");
1677        m.record_tool_call("alpha");
1678        m.record_tool_call("mango");
1679        let snap = m.snapshot();
1680        assert_eq!(snap.tool_names(), vec!["alpha", "mango", "zebra"]);
1681    }
1682
1683    // ── Round 4: top_tools_by_failures / LatencyHistogram::sum_ms ────────────
1684
1685    #[test]
1686    fn test_top_tools_by_failures_returns_top_n_descending() {
1687        let m = RuntimeMetrics::new();
1688        m.record_tool_failure("a");
1689        m.record_tool_failure("a");
1690        m.record_tool_failure("a");
1691        m.record_tool_failure("b");
1692        m.record_tool_failure("b");
1693        m.record_tool_failure("c");
1694        let top2 = m.top_tools_by_failures(2);
1695        assert_eq!(top2.len(), 2);
1696        assert_eq!(top2[0].0, "a");
1697        assert_eq!(top2[0].1, 3);
1698        assert_eq!(top2[1].0, "b");
1699        assert_eq!(top2[1].1, 2);
1700    }
1701
1702    #[test]
1703    fn test_top_tools_by_failures_n_larger_than_tools() {
1704        let m = RuntimeMetrics::new();
1705        m.record_tool_failure("only");
1706        let top = m.top_tools_by_failures(10);
1707        assert_eq!(top.len(), 1);
1708        assert_eq!(top[0].0, "only");
1709    }
1710
1711    #[test]
1712    fn test_latency_histogram_sum_ms_accumulates() {
1713        let h = LatencyHistogram::default();
1714        h.record(100);
1715        h.record(200);
1716        h.record(300);
1717        assert_eq!(h.sum_ms(), 600);
1718    }
1719
1720    #[test]
1721    fn test_latency_histogram_sum_ms_zero_when_empty() {
1722        let h = LatencyHistogram::default();
1723        assert_eq!(h.sum_ms(), 0);
1724    }
1725
1726    // ── Round 16: mean_ms, failure_rate ──────────────────────────────────────
1727
1728    #[test]
1729    fn test_latency_histogram_mean_ms_zero_when_empty() {
1730        let h = LatencyHistogram::default();
1731        assert_eq!(h.mean_ms(), 0.0);
1732    }
1733
1734    #[test]
1735    fn test_latency_histogram_mean_ms_computes_average() {
1736        let h = LatencyHistogram::default();
1737        h.record(100);
1738        h.record(200);
1739        h.record(300);
1740        assert!((h.mean_ms() - 200.0).abs() < 1.0);
1741    }
1742
1743    #[test]
1744    fn test_metrics_snapshot_failure_rate_zero_when_no_calls() {
1745        let m = RuntimeMetrics::new();
1746        let snap = m.snapshot();
1747        assert_eq!(snap.failure_rate(), 0.0);
1748    }
1749
1750    #[test]
1751    fn test_metrics_snapshot_failure_rate_correct() {
1752        let m = RuntimeMetrics::new();
1753        m.record_tool_call("t");
1754        m.record_tool_call("t");
1755        m.record_tool_failure("t");
1756        let snap = m.snapshot();
1757        assert!((snap.failure_rate() - 0.5).abs() < 1e-9);
1758    }
1759
1760    // ── Round 20: success_rate / is_active / checkpoint_errors ────────────────
1761
1762    #[test]
1763    fn test_success_rate_one_when_no_failures() {
1764        let m = RuntimeMetrics::new();
1765        m.record_tool_call("x");
1766        assert!((m.success_rate() - 1.0).abs() < 1e-9);
1767    }
1768
1769    #[test]
1770    fn test_success_rate_half_when_half_failed() {
1771        let m = RuntimeMetrics::new();
1772        m.record_tool_call("x");
1773        m.record_tool_call("x");
1774        m.record_tool_failure("x");
1775        assert!((m.success_rate() - 0.5).abs() < 1e-9);
1776    }
1777
1778    #[test]
1779    fn test_success_rate_one_when_no_calls() {
1780        let m = RuntimeMetrics::new();
1781        // Vacuously all succeeded — no calls means success_rate = 1.0
1782        assert!((m.success_rate() - 1.0).abs() < 1e-9);
1783    }
1784
1785    #[test]
1786    fn test_is_active_false_when_no_sessions() {
1787        let m = RuntimeMetrics::new();
1788        assert!(!m.is_active());
1789    }
1790
1791    #[test]
1792    fn test_is_active_true_when_session_active() {
1793        let m = RuntimeMetrics::new();
1794        m.active_sessions.fetch_add(1, Ordering::Relaxed);
1795        assert!(m.is_active());
1796        m.active_sessions.fetch_sub(1, Ordering::Relaxed);
1797        assert!(!m.is_active());
1798    }
1799
1800    #[test]
1801    fn test_checkpoint_errors_increments() {
1802        let m = RuntimeMetrics::new();
1803        assert_eq!(m.checkpoint_errors(), 0);
1804        m.checkpoint_errors.fetch_add(3, Ordering::Relaxed);
1805        assert_eq!(m.checkpoint_errors(), 3);
1806    }
1807
1808    #[test]
1809    fn test_checkpoint_errors_reset_to_zero() {
1810        let m = RuntimeMetrics::new();
1811        m.checkpoint_errors.fetch_add(5, Ordering::Relaxed);
1812        m.reset();
1813        assert_eq!(m.checkpoint_errors(), 0);
1814    }
1815
1816    // ── Round 10: LatencyHistogram::std_dev_ms ────────────────────────────────
1817
1818    #[test]
1819    fn test_std_dev_ms_zero_for_no_samples() {
1820        let h = LatencyHistogram::default();
1821        assert!((h.std_dev_ms() - 0.0).abs() < 1e-9);
1822    }
1823
1824    #[test]
1825    fn test_std_dev_ms_zero_for_single_sample() {
1826        let h = LatencyHistogram::default();
1827        h.record(5);
1828        assert!((h.std_dev_ms() - 0.0).abs() < 1e-9);
1829    }
1830
1831    #[test]
1832    fn test_std_dev_ms_positive_for_varied_samples() {
1833        let h = LatencyHistogram::default();
1834        h.record(1);    // bucket 0 mid ~0.5
1835        h.record(200);  // bucket 5 mid ~300
1836        // Two samples with very different values → std_dev > 0
1837        assert!(h.std_dev_ms() > 0.0);
1838    }
1839
1840    #[test]
1841    fn test_std_dev_ms_zero_for_identical_samples() {
1842        let h = LatencyHistogram::default();
1843        h.record(5);
1844        h.record(5);
1845        h.record(5);
1846        // All samples in the same bucket → std_dev ≈ 0
1847        assert!(h.std_dev_ms() < 1.0);
1848    }
1849
1850    // ── Round 11: RuntimeMetrics::tool_success_rate ───────────────────────────
1851
1852    #[test]
1853    fn test_tool_success_rate_one_when_no_calls() {
1854        let m = RuntimeMetrics::new();
1855        assert!((m.tool_success_rate() - 1.0).abs() < 1e-9);
1856    }
1857
1858    #[test]
1859    fn test_tool_success_rate_one_when_no_failures() {
1860        let m = RuntimeMetrics::new();
1861        m.total_tool_calls.fetch_add(10, Ordering::Relaxed);
1862        assert!((m.tool_success_rate() - 1.0).abs() < 1e-9);
1863    }
1864
1865    #[test]
1866    fn test_tool_success_rate_half_when_half_fail() {
1867        let m = RuntimeMetrics::new();
1868        m.total_tool_calls.fetch_add(10, Ordering::Relaxed);
1869        m.failed_tool_calls.fetch_add(5, Ordering::Relaxed);
1870        assert!((m.tool_success_rate() - 0.5).abs() < 1e-9);
1871    }
1872
1873    #[test]
1874    fn test_tool_success_rate_zero_when_all_fail() {
1875        let m = RuntimeMetrics::new();
1876        m.total_tool_calls.fetch_add(4, Ordering::Relaxed);
1877        m.failed_tool_calls.fetch_add(4, Ordering::Relaxed);
1878        assert!(m.tool_success_rate().abs() < 1e-9);
1879    }
1880
1881    // ── Round 12: step_latency_p50/p99, LatencyHistogram::range_ms ───────────
1882
1883    #[test]
1884    fn test_step_latency_p50_zero_when_empty() {
1885        let m = RuntimeMetrics::new();
1886        assert_eq!(m.step_latency_p50(), 0);
1887    }
1888
1889    #[test]
1890    fn test_step_latency_p99_zero_when_empty() {
1891        let m = RuntimeMetrics::new();
1892        assert_eq!(m.step_latency_p99(), 0);
1893    }
1894
1895    #[test]
1896    fn test_step_latency_p50_after_recording() {
1897        let m = RuntimeMetrics::new();
1898        for _ in 0..10 {
1899            m.step_latency.record(100);
1900        }
1901        assert!(m.step_latency_p50() > 0);
1902    }
1903
1904    #[test]
1905    fn test_step_latency_p99_gte_p50() {
1906        let m = RuntimeMetrics::new();
1907        for v in [10, 20, 30, 40, 500] {
1908            m.step_latency.record(v);
1909        }
1910        assert!(m.step_latency_p99() >= m.step_latency_p50());
1911    }
1912
1913    #[test]
1914    fn test_latency_histogram_range_ms_none_when_empty() {
1915        let h = LatencyHistogram::default();
1916        assert!(h.range_ms().is_none());
1917    }
1918
1919    #[test]
1920    fn test_latency_histogram_range_ms_some_for_single_sample() {
1921        let h = LatencyHistogram::default();
1922        h.record(100);
1923        // min/max are both derived from bucket boundaries, range is Some
1924        assert!(h.range_ms().is_some());
1925    }
1926
1927    #[test]
1928    fn test_latency_histogram_range_ms_positive_for_spread() {
1929        let h = LatencyHistogram::default();
1930        h.record(10);
1931        h.record(1000);
1932        let range = h.range_ms().unwrap();
1933        assert!(range > 0, "range should be > 0 for spread samples, got {range}");
1934    }
1935
1936    // ── Round 13: avg_tool_calls_per_session ──────────────────────────────────
1937
1938    #[test]
1939    fn test_avg_tool_calls_per_session_zero_when_no_sessions() {
1940        let m = RuntimeMetrics::new();
1941        assert!((m.avg_tool_calls_per_session() - 0.0).abs() < 1e-9);
1942    }
1943
1944    #[test]
1945    fn test_avg_tool_calls_per_session_correct_ratio() {
1946        let m = RuntimeMetrics::new();
1947        m.total_sessions.fetch_add(2, Ordering::Relaxed);
1948        m.total_tool_calls.fetch_add(10, Ordering::Relaxed);
1949        assert!((m.avg_tool_calls_per_session() - 5.0).abs() < 1e-9);
1950    }
1951
1952    // ── Round 27: interquartile_range_ms, avg_steps_per_session ──────────────
1953
1954    #[test]
1955    fn test_interquartile_range_ms_empty_is_zero() {
1956        let h = LatencyHistogram::default();
1957        assert_eq!(h.interquartile_range_ms(), 0);
1958    }
1959
1960    #[test]
1961    fn test_interquartile_range_ms_saturates_not_panics() {
1962        let h = LatencyHistogram::default();
1963        for _ in 0..50 {
1964            h.record(10);
1965        }
1966        for _ in 0..50 {
1967            h.record(500);
1968        }
1969        let iqr = h.interquartile_range_ms();
1970        // IQR must be non-negative (saturating_sub guarantee)
1971        assert!(iqr < u64::MAX);
1972    }
1973
1974    #[test]
1975    fn test_avg_steps_per_session_zero_when_no_sessions() {
1976        let snap = MetricsSnapshot::default();
1977        assert!((snap.avg_steps_per_session() - 0.0).abs() < 1e-9);
1978    }
1979
1980    #[test]
1981    fn test_avg_steps_per_session_correct_ratio() {
1982        let snap = MetricsSnapshot {
1983            total_sessions: 4,
1984            total_steps: 20,
1985            ..Default::default()
1986        };
1987        assert!((snap.avg_steps_per_session() - 5.0).abs() < 1e-9);
1988    }
1989
1990    // ── Round 15: LatencyHistogram::is_empty, RuntimeMetrics::checkpoint_error_rate
1991
1992    #[test]
1993    fn test_latency_histogram_is_empty_true_initially() {
1994        let h = LatencyHistogram::default();
1995        assert!(h.is_empty());
1996    }
1997
1998    #[test]
1999    fn test_latency_histogram_is_empty_false_after_record() {
2000        let h = LatencyHistogram::default();
2001        h.record(10);
2002        assert!(!h.is_empty());
2003    }
2004
2005    #[test]
2006    fn test_checkpoint_error_rate_zero_when_no_sessions() {
2007        let m = RuntimeMetrics::new();
2008        assert!((m.checkpoint_error_rate() - 0.0).abs() < 1e-9);
2009    }
2010
2011    #[test]
2012    fn test_checkpoint_error_rate_ratio_correct() {
2013        let m = RuntimeMetrics::new();
2014        m.total_sessions.fetch_add(4, std::sync::atomic::Ordering::Relaxed);
2015        m.checkpoint_errors.fetch_add(2, std::sync::atomic::Ordering::Relaxed);
2016        assert!((m.checkpoint_error_rate() - 0.5).abs() < 1e-9);
2017    }
2018
2019    // ── Round 16: LatencyHistogram::mode_bucket_ms ───────────────────────────
2020
2021    #[test]
2022    fn test_mode_bucket_ms_none_when_empty() {
2023        let h = LatencyHistogram::default();
2024        assert!(h.mode_bucket_ms().is_none());
2025    }
2026
2027    #[test]
2028    fn test_mode_bucket_ms_returns_bucket_with_most_samples() {
2029        let h = LatencyHistogram::default();
2030        // Record many samples in the ~10ms range
2031        for _ in 0..10 {
2032            h.record(5);
2033        }
2034        // Record fewer samples in the ~500ms range
2035        for _ in 0..2 {
2036            h.record(400);
2037        }
2038        let mode = h.mode_bucket_ms().unwrap();
2039        // The low-latency bucket should win
2040        assert!(mode <= 50, "expected low-latency bucket, got {mode}");
2041    }
2042
2043    // ── Round 17: MetricsSnapshot::error_rate / memory_recall_rate ───────────
2044
2045    #[test]
2046    fn test_metrics_snapshot_error_rate_zero_when_no_tool_calls() {
2047        let snap = MetricsSnapshot::default();
2048        assert!((snap.error_rate() - 0.0).abs() < 1e-9);
2049    }
2050
2051    #[test]
2052    fn test_metrics_snapshot_error_rate_correct_ratio() {
2053        let snap = MetricsSnapshot {
2054            total_tool_calls: 10,
2055            failed_tool_calls: 3,
2056            ..Default::default()
2057        };
2058        assert!((snap.error_rate() - 0.3).abs() < 1e-9);
2059    }
2060
2061    #[test]
2062    fn test_metrics_snapshot_memory_recall_rate_zero_when_no_sessions() {
2063        let snap = MetricsSnapshot::default();
2064        assert!((snap.memory_recall_rate() - 0.0).abs() < 1e-9);
2065    }
2066
2067    #[test]
2068    fn test_metrics_snapshot_memory_recall_rate_correct_ratio() {
2069        let snap = MetricsSnapshot {
2070            total_sessions: 5,
2071            memory_recall_count: 15,
2072            ..Default::default()
2073        };
2074        assert!((snap.memory_recall_rate() - 3.0).abs() < 1e-9);
2075    }
2076
2077    // ── Round 22: p10 ─────────────────────────────────────────────────────────
2078
2079    #[test]
2080    fn test_latency_histogram_p10_zero_when_empty() {
2081        let h = LatencyHistogram::default();
2082        assert_eq!(h.p10(), 0);
2083    }
2084
2085    #[test]
2086    fn test_latency_histogram_p10_lte_p50_lte_p99() {
2087        let h = LatencyHistogram::default();
2088        for ms in [10, 20, 50, 100, 200, 500, 1000] {
2089            h.record(ms);
2090        }
2091        assert!(h.p10() <= h.p50());
2092        assert!(h.p50() <= h.p99());
2093    }
2094
2095    // ── Round 29: is_below_p99, MetricsSnapshot::is_healthy ──────────────────
2096
2097    #[test]
2098    fn test_latency_histogram_is_below_p99_true_when_empty() {
2099        let h = LatencyHistogram::default();
2100        assert!(h.is_below_p99(1)); // p99 == 0 < 1
2101    }
2102
2103    #[test]
2104    fn test_latency_histogram_is_below_p99_true_when_under_threshold() {
2105        let h = LatencyHistogram::default();
2106        for _ in 0..100 {
2107            h.record(50);
2108        }
2109        assert!(h.is_below_p99(100));
2110    }
2111
2112    #[test]
2113    fn test_latency_histogram_is_below_p99_false_when_at_threshold() {
2114        let h = LatencyHistogram::default();
2115        for _ in 0..100 {
2116            h.record(200);
2117        }
2118        assert!(!h.is_below_p99(200)); // p99 == 200, not strictly less
2119    }
2120
2121    #[test]
2122    fn test_metrics_snapshot_is_healthy_true_when_default() {
2123        let snap = MetricsSnapshot::default();
2124        assert!(snap.is_healthy());
2125    }
2126
2127    #[test]
2128    fn test_metrics_snapshot_is_healthy_false_when_failed_tool_calls() {
2129        let snap = MetricsSnapshot { failed_tool_calls: 1, ..Default::default() };
2130        assert!(!snap.is_healthy());
2131    }
2132
2133    #[test]
2134    fn test_metrics_snapshot_is_healthy_false_when_backpressure_shed() {
2135        let snap = MetricsSnapshot { backpressure_shed_count: 2, ..Default::default() };
2136        assert!(!snap.is_healthy());
2137    }
2138
2139    #[test]
2140    fn test_metrics_snapshot_is_healthy_false_when_checkpoint_errors() {
2141        let snap = MetricsSnapshot { checkpoint_errors: 1, ..Default::default() };
2142        assert!(!snap.is_healthy());
2143    }
2144
2145    // ── Round 23: median_ms / steps_per_session / p50_latency_ms ─────────────
2146
2147    #[test]
2148    fn test_latency_histogram_median_ms_equals_p50() {
2149        let h = LatencyHistogram::default();
2150        for ms in [10, 50, 100, 200, 500] {
2151            h.record(ms);
2152        }
2153        assert_eq!(h.median_ms(), h.p50());
2154    }
2155
2156    #[test]
2157    fn test_latency_histogram_median_ms_zero_when_empty() {
2158        let h = LatencyHistogram::default();
2159        assert_eq!(h.median_ms(), 0);
2160    }
2161
2162    #[test]
2163    fn test_metrics_snapshot_steps_per_session_zero_when_no_sessions() {
2164        let snap = MetricsSnapshot::default();
2165        assert!((snap.steps_per_session() - 0.0).abs() < 1e-9);
2166    }
2167
2168    #[test]
2169    fn test_metrics_snapshot_steps_per_session_correct_ratio() {
2170        let snap = MetricsSnapshot {
2171            total_sessions: 4,
2172            total_steps: 20,
2173            ..Default::default()
2174        };
2175        assert!((snap.steps_per_session() - 5.0).abs() < 1e-9);
2176    }
2177
2178    #[test]
2179    fn test_runtime_metrics_p50_latency_ms_zero_when_no_data() {
2180        let m = RuntimeMetrics::new();
2181        assert_eq!(m.p50_latency_ms(), 0);
2182    }
2183
2184    #[test]
2185    fn test_runtime_metrics_p50_latency_ms_matches_histogram_p50() {
2186        let m = RuntimeMetrics::new();
2187        for ms in [10_u64, 50, 100, 200, 500] {
2188            m.step_latency.record(ms);
2189        }
2190        assert_eq!(m.p50_latency_ms(), m.step_latency.p50());
2191    }
2192
2193    // ── Round 25: histogram p25/p75/p90/min, has_data; snapshot helpers ───────
2194
2195    #[test]
2196    fn test_latency_histogram_has_data_false_when_empty() {
2197        let h = LatencyHistogram::default();
2198        assert!(!h.has_data());
2199    }
2200
2201    #[test]
2202    fn test_latency_histogram_has_data_true_after_record() {
2203        let h = LatencyHistogram::default();
2204        h.record(100);
2205        assert!(h.has_data());
2206    }
2207
2208    #[test]
2209    fn test_latency_histogram_min_ms_none_when_empty() {
2210        let h = LatencyHistogram::default();
2211        assert_eq!(h.min_ms(), None);
2212    }
2213
2214    #[test]
2215    fn test_latency_histogram_min_ms_some_after_record() {
2216        let h = LatencyHistogram::default();
2217        h.record(50);
2218        assert!(h.min_ms().is_some());
2219    }
2220
2221    #[test]
2222    fn test_latency_histogram_p25_lte_p75() {
2223        let h = LatencyHistogram::default();
2224        for ms in [10_u64, 50, 100, 200, 500, 1000, 2000, 5000] {
2225            h.record(ms);
2226        }
2227        assert!(h.p25() <= h.p75());
2228    }
2229
2230    #[test]
2231    fn test_latency_histogram_p90_between_p50_and_p99() {
2232        let h = LatencyHistogram::default();
2233        for ms in [10_u64, 50, 100, 200, 500] {
2234            h.record(ms);
2235        }
2236        assert!(h.p50() <= h.p90());
2237        assert!(h.p90() <= h.p99());
2238    }
2239
2240    #[test]
2241    fn test_metrics_snapshot_tool_success_count_correct() {
2242        let snap = MetricsSnapshot {
2243            per_tool_calls: [("search".to_string(), 10u64)].into(),
2244            per_tool_failures: [("search".to_string(), 3u64)].into(),
2245            ..Default::default()
2246        };
2247        assert_eq!(snap.tool_success_count("search"), 7);
2248    }
2249
2250    #[test]
2251    fn test_metrics_snapshot_tool_success_count_zero_for_unknown_tool() {
2252        let snap = MetricsSnapshot::default();
2253        assert_eq!(snap.tool_success_count("unknown"), 0);
2254    }
2255
2256    #[test]
2257    fn test_metrics_snapshot_tool_failure_rate_correct_ratio() {
2258        let snap = MetricsSnapshot {
2259            per_tool_calls: [("lookup".to_string(), 4u64)].into(),
2260            per_tool_failures: [("lookup".to_string(), 1u64)].into(),
2261            ..Default::default()
2262        };
2263        assert!((snap.tool_failure_rate("lookup") - 0.25).abs() < 1e-9);
2264    }
2265
2266    #[test]
2267    fn test_metrics_snapshot_tool_failure_rate_zero_for_unknown_tool() {
2268        let snap = MetricsSnapshot::default();
2269        assert!((snap.tool_failure_rate("none") - 0.0).abs() < 1e-9);
2270    }
2271
2272    #[test]
2273    fn test_metrics_snapshot_total_successful_tool_calls() {
2274        let snap = MetricsSnapshot {
2275            total_tool_calls: 20,
2276            failed_tool_calls: 5,
2277            ..Default::default()
2278        };
2279        assert_eq!(snap.total_successful_tool_calls(), 15);
2280    }
2281
2282    #[test]
2283    fn test_runtime_metrics_per_tool_calls_snapshot_increments() {
2284        let m = RuntimeMetrics::new();
2285        m.record_tool_call("search");
2286        m.record_tool_call("search");
2287        m.record_tool_call("lookup");
2288        let snap = m.per_tool_calls_snapshot();
2289        assert_eq!(snap.get("search"), Some(&2));
2290        assert_eq!(snap.get("lookup"), Some(&1));
2291    }
2292
2293    #[test]
2294    fn test_runtime_metrics_per_tool_failures_snapshot() {
2295        let m = RuntimeMetrics::new();
2296        m.record_tool_call("search");
2297        m.record_tool_failure("search");
2298        let snap = m.per_tool_failures_snapshot();
2299        assert_eq!(snap.get("search"), Some(&1));
2300    }
2301
2302    #[test]
2303    fn test_runtime_metrics_record_agent_tool_call_tracked() {
2304        let m = RuntimeMetrics::new();
2305        m.record_agent_tool_call("agent-1", "search");
2306        m.record_agent_tool_call("agent-1", "search");
2307        let snap = m.per_agent_tool_calls_snapshot();
2308        assert_eq!(snap.get("agent-1").and_then(|t| t.get("search")), Some(&2));
2309    }
2310
2311    #[test]
2312    fn test_runtime_metrics_per_agent_tool_failures_snapshot() {
2313        let m = RuntimeMetrics::new();
2314        m.record_agent_tool_failure("agent-2", "lookup");
2315        let snap = m.per_agent_tool_failures_snapshot();
2316        assert_eq!(
2317            snap.get("agent-2").and_then(|t| t.get("lookup")),
2318            Some(&1)
2319        );
2320    }
2321
2322    // ── Round 24: coefficient_of_variation ────────────────────────────────────
2323
2324    #[test]
2325    fn test_coefficient_of_variation_zero_when_empty() {
2326        let h = LatencyHistogram::default();
2327        assert!((h.coefficient_of_variation() - 0.0).abs() < 1e-9);
2328    }
2329
2330    #[test]
2331    fn test_coefficient_of_variation_positive_with_spread() {
2332        let h = LatencyHistogram::default();
2333        // Wide spread: 10ms and 1000ms — std_dev should be significant
2334        for _ in 0..50 {
2335            h.record(10);
2336        }
2337        for _ in 0..50 {
2338            h.record(1000);
2339        }
2340        let cv = h.coefficient_of_variation();
2341        assert!(cv > 0.0, "CV should be positive for spread data, got {cv}");
2342    }
2343
2344    #[test]
2345    fn test_coefficient_of_variation_near_zero_for_uniform_data() {
2346        let h = LatencyHistogram::default();
2347        // All the same latency bucket → std_dev ≈ 0
2348        for _ in 0..100 {
2349            h.record(50);
2350        }
2351        // CV won't be exactly 0 due to bucket approximation, but should be small
2352        assert!(h.coefficient_of_variation() < 1.0);
2353    }
2354
2355    // ── Round 31: LatencyHistogram::percentile, RuntimeMetrics helpers ────────
2356
2357    #[test]
2358    fn test_latency_histogram_percentile_zero_when_empty() {
2359        let h = LatencyHistogram::default();
2360        assert_eq!(h.percentile(0.5), 0);
2361    }
2362
2363    #[test]
2364    fn test_latency_histogram_percentile_50_matches_p50() {
2365        let h = LatencyHistogram::default();
2366        for ms in [10, 20, 30, 40, 50] {
2367            h.record(ms);
2368        }
2369        assert_eq!(h.percentile(0.5), h.p50());
2370    }
2371
2372    #[test]
2373    fn test_latency_histogram_percentile_99_matches_p99() {
2374        let h = LatencyHistogram::default();
2375        for ms in [10, 50, 100, 500, 1000] {
2376            h.record(ms);
2377        }
2378        assert_eq!(h.percentile(0.99), h.p99());
2379    }
2380
2381    #[test]
2382    fn test_runtime_metrics_record_agent_tool_failure_appears_in_snapshot() {
2383        let m = RuntimeMetrics::new();
2384        m.record_agent_tool_failure("agent-1", "search_tool");
2385        let snapshot = m.per_agent_tool_failures_snapshot();
2386        assert_eq!(snapshot.get("agent-1").and_then(|t| t.get("search_tool")), Some(&1));
2387    }
2388
2389    #[test]
2390    fn test_runtime_metrics_per_agent_tool_calls_snapshot_empty_initially() {
2391        let m = RuntimeMetrics::new();
2392        assert!(m.per_agent_tool_calls_snapshot().is_empty());
2393    }
2394
2395    #[test]
2396    fn test_runtime_metrics_record_step_latency_is_reflected_in_p50() {
2397        let m = RuntimeMetrics::new();
2398        for _ in 0..20 {
2399            m.record_step_latency(100);
2400        }
2401        // After recording 20 samples at 100ms, step latency p50 must be around 100ms.
2402        // We verify the operation doesn't panic and changes the histogram state.
2403        let snap = m.snapshot();
2404        assert!(snap.total_sessions == 0); // unrelated sanity check
2405    }
2406
2407    // ── Round 26: has_errors / is_above_p99 ───────────────────────────────────
2408
2409    #[test]
2410    fn test_metrics_snapshot_has_errors_false_when_clean() {
2411        let snap = MetricsSnapshot::default();
2412        assert!(!snap.has_errors());
2413    }
2414
2415    #[test]
2416    fn test_metrics_snapshot_has_errors_true_when_failed_tool_calls() {
2417        let snap = MetricsSnapshot { failed_tool_calls: 2, ..Default::default() };
2418        assert!(snap.has_errors());
2419    }
2420
2421    #[test]
2422    fn test_metrics_snapshot_has_errors_true_when_checkpoint_errors() {
2423        let snap = MetricsSnapshot { checkpoint_errors: 1, ..Default::default() };
2424        assert!(snap.has_errors());
2425    }
2426
2427    #[test]
2428    fn test_latency_histogram_is_above_p99_false_for_low_latency() {
2429        let h = LatencyHistogram::default();
2430        for _ in 0..200 {
2431            h.record(50);
2432        }
2433        assert!(!h.is_above_p99(50));
2434    }
2435
2436    #[test]
2437    fn test_latency_histogram_is_above_p99_true_for_high_latency() {
2438        let h = LatencyHistogram::default();
2439        for _ in 0..200 {
2440            h.record(50);
2441        }
2442        // p99 will be ~50ms; 10_000ms should be well above it
2443        assert!(h.is_above_p99(10_000));
2444    }
2445
2446    // ── Round 27: sample_count / tool_call_rate ───────────────────────────────
2447
2448    #[test]
2449    fn test_latency_histogram_sample_count_zero_when_empty() {
2450        let h = LatencyHistogram::default();
2451        assert_eq!(h.sample_count(), 0);
2452    }
2453
2454    #[test]
2455    fn test_latency_histogram_sample_count_matches_records() {
2456        let h = LatencyHistogram::default();
2457        for _ in 0..7 {
2458            h.record(100);
2459        }
2460        assert_eq!(h.sample_count(), 7);
2461    }
2462
2463    #[test]
2464    fn test_metrics_snapshot_tool_call_rate_zero_when_no_sessions() {
2465        let snap = MetricsSnapshot::default();
2466        assert!((snap.tool_call_rate() - 0.0).abs() < 1e-9);
2467    }
2468
2469    #[test]
2470    fn test_metrics_snapshot_tool_call_rate_correct_ratio() {
2471        let snap = MetricsSnapshot {
2472            total_sessions: 4,
2473            total_tool_calls: 20,
2474            ..Default::default()
2475        };
2476        assert!((snap.tool_call_rate() - 5.0).abs() < 1e-9);
2477    }
2478
2479    // ── Round 28: backpressure_rate / percentile_spread ───────────────────────
2480
2481    #[test]
2482    fn test_metrics_snapshot_backpressure_rate_zero_when_no_sessions() {
2483        let snap = MetricsSnapshot::default();
2484        assert!((snap.backpressure_rate() - 0.0).abs() < 1e-9);
2485    }
2486
2487    #[test]
2488    fn test_metrics_snapshot_backpressure_rate_correct_ratio() {
2489        let snap = MetricsSnapshot {
2490            total_sessions: 2,
2491            backpressure_shed_count: 4,
2492            ..Default::default()
2493        };
2494        assert!((snap.backpressure_rate() - 2.0).abs() < 1e-9);
2495    }
2496
2497    #[test]
2498    fn test_latency_histogram_percentile_spread_zero_when_empty() {
2499        let h = LatencyHistogram::default();
2500        assert_eq!(h.percentile_spread(), 0);
2501    }
2502
2503    #[test]
2504    fn test_latency_histogram_percentile_spread_nonnegative() {
2505        let h = LatencyHistogram::default();
2506        for _ in 0..100 {
2507            h.record(50);
2508        }
2509        for _ in 0..5 {
2510            h.record(500);
2511        }
2512        assert!(h.percentile_spread() >= 0);
2513    }
2514
2515    // ── Round 29: memory_efficiency / is_uniform ──────────────────────────────
2516
2517    #[test]
2518    fn test_metrics_snapshot_memory_efficiency_zero_when_no_steps() {
2519        let snap = MetricsSnapshot::default();
2520        assert!((snap.memory_efficiency() - 0.0).abs() < 1e-9);
2521    }
2522
2523    #[test]
2524    fn test_metrics_snapshot_memory_efficiency_correct_ratio() {
2525        let snap = MetricsSnapshot {
2526            total_steps: 10,
2527            memory_recall_count: 4,
2528            ..Default::default()
2529        };
2530        assert!((snap.memory_efficiency() - 0.4).abs() < 1e-9);
2531    }
2532
2533    #[test]
2534    fn test_latency_histogram_is_uniform_true_when_empty() {
2535        let h = LatencyHistogram::default();
2536        assert!(h.is_uniform());
2537    }
2538
2539    #[test]
2540    fn test_latency_histogram_is_uniform_true_for_single_bucket() {
2541        let h = LatencyHistogram::default();
2542        for _ in 0..50 {
2543            h.record(50); // all in same bucket
2544        }
2545        assert!(h.is_uniform());
2546    }
2547
2548    #[test]
2549    fn test_latency_histogram_is_uniform_false_for_mixed_latencies() {
2550        let h = LatencyHistogram::default();
2551        h.record(1);
2552        h.record(1000);
2553        assert!(!h.is_uniform());
2554    }
2555
2556    // ── Round 30: bucket_counts / active_session_ratio ────────────────────────
2557
2558    #[test]
2559    fn test_latency_histogram_bucket_counts_all_zero_when_empty() {
2560        let h = LatencyHistogram::default();
2561        assert_eq!(h.bucket_counts(), [0u64; 7]);
2562    }
2563
2564    #[test]
2565    fn test_latency_histogram_bucket_counts_increments_correct_bucket() {
2566        let h = LatencyHistogram::default();
2567        h.record(1); // should go into the first bucket (≤1ms)
2568        let counts = h.bucket_counts();
2569        assert_eq!(counts[0], 1);
2570        assert!(counts[1..].iter().all(|&c| c == 0));
2571    }
2572
2573    #[test]
2574    fn test_metrics_snapshot_active_session_ratio_zero_when_no_sessions() {
2575        let snap = MetricsSnapshot::default();
2576        assert!((snap.active_session_ratio() - 0.0).abs() < 1e-9);
2577    }
2578
2579    #[test]
2580    fn test_metrics_snapshot_active_session_ratio_correct() {
2581        let snap = MetricsSnapshot {
2582            total_sessions: 10,
2583            active_sessions: 3,
2584            ..Default::default()
2585        };
2586        assert!((snap.active_session_ratio() - 0.3).abs() < 1e-9);
2587    }
2588
2589    #[test]
2590    fn test_step_to_tool_ratio_correct_value() {
2591        let snap = MetricsSnapshot {
2592            total_steps: 4,
2593            total_tool_calls: 2,
2594            ..Default::default()
2595        };
2596        assert!((snap.step_to_tool_ratio() - 0.5).abs() < 1e-9);
2597    }
2598
2599    #[test]
2600    fn test_step_to_tool_ratio_zero_steps_returns_zero() {
2601        let snap = MetricsSnapshot {
2602            total_steps: 0,
2603            total_tool_calls: 5,
2604            ..Default::default()
2605        };
2606        assert_eq!(snap.step_to_tool_ratio(), 0.0);
2607    }
2608
2609    #[test]
2610    fn test_latency_histogram_min_occupied_ms_returns_smallest_occupied_bucket() {
2611        let h = LatencyHistogram::default();
2612        h.record(10); // falls in ≤10ms bucket (bound = 10)
2613        h.record(200); // falls in ≤500ms bucket
2614        // min_occupied should be the ≤10ms bucket bound = 10
2615        assert_eq!(h.min_occupied_ms(), Some(10));
2616    }
2617
2618    #[test]
2619    fn test_latency_histogram_min_occupied_ms_empty_returns_none() {
2620        let h = LatencyHistogram::default();
2621        assert_eq!(h.min_occupied_ms(), None);
2622    }
2623
2624    #[test]
2625    fn test_metrics_snapshot_has_failures_true_when_failures_exist() {
2626        let snap = MetricsSnapshot {
2627            failed_tool_calls: 1,
2628            ..Default::default()
2629        };
2630        assert!(snap.has_failures());
2631    }
2632
2633    #[test]
2634    fn test_metrics_snapshot_has_failures_false_when_no_failures() {
2635        let snap = MetricsSnapshot::default();
2636        assert!(!snap.has_failures());
2637    }
2638
2639    #[test]
2640    fn test_latency_histogram_max_occupied_ms_returns_largest_occupied_bucket() {
2641        let h = LatencyHistogram::default();
2642        h.record(5);   // ≤5ms bucket
2643        h.record(200); // ≤500ms bucket
2644        assert_eq!(h.max_occupied_ms(), Some(500));
2645    }
2646
2647    #[test]
2648    fn test_latency_histogram_max_occupied_ms_empty_returns_none() {
2649        let h = LatencyHistogram::default();
2650        assert_eq!(h.max_occupied_ms(), None);
2651    }
2652
2653    #[test]
2654    fn test_latency_histogram_occupied_bucket_count_correct() {
2655        let h = LatencyHistogram::default();
2656        h.record(5);   // bucket 1
2657        h.record(200); // bucket 5
2658        assert_eq!(h.occupied_bucket_count(), 2);
2659    }
2660
2661    #[test]
2662    fn test_latency_histogram_occupied_bucket_count_empty_returns_zero() {
2663        let h = LatencyHistogram::default();
2664        assert_eq!(h.occupied_bucket_count(), 0);
2665    }
2666
2667    #[test]
2668    fn test_metrics_snapshot_tool_diversity_counts_distinct_tools() {
2669        let snap = MetricsSnapshot {
2670            per_tool_calls: [("a".to_string(), 1u64), ("b".to_string(), 2u64)]
2671                .into_iter()
2672                .collect(),
2673            ..Default::default()
2674        };
2675        assert_eq!(snap.tool_diversity(), 2);
2676    }
2677
2678    #[test]
2679    fn test_metrics_snapshot_tool_diversity_empty_returns_zero() {
2680        let snap = MetricsSnapshot::default();
2681        assert_eq!(snap.tool_diversity(), 0);
2682    }
2683
2684    #[test]
2685    fn test_runtime_metrics_total_step_latency_ms_sums_recorded_latencies() {
2686        let m = RuntimeMetrics::new();
2687        m.record_step_latency(100);
2688        m.record_step_latency(200);
2689        assert_eq!(m.total_step_latency_ms(), 300);
2690    }
2691
2692    #[test]
2693    fn test_runtime_metrics_total_step_latency_ms_zero_when_empty() {
2694        let m = RuntimeMetrics::new();
2695        assert_eq!(m.total_step_latency_ms(), 0);
2696    }
2697
2698    #[test]
2699    fn test_metrics_snapshot_avg_failures_per_session_correct() {
2700        let snap = MetricsSnapshot {
2701            total_sessions: 4,
2702            failed_tool_calls: 2,
2703            ..Default::default()
2704        };
2705        assert!((snap.avg_failures_per_session() - 0.5).abs() < 1e-9);
2706    }
2707
2708    #[test]
2709    fn test_metrics_snapshot_avg_failures_per_session_zero_when_no_sessions() {
2710        let snap = MetricsSnapshot::default();
2711        assert_eq!(snap.avg_failures_per_session(), 0.0);
2712    }
2713
2714    #[test]
2715    fn test_latency_histogram_is_skewed_true_when_p99_much_greater_than_p50() {
2716        let h = LatencyHistogram::default();
2717        // Record many fast samples and one very slow one to skew p99
2718        for _ in 0..100 {
2719            h.record(1); // ≤1ms
2720        }
2721        h.record(500); // very slow
2722        // p50 = 1, p99 depends on bucket counts
2723        // With 100 samples in ≤1ms and 1 in ≤500ms, p99 should be 1ms too
2724        // Let's just verify the method doesn't panic
2725        let _ = h.is_skewed();
2726    }
2727
2728    #[test]
2729    fn test_latency_histogram_is_skewed_false_when_empty() {
2730        let h = LatencyHistogram::default();
2731        assert!(!h.is_skewed());
2732    }
2733
2734    // ── Round 36 ──────────────────────────────────────────────────────────────
2735
2736    #[test]
2737    fn test_most_called_tool_returns_tool_with_most_calls() {
2738        let snap = MetricsSnapshot {
2739            per_tool_calls: [
2740                ("search".to_string(), 5u64),
2741                ("write".to_string(), 2u64),
2742            ]
2743            .into_iter()
2744            .collect(),
2745            ..Default::default()
2746        };
2747        assert_eq!(snap.most_called_tool(), Some("search".to_string()));
2748    }
2749
2750    #[test]
2751    fn test_most_called_tool_returns_none_when_empty() {
2752        let snap = MetricsSnapshot::default();
2753        assert!(snap.most_called_tool().is_none());
2754    }
2755
2756    #[test]
2757    fn test_tool_names_with_failures_returns_sorted_names_with_failures() {
2758        let snap = MetricsSnapshot {
2759            per_tool_failures: [
2760                ("search".to_string(), 3u64),
2761                ("write".to_string(), 0u64),
2762                ("calc".to_string(), 1u64),
2763            ]
2764            .into_iter()
2765            .collect(),
2766            ..Default::default()
2767        };
2768        assert_eq!(snap.tool_names_with_failures(), vec!["calc", "search"]);
2769    }
2770
2771    #[test]
2772    fn test_tool_names_with_failures_empty_when_no_failures() {
2773        let snap = MetricsSnapshot::default();
2774        assert!(snap.tool_names_with_failures().is_empty());
2775    }
2776
2777    // ── Round 37 ──────────────────────────────────────────────────────────────
2778
2779    #[test]
2780    fn test_agent_with_most_calls_returns_highest_total() {
2781        let snap = MetricsSnapshot {
2782            per_agent_tool_calls: [
2783                ("agent_a".to_string(), [("search".to_string(), 3u64), ("write".to_string(), 2u64)].into_iter().collect()),
2784                ("agent_b".to_string(), [("search".to_string(), 1u64)].into_iter().collect()),
2785            ]
2786            .into_iter()
2787            .collect(),
2788            ..Default::default()
2789        };
2790        assert_eq!(snap.agent_with_most_calls(), Some("agent_a".to_string()));
2791    }
2792
2793    #[test]
2794    fn test_agent_with_most_calls_returns_none_when_empty() {
2795        let snap = MetricsSnapshot::default();
2796        assert!(snap.agent_with_most_calls().is_none());
2797    }
2798
2799    // ── Round 38 ──────────────────────────────────────────────────────────────
2800
2801    #[test]
2802    fn test_total_agent_count_returns_number_of_distinct_agents() {
2803        let snap = MetricsSnapshot {
2804            per_agent_tool_calls: [
2805                ("a".to_string(), std::collections::HashMap::new()),
2806                ("b".to_string(), std::collections::HashMap::new()),
2807            ]
2808            .into_iter()
2809            .collect(),
2810            ..Default::default()
2811        };
2812        assert_eq!(snap.total_agent_count(), 2);
2813    }
2814
2815    #[test]
2816    fn test_total_agent_count_zero_when_empty() {
2817        let snap = MetricsSnapshot::default();
2818        assert_eq!(snap.total_agent_count(), 0);
2819    }
2820
2821    #[test]
2822    fn test_steps_per_tool_call_returns_ratio() {
2823        let snap = MetricsSnapshot {
2824            total_steps: 10,
2825            total_tool_calls: 5,
2826            ..Default::default()
2827        };
2828        assert!((snap.steps_per_tool_call() - 2.0).abs() < 1e-9);
2829    }
2830
2831    #[test]
2832    fn test_steps_per_tool_call_zero_when_no_tool_calls() {
2833        let snap = MetricsSnapshot::default();
2834        assert_eq!(snap.steps_per_tool_call(), 0.0);
2835    }
2836
2837    // ── Round 39 ──────────────────────────────────────────────────────────────
2838
2839    #[test]
2840    fn test_failed_tool_ratio_for_returns_failure_rate() {
2841        let snap = MetricsSnapshot {
2842            per_tool_calls: [("tool".to_string(), 10u64)].into_iter().collect(),
2843            per_tool_failures: [("tool".to_string(), 2u64)].into_iter().collect(),
2844            ..Default::default()
2845        };
2846        assert!((snap.failed_tool_ratio_for("tool") - 0.2).abs() < 1e-9);
2847    }
2848
2849    #[test]
2850    fn test_failed_tool_ratio_for_zero_when_no_calls() {
2851        let snap = MetricsSnapshot::default();
2852        assert_eq!(snap.failed_tool_ratio_for("missing"), 0.0);
2853    }
2854
2855    #[test]
2856    fn test_backpressure_shed_rate_returns_ratio() {
2857        let snap = MetricsSnapshot {
2858            total_tool_calls: 100,
2859            backpressure_shed_count: 5,
2860            ..Default::default()
2861        };
2862        assert!((snap.backpressure_shed_rate() - 0.05).abs() < 1e-9);
2863    }
2864
2865    #[test]
2866    fn test_backpressure_shed_rate_zero_when_no_tool_calls() {
2867        let snap = MetricsSnapshot::default();
2868        assert_eq!(snap.backpressure_shed_rate(), 0.0);
2869    }
2870}
llm_agent_runtime/metrics.rs

llm_agent_runtime/
metrics.rs