Skip to main content

llm_agent_runtime/
metrics.rs

1//! # Module: Metrics
2//!
3//! Runtime observability counters for `AgentRuntime`.
4//! All global counters use atomics for lock-free, thread-safe increment/read.
5//! Per-tool counters use a `Mutex<HashMap>` to avoid requiring a concurrent
6//! map dependency.
7
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
11use std::sync::{Arc, Mutex};
12
13// ── LatencyHistogram ──────────────────────────────────────────────────────────
14
15/// A simple fixed-bucket latency histogram.
16///
17/// Bucket upper bounds are **inclusive** (i.e., a sample of exactly 1 ms falls into bucket 0).
18/// Bucket index mapping:
19/// - 0: ≤ 1 ms
20/// - 1: 2 – 5 ms
21/// - 2: 6 – 10 ms
22/// - 3: 11 – 50 ms
23/// - 4: 51 – 100 ms
24/// - 5: 101 – 500 ms
25/// - 6: > 500 ms
26#[derive(Debug)]
27pub struct LatencyHistogram {
28    /// Counts per bucket. Index 0 = ≤1ms, …, index 6 = >500ms.
29    /// Bucket upper bounds are **inclusive**.
30    buckets: [AtomicU64; 7],
31    total_count: AtomicU64,
32    total_sum_ms: AtomicU64,
33}
34
35impl Default for LatencyHistogram {
36    fn default() -> Self {
37        Self {
38            buckets: [
39                AtomicU64::new(0),
40                AtomicU64::new(0),
41                AtomicU64::new(0),
42                AtomicU64::new(0),
43                AtomicU64::new(0),
44                AtomicU64::new(0),
45                AtomicU64::new(0),
46            ],
47            total_count: AtomicU64::new(0),
48            total_sum_ms: AtomicU64::new(0),
49        }
50    }
51}
52
53impl LatencyHistogram {
54    /// Bucket upper bounds in milliseconds.
55    ///
56    /// Boundaries were chosen to cover the full range of observed latencies in
57    /// LLM-backed agent systems:
58    ///
59    /// | Bucket | Range      | Typical source                         |
60    /// |--------|------------|----------------------------------------|
61    /// | 0      | ≤ 1 ms     | In-process tool calls, cache hits      |
62    /// | 1      | ≤ 5 ms     | Fast local I/O, simple calculations    |
63    /// | 2      | ≤ 10 ms    | Network round-trips to local services  |
64    /// | 3      | ≤ 50 ms    | p50 LLM token latency (streaming)      |
65    /// | 4      | ≤ 100 ms   | p95 for small LLM completions          |
66    /// | 5      | ≤ 500 ms   | p99 for medium LLM completions         |
67    /// | 6      | > 500 ms   | Slow completions, network retries      |
68    const BOUNDS: [u64; 7] = [1, 5, 10, 50, 100, 500, u64::MAX];
69
70    /// Record a latency sample in milliseconds.
71    pub fn record(&self, ms: u64) {
72        self.total_count.fetch_add(1, Ordering::Relaxed);
73        self.total_sum_ms.fetch_add(ms, Ordering::Relaxed);
74        for (i, &bound) in Self::BOUNDS.iter().enumerate() {
75            if ms <= bound {
76                self.buckets[i].fetch_add(1, Ordering::Relaxed);
77                return;
78            }
79        }
80    }
81
82    /// Return the mean latency in ms, or 0.0 if no samples.
83    pub fn mean_ms(&self) -> f64 {
84        let count = self.total_count.load(Ordering::Relaxed);
85        if count == 0 {
86            return 0.0;
87        }
88        self.total_sum_ms.load(Ordering::Relaxed) as f64 / count as f64
89    }
90
91    /// Return a bucket-midpoint approximation of the standard deviation in ms.
92    ///
93    /// Uses the midpoint of each histogram bucket to estimate the second moment,
94    /// then applies `√(E[X²] − E[X]²)`.  Returns `0.0` when fewer than two
95    /// samples have been recorded.
96    ///
97    /// # Accuracy
98    /// The result is an estimate; its accuracy improves as the sample count
99    /// increases and degrades near the boundaries of wide buckets.
100    pub fn std_dev_ms(&self) -> f64 {
101        let count = self.total_count.load(Ordering::Relaxed);
102        if count < 2 {
103            return 0.0;
104        }
105        const MIDS: [f64; 7] = [0.5, 3.0, 7.5, 30.0, 75.0, 300.0, 500.0];
106        let (sum, sum_sq): (f64, f64) = self
107            .buckets
108            .iter()
109            .zip(MIDS.iter())
110            .map(|(b, &m)| {
111                let c = b.load(Ordering::Relaxed) as f64;
112                (c * m, c * m * m)
113            })
114            .fold((0.0, 0.0), |(s, ss), (v, v2)| (s + v, ss + v2));
115        let n = count as f64;
116        let variance = sum_sq / n - (sum / n) * (sum / n);
117        variance.max(0.0).sqrt()
118    }
119
120    /// Return the total sample count.
121    pub fn count(&self) -> u64 {
122        self.total_count.load(Ordering::Relaxed)
123    }
124
125    /// Return `true` if at least one sample has been recorded.
126    pub fn has_data(&self) -> bool {
127        self.count() > 0
128    }
129
130    /// Return `true` when no samples have been recorded yet.
131    pub fn is_empty(&self) -> bool {
132        self.count() == 0
133    }
134
135    /// Estimate the p-th percentile latency in milliseconds from the histogram.
136    ///
137    /// `p` must be in `[0.0, 1.0]`.  Returns the **upper bound** of the first
138    /// bucket that contains the p-th percentile.  Returns `0` if no samples
139    /// have been recorded.
140    ///
141    /// # Accuracy
142    ///
143    /// This is a bucket-boundary estimate, not an exact value.  The error is
144    /// bounded by the bucket width at that percentile.
145    pub fn percentile(&self, p: f64) -> u64 {
146        let total = self.total_count.load(Ordering::Relaxed);
147        if total == 0 {
148            return 0;
149        }
150        let target = (p.clamp(0.0, 1.0) * total as f64).ceil() as u64;
151        let mut cumulative = 0u64;
152        for (i, bucket) in self.buckets.iter().enumerate() {
153            cumulative += bucket.load(Ordering::Relaxed);
154            if cumulative >= target {
155                return Self::BOUNDS[i];
156            }
157        }
158        // All samples accounted for — return the last bound.
159        *Self::BOUNDS.last().unwrap_or(&u64::MAX)
160    }
161
162    /// Return the upper-bound of the bucket with the highest sample count (the mode).
163    ///
164    /// Returns `None` if no samples have been recorded.  When multiple buckets
165    /// tie for the maximum, the lowest-latency bucket is returned.
166    pub fn mode_bucket_ms(&self) -> Option<u64> {
167        if self.count() == 0 {
168            return None;
169        }
170        let (idx, _) = self
171            .buckets
172            .iter()
173            .enumerate()
174            .max_by_key(|(_, a)| a.load(Ordering::Relaxed))?;
175        Some(Self::BOUNDS[idx])
176    }
177
178    /// Return bucket counts as `(upper_bound_ms, count)` pairs.
179    pub fn buckets(&self) -> Vec<(u64, u64)> {
180        Self::BOUNDS
181            .iter()
182            .zip(self.buckets.iter())
183            .map(|(&b, a)| (b, a.load(Ordering::Relaxed)))
184            .collect()
185    }
186
187    /// Return the minimum recorded latency in ms, or `None` if no samples.
188    pub fn min_ms(&self) -> Option<u64> {
189        let total = self.total_count.load(Ordering::Relaxed);
190        if total == 0 {
191            return None;
192        }
193        // Walk buckets from the fastest; the first non-empty bucket's lower
194        // bound is 0 (or the previous bound), so return the upper bound as
195        // the conservative minimum estimate.
196        for (i, bucket) in self.buckets.iter().enumerate() {
197            if bucket.load(Ordering::Relaxed) > 0 {
198                return Some(if i == 0 { 0 } else { Self::BOUNDS[i - 1] + 1 });
199            }
200        }
201        None
202    }
203
204    /// Return the maximum recorded latency in ms, or `None` if no samples.
205    pub fn max_ms(&self) -> Option<u64> {
206        let total = self.total_count.load(Ordering::Relaxed);
207        if total == 0 {
208            return None;
209        }
210        // Walk from the slowest bucket; return the upper bound of the last non-empty bucket.
211        for (i, bucket) in self.buckets.iter().enumerate().rev() {
212            if bucket.load(Ordering::Relaxed) > 0 {
213                return Some(Self::BOUNDS[i]);
214            }
215        }
216        None
217    }
218
219    /// Return the spread (max − min) of recorded latencies in milliseconds.
220    ///
221    /// Returns `None` if no samples have been recorded.  A narrow range
222    /// indicates consistent latency; a wide range suggests outliers.
223    pub fn range_ms(&self) -> Option<u64> {
224        Some(self.max_ms()?.saturating_sub(self.min_ms()?))
225    }
226
227    /// Return the interquartile range (p75 − p25) in milliseconds.
228    ///
229    /// A measure of dispersion that is less sensitive to outliers than
230    /// [`range_ms`].  Returns `0` when fewer than two samples have been
231    /// recorded (p25 == p75 == 0).
232    ///
233    /// [`range_ms`]: LatencyHistogram::range_ms
234    pub fn interquartile_range_ms(&self) -> u64 {
235        self.p75().saturating_sub(self.p25())
236    }
237
238    /// Return the 50th-percentile (median) latency in milliseconds.
239    pub fn p50(&self) -> u64 {
240        self.percentile(0.50)
241    }
242
243    /// Return the 95th-percentile latency in milliseconds.
244    pub fn p95(&self) -> u64 {
245        self.percentile(0.95)
246    }
247
248    /// Return the 99th-percentile latency in milliseconds.
249    pub fn p99(&self) -> u64 {
250        self.percentile(0.99)
251    }
252
253    /// Return the 25th-percentile latency in milliseconds.
254    pub fn p25(&self) -> u64 {
255        self.percentile(0.25)
256    }
257
258    /// Return the 75th-percentile latency in milliseconds.
259    pub fn p75(&self) -> u64 {
260        self.percentile(0.75)
261    }
262
263    /// Return the 90th-percentile latency in milliseconds.
264    pub fn p90(&self) -> u64 {
265        self.percentile(0.90)
266    }
267
268    /// Return the 10th-percentile latency in milliseconds.
269    ///
270    /// Useful for assessing the "best case" tail of the distribution.
271    pub fn p10(&self) -> u64 {
272        self.percentile(0.10)
273    }
274
275    /// Return the median (50th-percentile) step latency in milliseconds.
276    ///
277    /// Convenience alias for `p50`; useful when callers want an explicit
278    /// "median" name without importing percentile constants.
279    pub fn median_ms(&self) -> u64 {
280        self.p50()
281    }
282
283    /// Reset all histogram counters to zero.
284    pub fn reset(&self) {
285        self.total_count.store(0, Ordering::Relaxed);
286        self.total_sum_ms.store(0, Ordering::Relaxed);
287        for bucket in &self.buckets {
288            bucket.store(0, Ordering::Relaxed);
289        }
290    }
291
292    /// Return the total sum of all recorded latency samples in milliseconds.
293    ///
294    /// Equivalent to `mean_ms() * count()` but avoids floating-point arithmetic.
295    pub fn sum_ms(&self) -> u64 {
296        self.total_sum_ms.load(Ordering::Relaxed)
297    }
298
299    /// Return the coefficient of variation: `std_dev_ms / mean_ms`.
300    ///
301    /// A value of `0.0` means no variation; higher values indicate more
302    /// spread in latency.  Returns `0.0` when `mean_ms` is zero (empty
303    /// histogram or all-zero samples) to avoid division by zero.
304    pub fn coefficient_of_variation(&self) -> f64 {
305        let mean = self.mean_ms();
306        if mean == 0.0 {
307            return 0.0;
308        }
309        self.std_dev_ms() / mean
310    }
311
312    /// Return the total number of samples recorded in this histogram.
313    pub fn sample_count(&self) -> u64 {
314        self.total_count.load(std::sync::atomic::Ordering::Relaxed)
315    }
316
317    /// Return the difference between the p99 and p50 latency buckets in
318    /// milliseconds.
319    ///
320    /// A larger spread indicates a long-tail latency distribution.
321    /// Returns `0` when no samples have been recorded.
322    pub fn percentile_spread(&self) -> u64 {
323        self.p99().saturating_sub(self.p50())
324    }
325
326    /// Return the count for each bucket as an array, in order from the
327    /// fastest (≤1ms) to the slowest (>500ms) bucket.
328    pub fn bucket_counts(&self) -> [u64; 7] {
329        let mut out = [0u64; 7];
330        for (i, b) in self.buckets.iter().enumerate() {
331            out[i] = b.load(std::sync::atomic::Ordering::Relaxed);
332        }
333        out
334    }
335
336    /// Return the upper bound (ms) of the lowest bucket that has at least one
337    /// sample, or `None` if no samples have been recorded.
338    pub fn min_occupied_ms(&self) -> Option<u64> {
339        Self::BOUNDS
340            .iter()
341            .zip(self.buckets.iter())
342            .find(|(_, b)| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
343            .map(|(&bound, _)| bound)
344    }
345
346    /// Return the upper-bound of the largest bucket with at least one recorded sample.
347    ///
348    /// Returns `None` if the histogram is empty.
349    pub fn max_occupied_ms(&self) -> Option<u64> {
350        Self::BOUNDS
351            .iter()
352            .zip(self.buckets.iter())
353            .rev()
354            .find(|(_, b)| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
355            .map(|(&bound, _)| bound)
356    }
357
358    /// Return the number of buckets that have at least one recorded sample.
359    pub fn occupied_bucket_count(&self) -> usize {
360        self.buckets
361            .iter()
362            .filter(|b| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
363            .count()
364    }
365
366    /// Return `true` if the latency distribution is skewed (p99 > 2 × p50).
367    ///
368    /// Returns `false` for empty histograms.
369    pub fn is_skewed(&self) -> bool {
370        let p50 = self.p50();
371        if p50 == 0 {
372            return false;
373        }
374        self.p99() > 2 * p50
375    }
376
377    /// Return `true` if all recorded samples fall into exactly one bucket.
378    ///
379    /// An empty histogram is considered uniform.
380    pub fn is_uniform(&self) -> bool {
381        let non_empty = self
382            .buckets
383            .iter()
384            .filter(|b| b.load(std::sync::atomic::Ordering::Relaxed) > 0)
385            .count();
386        non_empty <= 1
387    }
388
389    /// Reset all histogram counters to zero.
390    ///
391    /// Alias for [`reset`] using more conventional naming.
392    ///
393    /// [`reset`]: LatencyHistogram::reset
394    pub fn clear(&self) {
395        self.reset();
396    }
397
398    /// Return `true` if `latency_ms` is strictly greater than the current p99.
399    ///
400    /// Useful for detecting outlier requests at call sites without storing
401    /// the p99 value separately.  Returns `false` when the histogram is empty.
402    pub fn is_above_p99(&self, latency_ms: u64) -> bool {
403        latency_ms > self.p99()
404    }
405
406    /// Return `true` if the p99 latency is strictly below `threshold_ms`.
407    ///
408    /// Useful for SLO checks.  Returns `true` when no samples have been
409    /// recorded (`p99 == 0`).
410    pub fn is_below_p99(&self, threshold_ms: u64) -> bool {
411        self.p99() < threshold_ms
412    }
413
414}
415
416impl MetricsSnapshot {
417    /// Compute the difference between `after` and `before` (i.e., `after - before`).
418    ///
419    /// Useful for per-request instrumentation:
420    /// ```rust,ignore
421    /// let before = metrics.snapshot();
422    /// // ... run one agent invocation ...
423    /// let after = metrics.snapshot();
424    /// let delta = MetricsSnapshot::delta(&after, &before);
425    /// println!("steps this run: {}", delta.total_steps);
426    /// ```
427    ///
428    /// Saturating subtraction is used so callers don't need to guard against
429    /// races where a counter is read before the full increment propagates.
430    pub fn delta(after: &Self, before: &Self) -> Self {
431        Self {
432            active_sessions: after.active_sessions.saturating_sub(before.active_sessions),
433            total_sessions: after.total_sessions.saturating_sub(before.total_sessions),
434            total_steps: after.total_steps.saturating_sub(before.total_steps),
435            total_tool_calls: after.total_tool_calls.saturating_sub(before.total_tool_calls),
436            failed_tool_calls: after.failed_tool_calls.saturating_sub(before.failed_tool_calls),
437            backpressure_shed_count: after
438                .backpressure_shed_count
439                .saturating_sub(before.backpressure_shed_count),
440            memory_recall_count: after
441                .memory_recall_count
442                .saturating_sub(before.memory_recall_count),
443            checkpoint_errors: after
444                .checkpoint_errors
445                .saturating_sub(before.checkpoint_errors),
446            per_tool_calls: {
447                let mut m = after.per_tool_calls.clone();
448                for (k, v) in &before.per_tool_calls {
449                    let entry = m.entry(k.clone()).or_default();
450                    *entry = entry.saturating_sub(*v);
451                }
452                m
453            },
454            per_tool_failures: {
455                let mut m = after.per_tool_failures.clone();
456                for (k, v) in &before.per_tool_failures {
457                    let entry = m.entry(k.clone()).or_default();
458                    *entry = entry.saturating_sub(*v);
459                }
460                m
461            },
462            step_latency_buckets: after
463                .step_latency_buckets
464                .iter()
465                .zip(before.step_latency_buckets.iter())
466                .map(|((bound, a), (_, b))| (*bound, a.saturating_sub(*b)))
467                .collect(),
468            step_latency_mean_ms: after.step_latency_mean_ms - before.step_latency_mean_ms,
469            per_agent_tool_calls: after.per_agent_tool_calls.clone(),
470            per_agent_tool_failures: after.per_agent_tool_failures.clone(),
471        }
472    }
473
474    /// Serialize the snapshot to a `serde_json::Value` for logging or export.
475    pub fn to_json(&self) -> serde_json::Value {
476        serde_json::json!({
477            "active_sessions": self.active_sessions,
478            "total_sessions": self.total_sessions,
479            "total_steps": self.total_steps,
480            "total_tool_calls": self.total_tool_calls,
481            "failed_tool_calls": self.failed_tool_calls,
482            "backpressure_shed_count": self.backpressure_shed_count,
483            "memory_recall_count": self.memory_recall_count,
484            "step_latency_mean_ms": self.step_latency_mean_ms,
485            "per_tool_calls": self.per_tool_calls,
486            "per_tool_failures": self.per_tool_failures,
487        })
488    }
489
490    /// Return the number of calls recorded for the named tool.
491    ///
492    /// Returns `0` if no calls have been recorded for that tool name.
493    pub fn tool_call_count(&self, name: &str) -> u64 {
494        self.per_tool_calls.get(name).copied().unwrap_or(0)
495    }
496
497    /// Return a concise single-line summary of this snapshot.
498    ///
499    /// Format: `"sessions={n}, steps={n}, tool_calls={n}, failures={n}, latency_mean={n}ms"`.
500    /// Intended for logging and debugging — not a stable serialization format.
501    pub fn summary_line(&self) -> String {
502        format!(
503            "sessions={s}, steps={st}, tool_calls={tc}, failures={f}, latency_mean={l}ms",
504            s = self.total_sessions,
505            st = self.total_steps,
506            tc = self.total_tool_calls,
507            f = self.failed_tool_calls,
508            l = self.step_latency_mean_ms as u64,
509        )
510    }
511
512    /// Return the number of failures recorded for the named tool.
513    ///
514    /// Returns `0` if no failures have been recorded for that tool name.
515    pub fn tool_failure_count(&self, name: &str) -> u64 {
516        self.per_tool_failures.get(name).copied().unwrap_or(0)
517    }
518
519    /// Return a sorted list of tool names that have at least one recorded call.
520    pub fn tool_names(&self) -> Vec<&str> {
521        let mut names: Vec<&str> = self.per_tool_calls.keys().map(|s| s.as_str()).collect();
522        names.sort_unstable();
523        names
524    }
525
526    /// Return the overall tool-call failure rate as a value in `[0.0, 1.0]`.
527    ///
528    /// Returns `0.0` if no tool calls have been recorded.
529    pub fn failure_rate(&self) -> f64 {
530        if self.total_tool_calls == 0 {
531            return 0.0;
532        }
533        self.failed_tool_calls as f64 / self.total_tool_calls as f64
534    }
535
536    /// Return the overall tool-call success rate as a value in `[0.0, 1.0]`.
537    ///
538    /// Returns `1.0` if no tool calls have been recorded (vacuously all succeeded).
539    pub fn success_rate(&self) -> f64 {
540        1.0 - self.failure_rate()
541    }
542
543    /// Return the number of successful calls for the named tool.
544    ///
545    /// Computed as `tool_call_count(name) - tool_failure_count(name)`.
546    pub fn tool_success_count(&self, name: &str) -> u64 {
547        self.tool_call_count(name)
548            .saturating_sub(self.tool_failure_count(name))
549    }
550
551    /// Return the per-tool failure rate for the named tool.
552    ///
553    /// Returns `0.0` if no calls have been recorded for that tool.
554    pub fn tool_failure_rate(&self, name: &str) -> f64 {
555        let calls = self.tool_call_count(name);
556        if calls == 0 {
557            return 0.0;
558        }
559        self.tool_failure_count(name) as f64 / calls as f64
560    }
561
562    /// Return the total number of successful tool calls (total minus failed).
563    ///
564    /// Uses saturating subtraction so a race between `total_tool_calls`
565    /// and `failed_tool_calls` cannot produce an underflow.
566    pub fn total_successful_tool_calls(&self) -> u64 {
567        self.total_tool_calls.saturating_sub(self.failed_tool_calls)
568    }
569
570    /// Return `true` if all counters are zero (no activity has been recorded).
571    pub fn is_zero(&self) -> bool {
572        self.active_sessions == 0
573            && self.total_sessions == 0
574            && self.total_steps == 0
575            && self.total_tool_calls == 0
576            && self.failed_tool_calls == 0
577            && self.backpressure_shed_count == 0
578            && self.memory_recall_count == 0
579            && self.checkpoint_errors == 0
580    }
581
582    /// Return the average number of ReAct steps per completed session.
583    ///
584    /// Returns `0.0` when no sessions have been recorded, to avoid
585    /// division by zero.
586    pub fn avg_steps_per_session(&self) -> f64 {
587        if self.total_sessions == 0 {
588            0.0
589        } else {
590            self.total_steps as f64 / self.total_sessions as f64
591        }
592    }
593
594    /// Return the overall tool error rate: `failed_tool_calls / total_tool_calls`.
595    ///
596    /// Returns `0.0` when no tool calls have been recorded.
597    pub fn error_rate(&self) -> f64 {
598        if self.total_tool_calls == 0 {
599            return 0.0;
600        }
601        self.failed_tool_calls as f64 / self.total_tool_calls as f64
602    }
603
604    /// Return memory recalls per completed session.
605    ///
606    /// Returns `0.0` when no sessions have been recorded.
607    pub fn memory_recall_rate(&self) -> f64 {
608        if self.total_sessions == 0 {
609            return 0.0;
610        }
611        self.memory_recall_count as f64 / self.total_sessions as f64
612    }
613
614    /// Return the average number of ReAct steps per session.
615    ///
616    /// Alias for `avg_steps_per_session` on the snapshot type; returns `0.0`
617    /// when no sessions have been recorded.
618    pub fn steps_per_session(&self) -> f64 {
619        if self.total_sessions == 0 {
620            return 0.0;
621        }
622        self.total_steps as f64 / self.total_sessions as f64
623    }
624
625    /// Return `true` if the snapshot contains any error indicators.
626    ///
627    /// Specifically, `true` when `failed_tool_calls > 0` or
628    /// `checkpoint_errors > 0`.  The complement of "no errors" but distinct
629    /// from `!is_healthy()` which also considers backpressure sheds.
630    pub fn has_errors(&self) -> bool {
631        self.failed_tool_calls > 0 || self.checkpoint_errors > 0
632    }
633
634    /// Return `true` if the snapshot shows no error indicators.
635    ///
636    /// A "healthy" snapshot has zero failed tool calls, zero backpressure
637    /// sheds, and zero checkpoint errors.  Useful for quick health checks
638    /// in tests and monitoring.
639    pub fn is_healthy(&self) -> bool {
640        self.failed_tool_calls == 0
641            && self.backpressure_shed_count == 0
642            && self.checkpoint_errors == 0
643    }
644
645    /// Return `true` if this snapshot passes a parameterised health check.
646    ///
647    /// The check passes when all of the following hold:
648    /// 1. `failed_tool_calls == 0`
649    /// 2. `backpressure_shed_count == 0`
650    /// 3. `checkpoint_errors == 0`
651    /// 4. `step_latency_mean_ms <= max_latency_ms`
652    ///
653    /// Use this variant instead of [`is_healthy`] when you need to enforce an
654    /// explicit latency SLO — for example in an alerting callback.
655    ///
656    /// [`is_healthy`]: MetricsSnapshot::is_healthy
657    pub fn is_healthy_with_latency(&self, max_latency_ms: f64) -> bool {
658        self.is_healthy() && self.step_latency_mean_ms <= max_latency_ms
659    }
660
661    /// Return `true` if no tool calls have been recorded yet.
662    ///
663    /// A fresh snapshot (e.g. right after construction or after [`RuntimeMetrics::reset`])
664    /// has all counters at zero.  This predicate makes that condition explicit at call sites.
665    ///
666    /// [`RuntimeMetrics::reset`]: crate::metrics::RuntimeMetrics::reset
667    pub fn is_empty(&self) -> bool {
668        self.total_sessions == 0 && self.total_tool_calls == 0 && self.total_steps == 0
669    }
670
671    /// Return `true` if the tool failure rate exceeds `threshold`.
672    ///
673    /// `threshold` should be in `[0.0, 1.0]` (e.g. `0.1` for 10%).  Returns
674    /// `false` when no tool calls have been recorded (`failure_rate` is 0.0 in
675    /// that case).
676    ///
677    /// This is a softer signal than [`is_healthy`], which only checks for zero
678    /// failures.  Use `is_degraded` in alerting logic that needs a configurable
679    /// SLO threshold.
680    ///
681    /// [`is_healthy`]: MetricsSnapshot::is_healthy
682    pub fn is_degraded(&self, threshold: f64) -> bool {
683        self.failure_rate() > threshold
684    }
685
686    /// Return the average number of tool calls per session.
687    ///
688    /// Returns `0.0` when no sessions have been recorded.
689    pub fn tool_call_rate(&self) -> f64 {
690        if self.total_sessions == 0 {
691            return 0.0;
692        }
693        self.total_tool_calls as f64 / self.total_sessions as f64
694    }
695
696    /// Return the average number of backpressure shed events per session.
697    ///
698    /// Returns `0.0` when no sessions have been recorded.
699    pub fn backpressure_rate(&self) -> f64 {
700        if self.total_sessions == 0 {
701            return 0.0;
702        }
703        self.backpressure_shed_count as f64 / self.total_sessions as f64
704    }
705
706    /// Return the ratio of memory recalls to total steps.
707    ///
708    /// Returns `0.0` when no steps have been taken.
709    pub fn memory_efficiency(&self) -> f64 {
710        if self.total_steps == 0 {
711            return 0.0;
712        }
713        self.memory_recall_count as f64 / self.total_steps as f64
714    }
715
716    /// Return the fraction of sessions that are currently active.
717    ///
718    /// Returns `0.0` when no sessions have been started.
719    pub fn active_session_ratio(&self) -> f64 {
720        if self.total_sessions == 0 {
721            return 0.0;
722        }
723        self.active_sessions as f64 / self.total_sessions as f64
724    }
725
726    /// Return the average number of tool calls per step.
727    ///
728    /// Returns `0.0` when no steps have been taken.
729    pub fn step_to_tool_ratio(&self) -> f64 {
730        if self.total_steps == 0 {
731            return 0.0;
732        }
733        self.total_tool_calls as f64 / self.total_steps as f64
734    }
735
736    /// Return `true` if any tool-call failures have been recorded.
737    pub fn has_failures(&self) -> bool {
738        self.failed_tool_calls > 0
739    }
740
741    /// Return the number of distinct tool names that have been called at least once.
742    pub fn tool_diversity(&self) -> usize {
743        self.per_tool_calls.len()
744    }
745
746    /// Return the average number of tool-call failures per completed session.
747    ///
748    /// Returns `0.0` when no sessions have been recorded.
749    pub fn avg_failures_per_session(&self) -> f64 {
750        if self.total_sessions == 0 {
751            return 0.0;
752        }
753        self.failed_tool_calls as f64 / self.total_sessions as f64
754    }
755
756    /// Return the name of the tool with the most recorded calls.
757    ///
758    /// Returns `None` if no tool calls have been recorded.
759    pub fn most_called_tool(&self) -> Option<String> {
760        self.per_tool_calls
761            .iter()
762            .max_by_key(|(_, &v)| v)
763            .map(|(k, _)| k.clone())
764    }
765
766    /// Return a sorted list of tool names that have at least one recorded failure.
767    pub fn tool_names_with_failures(&self) -> Vec<String> {
768        let mut names: Vec<String> = self
769            .per_tool_failures
770            .iter()
771            .filter(|(_, &v)| v > 0)
772            .map(|(k, _)| k.clone())
773            .collect();
774        names.sort_unstable();
775        names
776    }
777
778    /// Return `true` if at least one tool has a recorded failure.
779    pub fn has_any_tool_failures(&self) -> bool {
780        self.per_tool_failures.values().any(|&v| v > 0)
781    }
782
783    /// Return sorted names of all tracked tools that have zero recorded failures.
784    ///
785    /// A tool that has never been called is included if it appears in the
786    /// `per_tool_calls` map with a count of zero.
787    pub fn tools_with_zero_failures(&self) -> Vec<String> {
788        let mut names: Vec<String> = self
789            .per_tool_calls
790            .keys()
791            .filter(|name| {
792                self.per_tool_failures
793                    .get(*name)
794                    .copied()
795                    .unwrap_or(0)
796                    == 0
797            })
798            .cloned()
799            .collect();
800        names.sort_unstable();
801        names
802    }
803
804    /// Return the sum of call counts across all tracked tools.
805    ///
806    /// This is the per-tool sum, which may differ from `total_tool_calls` if
807    /// the snapshot was produced from multiple sources.
808    pub fn total_tool_calls_count(&self) -> u64 {
809        self.per_tool_calls.values().sum()
810    }
811
812    /// Return the ratio of the most-called tool's count to the least-called
813    /// tool's count.
814    ///
815    /// Returns `1.0` when fewer than two tools are tracked (no imbalance
816    /// measurable) or when the minimum is zero.  A high ratio indicates that
817    /// load is concentrated on a single tool.
818    pub fn tool_call_imbalance(&self) -> f64 {
819        let counts: Vec<u64> = self.per_tool_calls.values().copied().collect();
820        if counts.len() < 2 {
821            return 1.0;
822        }
823        let max = counts.iter().copied().max().unwrap_or(0);
824        let min = counts.iter().copied().min().unwrap_or(0);
825        if min == 0 {
826            return 1.0;
827        }
828        max as f64 / min as f64
829    }
830
831    /// Return the failure rate for a specific tool (failures / calls).
832    ///
833    /// Returns `0.0` if the tool has no recorded calls.
834    pub fn failed_tool_ratio_for(&self, name: &str) -> f64 {
835        let calls = self.tool_call_count(name);
836        if calls == 0 {
837            return 0.0;
838        }
839        self.tool_failure_count(name) as f64 / calls as f64
840    }
841
842    /// Return the ratio of backpressure-shed events to total tool calls.
843    ///
844    /// Returns `0.0` if no tool calls have been recorded.
845    pub fn backpressure_shed_rate(&self) -> f64 {
846        if self.total_tool_calls == 0 {
847            return 0.0;
848        }
849        self.backpressure_shed_count as f64 / self.total_tool_calls as f64
850    }
851
852    /// Return the number of distinct agents that have recorded tool-call data.
853    pub fn total_agent_count(&self) -> usize {
854        self.per_agent_tool_calls.len()
855    }
856
857    /// Return the ratio of total steps to total tool calls.
858    ///
859    /// Returns `0.0` if no tool calls have been recorded.
860    pub fn steps_per_tool_call(&self) -> f64 {
861        if self.total_tool_calls == 0 {
862            return 0.0;
863        }
864        self.total_steps as f64 / self.total_tool_calls as f64
865    }
866
867    /// Return the agent id with the most total tool calls across all tools.
868    ///
869    /// Returns `None` if no per-agent tool-call data has been recorded.
870    pub fn agent_with_most_calls(&self) -> Option<String> {
871        self.per_agent_tool_calls
872            .iter()
873            .map(|(agent, tools)| (agent, tools.values().sum::<u64>()))
874            .max_by_key(|(_, total)| *total)
875            .map(|(agent, _)| agent.clone())
876    }
877
878    /// Return the total number of tool failures summed across all tools.
879    ///
880    /// This is the sum of `per_tool_failures` values and equals
881    /// `failed_tool_calls` when per-tool tracking is complete.  Useful for
882    /// verifying that failure tracking is consistent with overall counters.
883    pub fn total_tool_failures(&self) -> u64 {
884        self.per_tool_failures.values().sum()
885    }
886
887    /// Return the name of the tool with the fewest recorded calls.
888    ///
889    /// Returns `None` if no tool-call data has been recorded.  When multiple
890    /// tools share the minimum call count, any one of them may be returned.
891    pub fn least_called_tool(&self) -> Option<String> {
892        self.per_tool_calls
893            .iter()
894            .min_by_key(|(_, &count)| count)
895            .map(|(name, _)| name.clone())
896    }
897
898    /// Return the mean number of calls per distinct tool name.
899    ///
900    /// Returns `0.0` when no tool-call data has been recorded.
901    pub fn avg_tool_calls_per_name(&self) -> f64 {
902        let n = self.per_tool_calls.len();
903        if n == 0 {
904            return 0.0;
905        }
906        let total: u64 = self.per_tool_calls.values().sum();
907        total as f64 / n as f64
908    }
909
910    /// Return the number of distinct tool names that have more than `n` recorded calls.
911    ///
912    /// Returns `0` when no tool-call data has been recorded.
913    pub fn tool_call_count_above(&self, n: u64) -> usize {
914        self.per_tool_calls.values().filter(|&&count| count > n).count()
915    }
916
917    /// Return the top `n` tool names sorted by call count (descending).
918    ///
919    /// Returns fewer than `n` entries if fewer tools have been called.
920    /// Ties are broken alphabetically (ascending) for deterministic output.
921    pub fn top_n_tools_by_calls(&self, n: usize) -> Vec<(&str, u64)> {
922        let mut pairs: Vec<(&str, u64)> = self
923            .per_tool_calls
924            .iter()
925            .map(|(name, &count)| (name.as_str(), count))
926            .collect();
927        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(b.0)));
928        pairs.truncate(n);
929        pairs
930    }
931
932    /// Return the fraction of total tool calls accounted for by `name`.
933    ///
934    /// Returns `0.0` if `total_tool_calls` is zero or `name` has no recorded
935    /// calls.  Returns a value in `[0.0, 1.0]`.
936    pub fn tool_call_ratio(&self, name: &str) -> f64 {
937        if self.total_tool_calls == 0 {
938            return 0.0;
939        }
940        let count = self.per_tool_calls.get(name).copied().unwrap_or(0);
941        count as f64 / self.total_tool_calls as f64
942    }
943
944    /// Return all per-tool call counts sorted by count descending.
945    ///
946    /// Returns a `Vec` of `(tool_name, count)` pairs where the first entry is
947    /// the most-called tool.  Returns an empty `Vec` when no calls have been
948    /// recorded.  Ties are broken alphabetically (ascending).
949    pub fn per_tool_calls_sorted(&self) -> Vec<(String, u64)> {
950        let mut pairs: Vec<(String, u64)> = self
951            .per_tool_calls
952            .iter()
953            .map(|(k, &v)| (k.clone(), v))
954            .collect();
955        pairs.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
956        pairs
957    }
958
959    /// Return `true` if `name` appears in the per-tool call map (i.e., was
960    /// called at least once), `false` otherwise.
961    pub fn has_tool(&self, name: &str) -> bool {
962        self.per_tool_calls.contains_key(name)
963    }
964
965    /// Return the fraction of total tool calls attributable to `name`.
966    ///
967    /// Returns `0.0` when `total_tool_calls` is zero or when `name` has no
968    /// recorded calls.  The result is in `[0.0, 1.0]`.
969    pub fn tool_call_share(&self, name: &str) -> f64 {
970        if self.total_tool_calls == 0 {
971            return 0.0;
972        }
973        let count = self.per_tool_calls.get(name).copied().unwrap_or(0);
974        count as f64 / self.total_tool_calls as f64
975    }
976
977    /// Return the number of distinct tool names that have at least one
978    /// recorded call.
979    ///
980    /// Returns `0` when no tool calls have been recorded.
981    pub fn distinct_tool_count(&self) -> usize {
982        self.per_tool_calls.len()
983    }
984
985    /// Return `true` if at least one tool call has been recorded.
986    ///
987    /// Equivalent to `self.total_tool_calls > 0`, provided as a convenience
988    /// predicate for guard clauses.
989    pub fn has_any_tool_calls(&self) -> bool {
990        self.total_tool_calls > 0
991    }
992
993    /// Return tool names sorted alphabetically.
994    ///
995    /// Only names that appear in the `per_tool_calls` map are included.
996    /// Returns an empty `Vec` when no tool calls have been recorded.
997    pub fn tool_names_alphabetical(&self) -> Vec<String> {
998        let mut names: Vec<String> = self.per_tool_calls.keys().cloned().collect();
999        names.sort_unstable();
1000        names
1001    }
1002
1003    /// Return the average number of failures per distinct tool.
1004    ///
1005    /// Computed as total recorded failures divided by the number of distinct
1006    /// tool names in `per_tool_calls`.  Returns `0.0` when no tool calls have
1007    /// been recorded.
1008    pub fn avg_failures_per_tool(&self) -> f64 {
1009        let count = self.per_tool_calls.len();
1010        if count == 0 {
1011            return 0.0;
1012        }
1013        let total_failures: u64 = self.per_tool_failures.values().sum();
1014        total_failures as f64 / count as f64
1015    }
1016
1017    /// Return the names of tools whose failure ratio (failures / calls) exceeds
1018    /// `threshold`, sorted alphabetically.
1019    ///
1020    /// Returns an empty `Vec` when no tool exceeds the threshold or when no
1021    /// tool calls have been recorded.
1022    pub fn tools_above_failure_ratio(&self, threshold: f64) -> Vec<String> {
1023        let mut names: Vec<String> = self
1024            .per_tool_calls
1025            .keys()
1026            .filter(|name| {
1027                let calls = self.tool_call_count(name);
1028                if calls == 0 {
1029                    return false;
1030                }
1031                let failures = self.tool_failure_count(name);
1032                failures as f64 / calls as f64 > threshold
1033            })
1034            .cloned()
1035            .collect();
1036        names.sort_unstable();
1037        names
1038    }
1039
1040    /// Return the failure ratio for a specific tool: `failures / calls`.
1041    ///
1042    /// Returns `0.0` if the tool has never been called or is unknown, avoiding
1043    /// division-by-zero.  A ratio of `1.0` means every invocation failed.
1044    pub fn failure_ratio_for_tool(&self, name: &str) -> f64 {
1045        let calls = self.tool_call_count(name);
1046        if calls == 0 {
1047            return 0.0;
1048        }
1049        self.tool_failure_count(name) as f64 / calls as f64
1050    }
1051
1052    /// Return `true` if any registered tool has a call count strictly above
1053    /// `threshold`.
1054    ///
1055    /// Useful for detecting hotspot tools that may be responsible for
1056    /// disproportionate load.
1057    pub fn any_tool_exceeds_calls(&self, threshold: u64) -> bool {
1058        self.per_tool_calls.values().any(|&c| c > threshold)
1059    }
1060
1061    /// Return the number of distinct tools that have been tracked in this
1062    /// snapshot (i.e. tools with at least one call recorded).
1063    ///
1064    /// Equivalent to `per_tool_calls.len()` but exposed as a named method for
1065    /// readability.
1066    pub fn total_unique_tools(&self) -> usize {
1067        self.per_tool_calls.len()
1068    }
1069
1070    /// Return the fraction of all tool calls that were made by the named tool.
1071    ///
1072    /// Returns `0.0` when the tool is unknown or there have been no tool calls
1073    /// at all.  A value of `1.0` means this tool accounts for every call.
1074    pub fn tool_call_ratio_for(&self, name: &str) -> f64 {
1075        if self.total_tool_calls == 0 {
1076            return 0.0;
1077        }
1078        self.tool_call_count(name) as f64 / self.total_tool_calls as f64
1079    }
1080
1081    /// Return the sum of all per-tool failure counts across every tracked tool.
1082    ///
1083    /// This is the total number of error observations emitted by tool handlers,
1084    /// regardless of which tool generated them.  Returns `0` when no failures
1085    /// have been recorded.
1086    pub fn total_failures_across_all_tools(&self) -> u64 {
1087        self.per_tool_failures.values().sum()
1088    }
1089}
1090
1091impl std::fmt::Display for MetricsSnapshot {
1092    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1093        write!(
1094            f,
1095            "MetricsSnapshot {{ sessions: active={} total={}, steps={}, \
1096             tool_calls={} (failed={}), backpressure_shed={}, \
1097             memory_recalls={}, checkpoint_errors={}, latency_mean={:.1}ms }}",
1098            self.active_sessions,
1099            self.total_sessions,
1100            self.total_steps,
1101            self.total_tool_calls,
1102            self.failed_tool_calls,
1103            self.backpressure_shed_count,
1104            self.memory_recall_count,
1105            self.checkpoint_errors,
1106            self.step_latency_mean_ms,
1107        )
1108    }
1109}
1110
1111/// A point-in-time snapshot of all runtime counters.
1112///
1113/// Obtained by calling [`RuntimeMetrics::snapshot`].  All fields are plain
1114/// integers so the snapshot can be logged, serialised, or diffed without
1115/// holding any locks.
1116///
1117/// See also [`snapshot`] for a richer snapshot including per-tool and histogram data.
1118///
1119/// # Example
1120/// ```rust
1121/// use llm_agent_runtime::metrics::RuntimeMetrics;
1122///
1123/// let m = RuntimeMetrics::new();
1124/// let snap = m.snapshot();
1125/// assert_eq!(snap.active_sessions, 0);
1126/// assert_eq!(snap.total_sessions, 0);
1127/// ```
1128///
1129/// [`snapshot`]: RuntimeMetrics::snapshot
1130#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
1131pub struct MetricsSnapshot {
1132    /// Number of agent sessions currently in progress.
1133    pub active_sessions: usize,
1134    /// Total number of sessions started since the runtime was created.
1135    pub total_sessions: u64,
1136    /// Total number of ReAct steps executed across all sessions.
1137    pub total_steps: u64,
1138    /// Total number of tool calls dispatched (across all tool names).
1139    pub total_tool_calls: u64,
1140    /// Total number of tool calls that returned an error observation.
1141    pub failed_tool_calls: u64,
1142    /// Total number of requests shed due to backpressure.
1143    pub backpressure_shed_count: u64,
1144    /// Total number of memory recall operations.
1145    pub memory_recall_count: u64,
1146    /// Total number of checkpoint failures encountered during `run_agent`.
1147    pub checkpoint_errors: u64,
1148    /// Per-tool call counts: `tool_name → total_calls`.
1149    pub per_tool_calls: HashMap<String, u64>,
1150    /// Per-tool failure counts: `tool_name → failed_calls`.
1151    pub per_tool_failures: HashMap<String, u64>,
1152    /// Step latency histogram bucket counts as `(upper_bound_ms_inclusive, count)`.
1153    pub step_latency_buckets: Vec<(u64, u64)>,
1154    /// Mean step latency in milliseconds.
1155    pub step_latency_mean_ms: f64,
1156    /// Per-agent, per-tool call counts: `agent_id → tool_name → count`.
1157    pub per_agent_tool_calls: HashMap<String, HashMap<String, u64>>,
1158    /// Per-agent, per-tool failure counts: `agent_id → tool_name → count`.
1159    pub per_agent_tool_failures: HashMap<String, HashMap<String, u64>>,
1160}
1161
1162/// All four per-tool / per-agent counter maps, protected by a single lock.
1163///
1164/// Grouping them under one `Mutex` halves lock acquisitions on the hot path
1165/// (a single `record_tool_call` + `record_agent_tool_call` pair now requires
1166/// only one acquire/release) and makes snapshot reads cheaper too.
1167#[derive(Debug, Default)]
1168struct PerToolMaps {
1169    /// Per-tool call counts: `tool_name → total_calls`.
1170    calls: HashMap<String, u64>,
1171    /// Per-tool failure counts: `tool_name → failed_calls`.
1172    failures: HashMap<String, u64>,
1173    /// Per-agent, per-tool call counts: `agent_id → tool_name → count`.
1174    agent_calls: HashMap<String, HashMap<String, u64>>,
1175    /// Per-agent, per-tool failure counts: `agent_id → tool_name → count`.
1176    agent_failures: HashMap<String, HashMap<String, u64>>,
1177}
1178
1179/// Shared runtime metrics. Clone the `Arc` to share across threads.
1180#[derive(Debug)]
1181pub struct RuntimeMetrics {
1182    /// Number of agent sessions currently in progress.
1183    pub active_sessions: AtomicUsize,
1184    /// Total number of sessions started since the runtime was created.
1185    pub total_sessions: AtomicU64,
1186    /// Total number of ReAct steps executed across all sessions.
1187    pub total_steps: AtomicU64,
1188    /// Total number of tool calls dispatched (across all tool names).
1189    pub total_tool_calls: AtomicU64,
1190    /// Total number of tool calls that returned an error observation.
1191    pub failed_tool_calls: AtomicU64,
1192    /// Total number of requests shed due to backpressure.
1193    pub backpressure_shed_count: AtomicU64,
1194    /// Total number of memory recall operations.
1195    pub memory_recall_count: AtomicU64,
1196    /// Total number of checkpoint failures encountered during `run_agent`.
1197    pub checkpoint_errors: AtomicU64,
1198    /// All four per-tool / per-agent maps under a single lock.
1199    per_tool: Mutex<PerToolMaps>,
1200    /// Per-step latency histogram.
1201    pub step_latency: LatencyHistogram,
1202}
1203
1204impl Default for RuntimeMetrics {
1205    fn default() -> Self {
1206        Self {
1207            active_sessions: AtomicUsize::new(0),
1208            total_sessions: AtomicU64::new(0),
1209            total_steps: AtomicU64::new(0),
1210            total_tool_calls: AtomicU64::new(0),
1211            failed_tool_calls: AtomicU64::new(0),
1212            backpressure_shed_count: AtomicU64::new(0),
1213            memory_recall_count: AtomicU64::new(0),
1214            checkpoint_errors: AtomicU64::new(0),
1215            per_tool: Mutex::new(PerToolMaps::default()),
1216            step_latency: LatencyHistogram::default(),
1217        }
1218    }
1219}
1220
1221impl RuntimeMetrics {
1222    /// Allocate a new `RuntimeMetrics` instance wrapped in an `Arc`.
1223    pub fn new() -> Arc<Self> {
1224        Arc::new(Self::default())
1225    }
1226
1227    /// Return the number of agent sessions currently in progress.
1228    pub fn active_sessions(&self) -> usize {
1229        self.active_sessions.load(Ordering::Relaxed)
1230    }
1231
1232    /// Return the total number of sessions started since the runtime was created.
1233    pub fn total_sessions(&self) -> u64 {
1234        self.total_sessions.load(Ordering::Relaxed)
1235    }
1236
1237    /// Return the average number of tool calls per completed session.
1238    ///
1239    /// Returns `0.0` when no sessions have been recorded.
1240    pub fn avg_tool_calls_per_session(&self) -> f64 {
1241        let sessions = self.total_sessions();
1242        if sessions == 0 {
1243            return 0.0;
1244        }
1245        self.total_tool_calls() as f64 / sessions as f64
1246    }
1247
1248    /// Return the total number of ReAct steps executed across all sessions.
1249    pub fn total_steps(&self) -> u64 {
1250        self.total_steps.load(Ordering::Relaxed)
1251    }
1252
1253    /// Return the average number of ReAct steps per completed session.
1254    ///
1255    /// Returns `0.0` when no sessions have been recorded.
1256    pub fn avg_steps_per_session(&self) -> f64 {
1257        let sessions = self.total_sessions();
1258        if sessions == 0 {
1259            return 0.0;
1260        }
1261        self.total_steps() as f64 / sessions as f64
1262    }
1263
1264    /// Return the total number of tool calls dispatched.
1265    pub fn total_tool_calls(&self) -> u64 {
1266        self.total_tool_calls.load(Ordering::Relaxed)
1267    }
1268
1269    /// Return the total number of tool calls that returned an error observation.
1270    pub fn failed_tool_calls(&self) -> u64 {
1271        self.failed_tool_calls.load(Ordering::Relaxed)
1272    }
1273
1274    /// Return the fraction of tool calls that succeeded (i.e. did not fail).
1275    ///
1276    /// Returns `1.0` if no tool calls have been recorded yet (vacuously all
1277    /// succeeded) and a value in `[0.0, 1.0]` once calls have been made.
1278    pub fn tool_success_rate(&self) -> f64 {
1279        let total = self.total_tool_calls();
1280        if total == 0 {
1281            return 1.0;
1282        }
1283        let failed = self.failed_tool_calls();
1284        1.0 - (failed as f64 / total as f64)
1285    }
1286
1287    /// Return the total number of requests shed due to backpressure.
1288    pub fn backpressure_shed_count(&self) -> u64 {
1289        self.backpressure_shed_count.load(Ordering::Relaxed)
1290    }
1291
1292    /// Return the total number of memory recall operations performed.
1293    pub fn memory_recall_count(&self) -> u64 {
1294        self.memory_recall_count.load(Ordering::Relaxed)
1295    }
1296
1297    /// Return the total number of checkpoint failures encountered during `run_agent`.
1298    pub fn checkpoint_errors(&self) -> u64 {
1299        self.checkpoint_errors.load(Ordering::Relaxed)
1300    }
1301
1302    /// Return the ratio of checkpoint errors to total completed sessions.
1303    ///
1304    /// Returns `0.0` when no sessions have been recorded.
1305    pub fn checkpoint_error_rate(&self) -> f64 {
1306        let sessions = self.total_sessions();
1307        if sessions == 0 {
1308            return 0.0;
1309        }
1310        self.checkpoint_errors() as f64 / sessions as f64
1311    }
1312
1313    /// Return the median (50th-percentile) step latency in milliseconds.
1314    ///
1315    /// Convenience shorthand for `self.step_latency.p50()`.  Returns `0`
1316    /// when no step latencies have been recorded.
1317    pub fn p50_latency_ms(&self) -> u64 {
1318        self.step_latency.p50()
1319    }
1320
1321    /// Increment the call counter for `tool_name` by 1.
1322    ///
1323    /// Called automatically by the agent loop when `with_metrics` is configured.
1324    pub fn record_tool_call(&self, tool_name: &str) {
1325        self.total_tool_calls.fetch_add(1, Ordering::Relaxed);
1326        if let Ok(mut maps) = self.per_tool.lock() {
1327            *maps.calls.entry(tool_name.to_owned()).or_insert(0) += 1;
1328        }
1329    }
1330
1331    /// Increment the failure counter for `tool_name` by 1.
1332    ///
1333    /// Called automatically by the agent loop when a tool returns an error.
1334    pub fn record_tool_failure(&self, tool_name: &str) {
1335        self.failed_tool_calls.fetch_add(1, Ordering::Relaxed);
1336        if let Ok(mut maps) = self.per_tool.lock() {
1337            *maps.failures.entry(tool_name.to_owned()).or_insert(0) += 1;
1338        }
1339    }
1340
1341    /// Return a snapshot of per-tool call counts as a `HashMap<tool_name, count>`.
1342    pub fn per_tool_calls_snapshot(&self) -> HashMap<String, u64> {
1343        self.per_tool
1344            .lock()
1345            .map(|m| m.calls.clone())
1346            .unwrap_or_default()
1347    }
1348
1349    /// Return a snapshot of per-tool failure counts as a `HashMap<tool_name, count>`.
1350    pub fn per_tool_failures_snapshot(&self) -> HashMap<String, u64> {
1351        self.per_tool
1352            .lock()
1353            .map(|m| m.failures.clone())
1354            .unwrap_or_default()
1355    }
1356
1357    /// Increment call counter for (agent_id, tool_name).
1358    pub fn record_agent_tool_call(&self, agent_id: &str, tool_name: &str) {
1359        if let Ok(mut maps) = self.per_tool.lock() {
1360            *maps
1361                .agent_calls
1362                .entry(agent_id.to_owned())
1363                .or_default()
1364                .entry(tool_name.to_owned())
1365                .or_insert(0) += 1;
1366        }
1367    }
1368
1369    /// Increment failure counter for (agent_id, tool_name).
1370    pub fn record_agent_tool_failure(&self, agent_id: &str, tool_name: &str) {
1371        if let Ok(mut maps) = self.per_tool.lock() {
1372            *maps
1373                .agent_failures
1374                .entry(agent_id.to_owned())
1375                .or_default()
1376                .entry(tool_name.to_owned())
1377                .or_insert(0) += 1;
1378        }
1379    }
1380
1381    /// Snapshot of per-agent, per-tool call counts.
1382    pub fn per_agent_tool_calls_snapshot(&self) -> HashMap<String, HashMap<String, u64>> {
1383        self.per_tool
1384            .lock()
1385            .map(|m| m.agent_calls.clone())
1386            .unwrap_or_default()
1387    }
1388
1389    /// Snapshot of per-agent, per-tool failure counts.
1390    pub fn per_agent_tool_failures_snapshot(&self) -> HashMap<String, HashMap<String, u64>> {
1391        self.per_tool
1392            .lock()
1393            .map(|m| m.agent_failures.clone())
1394            .unwrap_or_default()
1395    }
1396
1397    /// Capture a complete snapshot of all counters, including per-tool breakdowns.
1398    ///
1399    /// This is the preferred alternative to [`to_snapshot`] — it returns a
1400    /// named [`MetricsSnapshot`] struct instead of an opaque tuple.
1401    ///
1402    /// [`to_snapshot`]: RuntimeMetrics::to_snapshot
1403    pub fn snapshot(&self) -> MetricsSnapshot {
1404        // Acquire the single per-tool lock once for all four maps.
1405        let (per_tool_calls, per_tool_failures, per_agent_tool_calls, per_agent_tool_failures) =
1406            self.per_tool
1407                .lock()
1408                .map(|m| {
1409                    (
1410                        m.calls.clone(),
1411                        m.failures.clone(),
1412                        m.agent_calls.clone(),
1413                        m.agent_failures.clone(),
1414                    )
1415                })
1416                .unwrap_or_default();
1417
1418        MetricsSnapshot {
1419            active_sessions: self.active_sessions.load(Ordering::Relaxed),
1420            total_sessions: self.total_sessions.load(Ordering::Relaxed),
1421            total_steps: self.total_steps.load(Ordering::Relaxed),
1422            total_tool_calls: self.total_tool_calls.load(Ordering::Relaxed),
1423            failed_tool_calls: self.failed_tool_calls.load(Ordering::Relaxed),
1424            backpressure_shed_count: self.backpressure_shed_count.load(Ordering::Relaxed),
1425            memory_recall_count: self.memory_recall_count.load(Ordering::Relaxed),
1426            checkpoint_errors: self.checkpoint_errors.load(Ordering::Relaxed),
1427            per_tool_calls,
1428            per_tool_failures,
1429            step_latency_buckets: self.step_latency.buckets(),
1430            step_latency_mean_ms: self.step_latency.mean_ms(),
1431            per_agent_tool_calls,
1432            per_agent_tool_failures,
1433        }
1434    }
1435
1436    /// Record a step latency sample.
1437    pub fn record_step_latency(&self, ms: u64) {
1438        self.step_latency.record(ms);
1439    }
1440
1441    /// Reset all counters to zero.
1442    ///
1443    /// Intended for testing. In production, counters are monotonically increasing.
1444    pub fn reset(&self) {
1445        self.active_sessions.store(0, Ordering::Relaxed);
1446        self.total_sessions.store(0, Ordering::Relaxed);
1447        self.total_steps.store(0, Ordering::Relaxed);
1448        self.total_tool_calls.store(0, Ordering::Relaxed);
1449        self.failed_tool_calls.store(0, Ordering::Relaxed);
1450        self.backpressure_shed_count.store(0, Ordering::Relaxed);
1451        self.memory_recall_count.store(0, Ordering::Relaxed);
1452        self.checkpoint_errors.store(0, Ordering::Relaxed);
1453        if let Ok(mut maps) = self.per_tool.lock() {
1454            maps.calls.clear();
1455            maps.failures.clear();
1456            maps.agent_calls.clear();
1457            maps.agent_failures.clear();
1458        }
1459        self.step_latency.reset();
1460    }
1461
1462    /// Return the fraction of tool calls that failed: `failed / total`.
1463    ///
1464    /// Returns `0.0` if no tool calls have been recorded.
1465    pub fn failure_rate(&self) -> f64 {
1466        let total = self.total_tool_calls.load(Ordering::Relaxed);
1467        if total == 0 {
1468            return 0.0;
1469        }
1470        let failed = self.failed_tool_calls.load(Ordering::Relaxed);
1471        failed as f64 / total as f64
1472    }
1473
1474    /// Return the fraction of tool calls that succeeded: `1.0 - failure_rate()`.
1475    ///
1476    /// Returns `1.0` if no tool calls have been recorded (vacuously all succeeded).
1477    pub fn success_rate(&self) -> f64 {
1478        1.0 - self.failure_rate()
1479    }
1480
1481    /// Return `true` if there is at least one active (in-progress) session.
1482    pub fn is_active(&self) -> bool {
1483        self.active_sessions.load(Ordering::Relaxed) > 0
1484    }
1485
1486    /// Return the 50th-percentile (median) step latency in milliseconds.
1487    ///
1488    /// Delegates to [`LatencyHistogram::p50`] on the histogram tracked by
1489    /// this `RuntimeMetrics` instance.  Returns `0` if no steps have been recorded.
1490    pub fn step_latency_p50(&self) -> u64 {
1491        self.step_latency.p50()
1492    }
1493
1494    /// Return the 99th-percentile step latency in milliseconds.
1495    ///
1496    /// Delegates to [`LatencyHistogram::p99`].  Returns `0` if no steps have
1497    /// been recorded.
1498    pub fn step_latency_p99(&self) -> u64 {
1499        self.step_latency.p99()
1500    }
1501
1502    /// Return the 95th-percentile step latency in milliseconds.
1503    ///
1504    /// Delegates to [`LatencyHistogram::p95`].  Returns `0` if no steps have
1505    /// been recorded.
1506    pub fn step_latency_p95(&self) -> u64 {
1507        self.step_latency.p95()
1508    }
1509
1510    /// Return the 75th-percentile step latency in milliseconds.
1511    ///
1512    /// Delegates to [`LatencyHistogram::p75`].  Returns `0` if no steps have
1513    /// been recorded.
1514    pub fn step_latency_p75(&self) -> u64 {
1515        self.step_latency.p75()
1516    }
1517
1518    /// Return the standard deviation of recorded step latencies in milliseconds.
1519    ///
1520    /// Delegates to [`LatencyHistogram::std_dev_ms`].  Returns `0.0` when fewer
1521    /// than two samples have been recorded.
1522    pub fn step_latency_std_dev_ms(&self) -> f64 {
1523        self.step_latency.std_dev_ms()
1524    }
1525
1526    /// Return the name of the tool with the highest call count, or `None` if no
1527    /// tools have been called yet.
1528    ///
1529    /// When multiple tools share the maximum call count, the one that sorts
1530    /// earliest alphabetically is returned for deterministic output.
1531    pub fn most_used_tool(&self) -> Option<String> {
1532        let snap = self.per_tool_calls_snapshot();
1533        snap.into_iter()
1534            .max_by(|a, b| a.1.cmp(&b.1).then_with(|| b.0.cmp(&a.0)))
1535            .map(|(name, _)| name)
1536    }
1537
1538    /// Return the ratio of failed tool calls to total tool calls.
1539    ///
1540    /// Returns `0.0` when no tool calls have been recorded.  Unlike the
1541    /// per-tool [`tool_failure_rate`] on `MetricsSnapshot`, this operates on
1542    /// the live atomic counters for the current process without snapshotting.
1543    ///
1544    /// [`tool_failure_rate`]: MetricsSnapshot::tool_failure_rate
1545    pub fn tool_call_to_failure_ratio(&self) -> f64 {
1546        let total = self.total_tool_calls.load(Ordering::Relaxed);
1547        if total == 0 {
1548            return 0.0;
1549        }
1550        self.failed_tool_calls.load(Ordering::Relaxed) as f64 / total as f64
1551    }
1552
1553    /// Return the fraction of all sessions that are currently active.
1554    ///
1555    /// Computed as `active_sessions / total_sessions`.  Returns `0.0` when no
1556    /// sessions have been started.
1557    pub fn active_session_rate(&self) -> f64 {
1558        let total = self.total_sessions.load(Ordering::Relaxed);
1559        if total == 0 {
1560            return 0.0;
1561        }
1562        self.active_sessions.load(Ordering::Relaxed) as f64 / total as f64
1563    }
1564
1565    /// Return the average number of memory recall operations per session.
1566    ///
1567    /// Computed as `memory_recall_count / total_sessions`.  Returns `0.0`
1568    /// when no sessions have been started.
1569    pub fn memory_recall_per_session(&self) -> f64 {
1570        let total = self.total_sessions.load(Ordering::Relaxed);
1571        if total == 0 {
1572            return 0.0;
1573        }
1574        self.memory_recall_count.load(Ordering::Relaxed) as f64 / total as f64
1575    }
1576
1577    /// Return the fraction of all ReAct steps that resulted in a tool failure.
1578    ///
1579    /// Computed as `failed_tool_calls / total_steps`.  Returns `0.0` when
1580    /// no steps have been executed.
1581    pub fn step_error_rate(&self) -> f64 {
1582        let steps = self.total_steps.load(Ordering::Relaxed);
1583        if steps == 0 {
1584            return 0.0;
1585        }
1586        self.failed_tool_calls.load(Ordering::Relaxed) as f64 / steps as f64
1587    }
1588
1589    /// Return the combined count of all error events: failed tool calls plus
1590    /// checkpoint errors.
1591    ///
1592    /// Useful as a single "total errors" gauge for alerting.
1593    pub fn total_errors(&self) -> u64 {
1594        self.failed_tool_calls.load(Ordering::Relaxed)
1595            + self.checkpoint_errors.load(Ordering::Relaxed)
1596    }
1597
1598    /// Return all tool names recorded in the call counter that contain
1599    /// `substr` as a substring (case-sensitive).
1600    ///
1601    /// Returns an empty `Vec` when no matching tool names are found.
1602    pub fn tool_names_containing(&self, substr: &str) -> Vec<String> {
1603        let snap = self.per_tool_calls_snapshot();
1604        let mut names: Vec<String> = snap
1605            .into_keys()
1606            .filter(|name| name.contains(substr))
1607            .collect();
1608        names.sort_unstable();
1609        names
1610    }
1611
1612    /// Return `true` if any tool has recorded at least one failure.
1613    ///
1614    /// A convenience shorthand for `failed_tool_calls() > 0`.
1615    pub fn has_failed_tools(&self) -> bool {
1616        self.failed_tool_calls() > 0
1617    }
1618
1619    /// Return tool names sorted by total call count in descending order.
1620    ///
1621    /// The highest-called tool appears first.  Ties are broken alphabetically.
1622    /// Returns an empty `Vec` when no tools have been called.
1623    pub fn tool_names_by_call_count(&self) -> Vec<String> {
1624        let snap = self.per_tool_calls_snapshot();
1625        let mut pairs: Vec<(String, u64)> = snap.into_iter().collect();
1626        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
1627        pairs.into_iter().map(|(name, _)| name).collect()
1628    }
1629
1630    /// Return the average number of memory recalls per recorded step.
1631    ///
1632    /// Computed as `memory_recall_count / total_steps`.  Returns `0.0`
1633    /// when no steps have been recorded to avoid division by zero.
1634    pub fn avg_memory_recalls_per_step(&self) -> f64 {
1635        let steps = self.total_steps();
1636        if steps == 0 {
1637            return 0.0;
1638        }
1639        self.memory_recall_count() as f64 / steps as f64
1640    }
1641
1642    /// Return the average number of tool failures per completed session.
1643    ///
1644    /// Computed as `failed_tool_calls / total_sessions`.  Returns `0.0`
1645    /// when no sessions have been recorded to avoid division by zero.
1646    pub fn avg_tool_failures_per_session(&self) -> f64 {
1647        let sessions = self.total_sessions();
1648        if sessions == 0 {
1649            return 0.0;
1650        }
1651        self.failed_tool_calls() as f64 / sessions as f64
1652    }
1653
1654    /// Return the ratio of total tool calls to total memory recalls.
1655    ///
1656    /// Returns `0.0` when no memory recalls have been recorded to avoid
1657    /// division by zero.
1658    pub fn tool_calls_per_memory_recall(&self) -> f64 {
1659        let recalls = self.memory_recall_count();
1660        if recalls == 0 {
1661            return 0.0;
1662        }
1663        self.total_tool_calls() as f64 / recalls as f64
1664    }
1665
1666    /// Return the ratio of memory recalls to total tool calls.
1667    ///
1668    /// Returns `0.0` when no tool calls have been recorded to avoid division
1669    /// by zero.
1670    pub fn memory_recalls_per_tool_call(&self) -> f64 {
1671        let calls = self.total_tool_calls();
1672        if calls == 0 {
1673            return 0.0;
1674        }
1675        self.memory_recall_count() as f64 / calls as f64
1676    }
1677
1678    /// Return the fraction of completed steps that recorded at least one tool
1679    /// failure.  Computed as `failed_tool_calls / total_steps`.
1680    ///
1681    /// Returns `0.0` when no steps have been recorded.
1682    pub fn step_failure_rate(&self) -> f64 {
1683        let steps = self.total_steps.load(std::sync::atomic::Ordering::Relaxed);
1684        if steps == 0 {
1685            return 0.0;
1686        }
1687        self.failed_tool_calls() as f64 / steps as f64
1688    }
1689
1690    /// Return the fraction of total tool calls that were shed due to
1691    /// backpressure.  Computed as `backpressure_shed / total_tool_calls`.
1692    ///
1693    /// Returns `0.0` when no tool calls have been made.
1694    pub fn total_backpressure_shed_pct(&self) -> f64 {
1695        let calls = self.total_tool_calls();
1696        if calls == 0 {
1697            return 0.0;
1698        }
1699        self.backpressure_shed_count() as f64 / calls as f64
1700    }
1701
1702    /// Return the name of the tool with the highest failure rate
1703    /// (`failures / calls`), or `None` when no tool has been called.
1704    ///
1705    /// Tools with zero calls are excluded.
1706    pub fn tool_with_highest_failure_rate(&self) -> Option<String> {
1707        let calls = self.per_tool_calls_snapshot();
1708        let fails = self.per_tool_failures_snapshot();
1709        calls
1710            .iter()
1711            .filter(|(_, &c)| c > 0)
1712            .map(|(name, &c)| {
1713                let f = fails.get(name).copied().unwrap_or(0);
1714                (name.clone(), f as f64 / c as f64)
1715            })
1716            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
1717            .map(|(name, _)| name)
1718    }
1719
1720    /// Return the total number of times `name` has been called.
1721    ///
1722    /// Returns `0` when the tool has never been called.
1723    pub fn tool_call_count_for(&self, name: &str) -> u64 {
1724        self.per_tool_calls_snapshot()
1725            .get(name)
1726            .copied()
1727            .unwrap_or(0)
1728    }
1729
1730    /// Return the name of the most-called tool, or `None` if no tools have
1731    /// been called yet.
1732    pub fn top_called_tool(&self) -> Option<String> {
1733        self.per_tool_calls_snapshot()
1734            .into_iter()
1735            .max_by_key(|(_, c)| *c)
1736            .map(|(name, _)| name)
1737    }
1738
1739    /// Return the average step latency in milliseconds.
1740    ///
1741    /// Returns `0.0` when no step latencies have been recorded.
1742    pub fn avg_step_latency_ms(&self) -> f64 {
1743        self.step_latency.mean_ms()
1744    }
1745
1746    /// Return the number of distinct tool names that have been called at least
1747    /// once.
1748    pub fn distinct_tools_called(&self) -> usize {
1749        self.per_tool_calls_snapshot().len()
1750    }
1751
1752    /// Return the failure rate (failed / total) for the given tool `name`.
1753    ///
1754    /// Returns `0.0` when the tool has never been called or doesn't exist.
1755    pub fn failure_rate_for(&self, name: &str) -> f64 {
1756        let calls = self.tool_call_count_for(name);
1757        if calls == 0 {
1758            return 0.0;
1759        }
1760        let failures = self
1761            .per_tool_failures_snapshot()
1762            .get(name)
1763            .copied()
1764            .unwrap_or(0);
1765        failures as f64 / calls as f64
1766    }
1767
1768    /// Return the total number of checkpoint errors recorded since the runtime
1769    /// started.
1770    pub fn checkpoint_errors_count(&self) -> u64 {
1771        self.checkpoint_errors.load(std::sync::atomic::Ordering::Relaxed)
1772    }
1773
1774    /// Return the names of agents that have at least one per-agent tool
1775    /// failure recorded.
1776    pub fn agents_with_failures(&self) -> Vec<String> {
1777        self.per_agent_tool_failures_snapshot()
1778            .into_iter()
1779            .filter(|(_, tools)| tools.values().any(|&c| c > 0))
1780            .map(|(agent_id, _)| agent_id)
1781            .collect()
1782    }
1783
1784    /// Return the total number of per-agent tool failures recorded across all
1785    /// agents and all tools.
1786    pub fn total_agent_failures(&self) -> u64 {
1787        self.per_agent_tool_failures_snapshot()
1788            .values()
1789            .flat_map(|m| m.values())
1790            .sum()
1791    }
1792
1793    /// Return the average number of tool calls per recorded step, or `0.0`
1794    /// when no steps have been recorded.
1795    pub fn per_step_tool_call_rate(&self) -> f64 {
1796        let steps = self.total_steps();
1797        if steps == 0 {
1798            return 0.0;
1799        }
1800        let calls: u64 = self.per_tool_calls_snapshot().values().sum();
1801        calls as f64 / steps as f64
1802    }
1803
1804    /// Return agent IDs that have recorded tool calls but zero failures.
1805    pub fn agents_with_no_failures(&self) -> Vec<String> {
1806        let calls = self.per_agent_tool_calls_snapshot();
1807        let failures = self.per_agent_tool_failures_snapshot();
1808        let mut result: Vec<String> = calls
1809            .keys()
1810            .filter(|agent| {
1811                let total_failures: u64 = failures
1812                    .get(*agent)
1813                    .map(|m| m.values().sum())
1814                    .unwrap_or(0);
1815                total_failures == 0
1816            })
1817            .cloned()
1818            .collect();
1819        result.sort_unstable();
1820        result
1821    }
1822
1823    /// Return a sorted list of tool names whose total call count exceeds
1824    /// `threshold`.
1825    ///
1826    /// Useful for identifying heavily-exercised tools above a given activity
1827    /// level.  Returns an empty `Vec` when no tool meets the criterion.
1828    pub fn tools_with_calls_above(&self, threshold: u64) -> Vec<String> {
1829        let snap = self.per_tool_calls_snapshot();
1830        let mut names: Vec<String> = snap
1831            .into_iter()
1832            .filter(|(_, count)| *count > threshold)
1833            .map(|(name, _)| name)
1834            .collect();
1835        names.sort_unstable();
1836        names
1837    }
1838
1839    /// Return the total number of tool calls recorded for the given `agent_id`.
1840    ///
1841    /// Returns `0` when the agent has never called a tool.
1842    pub fn agent_tool_call_count(&self, agent_id: &str) -> u64 {
1843        let snap = self.per_agent_tool_calls_snapshot();
1844        snap.get(agent_id)
1845            .map(|m| m.values().sum())
1846            .unwrap_or(0)
1847    }
1848
1849    /// Return the average number of tool calls per total session.
1850    ///
1851    /// Returns `0.0` when no sessions have been started.
1852    pub fn tool_calls_per_session(&self) -> f64 {
1853        let sessions = self.total_sessions();
1854        if sessions == 0 {
1855            return 0.0;
1856        }
1857        self.total_tool_calls() as f64 / sessions as f64
1858    }
1859
1860    /// Return the names of all tools that have been called at least once but
1861    /// have recorded zero failures.
1862    pub fn failure_free_tools(&self) -> Vec<String> {
1863        let calls = self.per_tool_calls_snapshot();
1864        let failures = self.per_tool_failures_snapshot();
1865        calls
1866            .into_keys()
1867            .filter(|name| failures.get(name).copied().unwrap_or(0) == 0)
1868            .collect()
1869    }
1870
1871    /// Return the top `n` tools by total call count, sorted descending.
1872    ///
1873    /// Returns fewer than `n` entries if fewer tools have been called.
1874    pub fn top_tools_by_calls(&self, n: usize) -> Vec<(String, u64)> {
1875        let snap = self.per_tool_calls_snapshot();
1876        let mut pairs: Vec<(String, u64)> = snap.into_iter().collect();
1877        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1));
1878        pairs.truncate(n);
1879        pairs
1880    }
1881
1882    /// Return the top `n` tools by total failure count, sorted descending.
1883    ///
1884    /// Analogous to [`top_tools_by_calls`]; returns fewer than `n` entries if
1885    /// fewer tools have recorded failures.
1886    ///
1887    /// [`top_tools_by_calls`]: RuntimeMetrics::top_tools_by_calls
1888    pub fn top_tools_by_failures(&self, n: usize) -> Vec<(String, u64)> {
1889        let snap = self.per_tool_failures_snapshot();
1890        let mut pairs: Vec<(String, u64)> = snap.into_iter().collect();
1891        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1));
1892        pairs.truncate(n);
1893        pairs
1894    }
1895
1896    /// Return the sum of all recorded step latencies in milliseconds.
1897    pub fn total_step_latency_ms(&self) -> u64 {
1898        self.step_latency.sum_ms()
1899    }
1900
1901    /// Return the average number of tool calls per recorded step.
1902    ///
1903    /// Returns `0.0` when no steps have been recorded to avoid division by
1904    /// zero.
1905    pub fn avg_calls_per_step(&self) -> f64 {
1906        let steps = self.total_steps.load(Ordering::Relaxed);
1907        if steps == 0 {
1908            return 0.0;
1909        }
1910        self.total_tool_calls.load(Ordering::Relaxed) as f64 / steps as f64
1911    }
1912
1913    /// Return the ratio of memory recall events to total steps recorded.
1914    ///
1915    /// Indicates how memory-intensive the agent's operation is. Returns `0.0`
1916    /// when no steps have been recorded to avoid division by zero.
1917    pub fn memory_pressure_ratio(&self) -> f64 {
1918        let steps = self.total_steps.load(Ordering::Relaxed);
1919        if steps == 0 {
1920            return 0.0;
1921        }
1922        self.memory_recall_count.load(Ordering::Relaxed) as f64 / steps as f64
1923    }
1924
1925    /// Return the ratio of backpressure-shed events to total steps recorded.
1926    ///
1927    /// Higher values indicate significant load shedding. Returns `0.0` when no
1928    /// steps have been recorded to avoid division by zero.
1929    pub fn backpressure_ratio(&self) -> f64 {
1930        let steps = self.total_steps.load(Ordering::Relaxed);
1931        if steps == 0 {
1932            return 0.0;
1933        }
1934        self.backpressure_shed_count.load(Ordering::Relaxed) as f64 / steps as f64
1935    }
1936
1937    /// Return the ratio of total sessions to total steps recorded.
1938    ///
1939    /// Higher values indicate shorter average sessions. Returns `0.0` when no
1940    /// steps have been recorded to avoid division by zero.
1941    pub fn sessions_per_step(&self) -> f64 {
1942        let steps = self.total_steps.load(Ordering::Relaxed);
1943        if steps == 0 {
1944            return 0.0;
1945        }
1946        self.total_sessions.load(Ordering::Relaxed) as f64 / steps as f64
1947    }
1948
1949    /// Return `true` if any step-latency samples have been recorded.
1950    ///
1951    /// Useful for guard-checking before using latency percentile methods.
1952    pub fn has_latency_data(&self) -> bool {
1953        self.total_steps.load(Ordering::Relaxed) > 0
1954    }
1955
1956    /// Return the ratio of `failed_tool_calls` to `total_tool_calls`.
1957    ///
1958    /// Returns `0.0` when no tool calls have been recorded (avoids
1959    /// division-by-zero).
1960    pub fn global_failure_rate(&self) -> f64 {
1961        let total = self.total_tool_calls.load(Ordering::Relaxed);
1962        if total == 0 {
1963            return 0.0;
1964        }
1965        self.failed_tool_calls.load(Ordering::Relaxed) as f64 / total as f64
1966    }
1967
1968    /// Return the total number of tool calls recorded across all agents in
1969    /// the per-agent breakdown.
1970    ///
1971    /// This sums the per-agent, per-tool call counters and is independent of
1972    /// the global `total_tool_calls` counter, which is incremented by a
1973    /// different code path.
1974    pub fn total_agent_tool_calls(&self) -> u64 {
1975        self.per_agent_tool_calls_snapshot()
1976            .values()
1977            .flat_map(|tool_map| tool_map.values())
1978            .sum()
1979    }
1980
1981    /// Return the number of distinct agents recorded in the per-agent tool
1982    /// call tracking.
1983    ///
1984    /// Returns `0` when no per-agent calls have been recorded.
1985    pub fn agent_tool_count(&self) -> usize {
1986        self.per_agent_tool_calls_snapshot().len()
1987    }
1988
1989    /// Return `true` if any per-agent tool call has been recorded.
1990    ///
1991    /// A lighter alternative to checking `agent_tool_count() > 0`; avoids
1992    /// building the full per-agent snapshot map when a boolean answer suffices.
1993    pub fn has_recorded_agent_calls(&self) -> bool {
1994        !self.per_agent_tool_calls_snapshot().is_empty()
1995    }
1996
1997    /// Return the current count of active (in-progress) sessions.
1998    pub fn active_session_count(&self) -> usize {
1999        self.active_sessions.load(Ordering::Relaxed)
2000    }
2001
2002    /// Return the ratio of `memory_recall_count` to `total_sessions`.
2003    ///
2004    /// Returns `0.0` when no sessions have been recorded (avoids
2005    /// division-by-zero).
2006    pub fn memory_to_session_ratio(&self) -> f64 {
2007        let sessions = self.total_sessions.load(Ordering::Relaxed);
2008        if sessions == 0 {
2009            return 0.0;
2010        }
2011        self.memory_recall_count.load(Ordering::Relaxed) as f64 / sessions as f64
2012    }
2013
2014    /// Return the total accumulated step latency in milliseconds divided by
2015    /// `total_sessions`.
2016    ///
2017    /// Returns `0.0` when no sessions have been recorded.
2018    pub fn total_latency_per_session(&self) -> f64 {
2019        let sessions = self.total_sessions.load(Ordering::Relaxed);
2020        if sessions == 0 {
2021            return 0.0;
2022        }
2023        self.step_latency.sum_ms() as f64 / sessions as f64
2024    }
2025
2026    /// Capture a snapshot of global counters as plain integers.
2027    ///
2028    /// Returns `(active_sessions, total_sessions, total_steps,
2029    ///           total_tool_calls, failed_tool_calls,
2030    ///           backpressure_shed_count, memory_recall_count)`.
2031    /// For per-tool breakdowns use [`per_tool_calls_snapshot`] and
2032    /// [`per_tool_failures_snapshot`].
2033    ///
2034    /// # Deprecation
2035    ///
2036    /// Prefer [`snapshot`] which returns the named [`MetricsSnapshot`] struct
2037    /// and includes per-tool, per-agent, and histogram data.  This method
2038    /// returns an anonymous tuple whose field order is easy to misread.
2039    ///
2040    /// [`snapshot`]: RuntimeMetrics::snapshot
2041    /// [`per_tool_calls_snapshot`]: RuntimeMetrics::per_tool_calls_snapshot
2042    /// [`per_tool_failures_snapshot`]: RuntimeMetrics::per_tool_failures_snapshot
2043    #[deprecated(since = "1.0.3", note = "use `snapshot()` which returns the named MetricsSnapshot struct")]
2044    pub fn to_snapshot(&self) -> (usize, u64, u64, u64, u64, u64, u64) {
2045        (
2046            self.active_sessions.load(Ordering::Relaxed),
2047            self.total_sessions.load(Ordering::Relaxed),
2048            self.total_steps.load(Ordering::Relaxed),
2049            self.total_tool_calls.load(Ordering::Relaxed),
2050            self.failed_tool_calls.load(Ordering::Relaxed),
2051            self.backpressure_shed_count.load(Ordering::Relaxed),
2052            self.memory_recall_count.load(Ordering::Relaxed),
2053        )
2054    }
2055}
2056
2057// ── Tests ─────────────────────────────────────────────────────────────────────
2058
2059#[cfg(test)]
2060mod tests {
2061    use super::*;
2062
2063    #[test]
2064    fn test_metrics_new_returns_arc_with_zero_counters() {
2065        let m = RuntimeMetrics::new();
2066        assert_eq!(m.active_sessions(), 0);
2067        assert_eq!(m.total_sessions(), 0);
2068        assert_eq!(m.total_steps(), 0);
2069        assert_eq!(m.total_tool_calls(), 0);
2070        assert_eq!(m.failed_tool_calls(), 0);
2071        assert_eq!(m.backpressure_shed_count(), 0);
2072        assert_eq!(m.memory_recall_count(), 0);
2073    }
2074
2075    #[test]
2076    fn test_active_sessions_increments_and_decrements() {
2077        let m = RuntimeMetrics::new();
2078        m.active_sessions.fetch_add(1, Ordering::Relaxed);
2079        assert_eq!(m.active_sessions(), 1);
2080        m.active_sessions.fetch_sub(1, Ordering::Relaxed);
2081        assert_eq!(m.active_sessions(), 0);
2082    }
2083
2084    #[test]
2085    fn test_total_sessions_increments() {
2086        let m = RuntimeMetrics::new();
2087        m.total_sessions.fetch_add(1, Ordering::Relaxed);
2088        m.total_sessions.fetch_add(1, Ordering::Relaxed);
2089        assert_eq!(m.total_sessions(), 2);
2090    }
2091
2092    #[test]
2093    fn test_total_steps_increments() {
2094        let m = RuntimeMetrics::new();
2095        m.total_steps.fetch_add(5, Ordering::Relaxed);
2096        assert_eq!(m.total_steps(), 5);
2097    }
2098
2099    #[test]
2100    fn test_total_tool_calls_increments() {
2101        let m = RuntimeMetrics::new();
2102        m.total_tool_calls.fetch_add(3, Ordering::Relaxed);
2103        assert_eq!(m.total_tool_calls(), 3);
2104    }
2105
2106    #[test]
2107    fn test_failed_tool_calls_increments() {
2108        let m = RuntimeMetrics::new();
2109        m.failed_tool_calls.fetch_add(2, Ordering::Relaxed);
2110        assert_eq!(m.failed_tool_calls(), 2);
2111    }
2112
2113    #[test]
2114    fn test_backpressure_shed_count_increments() {
2115        let m = RuntimeMetrics::new();
2116        m.backpressure_shed_count.fetch_add(7, Ordering::Relaxed);
2117        assert_eq!(m.backpressure_shed_count(), 7);
2118    }
2119
2120    #[test]
2121    fn test_memory_recall_count_increments() {
2122        let m = RuntimeMetrics::new();
2123        m.memory_recall_count.fetch_add(4, Ordering::Relaxed);
2124        assert_eq!(m.memory_recall_count(), 4);
2125    }
2126
2127    #[test]
2128    fn test_reset_zeroes_all_counters() {
2129        let m = RuntimeMetrics::new();
2130        m.active_sessions.store(3, Ordering::Relaxed);
2131        m.total_sessions.store(10, Ordering::Relaxed);
2132        m.total_steps.store(50, Ordering::Relaxed);
2133        m.total_tool_calls.store(20, Ordering::Relaxed);
2134        m.failed_tool_calls.store(2, Ordering::Relaxed);
2135        m.backpressure_shed_count.store(1, Ordering::Relaxed);
2136        m.memory_recall_count.store(8, Ordering::Relaxed);
2137
2138        m.reset();
2139
2140        assert_eq!(m.active_sessions(), 0);
2141        assert_eq!(m.total_sessions(), 0);
2142        assert_eq!(m.total_steps(), 0);
2143        assert_eq!(m.total_tool_calls(), 0);
2144        assert_eq!(m.failed_tool_calls(), 0);
2145        assert_eq!(m.backpressure_shed_count(), 0);
2146        assert_eq!(m.memory_recall_count(), 0);
2147    }
2148
2149    #[test]
2150    fn test_to_snapshot_captures_correct_values() {
2151        let m = RuntimeMetrics::new();
2152        m.active_sessions.store(1, Ordering::Relaxed);
2153        m.total_sessions.store(2, Ordering::Relaxed);
2154        m.total_steps.store(3, Ordering::Relaxed);
2155        m.total_tool_calls.store(4, Ordering::Relaxed);
2156        m.failed_tool_calls.store(5, Ordering::Relaxed);
2157        m.backpressure_shed_count.store(6, Ordering::Relaxed);
2158        m.memory_recall_count.store(7, Ordering::Relaxed);
2159
2160        let snap = m.to_snapshot();
2161        assert_eq!(snap, (1, 2, 3, 4, 5, 6, 7));
2162    }
2163
2164    #[test]
2165    fn test_metrics_is_send_sync() {
2166        fn assert_send_sync<T: Send + Sync>() {}
2167        assert_send_sync::<RuntimeMetrics>();
2168    }
2169
2170    #[test]
2171    fn test_multiple_increments_are_cumulative() {
2172        let m = RuntimeMetrics::new();
2173        for _ in 0..100 {
2174            m.total_sessions.fetch_add(1, Ordering::Relaxed);
2175        }
2176        assert_eq!(m.total_sessions(), 100);
2177    }
2178
2179    #[test]
2180    fn test_arc_clone_shares_state() {
2181        let m = RuntimeMetrics::new();
2182        let m2 = Arc::clone(&m);
2183        m.total_sessions.fetch_add(1, Ordering::Relaxed);
2184        assert_eq!(m2.total_sessions(), 1);
2185    }
2186
2187    // ── Per-tool metrics ──────────────────────────────────────────────────────
2188
2189    #[test]
2190    fn test_record_tool_call_increments_global_and_per_tool() {
2191        let m = RuntimeMetrics::new();
2192        m.record_tool_call("search");
2193        m.record_tool_call("search");
2194        m.record_tool_call("lookup");
2195        assert_eq!(m.total_tool_calls(), 3);
2196        let snap = m.per_tool_calls_snapshot();
2197        assert_eq!(snap.get("search").copied(), Some(2));
2198        assert_eq!(snap.get("lookup").copied(), Some(1));
2199    }
2200
2201    #[test]
2202    fn test_record_tool_failure_increments_global_and_per_tool() {
2203        let m = RuntimeMetrics::new();
2204        m.record_tool_failure("search");
2205        m.record_tool_failure("lookup");
2206        m.record_tool_failure("search");
2207        assert_eq!(m.failed_tool_calls(), 3);
2208        let snap = m.per_tool_failures_snapshot();
2209        assert_eq!(snap.get("search").copied(), Some(2));
2210        assert_eq!(snap.get("lookup").copied(), Some(1));
2211    }
2212
2213    #[test]
2214    fn test_reset_clears_per_tool_counters() {
2215        let m = RuntimeMetrics::new();
2216        m.record_tool_call("foo");
2217        m.record_tool_failure("foo");
2218        m.reset();
2219        assert!(m.per_tool_calls_snapshot().is_empty());
2220        assert!(m.per_tool_failures_snapshot().is_empty());
2221    }
2222
2223    #[test]
2224    fn test_per_tool_snapshot_is_independent_for_unknown_tools() {
2225        let m = RuntimeMetrics::new();
2226        let snap = m.per_tool_calls_snapshot();
2227        assert!(snap.is_empty());
2228    }
2229
2230    // ── LatencyHistogram ───────────────────────────────────────────────────────
2231
2232    #[test]
2233    fn test_latency_histogram_records_sample() {
2234        let h = LatencyHistogram::default();
2235        h.record(10);
2236        assert_eq!(h.count(), 1);
2237    }
2238
2239    #[test]
2240    fn test_latency_histogram_mean_ms() {
2241        let h = LatencyHistogram::default();
2242        h.record(10);
2243        h.record(20);
2244        assert!((h.mean_ms() - 15.0).abs() < 1e-5);
2245    }
2246
2247    #[test]
2248    fn test_latency_histogram_buckets_correct_bucket() {
2249        let h = LatencyHistogram::default();
2250        h.record(3); // falls in ≤5ms bucket (index 1)
2251        let buckets = h.buckets();
2252        // bucket at index 1 is ≤5ms
2253        assert_eq!(buckets[1].1, 1, "3ms should land in ≤5ms bucket");
2254        // other buckets should be zero
2255        assert_eq!(buckets[0].1, 0);
2256        assert_eq!(buckets[2].1, 0);
2257    }
2258
2259    // ── MetricsSnapshot ───────────────────────────────────────────────────────
2260
2261    #[test]
2262    fn test_snapshot_returns_all_fields() {
2263        let m = RuntimeMetrics::new();
2264        m.active_sessions.store(1, Ordering::Relaxed);
2265        m.total_sessions.store(2, Ordering::Relaxed);
2266        m.total_steps.store(3, Ordering::Relaxed);
2267        m.backpressure_shed_count.store(6, Ordering::Relaxed);
2268        m.memory_recall_count.store(7, Ordering::Relaxed);
2269        // Use record_* methods so global and per-tool counters stay consistent.
2270        m.record_tool_call("my_tool");
2271        m.record_tool_call("my_tool");
2272        m.record_tool_failure("my_tool");
2273
2274        let snap = m.snapshot();
2275        assert_eq!(snap.active_sessions, 1);
2276        assert_eq!(snap.total_sessions, 2);
2277        assert_eq!(snap.total_steps, 3);
2278        assert_eq!(snap.total_tool_calls, 2);
2279        assert_eq!(snap.failed_tool_calls, 1);
2280        assert_eq!(snap.backpressure_shed_count, 6);
2281        assert_eq!(snap.memory_recall_count, 7);
2282        assert_eq!(snap.per_tool_calls.get("my_tool").copied(), Some(2));
2283        assert_eq!(snap.per_tool_failures.get("my_tool").copied(), Some(1));
2284    }
2285
2286    #[test]
2287    fn test_snapshot_default_is_zeroed() {
2288        let snap = MetricsSnapshot::default();
2289        assert_eq!(snap.active_sessions, 0);
2290        assert_eq!(snap.total_sessions, 0);
2291        assert_eq!(snap.total_steps, 0);
2292        assert!(snap.per_tool_calls.is_empty());
2293        assert!(snap.per_tool_failures.is_empty());
2294    }
2295
2296    // ── #8 MetricsSnapshot histogram fields ───────────────────────────────────
2297
2298    #[test]
2299    fn test_metrics_snapshot_contains_all_fields() {
2300        let m = RuntimeMetrics::new();
2301        m.record_step_latency(5);
2302        m.record_step_latency(200);
2303        let snap = m.snapshot();
2304        // Should have 7 buckets
2305        assert_eq!(snap.step_latency_buckets.len(), 7);
2306        assert!(snap.step_latency_mean_ms > 0.0);
2307    }
2308
2309    // ── #9 per-agent tool call tracking ──────────────────────────────────────
2310
2311    #[test]
2312    fn test_per_agent_tool_call_tracking() {
2313        let m = RuntimeMetrics::new();
2314        m.record_agent_tool_call("agent-1", "search");
2315        m.record_agent_tool_call("agent-1", "search");
2316        m.record_agent_tool_call("agent-2", "lookup");
2317        m.record_agent_tool_failure("agent-1", "search");
2318
2319        let calls = m.per_agent_tool_calls_snapshot();
2320        assert_eq!(calls.get("agent-1").and_then(|t| t.get("search")).copied(), Some(2));
2321        assert_eq!(calls.get("agent-2").and_then(|t| t.get("lookup")).copied(), Some(1));
2322
2323        let failures = m.per_agent_tool_failures_snapshot();
2324        assert_eq!(failures.get("agent-1").and_then(|t| t.get("search")).copied(), Some(1));
2325
2326        // Also check snapshot includes them
2327        let snap = m.snapshot();
2328        assert_eq!(snap.per_agent_tool_calls.get("agent-1").and_then(|t| t.get("search")).copied(), Some(2));
2329
2330        // Reset clears them
2331        m.reset();
2332        assert!(m.per_agent_tool_calls_snapshot().is_empty());
2333        assert!(m.per_agent_tool_failures_snapshot().is_empty());
2334    }
2335
2336    // ── New API tests (Rounds 4-8) ────────────────────────────────────────────
2337
2338    #[test]
2339    fn test_latency_histogram_min_max_ms() {
2340        let h = LatencyHistogram::default();
2341        assert!(h.min_ms().is_none());
2342        assert!(h.max_ms().is_none());
2343
2344        h.record(3);  // bucket 1 (≤5ms)
2345        h.record(200); // bucket 5 (≤500ms)
2346        assert!(h.min_ms().is_some());
2347        assert!(h.max_ms().is_some());
2348        assert!(h.min_ms().unwrap() <= h.max_ms().unwrap());
2349    }
2350
2351    #[test]
2352    fn test_latency_histogram_p50_p95_p99() {
2353        let h = LatencyHistogram::default();
2354        for _ in 0..100 {
2355            h.record(5); // all in ≤5ms bucket
2356        }
2357        // p50, p95, p99 should all resolve to the same bucket bound
2358        let p50 = h.p50();
2359        let p95 = h.p95();
2360        let p99 = h.p99();
2361        assert_eq!(p50, p95);
2362        assert_eq!(p95, p99);
2363    }
2364
2365    #[test]
2366    fn test_metrics_snapshot_delta_reflects_increments() {
2367        let m = RuntimeMetrics::new();
2368        let before = m.snapshot();
2369        m.total_steps.fetch_add(5, std::sync::atomic::Ordering::Relaxed);
2370        m.total_tool_calls.fetch_add(3, std::sync::atomic::Ordering::Relaxed);
2371        let after = m.snapshot();
2372        let delta = MetricsSnapshot::delta(&after, &before);
2373        assert_eq!(delta.total_steps, 5);
2374        assert_eq!(delta.total_tool_calls, 3);
2375    }
2376
2377    #[test]
2378    fn test_metrics_snapshot_display_contains_key_fields() {
2379        let m = RuntimeMetrics::new();
2380        let snap = m.snapshot();
2381        let s = snap.to_string();
2382        assert!(s.contains("sessions"));
2383        assert!(s.contains("steps"));
2384        assert!(s.contains("latency_mean"));
2385    }
2386
2387    #[test]
2388    fn test_failure_rate_zero_when_no_calls() {
2389        let m = RuntimeMetrics::new();
2390        assert_eq!(m.failure_rate(), 0.0);
2391    }
2392
2393    #[test]
2394    fn test_failure_rate_correct_proportion() {
2395        let m = RuntimeMetrics::new();
2396        m.record_tool_call("tool_a");
2397        m.record_tool_call("tool_a");
2398        m.record_tool_failure("tool_a");
2399        // 1 failure out of 2 total = 0.5
2400        assert!((m.failure_rate() - 0.5).abs() < 1e-9);
2401    }
2402
2403    #[test]
2404    fn test_failure_rate_all_failed() {
2405        let m = RuntimeMetrics::new();
2406        m.record_tool_call("x");
2407        m.record_tool_failure("x");
2408        assert!((m.failure_rate() - 1.0).abs() < 1e-9);
2409    }
2410
2411    #[test]
2412    fn test_top_tools_by_calls_returns_top_n() {
2413        let m = RuntimeMetrics::new();
2414        for _ in 0..5 { m.record_tool_call("a"); }
2415        for _ in 0..3 { m.record_tool_call("b"); }
2416        for _ in 0..1 { m.record_tool_call("c"); }
2417        let top = m.top_tools_by_calls(2);
2418        assert_eq!(top.len(), 2);
2419        assert_eq!(top[0].0, "a");
2420        assert_eq!(top[1].0, "b");
2421    }
2422
2423    #[test]
2424    fn test_top_tools_by_calls_returns_all_when_n_exceeds_count() {
2425        let m = RuntimeMetrics::new();
2426        m.record_tool_call("only");
2427        let top = m.top_tools_by_calls(10);
2428        assert_eq!(top.len(), 1);
2429        assert_eq!(top[0].0, "only");
2430    }
2431
2432    #[test]
2433    fn test_metrics_snapshot_to_json_contains_key_fields() {
2434        let m = RuntimeMetrics::new();
2435        m.record_tool_call("t");
2436        let snap = m.snapshot();
2437        let json = snap.to_json();
2438        assert!(json.get("total_sessions").is_some());
2439        assert!(json.get("total_steps").is_some());
2440        assert!(json.get("total_tool_calls").is_some());
2441    }
2442
2443    #[test]
2444    fn test_metrics_snapshot_is_zero_on_new_metrics() {
2445        let m = RuntimeMetrics::new();
2446        assert!(m.snapshot().is_zero());
2447    }
2448
2449    #[test]
2450    fn test_metrics_snapshot_is_zero_false_after_activity() {
2451        let m = RuntimeMetrics::new();
2452        m.record_tool_call("t");
2453        assert!(!m.snapshot().is_zero());
2454    }
2455
2456    #[test]
2457    fn test_tool_call_count_returns_per_tool_count() {
2458        let m = RuntimeMetrics::new();
2459        m.record_tool_call("search");
2460        m.record_tool_call("search");
2461        m.record_tool_call("fetch");
2462        let snap = m.snapshot();
2463        assert_eq!(snap.tool_call_count("search"), 2);
2464        assert_eq!(snap.tool_call_count("fetch"), 1);
2465        assert_eq!(snap.tool_call_count("absent"), 0);
2466    }
2467
2468    #[test]
2469    fn test_tool_failure_count_returns_per_tool_failures() {
2470        let m = RuntimeMetrics::new();
2471        m.record_tool_call("t");
2472        m.record_tool_failure("t");
2473        let snap = m.snapshot();
2474        assert_eq!(snap.tool_failure_count("t"), 1);
2475        assert_eq!(snap.tool_failure_count("other"), 0);
2476    }
2477
2478    #[test]
2479    fn test_latency_histogram_clear_resets_counts() {
2480        let h = LatencyHistogram::default();
2481        h.record(10);
2482        h.record(20);
2483        assert_eq!(h.count(), 2);
2484        h.clear();
2485        assert_eq!(h.count(), 0);
2486    }
2487
2488    #[test]
2489    fn test_metrics_snapshot_tool_names_sorted() {
2490        let m = RuntimeMetrics::new();
2491        m.record_tool_call("zebra");
2492        m.record_tool_call("alpha");
2493        m.record_tool_call("mango");
2494        let snap = m.snapshot();
2495        assert_eq!(snap.tool_names(), vec!["alpha", "mango", "zebra"]);
2496    }
2497
2498    // ── Round 4: top_tools_by_failures / LatencyHistogram::sum_ms ────────────
2499
2500    #[test]
2501    fn test_top_tools_by_failures_returns_top_n_descending() {
2502        let m = RuntimeMetrics::new();
2503        m.record_tool_failure("a");
2504        m.record_tool_failure("a");
2505        m.record_tool_failure("a");
2506        m.record_tool_failure("b");
2507        m.record_tool_failure("b");
2508        m.record_tool_failure("c");
2509        let top2 = m.top_tools_by_failures(2);
2510        assert_eq!(top2.len(), 2);
2511        assert_eq!(top2[0].0, "a");
2512        assert_eq!(top2[0].1, 3);
2513        assert_eq!(top2[1].0, "b");
2514        assert_eq!(top2[1].1, 2);
2515    }
2516
2517    #[test]
2518    fn test_top_tools_by_failures_n_larger_than_tools() {
2519        let m = RuntimeMetrics::new();
2520        m.record_tool_failure("only");
2521        let top = m.top_tools_by_failures(10);
2522        assert_eq!(top.len(), 1);
2523        assert_eq!(top[0].0, "only");
2524    }
2525
2526    #[test]
2527    fn test_latency_histogram_sum_ms_accumulates() {
2528        let h = LatencyHistogram::default();
2529        h.record(100);
2530        h.record(200);
2531        h.record(300);
2532        assert_eq!(h.sum_ms(), 600);
2533    }
2534
2535    #[test]
2536    fn test_latency_histogram_sum_ms_zero_when_empty() {
2537        let h = LatencyHistogram::default();
2538        assert_eq!(h.sum_ms(), 0);
2539    }
2540
2541    // ── Round 16: mean_ms, failure_rate ──────────────────────────────────────
2542
2543    #[test]
2544    fn test_latency_histogram_mean_ms_zero_when_empty() {
2545        let h = LatencyHistogram::default();
2546        assert_eq!(h.mean_ms(), 0.0);
2547    }
2548
2549    #[test]
2550    fn test_latency_histogram_mean_ms_computes_average() {
2551        let h = LatencyHistogram::default();
2552        h.record(100);
2553        h.record(200);
2554        h.record(300);
2555        assert!((h.mean_ms() - 200.0).abs() < 1.0);
2556    }
2557
2558    #[test]
2559    fn test_metrics_snapshot_failure_rate_zero_when_no_calls() {
2560        let m = RuntimeMetrics::new();
2561        let snap = m.snapshot();
2562        assert_eq!(snap.failure_rate(), 0.0);
2563    }
2564
2565    #[test]
2566    fn test_metrics_snapshot_failure_rate_correct() {
2567        let m = RuntimeMetrics::new();
2568        m.record_tool_call("t");
2569        m.record_tool_call("t");
2570        m.record_tool_failure("t");
2571        let snap = m.snapshot();
2572        assert!((snap.failure_rate() - 0.5).abs() < 1e-9);
2573    }
2574
2575    // ── Round 20: success_rate / is_active / checkpoint_errors ────────────────
2576
2577    #[test]
2578    fn test_success_rate_one_when_no_failures() {
2579        let m = RuntimeMetrics::new();
2580        m.record_tool_call("x");
2581        assert!((m.success_rate() - 1.0).abs() < 1e-9);
2582    }
2583
2584    #[test]
2585    fn test_success_rate_half_when_half_failed() {
2586        let m = RuntimeMetrics::new();
2587        m.record_tool_call("x");
2588        m.record_tool_call("x");
2589        m.record_tool_failure("x");
2590        assert!((m.success_rate() - 0.5).abs() < 1e-9);
2591    }
2592
2593    #[test]
2594    fn test_success_rate_one_when_no_calls() {
2595        let m = RuntimeMetrics::new();
2596        // Vacuously all succeeded — no calls means success_rate = 1.0
2597        assert!((m.success_rate() - 1.0).abs() < 1e-9);
2598    }
2599
2600    #[test]
2601    fn test_is_active_false_when_no_sessions() {
2602        let m = RuntimeMetrics::new();
2603        assert!(!m.is_active());
2604    }
2605
2606    #[test]
2607    fn test_is_active_true_when_session_active() {
2608        let m = RuntimeMetrics::new();
2609        m.active_sessions.fetch_add(1, Ordering::Relaxed);
2610        assert!(m.is_active());
2611        m.active_sessions.fetch_sub(1, Ordering::Relaxed);
2612        assert!(!m.is_active());
2613    }
2614
2615    #[test]
2616    fn test_checkpoint_errors_increments() {
2617        let m = RuntimeMetrics::new();
2618        assert_eq!(m.checkpoint_errors(), 0);
2619        m.checkpoint_errors.fetch_add(3, Ordering::Relaxed);
2620        assert_eq!(m.checkpoint_errors(), 3);
2621    }
2622
2623    #[test]
2624    fn test_checkpoint_errors_reset_to_zero() {
2625        let m = RuntimeMetrics::new();
2626        m.checkpoint_errors.fetch_add(5, Ordering::Relaxed);
2627        m.reset();
2628        assert_eq!(m.checkpoint_errors(), 0);
2629    }
2630
2631    // ── Round 10: LatencyHistogram::std_dev_ms ────────────────────────────────
2632
2633    #[test]
2634    fn test_std_dev_ms_zero_for_no_samples() {
2635        let h = LatencyHistogram::default();
2636        assert!((h.std_dev_ms() - 0.0).abs() < 1e-9);
2637    }
2638
2639    #[test]
2640    fn test_std_dev_ms_zero_for_single_sample() {
2641        let h = LatencyHistogram::default();
2642        h.record(5);
2643        assert!((h.std_dev_ms() - 0.0).abs() < 1e-9);
2644    }
2645
2646    #[test]
2647    fn test_std_dev_ms_positive_for_varied_samples() {
2648        let h = LatencyHistogram::default();
2649        h.record(1);    // bucket 0 mid ~0.5
2650        h.record(200);  // bucket 5 mid ~300
2651        // Two samples with very different values → std_dev > 0
2652        assert!(h.std_dev_ms() > 0.0);
2653    }
2654
2655    #[test]
2656    fn test_std_dev_ms_zero_for_identical_samples() {
2657        let h = LatencyHistogram::default();
2658        h.record(5);
2659        h.record(5);
2660        h.record(5);
2661        // All samples in the same bucket → std_dev ≈ 0
2662        assert!(h.std_dev_ms() < 1.0);
2663    }
2664
2665    // ── Round 11: RuntimeMetrics::tool_success_rate ───────────────────────────
2666
2667    #[test]
2668    fn test_tool_success_rate_one_when_no_calls() {
2669        let m = RuntimeMetrics::new();
2670        assert!((m.tool_success_rate() - 1.0).abs() < 1e-9);
2671    }
2672
2673    #[test]
2674    fn test_tool_success_rate_one_when_no_failures() {
2675        let m = RuntimeMetrics::new();
2676        m.total_tool_calls.fetch_add(10, Ordering::Relaxed);
2677        assert!((m.tool_success_rate() - 1.0).abs() < 1e-9);
2678    }
2679
2680    #[test]
2681    fn test_tool_success_rate_half_when_half_fail() {
2682        let m = RuntimeMetrics::new();
2683        m.total_tool_calls.fetch_add(10, Ordering::Relaxed);
2684        m.failed_tool_calls.fetch_add(5, Ordering::Relaxed);
2685        assert!((m.tool_success_rate() - 0.5).abs() < 1e-9);
2686    }
2687
2688    #[test]
2689    fn test_tool_success_rate_zero_when_all_fail() {
2690        let m = RuntimeMetrics::new();
2691        m.total_tool_calls.fetch_add(4, Ordering::Relaxed);
2692        m.failed_tool_calls.fetch_add(4, Ordering::Relaxed);
2693        assert!(m.tool_success_rate().abs() < 1e-9);
2694    }
2695
2696    // ── Round 12: step_latency_p50/p99, LatencyHistogram::range_ms ───────────
2697
2698    #[test]
2699    fn test_step_latency_p50_zero_when_empty() {
2700        let m = RuntimeMetrics::new();
2701        assert_eq!(m.step_latency_p50(), 0);
2702    }
2703
2704    #[test]
2705    fn test_step_latency_p99_zero_when_empty() {
2706        let m = RuntimeMetrics::new();
2707        assert_eq!(m.step_latency_p99(), 0);
2708    }
2709
2710    #[test]
2711    fn test_step_latency_p50_after_recording() {
2712        let m = RuntimeMetrics::new();
2713        for _ in 0..10 {
2714            m.step_latency.record(100);
2715        }
2716        assert!(m.step_latency_p50() > 0);
2717    }
2718
2719    #[test]
2720    fn test_step_latency_p99_gte_p50() {
2721        let m = RuntimeMetrics::new();
2722        for v in [10, 20, 30, 40, 500] {
2723            m.step_latency.record(v);
2724        }
2725        assert!(m.step_latency_p99() >= m.step_latency_p50());
2726    }
2727
2728    #[test]
2729    fn test_latency_histogram_range_ms_none_when_empty() {
2730        let h = LatencyHistogram::default();
2731        assert!(h.range_ms().is_none());
2732    }
2733
2734    #[test]
2735    fn test_latency_histogram_range_ms_some_for_single_sample() {
2736        let h = LatencyHistogram::default();
2737        h.record(100);
2738        // min/max are both derived from bucket boundaries, range is Some
2739        assert!(h.range_ms().is_some());
2740    }
2741
2742    #[test]
2743    fn test_latency_histogram_range_ms_positive_for_spread() {
2744        let h = LatencyHistogram::default();
2745        h.record(10);
2746        h.record(1000);
2747        let range = h.range_ms().unwrap();
2748        assert!(range > 0, "range should be > 0 for spread samples, got {range}");
2749    }
2750
2751    // ── Round 13: avg_tool_calls_per_session ──────────────────────────────────
2752
2753    #[test]
2754    fn test_avg_tool_calls_per_session_zero_when_no_sessions() {
2755        let m = RuntimeMetrics::new();
2756        assert!((m.avg_tool_calls_per_session() - 0.0).abs() < 1e-9);
2757    }
2758
2759    #[test]
2760    fn test_avg_tool_calls_per_session_correct_ratio() {
2761        let m = RuntimeMetrics::new();
2762        m.total_sessions.fetch_add(2, Ordering::Relaxed);
2763        m.total_tool_calls.fetch_add(10, Ordering::Relaxed);
2764        assert!((m.avg_tool_calls_per_session() - 5.0).abs() < 1e-9);
2765    }
2766
2767    // ── Round 27: interquartile_range_ms, avg_steps_per_session ──────────────
2768
2769    #[test]
2770    fn test_interquartile_range_ms_empty_is_zero() {
2771        let h = LatencyHistogram::default();
2772        assert_eq!(h.interquartile_range_ms(), 0);
2773    }
2774
2775    #[test]
2776    fn test_interquartile_range_ms_saturates_not_panics() {
2777        let h = LatencyHistogram::default();
2778        for _ in 0..50 {
2779            h.record(10);
2780        }
2781        for _ in 0..50 {
2782            h.record(500);
2783        }
2784        let iqr = h.interquartile_range_ms();
2785        // IQR must be non-negative (saturating_sub guarantee)
2786        assert!(iqr < u64::MAX);
2787    }
2788
2789    #[test]
2790    fn test_avg_steps_per_session_zero_when_no_sessions() {
2791        let snap = MetricsSnapshot::default();
2792        assert!((snap.avg_steps_per_session() - 0.0).abs() < 1e-9);
2793    }
2794
2795    #[test]
2796    fn test_avg_steps_per_session_correct_ratio() {
2797        let snap = MetricsSnapshot {
2798            total_sessions: 4,
2799            total_steps: 20,
2800            ..Default::default()
2801        };
2802        assert!((snap.avg_steps_per_session() - 5.0).abs() < 1e-9);
2803    }
2804
2805    // ── Round 15: LatencyHistogram::is_empty, RuntimeMetrics::checkpoint_error_rate
2806
2807    #[test]
2808    fn test_latency_histogram_is_empty_true_initially() {
2809        let h = LatencyHistogram::default();
2810        assert!(h.is_empty());
2811    }
2812
2813    #[test]
2814    fn test_latency_histogram_is_empty_false_after_record() {
2815        let h = LatencyHistogram::default();
2816        h.record(10);
2817        assert!(!h.is_empty());
2818    }
2819
2820    #[test]
2821    fn test_checkpoint_error_rate_zero_when_no_sessions() {
2822        let m = RuntimeMetrics::new();
2823        assert!((m.checkpoint_error_rate() - 0.0).abs() < 1e-9);
2824    }
2825
2826    #[test]
2827    fn test_checkpoint_error_rate_ratio_correct() {
2828        let m = RuntimeMetrics::new();
2829        m.total_sessions.fetch_add(4, std::sync::atomic::Ordering::Relaxed);
2830        m.checkpoint_errors.fetch_add(2, std::sync::atomic::Ordering::Relaxed);
2831        assert!((m.checkpoint_error_rate() - 0.5).abs() < 1e-9);
2832    }
2833
2834    // ── Round 16: LatencyHistogram::mode_bucket_ms ───────────────────────────
2835
2836    #[test]
2837    fn test_mode_bucket_ms_none_when_empty() {
2838        let h = LatencyHistogram::default();
2839        assert!(h.mode_bucket_ms().is_none());
2840    }
2841
2842    #[test]
2843    fn test_mode_bucket_ms_returns_bucket_with_most_samples() {
2844        let h = LatencyHistogram::default();
2845        // Record many samples in the ~10ms range
2846        for _ in 0..10 {
2847            h.record(5);
2848        }
2849        // Record fewer samples in the ~500ms range
2850        for _ in 0..2 {
2851            h.record(400);
2852        }
2853        let mode = h.mode_bucket_ms().unwrap();
2854        // The low-latency bucket should win
2855        assert!(mode <= 50, "expected low-latency bucket, got {mode}");
2856    }
2857
2858    // ── Round 17: MetricsSnapshot::error_rate / memory_recall_rate ───────────
2859
2860    #[test]
2861    fn test_metrics_snapshot_error_rate_zero_when_no_tool_calls() {
2862        let snap = MetricsSnapshot::default();
2863        assert!((snap.error_rate() - 0.0).abs() < 1e-9);
2864    }
2865
2866    #[test]
2867    fn test_metrics_snapshot_error_rate_correct_ratio() {
2868        let snap = MetricsSnapshot {
2869            total_tool_calls: 10,
2870            failed_tool_calls: 3,
2871            ..Default::default()
2872        };
2873        assert!((snap.error_rate() - 0.3).abs() < 1e-9);
2874    }
2875
2876    #[test]
2877    fn test_metrics_snapshot_memory_recall_rate_zero_when_no_sessions() {
2878        let snap = MetricsSnapshot::default();
2879        assert!((snap.memory_recall_rate() - 0.0).abs() < 1e-9);
2880    }
2881
2882    #[test]
2883    fn test_metrics_snapshot_memory_recall_rate_correct_ratio() {
2884        let snap = MetricsSnapshot {
2885            total_sessions: 5,
2886            memory_recall_count: 15,
2887            ..Default::default()
2888        };
2889        assert!((snap.memory_recall_rate() - 3.0).abs() < 1e-9);
2890    }
2891
2892    // ── Round 22: p10 ─────────────────────────────────────────────────────────
2893
2894    #[test]
2895    fn test_latency_histogram_p10_zero_when_empty() {
2896        let h = LatencyHistogram::default();
2897        assert_eq!(h.p10(), 0);
2898    }
2899
2900    #[test]
2901    fn test_latency_histogram_p10_lte_p50_lte_p99() {
2902        let h = LatencyHistogram::default();
2903        for ms in [10, 20, 50, 100, 200, 500, 1000] {
2904            h.record(ms);
2905        }
2906        assert!(h.p10() <= h.p50());
2907        assert!(h.p50() <= h.p99());
2908    }
2909
2910    // ── Round 29: is_below_p99, MetricsSnapshot::is_healthy ──────────────────
2911
2912    #[test]
2913    fn test_latency_histogram_is_below_p99_true_when_empty() {
2914        let h = LatencyHistogram::default();
2915        assert!(h.is_below_p99(1)); // p99 == 0 < 1
2916    }
2917
2918    #[test]
2919    fn test_latency_histogram_is_below_p99_true_when_under_threshold() {
2920        let h = LatencyHistogram::default();
2921        for _ in 0..100 {
2922            h.record(50);
2923        }
2924        assert!(h.is_below_p99(100));
2925    }
2926
2927    #[test]
2928    fn test_latency_histogram_is_below_p99_false_when_at_threshold() {
2929        let h = LatencyHistogram::default();
2930        for _ in 0..100 {
2931            h.record(200);
2932        }
2933        assert!(!h.is_below_p99(200)); // p99 == 200, not strictly less
2934    }
2935
2936    #[test]
2937    fn test_metrics_snapshot_is_healthy_true_when_default() {
2938        let snap = MetricsSnapshot::default();
2939        assert!(snap.is_healthy());
2940    }
2941
2942    #[test]
2943    fn test_metrics_snapshot_is_healthy_false_when_failed_tool_calls() {
2944        let snap = MetricsSnapshot { failed_tool_calls: 1, ..Default::default() };
2945        assert!(!snap.is_healthy());
2946    }
2947
2948    #[test]
2949    fn test_metrics_snapshot_is_healthy_false_when_backpressure_shed() {
2950        let snap = MetricsSnapshot { backpressure_shed_count: 2, ..Default::default() };
2951        assert!(!snap.is_healthy());
2952    }
2953
2954    #[test]
2955    fn test_metrics_snapshot_is_healthy_false_when_checkpoint_errors() {
2956        let snap = MetricsSnapshot { checkpoint_errors: 1, ..Default::default() };
2957        assert!(!snap.is_healthy());
2958    }
2959
2960    // ── Round 23: median_ms / steps_per_session / p50_latency_ms ─────────────
2961
2962    #[test]
2963    fn test_latency_histogram_median_ms_equals_p50() {
2964        let h = LatencyHistogram::default();
2965        for ms in [10, 50, 100, 200, 500] {
2966            h.record(ms);
2967        }
2968        assert_eq!(h.median_ms(), h.p50());
2969    }
2970
2971    #[test]
2972    fn test_latency_histogram_median_ms_zero_when_empty() {
2973        let h = LatencyHistogram::default();
2974        assert_eq!(h.median_ms(), 0);
2975    }
2976
2977    #[test]
2978    fn test_metrics_snapshot_steps_per_session_zero_when_no_sessions() {
2979        let snap = MetricsSnapshot::default();
2980        assert!((snap.steps_per_session() - 0.0).abs() < 1e-9);
2981    }
2982
2983    #[test]
2984    fn test_metrics_snapshot_steps_per_session_correct_ratio() {
2985        let snap = MetricsSnapshot {
2986            total_sessions: 4,
2987            total_steps: 20,
2988            ..Default::default()
2989        };
2990        assert!((snap.steps_per_session() - 5.0).abs() < 1e-9);
2991    }
2992
2993    #[test]
2994    fn test_runtime_metrics_p50_latency_ms_zero_when_no_data() {
2995        let m = RuntimeMetrics::new();
2996        assert_eq!(m.p50_latency_ms(), 0);
2997    }
2998
2999    #[test]
3000    fn test_runtime_metrics_p50_latency_ms_matches_histogram_p50() {
3001        let m = RuntimeMetrics::new();
3002        for ms in [10_u64, 50, 100, 200, 500] {
3003            m.step_latency.record(ms);
3004        }
3005        assert_eq!(m.p50_latency_ms(), m.step_latency.p50());
3006    }
3007
3008    // ── Round 25: histogram p25/p75/p90/min, has_data; snapshot helpers ───────
3009
3010    #[test]
3011    fn test_latency_histogram_has_data_false_when_empty() {
3012        let h = LatencyHistogram::default();
3013        assert!(!h.has_data());
3014    }
3015
3016    #[test]
3017    fn test_latency_histogram_has_data_true_after_record() {
3018        let h = LatencyHistogram::default();
3019        h.record(100);
3020        assert!(h.has_data());
3021    }
3022
3023    #[test]
3024    fn test_latency_histogram_min_ms_none_when_empty() {
3025        let h = LatencyHistogram::default();
3026        assert_eq!(h.min_ms(), None);
3027    }
3028
3029    #[test]
3030    fn test_latency_histogram_min_ms_some_after_record() {
3031        let h = LatencyHistogram::default();
3032        h.record(50);
3033        assert!(h.min_ms().is_some());
3034    }
3035
3036    #[test]
3037    fn test_latency_histogram_p25_lte_p75() {
3038        let h = LatencyHistogram::default();
3039        for ms in [10_u64, 50, 100, 200, 500, 1000, 2000, 5000] {
3040            h.record(ms);
3041        }
3042        assert!(h.p25() <= h.p75());
3043    }
3044
3045    #[test]
3046    fn test_latency_histogram_p90_between_p50_and_p99() {
3047        let h = LatencyHistogram::default();
3048        for ms in [10_u64, 50, 100, 200, 500] {
3049            h.record(ms);
3050        }
3051        assert!(h.p50() <= h.p90());
3052        assert!(h.p90() <= h.p99());
3053    }
3054
3055    #[test]
3056    fn test_metrics_snapshot_tool_success_count_correct() {
3057        let snap = MetricsSnapshot {
3058            per_tool_calls: [("search".to_string(), 10u64)].into(),
3059            per_tool_failures: [("search".to_string(), 3u64)].into(),
3060            ..Default::default()
3061        };
3062        assert_eq!(snap.tool_success_count("search"), 7);
3063    }
3064
3065    #[test]
3066    fn test_metrics_snapshot_tool_success_count_zero_for_unknown_tool() {
3067        let snap = MetricsSnapshot::default();
3068        assert_eq!(snap.tool_success_count("unknown"), 0);
3069    }
3070
3071    #[test]
3072    fn test_metrics_snapshot_tool_failure_rate_correct_ratio() {
3073        let snap = MetricsSnapshot {
3074            per_tool_calls: [("lookup".to_string(), 4u64)].into(),
3075            per_tool_failures: [("lookup".to_string(), 1u64)].into(),
3076            ..Default::default()
3077        };
3078        assert!((snap.tool_failure_rate("lookup") - 0.25).abs() < 1e-9);
3079    }
3080
3081    #[test]
3082    fn test_metrics_snapshot_tool_failure_rate_zero_for_unknown_tool() {
3083        let snap = MetricsSnapshot::default();
3084        assert!((snap.tool_failure_rate("none") - 0.0).abs() < 1e-9);
3085    }
3086
3087    #[test]
3088    fn test_metrics_snapshot_total_successful_tool_calls() {
3089        let snap = MetricsSnapshot {
3090            total_tool_calls: 20,
3091            failed_tool_calls: 5,
3092            ..Default::default()
3093        };
3094        assert_eq!(snap.total_successful_tool_calls(), 15);
3095    }
3096
3097    #[test]
3098    fn test_runtime_metrics_per_tool_calls_snapshot_increments() {
3099        let m = RuntimeMetrics::new();
3100        m.record_tool_call("search");
3101        m.record_tool_call("search");
3102        m.record_tool_call("lookup");
3103        let snap = m.per_tool_calls_snapshot();
3104        assert_eq!(snap.get("search"), Some(&2));
3105        assert_eq!(snap.get("lookup"), Some(&1));
3106    }
3107
3108    #[test]
3109    fn test_runtime_metrics_per_tool_failures_snapshot() {
3110        let m = RuntimeMetrics::new();
3111        m.record_tool_call("search");
3112        m.record_tool_failure("search");
3113        let snap = m.per_tool_failures_snapshot();
3114        assert_eq!(snap.get("search"), Some(&1));
3115    }
3116
3117    #[test]
3118    fn test_runtime_metrics_record_agent_tool_call_tracked() {
3119        let m = RuntimeMetrics::new();
3120        m.record_agent_tool_call("agent-1", "search");
3121        m.record_agent_tool_call("agent-1", "search");
3122        let snap = m.per_agent_tool_calls_snapshot();
3123        assert_eq!(snap.get("agent-1").and_then(|t| t.get("search")), Some(&2));
3124    }
3125
3126    #[test]
3127    fn test_runtime_metrics_per_agent_tool_failures_snapshot() {
3128        let m = RuntimeMetrics::new();
3129        m.record_agent_tool_failure("agent-2", "lookup");
3130        let snap = m.per_agent_tool_failures_snapshot();
3131        assert_eq!(
3132            snap.get("agent-2").and_then(|t| t.get("lookup")),
3133            Some(&1)
3134        );
3135    }
3136
3137    // ── Round 24: coefficient_of_variation ────────────────────────────────────
3138
3139    #[test]
3140    fn test_coefficient_of_variation_zero_when_empty() {
3141        let h = LatencyHistogram::default();
3142        assert!((h.coefficient_of_variation() - 0.0).abs() < 1e-9);
3143    }
3144
3145    #[test]
3146    fn test_coefficient_of_variation_positive_with_spread() {
3147        let h = LatencyHistogram::default();
3148        // Wide spread: 10ms and 1000ms — std_dev should be significant
3149        for _ in 0..50 {
3150            h.record(10);
3151        }
3152        for _ in 0..50 {
3153            h.record(1000);
3154        }
3155        let cv = h.coefficient_of_variation();
3156        assert!(cv > 0.0, "CV should be positive for spread data, got {cv}");
3157    }
3158
3159    #[test]
3160    fn test_coefficient_of_variation_near_zero_for_uniform_data() {
3161        let h = LatencyHistogram::default();
3162        // All the same latency bucket → std_dev ≈ 0
3163        for _ in 0..100 {
3164            h.record(50);
3165        }
3166        // CV won't be exactly 0 due to bucket approximation, but should be small
3167        assert!(h.coefficient_of_variation() < 1.0);
3168    }
3169
3170    // ── Round 31: LatencyHistogram::percentile, RuntimeMetrics helpers ────────
3171
3172    #[test]
3173    fn test_latency_histogram_percentile_zero_when_empty() {
3174        let h = LatencyHistogram::default();
3175        assert_eq!(h.percentile(0.5), 0);
3176    }
3177
3178    #[test]
3179    fn test_latency_histogram_percentile_50_matches_p50() {
3180        let h = LatencyHistogram::default();
3181        for ms in [10, 20, 30, 40, 50] {
3182            h.record(ms);
3183        }
3184        assert_eq!(h.percentile(0.5), h.p50());
3185    }
3186
3187    #[test]
3188    fn test_latency_histogram_percentile_99_matches_p99() {
3189        let h = LatencyHistogram::default();
3190        for ms in [10, 50, 100, 500, 1000] {
3191            h.record(ms);
3192        }
3193        assert_eq!(h.percentile(0.99), h.p99());
3194    }
3195
3196    #[test]
3197    fn test_runtime_metrics_record_agent_tool_failure_appears_in_snapshot() {
3198        let m = RuntimeMetrics::new();
3199        m.record_agent_tool_failure("agent-1", "search_tool");
3200        let snapshot = m.per_agent_tool_failures_snapshot();
3201        assert_eq!(snapshot.get("agent-1").and_then(|t| t.get("search_tool")), Some(&1));
3202    }
3203
3204    #[test]
3205    fn test_runtime_metrics_per_agent_tool_calls_snapshot_empty_initially() {
3206        let m = RuntimeMetrics::new();
3207        assert!(m.per_agent_tool_calls_snapshot().is_empty());
3208    }
3209
3210    #[test]
3211    fn test_runtime_metrics_record_step_latency_is_reflected_in_p50() {
3212        let m = RuntimeMetrics::new();
3213        for _ in 0..20 {
3214            m.record_step_latency(100);
3215        }
3216        // After recording 20 samples at 100ms, step latency p50 must be around 100ms.
3217        // We verify the operation doesn't panic and changes the histogram state.
3218        let snap = m.snapshot();
3219        assert!(snap.total_sessions == 0); // unrelated sanity check
3220    }
3221
3222    // ── Round 26: has_errors / is_above_p99 ───────────────────────────────────
3223
3224    #[test]
3225    fn test_metrics_snapshot_has_errors_false_when_clean() {
3226        let snap = MetricsSnapshot::default();
3227        assert!(!snap.has_errors());
3228    }
3229
3230    #[test]
3231    fn test_metrics_snapshot_has_errors_true_when_failed_tool_calls() {
3232        let snap = MetricsSnapshot { failed_tool_calls: 2, ..Default::default() };
3233        assert!(snap.has_errors());
3234    }
3235
3236    #[test]
3237    fn test_metrics_snapshot_has_errors_true_when_checkpoint_errors() {
3238        let snap = MetricsSnapshot { checkpoint_errors: 1, ..Default::default() };
3239        assert!(snap.has_errors());
3240    }
3241
3242    #[test]
3243    fn test_latency_histogram_is_above_p99_false_for_low_latency() {
3244        let h = LatencyHistogram::default();
3245        for _ in 0..200 {
3246            h.record(50);
3247        }
3248        assert!(!h.is_above_p99(50));
3249    }
3250
3251    #[test]
3252    fn test_latency_histogram_is_above_p99_true_for_high_latency() {
3253        let h = LatencyHistogram::default();
3254        for _ in 0..200 {
3255            h.record(50);
3256        }
3257        // p99 will be ~50ms; 10_000ms should be well above it
3258        assert!(h.is_above_p99(10_000));
3259    }
3260
3261    // ── Round 27: sample_count / tool_call_rate ───────────────────────────────
3262
3263    #[test]
3264    fn test_latency_histogram_sample_count_zero_when_empty() {
3265        let h = LatencyHistogram::default();
3266        assert_eq!(h.sample_count(), 0);
3267    }
3268
3269    #[test]
3270    fn test_latency_histogram_sample_count_matches_records() {
3271        let h = LatencyHistogram::default();
3272        for _ in 0..7 {
3273            h.record(100);
3274        }
3275        assert_eq!(h.sample_count(), 7);
3276    }
3277
3278    #[test]
3279    fn test_metrics_snapshot_tool_call_rate_zero_when_no_sessions() {
3280        let snap = MetricsSnapshot::default();
3281        assert!((snap.tool_call_rate() - 0.0).abs() < 1e-9);
3282    }
3283
3284    #[test]
3285    fn test_metrics_snapshot_tool_call_rate_correct_ratio() {
3286        let snap = MetricsSnapshot {
3287            total_sessions: 4,
3288            total_tool_calls: 20,
3289            ..Default::default()
3290        };
3291        assert!((snap.tool_call_rate() - 5.0).abs() < 1e-9);
3292    }
3293
3294    // ── Round 28: backpressure_rate / percentile_spread ───────────────────────
3295
3296    #[test]
3297    fn test_metrics_snapshot_backpressure_rate_zero_when_no_sessions() {
3298        let snap = MetricsSnapshot::default();
3299        assert!((snap.backpressure_rate() - 0.0).abs() < 1e-9);
3300    }
3301
3302    #[test]
3303    fn test_metrics_snapshot_backpressure_rate_correct_ratio() {
3304        let snap = MetricsSnapshot {
3305            total_sessions: 2,
3306            backpressure_shed_count: 4,
3307            ..Default::default()
3308        };
3309        assert!((snap.backpressure_rate() - 2.0).abs() < 1e-9);
3310    }
3311
3312    #[test]
3313    fn test_latency_histogram_percentile_spread_zero_when_empty() {
3314        let h = LatencyHistogram::default();
3315        assert_eq!(h.percentile_spread(), 0);
3316    }
3317
3318    #[test]
3319    fn test_latency_histogram_percentile_spread_nonnegative() {
3320        let h = LatencyHistogram::default();
3321        for _ in 0..100 {
3322            h.record(50);
3323        }
3324        for _ in 0..5 {
3325            h.record(500);
3326        }
3327        assert!(h.percentile_spread() >= 0);
3328    }
3329
3330    // ── Round 29: memory_efficiency / is_uniform ──────────────────────────────
3331
3332    #[test]
3333    fn test_metrics_snapshot_memory_efficiency_zero_when_no_steps() {
3334        let snap = MetricsSnapshot::default();
3335        assert!((snap.memory_efficiency() - 0.0).abs() < 1e-9);
3336    }
3337
3338    #[test]
3339    fn test_metrics_snapshot_memory_efficiency_correct_ratio() {
3340        let snap = MetricsSnapshot {
3341            total_steps: 10,
3342            memory_recall_count: 4,
3343            ..Default::default()
3344        };
3345        assert!((snap.memory_efficiency() - 0.4).abs() < 1e-9);
3346    }
3347
3348    #[test]
3349    fn test_latency_histogram_is_uniform_true_when_empty() {
3350        let h = LatencyHistogram::default();
3351        assert!(h.is_uniform());
3352    }
3353
3354    #[test]
3355    fn test_latency_histogram_is_uniform_true_for_single_bucket() {
3356        let h = LatencyHistogram::default();
3357        for _ in 0..50 {
3358            h.record(50); // all in same bucket
3359        }
3360        assert!(h.is_uniform());
3361    }
3362
3363    #[test]
3364    fn test_latency_histogram_is_uniform_false_for_mixed_latencies() {
3365        let h = LatencyHistogram::default();
3366        h.record(1);
3367        h.record(1000);
3368        assert!(!h.is_uniform());
3369    }
3370
3371    // ── Round 30: bucket_counts / active_session_ratio ────────────────────────
3372
3373    #[test]
3374    fn test_latency_histogram_bucket_counts_all_zero_when_empty() {
3375        let h = LatencyHistogram::default();
3376        assert_eq!(h.bucket_counts(), [0u64; 7]);
3377    }
3378
3379    #[test]
3380    fn test_latency_histogram_bucket_counts_increments_correct_bucket() {
3381        let h = LatencyHistogram::default();
3382        h.record(1); // should go into the first bucket (≤1ms)
3383        let counts = h.bucket_counts();
3384        assert_eq!(counts[0], 1);
3385        assert!(counts[1..].iter().all(|&c| c == 0));
3386    }
3387
3388    #[test]
3389    fn test_metrics_snapshot_active_session_ratio_zero_when_no_sessions() {
3390        let snap = MetricsSnapshot::default();
3391        assert!((snap.active_session_ratio() - 0.0).abs() < 1e-9);
3392    }
3393
3394    #[test]
3395    fn test_metrics_snapshot_active_session_ratio_correct() {
3396        let snap = MetricsSnapshot {
3397            total_sessions: 10,
3398            active_sessions: 3,
3399            ..Default::default()
3400        };
3401        assert!((snap.active_session_ratio() - 0.3).abs() < 1e-9);
3402    }
3403
3404    #[test]
3405    fn test_step_to_tool_ratio_correct_value() {
3406        let snap = MetricsSnapshot {
3407            total_steps: 4,
3408            total_tool_calls: 2,
3409            ..Default::default()
3410        };
3411        assert!((snap.step_to_tool_ratio() - 0.5).abs() < 1e-9);
3412    }
3413
3414    #[test]
3415    fn test_step_to_tool_ratio_zero_steps_returns_zero() {
3416        let snap = MetricsSnapshot {
3417            total_steps: 0,
3418            total_tool_calls: 5,
3419            ..Default::default()
3420        };
3421        assert_eq!(snap.step_to_tool_ratio(), 0.0);
3422    }
3423
3424    #[test]
3425    fn test_latency_histogram_min_occupied_ms_returns_smallest_occupied_bucket() {
3426        let h = LatencyHistogram::default();
3427        h.record(10); // falls in ≤10ms bucket (bound = 10)
3428        h.record(200); // falls in ≤500ms bucket
3429        // min_occupied should be the ≤10ms bucket bound = 10
3430        assert_eq!(h.min_occupied_ms(), Some(10));
3431    }
3432
3433    #[test]
3434    fn test_latency_histogram_min_occupied_ms_empty_returns_none() {
3435        let h = LatencyHistogram::default();
3436        assert_eq!(h.min_occupied_ms(), None);
3437    }
3438
3439    #[test]
3440    fn test_metrics_snapshot_has_failures_true_when_failures_exist() {
3441        let snap = MetricsSnapshot {
3442            failed_tool_calls: 1,
3443            ..Default::default()
3444        };
3445        assert!(snap.has_failures());
3446    }
3447
3448    #[test]
3449    fn test_metrics_snapshot_has_failures_false_when_no_failures() {
3450        let snap = MetricsSnapshot::default();
3451        assert!(!snap.has_failures());
3452    }
3453
3454    #[test]
3455    fn test_latency_histogram_max_occupied_ms_returns_largest_occupied_bucket() {
3456        let h = LatencyHistogram::default();
3457        h.record(5);   // ≤5ms bucket
3458        h.record(200); // ≤500ms bucket
3459        assert_eq!(h.max_occupied_ms(), Some(500));
3460    }
3461
3462    #[test]
3463    fn test_latency_histogram_max_occupied_ms_empty_returns_none() {
3464        let h = LatencyHistogram::default();
3465        assert_eq!(h.max_occupied_ms(), None);
3466    }
3467
3468    #[test]
3469    fn test_latency_histogram_occupied_bucket_count_correct() {
3470        let h = LatencyHistogram::default();
3471        h.record(5);   // bucket 1
3472        h.record(200); // bucket 5
3473        assert_eq!(h.occupied_bucket_count(), 2);
3474    }
3475
3476    #[test]
3477    fn test_latency_histogram_occupied_bucket_count_empty_returns_zero() {
3478        let h = LatencyHistogram::default();
3479        assert_eq!(h.occupied_bucket_count(), 0);
3480    }
3481
3482    #[test]
3483    fn test_metrics_snapshot_tool_diversity_counts_distinct_tools() {
3484        let snap = MetricsSnapshot {
3485            per_tool_calls: [("a".to_string(), 1u64), ("b".to_string(), 2u64)]
3486                .into_iter()
3487                .collect(),
3488            ..Default::default()
3489        };
3490        assert_eq!(snap.tool_diversity(), 2);
3491    }
3492
3493    #[test]
3494    fn test_metrics_snapshot_tool_diversity_empty_returns_zero() {
3495        let snap = MetricsSnapshot::default();
3496        assert_eq!(snap.tool_diversity(), 0);
3497    }
3498
3499    #[test]
3500    fn test_runtime_metrics_total_step_latency_ms_sums_recorded_latencies() {
3501        let m = RuntimeMetrics::new();
3502        m.record_step_latency(100);
3503        m.record_step_latency(200);
3504        assert_eq!(m.total_step_latency_ms(), 300);
3505    }
3506
3507    #[test]
3508    fn test_runtime_metrics_total_step_latency_ms_zero_when_empty() {
3509        let m = RuntimeMetrics::new();
3510        assert_eq!(m.total_step_latency_ms(), 0);
3511    }
3512
3513    #[test]
3514    fn test_metrics_snapshot_avg_failures_per_session_correct() {
3515        let snap = MetricsSnapshot {
3516            total_sessions: 4,
3517            failed_tool_calls: 2,
3518            ..Default::default()
3519        };
3520        assert!((snap.avg_failures_per_session() - 0.5).abs() < 1e-9);
3521    }
3522
3523    #[test]
3524    fn test_metrics_snapshot_avg_failures_per_session_zero_when_no_sessions() {
3525        let snap = MetricsSnapshot::default();
3526        assert_eq!(snap.avg_failures_per_session(), 0.0);
3527    }
3528
3529    #[test]
3530    fn test_latency_histogram_is_skewed_true_when_p99_much_greater_than_p50() {
3531        let h = LatencyHistogram::default();
3532        // Record many fast samples and one very slow one to skew p99
3533        for _ in 0..100 {
3534            h.record(1); // ≤1ms
3535        }
3536        h.record(500); // very slow
3537        // p50 = 1, p99 depends on bucket counts
3538        // With 100 samples in ≤1ms and 1 in ≤500ms, p99 should be 1ms too
3539        // Let's just verify the method doesn't panic
3540        let _ = h.is_skewed();
3541    }
3542
3543    #[test]
3544    fn test_latency_histogram_is_skewed_false_when_empty() {
3545        let h = LatencyHistogram::default();
3546        assert!(!h.is_skewed());
3547    }
3548
3549    // ── Round 36 ──────────────────────────────────────────────────────────────
3550
3551    #[test]
3552    fn test_most_called_tool_returns_tool_with_most_calls() {
3553        let snap = MetricsSnapshot {
3554            per_tool_calls: [
3555                ("search".to_string(), 5u64),
3556                ("write".to_string(), 2u64),
3557            ]
3558            .into_iter()
3559            .collect(),
3560            ..Default::default()
3561        };
3562        assert_eq!(snap.most_called_tool(), Some("search".to_string()));
3563    }
3564
3565    #[test]
3566    fn test_most_called_tool_returns_none_when_empty() {
3567        let snap = MetricsSnapshot::default();
3568        assert!(snap.most_called_tool().is_none());
3569    }
3570
3571    #[test]
3572    fn test_tool_names_with_failures_returns_sorted_names_with_failures() {
3573        let snap = MetricsSnapshot {
3574            per_tool_failures: [
3575                ("search".to_string(), 3u64),
3576                ("write".to_string(), 0u64),
3577                ("calc".to_string(), 1u64),
3578            ]
3579            .into_iter()
3580            .collect(),
3581            ..Default::default()
3582        };
3583        assert_eq!(snap.tool_names_with_failures(), vec!["calc", "search"]);
3584    }
3585
3586    #[test]
3587    fn test_tool_names_with_failures_empty_when_no_failures() {
3588        let snap = MetricsSnapshot::default();
3589        assert!(snap.tool_names_with_failures().is_empty());
3590    }
3591
3592    // ── Round 37 ──────────────────────────────────────────────────────────────
3593
3594    #[test]
3595    fn test_agent_with_most_calls_returns_highest_total() {
3596        let snap = MetricsSnapshot {
3597            per_agent_tool_calls: [
3598                ("agent_a".to_string(), [("search".to_string(), 3u64), ("write".to_string(), 2u64)].into_iter().collect()),
3599                ("agent_b".to_string(), [("search".to_string(), 1u64)].into_iter().collect()),
3600            ]
3601            .into_iter()
3602            .collect(),
3603            ..Default::default()
3604        };
3605        assert_eq!(snap.agent_with_most_calls(), Some("agent_a".to_string()));
3606    }
3607
3608    #[test]
3609    fn test_agent_with_most_calls_returns_none_when_empty() {
3610        let snap = MetricsSnapshot::default();
3611        assert!(snap.agent_with_most_calls().is_none());
3612    }
3613
3614    // ── Round 38 ──────────────────────────────────────────────────────────────
3615
3616    #[test]
3617    fn test_total_agent_count_returns_number_of_distinct_agents() {
3618        let snap = MetricsSnapshot {
3619            per_agent_tool_calls: [
3620                ("a".to_string(), std::collections::HashMap::new()),
3621                ("b".to_string(), std::collections::HashMap::new()),
3622            ]
3623            .into_iter()
3624            .collect(),
3625            ..Default::default()
3626        };
3627        assert_eq!(snap.total_agent_count(), 2);
3628    }
3629
3630    #[test]
3631    fn test_total_agent_count_zero_when_empty() {
3632        let snap = MetricsSnapshot::default();
3633        assert_eq!(snap.total_agent_count(), 0);
3634    }
3635
3636    #[test]
3637    fn test_steps_per_tool_call_returns_ratio() {
3638        let snap = MetricsSnapshot {
3639            total_steps: 10,
3640            total_tool_calls: 5,
3641            ..Default::default()
3642        };
3643        assert!((snap.steps_per_tool_call() - 2.0).abs() < 1e-9);
3644    }
3645
3646    #[test]
3647    fn test_steps_per_tool_call_zero_when_no_tool_calls() {
3648        let snap = MetricsSnapshot::default();
3649        assert_eq!(snap.steps_per_tool_call(), 0.0);
3650    }
3651
3652    // ── Round 39 ──────────────────────────────────────────────────────────────
3653
3654    #[test]
3655    fn test_failed_tool_ratio_for_returns_failure_rate() {
3656        let snap = MetricsSnapshot {
3657            per_tool_calls: [("tool".to_string(), 10u64)].into_iter().collect(),
3658            per_tool_failures: [("tool".to_string(), 2u64)].into_iter().collect(),
3659            ..Default::default()
3660        };
3661        assert!((snap.failed_tool_ratio_for("tool") - 0.2).abs() < 1e-9);
3662    }
3663
3664    #[test]
3665    fn test_failed_tool_ratio_for_zero_when_no_calls() {
3666        let snap = MetricsSnapshot::default();
3667        assert_eq!(snap.failed_tool_ratio_for("missing"), 0.0);
3668    }
3669
3670    #[test]
3671    fn test_backpressure_shed_rate_returns_ratio() {
3672        let snap = MetricsSnapshot {
3673            total_tool_calls: 100,
3674            backpressure_shed_count: 5,
3675            ..Default::default()
3676        };
3677        assert!((snap.backpressure_shed_rate() - 0.05).abs() < 1e-9);
3678    }
3679
3680    #[test]
3681    fn test_backpressure_shed_rate_zero_when_no_tool_calls() {
3682        let snap = MetricsSnapshot::default();
3683        assert_eq!(snap.backpressure_shed_rate(), 0.0);
3684    }
3685
3686    // ── Round 40 ──────────────────────────────────────────────────────────────
3687
3688    #[test]
3689    fn test_step_latency_p95_zero_when_empty() {
3690        let m = RuntimeMetrics::new();
3691        assert_eq!(m.step_latency_p95(), 0);
3692    }
3693
3694    #[test]
3695    fn test_step_latency_p75_zero_when_empty() {
3696        let m = RuntimeMetrics::new();
3697        assert_eq!(m.step_latency_p75(), 0);
3698    }
3699
3700    #[test]
3701    fn test_step_latency_p95_gte_p75_after_recording() {
3702        let m = RuntimeMetrics::new();
3703        for ms in [1, 5, 10, 50, 100, 500, 1000] {
3704            m.record_step_latency(ms);
3705        }
3706        assert!(m.step_latency_p95() >= m.step_latency_p75());
3707    }
3708
3709    #[test]
3710    fn test_step_latency_p99_gte_p95_after_recording() {
3711        let m = RuntimeMetrics::new();
3712        for ms in [1, 5, 10, 50, 100, 500, 1000] {
3713            m.record_step_latency(ms);
3714        }
3715        assert!(m.step_latency_p99() >= m.step_latency_p95());
3716    }
3717
3718    // ── Round 41: MetricsSnapshot::is_healthy_with_latency, is_empty ─────────
3719
3720    #[test]
3721    fn test_snapshot_is_empty_true_for_fresh_snapshot() {
3722        let m = RuntimeMetrics::new();
3723        let snap = m.snapshot();
3724        assert!(snap.is_empty());
3725    }
3726
3727    #[test]
3728    fn test_snapshot_is_healthy_with_latency_true_when_below_threshold() {
3729        let m = RuntimeMetrics::new();
3730        let snap = m.snapshot();
3731        // A fresh snapshot has 0 mean latency — well below any threshold
3732        assert!(snap.is_healthy_with_latency(1000.0));
3733    }
3734
3735    #[test]
3736    fn test_snapshot_is_healthy_with_latency_false_when_has_failures() {
3737        let m = RuntimeMetrics::new();
3738        m.record_tool_failure("search");
3739        let snap = m.snapshot();
3740        assert!(!snap.is_healthy_with_latency(9999.0));
3741    }
3742
3743    #[test]
3744    fn test_snapshot_is_empty_false_after_recording_step() {
3745        let m = RuntimeMetrics::new();
3746        m.record_step_latency(5);
3747        // total_steps increments in run_agent; we can observe via snapshot
3748        // that at least the latency histogram changed (mean > 0 check skipped;
3749        // just confirm the predicate doesn't panic).
3750        let _ = m.snapshot().is_empty();
3751    }
3752
3753    // ── Round 41: step_latency_std_dev_ms, most_used_tool ─────────────────────
3754
3755    #[test]
3756    fn test_step_latency_std_dev_ms_zero_when_empty() {
3757        let m = RuntimeMetrics::new();
3758        assert_eq!(m.step_latency_std_dev_ms(), 0.0);
3759    }
3760
3761    #[test]
3762    fn test_step_latency_std_dev_ms_positive_after_diverse_recording() {
3763        let m = RuntimeMetrics::new();
3764        m.record_step_latency(1);
3765        m.record_step_latency(1000);
3766        assert!(m.step_latency_std_dev_ms() > 0.0);
3767    }
3768
3769    #[test]
3770    fn test_most_used_tool_returns_tool_with_most_calls() {
3771        let m = RuntimeMetrics::new();
3772        m.record_tool_call("search");
3773        m.record_tool_call("search");
3774        m.record_tool_call("lookup");
3775        assert_eq!(m.most_used_tool(), Some("search".to_string()));
3776    }
3777
3778    #[test]
3779    fn test_most_used_tool_returns_none_when_no_calls() {
3780        let m = RuntimeMetrics::new();
3781        assert_eq!(m.most_used_tool(), None);
3782    }
3783
3784    // ── Round 42 ──────────────────────────────────────────────────────────────
3785
3786    #[test]
3787    fn test_tool_call_to_failure_ratio_zero_when_no_calls() {
3788        let m = RuntimeMetrics::new();
3789        assert_eq!(m.tool_call_to_failure_ratio(), 0.0);
3790    }
3791
3792    #[test]
3793    fn test_tool_call_to_failure_ratio_computed_correctly() {
3794        let m = RuntimeMetrics::new();
3795        m.record_tool_call("t");
3796        m.record_tool_call("t");
3797        m.record_tool_failure("t");
3798        // 1 failure out of 2 calls (total_tool_calls = 2)
3799        assert!((m.tool_call_to_failure_ratio() - 0.5).abs() < 1e-9);
3800    }
3801
3802    // ── Round 41: MetricsSnapshot::total_tool_failures, least_called_tool ─────
3803
3804    #[test]
3805    fn test_metrics_snapshot_total_tool_failures_sums_all_failures() {
3806        let snap = MetricsSnapshot {
3807            per_tool_failures: [
3808                ("search".to_string(), 3u64),
3809                ("write".to_string(), 2u64),
3810            ].into_iter().collect(),
3811            ..Default::default()
3812        };
3813        assert_eq!(snap.total_tool_failures(), 5);
3814    }
3815
3816    #[test]
3817    fn test_metrics_snapshot_total_tool_failures_zero_when_empty() {
3818        let snap = MetricsSnapshot::default();
3819        assert_eq!(snap.total_tool_failures(), 0);
3820    }
3821
3822    #[test]
3823    fn test_metrics_snapshot_least_called_tool_returns_tool_with_fewest_calls() {
3824        let snap = MetricsSnapshot {
3825            per_tool_calls: [
3826                ("search".to_string(), 10u64),
3827                ("lookup".to_string(), 2u64),
3828                ("write".to_string(), 5u64),
3829            ].into_iter().collect(),
3830            ..Default::default()
3831        };
3832        assert_eq!(snap.least_called_tool(), Some("lookup".to_string()));
3833    }
3834
3835    #[test]
3836    fn test_metrics_snapshot_least_called_tool_returns_none_when_empty() {
3837        let snap = MetricsSnapshot::default();
3838        assert_eq!(snap.least_called_tool(), None);
3839    }
3840
3841    // ── Round 42: summary_line ────────────────────────────────────────────────
3842
3843    #[test]
3844    fn test_metrics_snapshot_summary_line_format() {
3845        let m = RuntimeMetrics::new();
3846        let snap = m.snapshot();
3847        let line = snap.summary_line();
3848        assert!(line.contains("sessions="));
3849        assert!(line.contains("steps="));
3850        assert!(line.contains("tool_calls="));
3851        assert!(line.contains("failures="));
3852        assert!(line.contains("latency_mean="));
3853    }
3854
3855    #[test]
3856    fn test_metrics_snapshot_summary_line_reflects_zero_values() {
3857        let snap = MetricsSnapshot::default();
3858        let line = snap.summary_line();
3859        assert!(line.contains("sessions=0"));
3860        assert!(line.contains("failures=0"));
3861    }
3862
3863    // ── Round 43 ──────────────────────────────────────────────────────────────
3864
3865    #[test]
3866    fn test_active_session_rate_zero_when_no_sessions() {
3867        let m = RuntimeMetrics::new();
3868        assert_eq!(m.active_session_rate(), 0.0);
3869    }
3870
3871    #[test]
3872    fn test_active_session_rate_one_when_all_sessions_active() {
3873        let m = RuntimeMetrics::new();
3874        m.active_sessions.fetch_add(2, Ordering::Relaxed);
3875        m.total_sessions.fetch_add(2, Ordering::Relaxed);
3876        assert!((m.active_session_rate() - 1.0).abs() < 1e-9);
3877    }
3878
3879    // ── Round 42: MetricsSnapshot::avg_tool_calls_per_name ────────────────────
3880
3881    #[test]
3882    fn test_avg_tool_calls_per_name_computed_correctly() {
3883        let snap = MetricsSnapshot {
3884            per_tool_calls: [
3885                ("search".to_string(), 6u64),
3886                ("write".to_string(), 4u64),
3887            ].into_iter().collect(),
3888            ..Default::default()
3889        };
3890        // (6 + 4) / 2 = 5.0
3891        assert!((snap.avg_tool_calls_per_name() - 5.0).abs() < 1e-9);
3892    }
3893
3894    #[test]
3895    fn test_avg_tool_calls_per_name_zero_when_no_tools() {
3896        let snap = MetricsSnapshot::default();
3897        assert_eq!(snap.avg_tool_calls_per_name(), 0.0);
3898    }
3899
3900    // ── Round 43: MetricsSnapshot::tool_call_count_above ──────────────────────
3901
3902    #[test]
3903    fn test_tool_call_count_above_counts_tools_exceeding_threshold() {
3904        let snap = MetricsSnapshot {
3905            per_tool_calls: [
3906                ("search".to_string(), 10u64),
3907                ("write".to_string(), 2u64),
3908                ("read".to_string(), 5u64),
3909            ].into_iter().collect(),
3910            ..Default::default()
3911        };
3912        assert_eq!(snap.tool_call_count_above(4), 2); // search(10) and read(5)
3913    }
3914
3915    #[test]
3916    fn test_tool_call_count_above_returns_zero_when_none_exceed() {
3917        let snap = MetricsSnapshot {
3918            per_tool_calls: [("t".to_string(), 3u64)].into_iter().collect(),
3919            ..Default::default()
3920        };
3921        assert_eq!(snap.tool_call_count_above(10), 0);
3922    }
3923
3924    #[test]
3925    fn test_tool_call_count_above_zero_for_empty_snapshot() {
3926        let snap = MetricsSnapshot::default();
3927        assert_eq!(snap.tool_call_count_above(0), 0);
3928    }
3929
3930    // ── Round 44: memory_recall_per_session ────────────────────────────────────
3931
3932    #[test]
3933    fn test_memory_recall_per_session_returns_ratio() {
3934        use std::sync::atomic::Ordering;
3935        let m = RuntimeMetrics::default();
3936        m.total_sessions.store(4, Ordering::Relaxed);
3937        m.memory_recall_count.store(8, Ordering::Relaxed);
3938        assert!((m.memory_recall_per_session() - 2.0).abs() < 1e-9);
3939    }
3940
3941    #[test]
3942    fn test_memory_recall_per_session_zero_when_no_sessions() {
3943        let m = RuntimeMetrics::default();
3944        assert_eq!(m.memory_recall_per_session(), 0.0);
3945    }
3946
3947    // ── Round 44: tool_call_ratio ─────────────────────────────────────────────
3948
3949    #[test]
3950    fn test_tool_call_ratio_returns_fraction_for_named_tool() {
3951        let snap = MetricsSnapshot {
3952            total_tool_calls: 10,
3953            per_tool_calls: [
3954                ("search".to_string(), 4u64),
3955                ("write".to_string(), 6u64),
3956            ].into_iter().collect(),
3957            ..Default::default()
3958        };
3959        assert!((snap.tool_call_ratio("search") - 0.4).abs() < 1e-9);
3960        assert!((snap.tool_call_ratio("write") - 0.6).abs() < 1e-9);
3961    }
3962
3963    #[test]
3964    fn test_tool_call_ratio_returns_zero_for_unknown_tool() {
3965        let snap = MetricsSnapshot {
3966            total_tool_calls: 5,
3967            per_tool_calls: [("a".to_string(), 5u64)].into_iter().collect(),
3968            ..Default::default()
3969        };
3970        assert_eq!(snap.tool_call_ratio("unknown"), 0.0);
3971    }
3972
3973    #[test]
3974    fn test_tool_call_ratio_returns_zero_when_no_calls_recorded() {
3975        let snap = MetricsSnapshot::default();
3976        assert_eq!(snap.tool_call_ratio("any"), 0.0);
3977    }
3978
3979    // ── Round 44: top_n_tools_by_calls ────────────────────────────────────────
3980
3981    #[test]
3982    fn test_top_n_tools_by_calls_returns_n_descending() {
3983        let snap = MetricsSnapshot {
3984            per_tool_calls: [
3985                ("a".to_string(), 10),
3986                ("b".to_string(), 5),
3987                ("c".to_string(), 20),
3988            ]
3989            .into_iter()
3990            .collect(),
3991            ..Default::default()
3992        };
3993        let top = snap.top_n_tools_by_calls(2);
3994        assert_eq!(top.len(), 2);
3995        assert_eq!(top[0], ("c", 20));
3996        assert_eq!(top[1], ("a", 10));
3997    }
3998
3999    #[test]
4000    fn test_top_n_tools_by_calls_empty_for_empty_snapshot() {
4001        let snap = MetricsSnapshot::default();
4002        assert!(snap.top_n_tools_by_calls(5).is_empty());
4003    }
4004
4005    #[test]
4006    fn test_top_n_tools_by_calls_returns_all_when_n_exceeds_count() {
4007        let snap = MetricsSnapshot {
4008            per_tool_calls: [("only".to_string(), 3)].into_iter().collect(),
4009            ..Default::default()
4010        };
4011        assert_eq!(snap.top_n_tools_by_calls(100).len(), 1);
4012    }
4013
4014    // ── Round 45: step_error_rate ──────────────────────────────────────────────
4015
4016    #[test]
4017    fn test_step_error_rate_returns_ratio() {
4018        use std::sync::atomic::Ordering;
4019        let m = RuntimeMetrics::default();
4020        m.total_steps.store(10, Ordering::Relaxed);
4021        m.failed_tool_calls.store(2, Ordering::Relaxed);
4022        assert!((m.step_error_rate() - 0.2).abs() < 1e-9);
4023    }
4024
4025    #[test]
4026    fn test_step_error_rate_zero_when_no_steps() {
4027        let m = RuntimeMetrics::default();
4028        assert_eq!(m.step_error_rate(), 0.0);
4029    }
4030
4031    // ── Round 45: is_degraded ─────────────────────────────────────────────────
4032
4033    #[test]
4034    fn test_is_degraded_true_when_failure_rate_exceeds_threshold() {
4035        let snap = MetricsSnapshot {
4036            total_tool_calls: 10,
4037            failed_tool_calls: 3,
4038            ..Default::default()
4039        };
4040        assert!(snap.is_degraded(0.2)); // failure_rate = 0.3 > 0.2
4041    }
4042
4043    #[test]
4044    fn test_is_degraded_false_when_failure_rate_at_or_below_threshold() {
4045        let snap = MetricsSnapshot {
4046            total_tool_calls: 10,
4047            failed_tool_calls: 2,
4048            ..Default::default()
4049        };
4050        assert!(!snap.is_degraded(0.2)); // failure_rate = 0.2, not strictly greater
4051    }
4052
4053    #[test]
4054    fn test_is_degraded_false_for_zero_failures() {
4055        let snap = MetricsSnapshot {
4056            total_tool_calls: 5,
4057            failed_tool_calls: 0,
4058            ..Default::default()
4059        };
4060        assert!(!snap.is_degraded(0.05));
4061    }
4062
4063    #[test]
4064    fn test_is_degraded_false_for_empty_snapshot() {
4065        let snap = MetricsSnapshot::default();
4066        assert!(!snap.is_degraded(0.1));
4067    }
4068
4069    // ── Round 46: total_errors ─────────────────────────────────────────────────
4070
4071    #[test]
4072    fn test_total_errors_sums_failed_tool_calls_and_checkpoint_errors() {
4073        use std::sync::atomic::Ordering;
4074        let m = RuntimeMetrics::default();
4075        m.failed_tool_calls.store(5, Ordering::Relaxed);
4076        m.checkpoint_errors.store(3, Ordering::Relaxed);
4077        assert_eq!(m.total_errors(), 8);
4078    }
4079
4080    #[test]
4081    fn test_total_errors_zero_when_no_errors() {
4082        let m = RuntimeMetrics::default();
4083        assert_eq!(m.total_errors(), 0);
4084    }
4085
4086    // ── Round 45: has_tool, tool_call_share ────────────────────────────────────
4087
4088    #[test]
4089    fn test_has_tool_true_for_recorded_tool() {
4090        let snap = MetricsSnapshot {
4091            per_tool_calls: [("my_tool".to_string(), 3)].into_iter().collect(),
4092            ..Default::default()
4093        };
4094        assert!(snap.has_tool("my_tool"));
4095    }
4096
4097    #[test]
4098    fn test_has_tool_false_for_unrecorded_tool() {
4099        let snap = MetricsSnapshot::default();
4100        assert!(!snap.has_tool("anything"));
4101    }
4102
4103    #[test]
4104    fn test_tool_call_share_returns_fraction() {
4105        let snap = MetricsSnapshot {
4106            total_tool_calls: 10,
4107            per_tool_calls: [("a".to_string(), 4)].into_iter().collect(),
4108            ..Default::default()
4109        };
4110        assert!((snap.tool_call_share("a") - 0.4).abs() < 1e-9);
4111    }
4112
4113    #[test]
4114    fn test_tool_call_share_zero_when_no_calls() {
4115        let snap = MetricsSnapshot::default();
4116        assert_eq!(snap.tool_call_share("any"), 0.0);
4117    }
4118
4119    // ── Round 47: tool_names_containing ───────────────────────────────────────
4120
4121    #[test]
4122    fn test_tool_names_containing_returns_matching_names() {
4123        let m = RuntimeMetrics::default();
4124        m.record_tool_call("search_web");
4125        m.record_tool_call("search_db");
4126        m.record_tool_call("write_file");
4127        let mut names = m.tool_names_containing("search");
4128        names.sort_unstable();
4129        assert_eq!(names, vec!["search_db", "search_web"]);
4130    }
4131
4132    #[test]
4133    fn test_tool_names_containing_empty_when_no_match() {
4134        let m = RuntimeMetrics::default();
4135        m.record_tool_call("read");
4136        assert!(m.tool_names_containing("write").is_empty());
4137    }
4138
4139    // ── Round 48: avg_memory_recalls_per_step ──────────────────────────────────
4140
4141    #[test]
4142    fn test_avg_memory_recalls_per_step_computes_ratio() {
4143        use std::sync::atomic::Ordering;
4144        let m = RuntimeMetrics::default();
4145        m.total_steps.store(2, Ordering::Relaxed);
4146        m.memory_recall_count.store(1, Ordering::Relaxed);
4147        // 1 recall / 2 steps = 0.5
4148        assert!((m.avg_memory_recalls_per_step() - 0.5).abs() < 1e-9);
4149    }
4150
4151    #[test]
4152    fn test_avg_memory_recalls_per_step_zero_when_no_steps() {
4153        let m = RuntimeMetrics::default();
4154        assert_eq!(m.avg_memory_recalls_per_step(), 0.0);
4155    }
4156
4157    // ── Round 49: avg_tool_failures_per_session ────────────────────────────────
4158
4159    #[test]
4160    fn test_avg_tool_failures_per_session_computes_ratio() {
4161        use std::sync::atomic::Ordering;
4162        let m = RuntimeMetrics::default();
4163        m.total_sessions.store(4, Ordering::Relaxed);
4164        m.failed_tool_calls.store(2, Ordering::Relaxed);
4165        assert!((m.avg_tool_failures_per_session() - 0.5).abs() < 1e-9);
4166    }
4167
4168    #[test]
4169    fn test_avg_tool_failures_per_session_zero_when_no_sessions() {
4170        let m = RuntimeMetrics::default();
4171        assert_eq!(m.avg_tool_failures_per_session(), 0.0);
4172    }
4173
4174    // ── Round 47: has_any_tool_failures, total_tool_calls_count ───────────────
4175
4176    #[test]
4177    fn test_has_any_tool_failures_false_when_no_failures() {
4178        let m = RuntimeMetrics::new();
4179        m.record_tool_call("search");
4180        let snap = m.snapshot();
4181        assert!(!snap.has_any_tool_failures());
4182    }
4183
4184    #[test]
4185    fn test_has_any_tool_failures_true_when_failure_recorded() {
4186        let m = RuntimeMetrics::new();
4187        m.record_tool_call("search");
4188        m.record_tool_failure("search");
4189        let snap = m.snapshot();
4190        assert!(snap.has_any_tool_failures());
4191    }
4192
4193    #[test]
4194    fn test_total_tool_calls_count_sums_all_per_tool_calls() {
4195        let m = RuntimeMetrics::new();
4196        m.record_tool_call("search");
4197        m.record_tool_call("search");
4198        m.record_tool_call("lookup");
4199        let snap = m.snapshot();
4200        assert_eq!(snap.total_tool_calls_count(), 3);
4201    }
4202
4203    #[test]
4204    fn test_total_tool_calls_count_zero_for_no_calls() {
4205        let m = RuntimeMetrics::new();
4206        let snap = m.snapshot();
4207        assert_eq!(snap.total_tool_calls_count(), 0);
4208    }
4209
4210    // ── Round 49: tool_call_imbalance ─────────────────────────────────────────
4211
4212    #[test]
4213    fn test_tool_call_imbalance_one_for_single_tool() {
4214        let m = RuntimeMetrics::new();
4215        m.record_tool_call("search");
4216        m.record_tool_call("search");
4217        let snap = m.snapshot();
4218        assert!((snap.tool_call_imbalance() - 1.0).abs() < 1e-9);
4219    }
4220
4221    #[test]
4222    fn test_tool_call_imbalance_computes_max_over_min() {
4223        let m = RuntimeMetrics::new();
4224        m.record_tool_call("a");
4225        m.record_tool_call("a");
4226        m.record_tool_call("a");
4227        m.record_tool_call("b");
4228        let snap = m.snapshot();
4229        // max=3, min=1 → ratio=3.0
4230        assert!((snap.tool_call_imbalance() - 3.0).abs() < 1e-9);
4231    }
4232
4233    #[test]
4234    fn test_tool_call_imbalance_one_for_empty_snapshot() {
4235        let m = RuntimeMetrics::new();
4236        let snap = m.snapshot();
4237        assert!((snap.tool_call_imbalance() - 1.0).abs() < 1e-9);
4238    }
4239
4240    // ── Round 50: has_failed_tools ────────────────────────────────────────────
4241
4242    #[test]
4243    fn test_has_failed_tools_true_when_failure_recorded() {
4244        let m = RuntimeMetrics::new();
4245        m.record_tool_failure("search");
4246        assert!(m.has_failed_tools());
4247    }
4248
4249    #[test]
4250    fn test_has_failed_tools_false_when_no_failures() {
4251        let m = RuntimeMetrics::new();
4252        m.record_tool_call("search");
4253        assert!(!m.has_failed_tools());
4254    }
4255
4256    // ── Round 47: distinct_tool_count ─────────────────────────────────────────
4257
4258    #[test]
4259    fn test_distinct_tool_count_reflects_unique_tools() {
4260        let snap = MetricsSnapshot {
4261            per_tool_calls: [
4262                ("tool_a".to_string(), 3),
4263                ("tool_b".to_string(), 1),
4264            ]
4265            .into_iter()
4266            .collect(),
4267            ..Default::default()
4268        };
4269        assert_eq!(snap.distinct_tool_count(), 2);
4270    }
4271
4272    #[test]
4273    fn test_distinct_tool_count_zero_for_empty_snapshot() {
4274        let snap = MetricsSnapshot::default();
4275        assert_eq!(snap.distinct_tool_count(), 0);
4276    }
4277
4278    // ── Round 50: tools_with_zero_failures, tool_call_imbalance ───────────────
4279
4280    #[test]
4281    fn test_tools_with_zero_failures_returns_tools_without_failures() {
4282        let m = RuntimeMetrics::new();
4283        m.record_tool_call("search");
4284        m.record_tool_call("lookup");
4285        m.record_tool_failure("search");
4286        let snap = m.snapshot();
4287        let zero_fail = snap.tools_with_zero_failures();
4288        assert_eq!(zero_fail, vec!["lookup"]);
4289    }
4290
4291    #[test]
4292    fn test_tools_with_zero_failures_empty_when_all_have_failures() {
4293        let m = RuntimeMetrics::new();
4294        m.record_tool_call("a");
4295        m.record_tool_failure("a");
4296        let snap = m.snapshot();
4297        assert!(snap.tools_with_zero_failures().is_empty());
4298    }
4299
4300    // ── Round 51: tool_names_by_call_count ────────────────────────────────────
4301
4302    #[test]
4303    fn test_tool_names_by_call_count_orders_highest_first() {
4304        let m = RuntimeMetrics::new();
4305        m.record_tool_call("alpha");
4306        m.record_tool_call("beta");
4307        m.record_tool_call("beta");
4308        m.record_tool_call("gamma");
4309        m.record_tool_call("gamma");
4310        m.record_tool_call("gamma");
4311        let names = m.tool_names_by_call_count();
4312        assert_eq!(names[0], "gamma");
4313        assert_eq!(names[1], "beta");
4314        assert_eq!(names[2], "alpha");
4315    }
4316
4317    #[test]
4318    fn test_tool_names_by_call_count_empty_when_no_calls() {
4319        let m = RuntimeMetrics::new();
4320        assert!(m.tool_names_by_call_count().is_empty());
4321    }
4322
4323    // ── Round 52 ──────────────────────────────────────────────────────────────
4324
4325    #[test]
4326    fn test_has_any_tool_calls_false_when_no_calls() {
4327        let m = RuntimeMetrics::new();
4328        assert!(!m.snapshot().has_any_tool_calls());
4329    }
4330
4331    #[test]
4332    fn test_has_any_tool_calls_true_after_recording() {
4333        let m = RuntimeMetrics::new();
4334        m.record_tool_call("search");
4335        assert!(m.snapshot().has_any_tool_calls());
4336    }
4337
4338    #[test]
4339    fn test_tool_names_alphabetical_sorted() {
4340        let m = RuntimeMetrics::new();
4341        m.record_tool_call("zebra");
4342        m.record_tool_call("alpha");
4343        m.record_tool_call("mango");
4344        let names = m.snapshot().tool_names_alphabetical();
4345        assert_eq!(names, vec!["alpha", "mango", "zebra"]);
4346    }
4347
4348    #[test]
4349    fn test_tool_names_alphabetical_empty_when_no_calls() {
4350        let m = RuntimeMetrics::new();
4351        assert!(m.snapshot().tool_names_alphabetical().is_empty());
4352    }
4353
4354    // ── Round 52: tool_calls_per_memory_recall ─────────────────────────────────
4355
4356    #[test]
4357    fn test_tool_calls_per_memory_recall_returns_ratio() {
4358        let m = RuntimeMetrics::new();
4359        m.memory_recall_count.store(2, std::sync::atomic::Ordering::Relaxed);
4360        m.record_tool_call("a");
4361        m.record_tool_call("b");
4362        m.record_tool_call("c");
4363        m.record_tool_call("d");
4364        assert_eq!(m.tool_calls_per_memory_recall(), 2.0);
4365    }
4366
4367    #[test]
4368    fn test_tool_calls_per_memory_recall_zero_when_no_recalls() {
4369        let m = RuntimeMetrics::new();
4370        m.record_tool_call("a");
4371        assert_eq!(m.tool_calls_per_memory_recall(), 0.0);
4372    }
4373
4374    #[test]
4375    fn test_tool_calls_per_memory_recall_zero_for_empty_metrics() {
4376        let m = RuntimeMetrics::new();
4377        assert_eq!(m.tool_calls_per_memory_recall(), 0.0);
4378    }
4379
4380    // ── Round 53: memory_recalls_per_tool_call ─────────────────────────────────
4381
4382    #[test]
4383    fn test_memory_recalls_per_tool_call_returns_ratio() {
4384        let m = RuntimeMetrics::new();
4385        m.record_tool_call("a");
4386        m.record_tool_call("b");
4387        m.memory_recall_count.store(4, std::sync::atomic::Ordering::Relaxed);
4388        assert_eq!(m.memory_recalls_per_tool_call(), 2.0);
4389    }
4390
4391    #[test]
4392    fn test_memory_recalls_per_tool_call_zero_when_no_tool_calls() {
4393        let m = RuntimeMetrics::new();
4394        m.memory_recall_count.store(5, std::sync::atomic::Ordering::Relaxed);
4395        assert_eq!(m.memory_recalls_per_tool_call(), 0.0);
4396    }
4397
4398    #[test]
4399    fn test_memory_recalls_per_tool_call_zero_for_empty_metrics() {
4400        let m = RuntimeMetrics::new();
4401        assert_eq!(m.memory_recalls_per_tool_call(), 0.0);
4402    }
4403
4404    // ── Round 53 ──────────────────────────────────────────────────────────────
4405
4406    #[test]
4407    fn test_avg_failures_per_tool_zero_when_no_calls() {
4408        let m = RuntimeMetrics::new();
4409        assert_eq!(m.snapshot().avg_failures_per_tool(), 0.0);
4410    }
4411
4412    #[test]
4413    fn test_avg_failures_per_tool_correct_value() {
4414        let m = RuntimeMetrics::new();
4415        m.record_tool_call("search");
4416        m.record_tool_failure("search");
4417        m.record_tool_call("write");
4418        // search: 1 failure, write: 0 failures; avg = 0.5
4419        let avg = m.snapshot().avg_failures_per_tool();
4420        assert!((avg - 0.5).abs() < 1e-9);
4421    }
4422
4423    // ── Round 48 ──────────────────────────────────────────────────────────────
4424
4425    #[test]
4426    fn test_memory_pressure_ratio_correct_ratio() {
4427        use std::sync::atomic::Ordering;
4428        let m = RuntimeMetrics::new();
4429        m.total_steps.store(4, Ordering::Relaxed);
4430        m.memory_recall_count.store(2, Ordering::Relaxed);
4431        assert!((m.memory_pressure_ratio() - 0.5).abs() < 1e-9);
4432    }
4433
4434    #[test]
4435    fn test_memory_pressure_ratio_zero_when_no_steps() {
4436        let m = RuntimeMetrics::new();
4437        assert_eq!(m.memory_pressure_ratio(), 0.0);
4438    }
4439
4440    #[test]
4441    fn test_sessions_per_step_correct_ratio() {
4442        use std::sync::atomic::Ordering;
4443        let m = RuntimeMetrics::new();
4444        m.total_steps.store(10, Ordering::Relaxed);
4445        m.total_sessions.store(2, Ordering::Relaxed);
4446        assert!((m.sessions_per_step() - 0.2).abs() < 1e-9);
4447    }
4448
4449    #[test]
4450    fn test_sessions_per_step_zero_when_no_steps() {
4451        let m = RuntimeMetrics::new();
4452        assert_eq!(m.sessions_per_step(), 0.0);
4453    }
4454
4455    // ── Round 55: step_failure_rate ────────────────────────────────────────────
4456
4457    #[test]
4458    fn test_step_failure_rate_returns_ratio() {
4459        let m = RuntimeMetrics::new();
4460        m.total_steps.store(4, std::sync::atomic::Ordering::Relaxed);
4461        m.record_tool_failure("a");
4462        m.record_tool_failure("b");
4463        assert_eq!(m.step_failure_rate(), 0.5);
4464    }
4465
4466    #[test]
4467    fn test_step_failure_rate_zero_when_no_steps() {
4468        let m = RuntimeMetrics::new();
4469        assert_eq!(m.step_failure_rate(), 0.0);
4470    }
4471
4472    #[test]
4473    fn test_step_failure_rate_zero_when_no_failures() {
4474        let m = RuntimeMetrics::new();
4475        m.total_steps.store(3, std::sync::atomic::Ordering::Relaxed);
4476        assert_eq!(m.step_failure_rate(), 0.0);
4477    }
4478
4479    // ── Round 49 ──────────────────────────────────────────────────────────────
4480
4481    #[test]
4482    fn test_avg_calls_per_step_correct_ratio() {
4483        use std::sync::atomic::Ordering;
4484        let m = RuntimeMetrics::new();
4485        m.total_steps.store(4, Ordering::Relaxed);
4486        m.total_tool_calls.store(8, Ordering::Relaxed);
4487        assert!((m.avg_calls_per_step() - 2.0).abs() < 1e-9);
4488    }
4489
4490    #[test]
4491    fn test_avg_calls_per_step_zero_when_no_steps() {
4492        let m = RuntimeMetrics::new();
4493        assert_eq!(m.avg_calls_per_step(), 0.0);
4494    }
4495
4496    // ── Round 54 ──────────────────────────────────────────────────────────────
4497
4498    #[test]
4499    fn test_tools_above_failure_ratio_returns_failing_tools() {
4500        let m = RuntimeMetrics::new();
4501        m.record_tool_call("search");
4502        m.record_tool_failure("search");
4503        m.record_tool_call("write");
4504        // search ratio = 1.0, write ratio = 0.0
4505        let above = m.snapshot().tools_above_failure_ratio(0.5);
4506        assert_eq!(above, vec!["search"]);
4507    }
4508
4509    #[test]
4510    fn test_tools_above_failure_ratio_empty_when_no_calls() {
4511        let m = RuntimeMetrics::new();
4512        assert!(m.snapshot().tools_above_failure_ratio(0.1).is_empty());
4513    }
4514
4515    // ── Round 56: total_backpressure_shed_pct ──────────────────────────────────
4516
4517    #[test]
4518    fn test_total_backpressure_shed_pct_returns_ratio() {
4519        let m = RuntimeMetrics::new();
4520        m.record_tool_call("a");
4521        m.record_tool_call("b");
4522        m.record_tool_call("c");
4523        m.record_tool_call("d");
4524        m.backpressure_shed_count.store(1, std::sync::atomic::Ordering::Relaxed);
4525        assert_eq!(m.total_backpressure_shed_pct(), 0.25);
4526    }
4527
4528    #[test]
4529    fn test_total_backpressure_shed_pct_zero_when_no_calls() {
4530        let m = RuntimeMetrics::new();
4531        assert_eq!(m.total_backpressure_shed_pct(), 0.0);
4532    }
4533
4534    // ── Round 50 ──────────────────────────────────────────────────────────────
4535
4536    #[test]
4537    fn test_backpressure_ratio_correct() {
4538        use std::sync::atomic::Ordering;
4539        let m = RuntimeMetrics::new();
4540        m.total_steps.store(4, Ordering::Relaxed);
4541        m.backpressure_shed_count.store(1, Ordering::Relaxed);
4542        assert!((m.backpressure_ratio() - 0.25).abs() < 1e-9);
4543    }
4544
4545    #[test]
4546    fn test_backpressure_ratio_zero_when_no_steps() {
4547        let m = RuntimeMetrics::new();
4548        assert_eq!(m.backpressure_ratio(), 0.0);
4549    }
4550
4551    // ── Round 57: tool_with_highest_failure_rate ───────────────────────────────
4552
4553    #[test]
4554    fn test_tool_with_highest_failure_rate_returns_most_failing_tool() {
4555        let m = RuntimeMetrics::new();
4556        m.record_tool_call("a");
4557        m.record_tool_failure("a");
4558        m.record_tool_call("b");
4559        m.record_tool_call("b");
4560        m.record_tool_failure("b");
4561        // a: 1/1 = 1.0, b: 1/2 = 0.5 → highest is "a"
4562        assert_eq!(m.tool_with_highest_failure_rate().as_deref(), Some("a"));
4563    }
4564
4565    #[test]
4566    fn test_tool_with_highest_failure_rate_none_when_no_calls() {
4567        let m = RuntimeMetrics::new();
4568        assert!(m.tool_with_highest_failure_rate().is_none());
4569    }
4570
4571    // ── Round 51 ──────────────────────────────────────────────────────────────
4572
4573    #[test]
4574    fn test_has_latency_data_true_after_step() {
4575        use std::sync::atomic::Ordering;
4576        let m = RuntimeMetrics::new();
4577        m.total_steps.store(1, Ordering::Relaxed);
4578        assert!(m.has_latency_data());
4579    }
4580
4581    #[test]
4582    fn test_has_latency_data_false_for_new_metrics() {
4583        let m = RuntimeMetrics::new();
4584        assert!(!m.has_latency_data());
4585    }
4586
4587    // ── Round 59: global_failure_rate ─────────────────────────────────────────
4588
4589    #[test]
4590    fn test_global_failure_rate_correct() {
4591        let m = RuntimeMetrics::new();
4592        m.total_tool_calls.store(10, Ordering::Relaxed);
4593        m.failed_tool_calls.store(2, Ordering::Relaxed);
4594        assert!((m.global_failure_rate() - 0.2).abs() < 1e-9);
4595    }
4596
4597    #[test]
4598    fn test_global_failure_rate_zero_when_no_calls() {
4599        let m = RuntimeMetrics::new();
4600        assert_eq!(m.global_failure_rate(), 0.0);
4601    }
4602
4603    // ── Round 60: agent_tool_count ────────────────────────────────────────────
4604
4605    #[test]
4606    fn test_agent_tool_count_correct() {
4607        let m = RuntimeMetrics::new();
4608        m.record_agent_tool_call("agent-A", "tool1");
4609        m.record_agent_tool_call("agent-B", "tool2");
4610        m.record_agent_tool_call("agent-A", "tool3");
4611        assert_eq!(m.agent_tool_count(), 2);
4612    }
4613
4614    #[test]
4615    fn test_agent_tool_count_zero_when_no_calls() {
4616        let m = RuntimeMetrics::new();
4617        assert_eq!(m.agent_tool_count(), 0);
4618    }
4619
4620    // ── Round 61: active_session_count ────────────────────────────────────────
4621
4622    #[test]
4623    fn test_active_session_count_correct() {
4624        let m = RuntimeMetrics::new();
4625        m.active_sessions.store(3, Ordering::Relaxed);
4626        assert_eq!(m.active_session_count(), 3);
4627    }
4628
4629    #[test]
4630    fn test_active_session_count_zero_initially() {
4631        let m = RuntimeMetrics::new();
4632        assert_eq!(m.active_session_count(), 0);
4633    }
4634
4635    // ── Round 62: memory_to_session_ratio ────────────────────────────────────
4636
4637    #[test]
4638    fn test_memory_to_session_ratio_correct() {
4639        let m = RuntimeMetrics::new();
4640        m.total_sessions.store(4, Ordering::Relaxed);
4641        m.memory_recall_count.store(8, Ordering::Relaxed);
4642        assert!((m.memory_to_session_ratio() - 2.0).abs() < 1e-9);
4643    }
4644
4645    #[test]
4646    fn test_memory_to_session_ratio_zero_when_no_sessions() {
4647        let m = RuntimeMetrics::new();
4648        assert_eq!(m.memory_to_session_ratio(), 0.0);
4649    }
4650
4651    // ── Round 63: total_latency_per_session ───────────────────────────────────
4652
4653    #[test]
4654    fn test_total_latency_per_session_correct() {
4655        let m = RuntimeMetrics::new();
4656        m.record_step_latency(100);
4657        m.record_step_latency(200);
4658        m.total_sessions.store(2, Ordering::Relaxed);
4659        // total latency = 300ms, sessions = 2 → 150.0
4660        assert!((m.total_latency_per_session() - 150.0).abs() < 1e-9);
4661    }
4662
4663    #[test]
4664    fn test_total_latency_per_session_zero_when_no_sessions() {
4665        let m = RuntimeMetrics::new();
4666        assert_eq!(m.total_latency_per_session(), 0.0);
4667    }
4668
4669    // ── Round 57: failure_ratio_for_tool, any_tool_exceeds_calls ─────────────
4670
4671    #[test]
4672    fn test_failure_ratio_for_tool_correct_ratio() {
4673        let m = RuntimeMetrics::new();
4674        m.record_tool_call("search");
4675        m.record_tool_call("search");
4676        m.record_tool_failure("search");
4677        let snap = m.snapshot();
4678        assert!((snap.failure_ratio_for_tool("search") - 0.5).abs() < 1e-9);
4679    }
4680
4681    #[test]
4682    fn test_failure_ratio_for_tool_zero_for_unknown_tool() {
4683        let m = RuntimeMetrics::new();
4684        let snap = m.snapshot();
4685        assert_eq!(snap.failure_ratio_for_tool("unknown"), 0.0);
4686    }
4687
4688    #[test]
4689    fn test_any_tool_exceeds_calls_true_when_above_threshold() {
4690        let m = RuntimeMetrics::new();
4691        m.record_tool_call("a");
4692        m.record_tool_call("a");
4693        m.record_tool_call("a");
4694        let snap = m.snapshot();
4695        assert!(snap.any_tool_exceeds_calls(2));
4696    }
4697
4698    #[test]
4699    fn test_any_tool_exceeds_calls_false_when_all_at_or_below_threshold() {
4700        let m = RuntimeMetrics::new();
4701        m.record_tool_call("a");
4702        m.record_tool_call("a");
4703        let snap = m.snapshot();
4704        assert!(!snap.any_tool_exceeds_calls(2));
4705    }
4706
4707    // ── Round 58: tool_call_count_for ─────────────────────────────────────
4708    #[test]
4709    fn test_tool_call_count_for_returns_correct_count() {
4710        let m = RuntimeMetrics::new();
4711        m.record_tool_call("grep");
4712        m.record_tool_call("grep");
4713        m.record_tool_call("grep");
4714        assert_eq!(m.tool_call_count_for("grep"), 3);
4715    }
4716
4717    #[test]
4718    fn test_tool_call_count_for_returns_zero_for_unknown_tool() {
4719        let m = RuntimeMetrics::new();
4720        assert_eq!(m.tool_call_count_for("nonexistent"), 0);
4721    }
4722
4723    // ── Round 58: total_unique_tools, total_agent_tool_calls ──────────────────
4724
4725    #[test]
4726    fn test_total_unique_tools_counts_distinct_tools() {
4727        let m = RuntimeMetrics::new();
4728        m.record_tool_call("search");
4729        m.record_tool_call("search");
4730        m.record_tool_call("browse");
4731        let snap = m.snapshot();
4732        assert_eq!(snap.total_unique_tools(), 2);
4733    }
4734
4735    #[test]
4736    fn test_total_unique_tools_zero_for_no_calls() {
4737        let m = RuntimeMetrics::new();
4738        let snap = m.snapshot();
4739        assert_eq!(snap.total_unique_tools(), 0);
4740    }
4741
4742    #[test]
4743    fn test_total_agent_tool_calls_sums_all_agents() {
4744        let m = RuntimeMetrics::new();
4745        m.record_agent_tool_call("agent-1", "search");
4746        m.record_agent_tool_call("agent-1", "browse");
4747        m.record_agent_tool_call("agent-2", "search");
4748        assert_eq!(m.total_agent_tool_calls(), 3);
4749    }
4750
4751    #[test]
4752    fn test_total_agent_tool_calls_zero_for_new_metrics() {
4753        let m = RuntimeMetrics::new();
4754        assert_eq!(m.total_agent_tool_calls(), 0);
4755    }
4756
4757    // ── Round 59: top_called_tool ──────────────────────────────────────────────
4758
4759    #[test]
4760    fn test_top_called_tool_returns_most_called() {
4761        let m = RuntimeMetrics::new();
4762        m.record_tool_call("search");
4763        m.record_tool_call("search");
4764        m.record_tool_call("browse");
4765        assert_eq!(m.top_called_tool().as_deref(), Some("search"));
4766    }
4767
4768    #[test]
4769    fn test_top_called_tool_none_for_new_metrics() {
4770        let m = RuntimeMetrics::new();
4771        assert!(m.top_called_tool().is_none());
4772    }
4773
4774    // ── Round 60: avg_step_latency_ms, distinct_tools_called, agent_tool_call_count ──
4775
4776    #[test]
4777    fn test_avg_step_latency_ms_correct() {
4778        let m = RuntimeMetrics::new();
4779        m.record_step_latency(100);
4780        m.record_step_latency(200);
4781        // avg_step_latency_ms divides by total_steps, not latency sample count
4782        m.total_steps.store(2, Ordering::Relaxed);
4783        assert!((m.avg_step_latency_ms() - 150.0).abs() < 1e-9);
4784    }
4785
4786    #[test]
4787    fn test_avg_step_latency_ms_zero_for_new_metrics() {
4788        let m = RuntimeMetrics::new();
4789        assert_eq!(m.avg_step_latency_ms(), 0.0);
4790    }
4791
4792    #[test]
4793    fn test_distinct_tools_called_counts_unique_tools() {
4794        let m = RuntimeMetrics::new();
4795        m.record_tool_call("search");
4796        m.record_tool_call("search");
4797        m.record_tool_call("browse");
4798        assert_eq!(m.distinct_tools_called(), 2);
4799    }
4800
4801    #[test]
4802    fn test_distinct_tools_called_zero_for_new_metrics() {
4803        let m = RuntimeMetrics::new();
4804        assert_eq!(m.distinct_tools_called(), 0);
4805    }
4806
4807    #[test]
4808    fn test_agent_tool_call_count_sums_correctly() {
4809        let m = RuntimeMetrics::new();
4810        m.record_agent_tool_call("agent-1", "search");
4811        m.record_agent_tool_call("agent-1", "browse");
4812        m.record_agent_tool_call("agent-2", "search");
4813        assert_eq!(m.agent_tool_call_count("agent-1"), 2);
4814    }
4815
4816    #[test]
4817    fn test_agent_tool_call_count_zero_for_unknown_agent() {
4818        let m = RuntimeMetrics::new();
4819        assert_eq!(m.agent_tool_call_count("nobody"), 0);
4820    }
4821
4822    // ── Round 59: tool_call_ratio_for, has_recorded_agent_calls ──────────────
4823
4824    #[test]
4825    fn test_tool_call_ratio_for_returns_correct_fraction() {
4826        let m = RuntimeMetrics::new();
4827        m.record_tool_call("a");
4828        m.record_tool_call("a");
4829        m.record_tool_call("b");
4830        let snap = m.snapshot();
4831        assert!((snap.tool_call_ratio_for("a") - 2.0 / 3.0).abs() < 1e-9);
4832    }
4833
4834    #[test]
4835    fn test_tool_call_ratio_for_zero_when_no_calls() {
4836        let m = RuntimeMetrics::new();
4837        let snap = m.snapshot();
4838        assert_eq!(snap.tool_call_ratio_for("search"), 0.0);
4839    }
4840
4841    #[test]
4842    fn test_has_recorded_agent_calls_true_after_recording() {
4843        let m = RuntimeMetrics::new();
4844        m.record_agent_tool_call("agent-1", "search");
4845        assert!(m.has_recorded_agent_calls());
4846    }
4847
4848    #[test]
4849    fn test_has_recorded_agent_calls_false_for_new_metrics() {
4850        let m = RuntimeMetrics::new();
4851        assert!(!m.has_recorded_agent_calls());
4852    }
4853
4854    // ── Round 61: failure_rate_for ─────────────────────────────────────────────
4855
4856    #[test]
4857    fn test_failure_rate_for_returns_correct_ratio() {
4858        let m = RuntimeMetrics::new();
4859        m.record_tool_call("search");
4860        m.record_tool_call("search");
4861        m.record_tool_failure("search");
4862        assert!((m.failure_rate_for("search") - 0.5).abs() < 1e-9);
4863    }
4864
4865    #[test]
4866    fn test_failure_rate_for_zero_for_unknown_tool() {
4867        let m = RuntimeMetrics::new();
4868        assert_eq!(m.failure_rate_for("unknown"), 0.0);
4869    }
4870
4871    #[test]
4872    fn test_failure_rate_for_zero_when_no_failures() {
4873        let m = RuntimeMetrics::new();
4874        m.record_tool_call("browse");
4875        assert_eq!(m.failure_rate_for("browse"), 0.0);
4876    }
4877
4878    // ── Round 62: tool_calls_per_session, failure_free_tools ──────────────────
4879
4880    #[test]
4881    fn test_tool_calls_per_session_returns_correct_ratio() {
4882        let m = RuntimeMetrics::new();
4883        m.total_sessions
4884            .fetch_add(2, std::sync::atomic::Ordering::Relaxed);
4885        m.record_tool_call("search");
4886        m.record_tool_call("browse");
4887        m.record_tool_call("search");
4888        assert!((m.tool_calls_per_session() - 1.5).abs() < 1e-9);
4889    }
4890
4891    #[test]
4892    fn test_tool_calls_per_session_zero_when_no_sessions() {
4893        let m = RuntimeMetrics::new();
4894        assert_eq!(m.tool_calls_per_session(), 0.0);
4895    }
4896
4897    #[test]
4898    fn test_failure_free_tools_returns_tools_without_failures() {
4899        let m = RuntimeMetrics::new();
4900        m.record_tool_call("search");
4901        m.record_tool_call("browse");
4902        m.record_tool_failure("search");
4903        let tools = m.failure_free_tools();
4904        assert!(tools.contains(&"browse".to_string()));
4905        assert!(!tools.contains(&"search".to_string()));
4906    }
4907
4908    #[test]
4909    fn test_failure_free_tools_empty_when_all_failed() {
4910        let m = RuntimeMetrics::new();
4911        m.record_tool_call("a");
4912        m.record_tool_failure("a");
4913        let tools = m.failure_free_tools();
4914        assert!(!tools.contains(&"a".to_string()));
4915    }
4916
4917    // ── Round 62: total_failures_across_all_tools, tools_with_calls_above ────
4918
4919    #[test]
4920    fn test_total_failures_across_all_tools_sums_all_failures() {
4921        let m = RuntimeMetrics::new();
4922        m.record_tool_call("a");
4923        m.record_tool_failure("a");
4924        m.record_tool_call("b");
4925        m.record_tool_failure("b");
4926        m.record_tool_failure("b");
4927        let snap = m.snapshot();
4928        assert_eq!(snap.total_failures_across_all_tools(), 3);
4929    }
4930
4931    #[test]
4932    fn test_total_failures_across_all_tools_zero_when_none() {
4933        let m = RuntimeMetrics::new();
4934        m.record_tool_call("a");
4935        let snap = m.snapshot();
4936        assert_eq!(snap.total_failures_across_all_tools(), 0);
4937    }
4938
4939    #[test]
4940    fn test_tools_with_calls_above_returns_tools_exceeding_threshold() {
4941        let m = RuntimeMetrics::new();
4942        for _ in 0..5 { m.record_tool_call("busy"); }
4943        m.record_tool_call("idle");
4944        let result = m.tools_with_calls_above(3);
4945        assert!(result.contains(&"busy".to_string()));
4946        assert!(!result.contains(&"idle".to_string()));
4947    }
4948
4949    #[test]
4950    fn test_tools_with_calls_above_empty_when_none_qualify() {
4951        let m = RuntimeMetrics::new();
4952        m.record_tool_call("once");
4953        assert!(m.tools_with_calls_above(5).is_empty());
4954    }
4955
4956    #[test]
4957    fn test_tools_with_calls_above_returns_sorted_names() {
4958        let m = RuntimeMetrics::new();
4959        for _ in 0..3 { m.record_tool_call("zebra"); }
4960        for _ in 0..3 { m.record_tool_call("apple"); }
4961        let result = m.tools_with_calls_above(2);
4962        assert_eq!(result, vec!["apple", "zebra"]);
4963    }
4964
4965    // ── Round 63: checkpoint_errors_count, agents_with_failures, total_agent_failures ──
4966
4967    #[test]
4968    fn test_checkpoint_errors_count_zero_for_new_metrics() {
4969        let m = RuntimeMetrics::new();
4970        assert_eq!(m.checkpoint_errors_count(), 0);
4971    }
4972
4973    #[test]
4974    fn test_checkpoint_errors_count_reflects_incremented_value() {
4975        let m = RuntimeMetrics::new();
4976        m.checkpoint_errors
4977            .fetch_add(3, std::sync::atomic::Ordering::Relaxed);
4978        assert_eq!(m.checkpoint_errors_count(), 3);
4979    }
4980
4981    #[test]
4982    fn test_agents_with_failures_returns_agents_with_failures() {
4983        let m = RuntimeMetrics::new();
4984        m.record_agent_tool_call("agent-x", "search");
4985        m.record_agent_tool_failure("agent-x", "search");
4986        m.record_agent_tool_call("agent-y", "browse");
4987        let agents = m.agents_with_failures();
4988        assert!(agents.contains(&"agent-x".to_string()));
4989        assert!(!agents.contains(&"agent-y".to_string()));
4990    }
4991
4992    #[test]
4993    fn test_total_agent_failures_sums_all_failures() {
4994        let m = RuntimeMetrics::new();
4995        m.record_agent_tool_failure("a", "tool1");
4996        m.record_agent_tool_failure("a", "tool2");
4997        m.record_agent_tool_failure("b", "tool1");
4998        assert_eq!(m.total_agent_failures(), 3);
4999    }
5000
5001    #[test]
5002    fn test_total_agent_failures_zero_for_new_metrics() {
5003        let m = RuntimeMetrics::new();
5004        assert_eq!(m.total_agent_failures(), 0);
5005    }
5006
5007    // ── Round 64: per_step_tool_call_rate, agents_with_no_failures ───────────
5008
5009    #[test]
5010    fn test_per_step_tool_call_rate_zero_when_no_steps() {
5011        let m = RuntimeMetrics::new();
5012        assert_eq!(m.per_step_tool_call_rate(), 0.0);
5013    }
5014
5015    #[test]
5016    fn test_per_step_tool_call_rate_computed_correctly() {
5017        let m = RuntimeMetrics::new();
5018        m.total_steps.store(2, Ordering::Relaxed);
5019        m.record_tool_call("search");
5020        m.record_tool_call("browse");
5021        m.record_tool_call("search");
5022        // 3 calls / 2 steps = 1.5
5023        assert!((m.per_step_tool_call_rate() - 1.5).abs() < 1e-9);
5024    }
5025
5026    #[test]
5027    fn test_agents_with_no_failures_returns_clean_agents() {
5028        let m = RuntimeMetrics::new();
5029        m.record_agent_tool_call("agent-clean", "search");
5030        m.record_agent_tool_call("agent-fail", "search");
5031        m.record_agent_tool_failure("agent-fail", "search");
5032        let clean = m.agents_with_no_failures();
5033        assert!(clean.contains(&"agent-clean".to_string()));
5034        assert!(!clean.contains(&"agent-fail".to_string()));
5035    }
5036
5037    #[test]
5038    fn test_agents_with_no_failures_empty_for_new_metrics() {
5039        let m = RuntimeMetrics::new();
5040        assert!(m.agents_with_no_failures().is_empty());
5041    }
5042}