Skip to main content

shadow_core/diff/
verbosity.rs

1//! Axis 4: verbosity (output-token count) from
2//! `chat_response.usage.output_tokens`.
3
4use crate::agentlog::Record;
5use crate::diff::axes::{Axis, AxisStat};
6use crate::diff::bootstrap::{median, paired_ci};
7
8fn output_tokens(r: &Record) -> Option<f64> {
9    r.payload
10        .get("usage")
11        .and_then(|u| u.get("output_tokens"))
12        .and_then(|v| v.as_f64())
13}
14
15/// Compute the verbosity axis from paired response records.
16pub fn compute(pairs: &[(&Record, &Record)], seed: Option<u64>) -> AxisStat {
17    let mut b = Vec::with_capacity(pairs.len());
18    let mut c = Vec::with_capacity(pairs.len());
19    for (br, cr) in pairs {
20        if let (Some(bv), Some(cv)) = (output_tokens(br), output_tokens(cr)) {
21            b.push(bv);
22            c.push(cv);
23        }
24    }
25    if b.is_empty() {
26        return AxisStat::empty(Axis::Verbosity);
27    }
28    let bm = median(&b);
29    let cm = median(&c);
30    let delta = cm - bm;
31    let ci = paired_ci(&b, &c, |bs, cs| median(cs) - median(bs), 0, seed);
32    AxisStat::new_value(Axis::Verbosity, bm, cm, delta, ci.low, ci.high, b.len())
33}
34
35#[cfg(test)]
36mod tests {
37    use super::*;
38    use crate::agentlog::Kind;
39    use serde_json::json;
40
41    fn response(output: u64) -> Record {
42        Record::new(
43            Kind::ChatResponse,
44            json!({
45                "model": "x",
46                "content": [],
47                "stop_reason": "end_turn",
48                "latency_ms": 0,
49                "usage": {"input_tokens": 1, "output_tokens": output, "thinking_tokens": 0},
50            }),
51            "2026-04-21T10:00:00Z",
52            None,
53        )
54    }
55
56    #[test]
57    fn candidate_half_as_verbose_is_moderate_or_severe() {
58        use crate::diff::axes::Severity;
59        let baseline: Vec<Record> = (0..20).map(|i| response(100 + i)).collect();
60        let candidate: Vec<Record> = (0..20).map(|i| response(50 + i)).collect();
61        let pairs: Vec<(&Record, &Record)> = baseline.iter().zip(candidate.iter()).collect();
62        let stat = compute(&pairs, Some(7));
63        assert!(stat.delta < 0.0);
64        assert!(matches!(
65            stat.severity,
66            Severity::Moderate | Severity::Severe
67        ));
68    }
69}