shadow_core/diff/
verbosity.rs1use crate::agentlog::Record;
5use crate::diff::axes::{Axis, AxisStat};
6use crate::diff::bootstrap::{median, paired_ci};
7
8fn output_tokens(r: &Record) -> Option<f64> {
9 r.payload
10 .get("usage")
11 .and_then(|u| u.get("output_tokens"))
12 .and_then(|v| v.as_f64())
13}
14
15pub fn compute(pairs: &[(&Record, &Record)], seed: Option<u64>) -> AxisStat {
17 let mut b = Vec::with_capacity(pairs.len());
18 let mut c = Vec::with_capacity(pairs.len());
19 for (br, cr) in pairs {
20 if let (Some(bv), Some(cv)) = (output_tokens(br), output_tokens(cr)) {
21 b.push(bv);
22 c.push(cv);
23 }
24 }
25 if b.is_empty() {
26 return AxisStat::empty(Axis::Verbosity);
27 }
28 let bm = median(&b);
29 let cm = median(&c);
30 let delta = cm - bm;
31 let ci = paired_ci(&b, &c, |bs, cs| median(cs) - median(bs), 0, seed);
32 AxisStat::new_value(Axis::Verbosity, bm, cm, delta, ci.low, ci.high, b.len())
33}
34
35#[cfg(test)]
36mod tests {
37 use super::*;
38 use crate::agentlog::Kind;
39 use serde_json::json;
40
41 fn response(output: u64) -> Record {
42 Record::new(
43 Kind::ChatResponse,
44 json!({
45 "model": "x",
46 "content": [],
47 "stop_reason": "end_turn",
48 "latency_ms": 0,
49 "usage": {"input_tokens": 1, "output_tokens": output, "thinking_tokens": 0},
50 }),
51 "2026-04-21T10:00:00Z",
52 None,
53 )
54 }
55
56 #[test]
57 fn candidate_half_as_verbose_is_moderate_or_severe() {
58 use crate::diff::axes::Severity;
59 let baseline: Vec<Record> = (0..20).map(|i| response(100 + i)).collect();
60 let candidate: Vec<Record> = (0..20).map(|i| response(50 + i)).collect();
61 let pairs: Vec<(&Record, &Record)> = baseline.iter().zip(candidate.iter()).collect();
62 let stat = compute(&pairs, Some(7));
63 assert!(stat.delta < 0.0);
64 assert!(matches!(
65 stat.severity,
66 Severity::Moderate | Severity::Severe
67 ));
68 }
69}