1use crate::agentlog::{Kind, Record};
18
19pub mod alignment;
20pub mod axes;
21pub mod bootstrap;
22pub mod conformance;
23pub mod cost;
24pub mod drill_down;
25pub mod embedder;
26pub mod judge;
27pub mod latency;
28pub mod reasoning;
29pub mod recommendations;
30pub mod report;
31pub mod safety;
32pub mod semantic;
33pub mod trajectory;
34pub mod verbosity;
35
36pub use alignment::{DivergenceKind, FirstDivergence};
37pub use axes::{Axis, AxisStat, Severity};
38pub use bootstrap::{paired_ci, CiResult};
39pub use drill_down::{PairAxisScore, PairDrilldown};
40pub use recommendations::{ActionKind, Recommendation, RecommendationSeverity};
41pub use report::DiffReport;
42
43pub fn extract_response_pairs<'a>(
50 baseline: &'a [Record],
51 candidate: &'a [Record],
52) -> Vec<(&'a Record, &'a Record)> {
53 let b: Vec<&Record> = baseline
54 .iter()
55 .filter(|r| r.kind == Kind::ChatResponse)
56 .collect();
57 let c: Vec<&Record> = candidate
58 .iter()
59 .filter(|r| r.kind == Kind::ChatResponse)
60 .collect();
61 b.into_iter().zip(c).collect()
62}
63
64pub fn compute_report(
69 baseline: &[Record],
70 candidate: &[Record],
71 pricing: &cost::Pricing,
72 seed: Option<u64>,
73) -> DiffReport {
74 let pairs = extract_response_pairs(baseline, candidate);
75 let rows = vec![
76 semantic::compute(&pairs, seed),
77 trajectory::compute(&pairs, seed),
78 safety::compute(&pairs, seed),
79 verbosity::compute(&pairs, seed),
80 latency::compute(&pairs, seed),
81 cost::compute(&pairs, pricing, seed),
82 reasoning::compute(&pairs, seed),
83 AxisStat::empty(Axis::Judge),
84 conformance::compute(&pairs, seed),
85 ];
86 let first_divergence = alignment::detect(baseline, candidate);
87 let divergences = alignment::detect_top_k(baseline, candidate, alignment::DEFAULT_K);
88 let drill_down = drill_down::compute(&pairs, pricing, drill_down::DEFAULT_K);
89 let mut report = DiffReport {
90 rows,
91 baseline_trace_id: baseline.first().map(|r| r.id.clone()).unwrap_or_default(),
92 candidate_trace_id: candidate.first().map(|r| r.id.clone()).unwrap_or_default(),
93 pair_count: pairs.len(),
94 first_divergence,
95 divergences,
96 drill_down,
97 recommendations: Vec::new(),
98 };
99 report.recommendations = recommendations::generate(&report);
103 report
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109 use crate::agentlog::Kind;
110 use serde_json::json;
111
112 fn make_trace(responses: Vec<(u64, &str)>) -> Vec<Record> {
113 let meta = Record::new(
114 Kind::Metadata,
115 json!({"sdk": {"name": "shadow"}}),
116 "2026-04-21T10:00:00Z",
117 None,
118 );
119 let mut out = vec![meta];
120 for (i, (latency, text)) in responses.iter().enumerate() {
121 let req = Record::new(
122 Kind::ChatRequest,
123 json!({"model": "x", "messages": [{"role": "user", "content": format!("q{i}")}], "params": {}}),
124 format!("2026-04-21T10:00:{:02}.000Z", i),
125 out.last().map(|r| r.id.clone()),
126 );
127 let resp = Record::new(
128 Kind::ChatResponse,
129 json!({
130 "model": "x",
131 "content": [{"type": "text", "text": text}],
132 "stop_reason": "end_turn",
133 "latency_ms": latency,
134 "usage": {"input_tokens": 10, "output_tokens": 5, "thinking_tokens": 0},
135 }),
136 format!("2026-04-21T10:00:{:02}.500Z", i),
137 Some(req.id.clone()),
138 );
139 out.push(req);
140 out.push(resp);
141 }
142 out
143 }
144
145 #[test]
146 fn compute_report_shapes_to_nine_axes() {
147 let baseline = make_trace(vec![(100, "yes"), (110, "ok"), (90, "sure")]);
148 let candidate = make_trace(vec![(200, "yes"), (220, "ok"), (180, "sure")]);
149 let pricing = cost::Pricing::new();
150 let report = compute_report(&baseline, &candidate, &pricing, Some(42));
151 assert_eq!(report.rows.len(), 9);
152 assert_eq!(report.pair_count, 3);
153 let latency_row = report
155 .rows
156 .iter()
157 .find(|r| r.axis == Axis::Latency)
158 .unwrap();
159 assert!(latency_row.delta > 0.0);
160 }
161
162 #[test]
163 fn extract_response_pairs_truncates_to_shorter() {
164 let b = make_trace(vec![(1, "a"), (2, "b"), (3, "c")]);
165 let c = make_trace(vec![(1, "a"), (2, "b")]);
166 let pairs = extract_response_pairs(&b, &c);
167 assert_eq!(pairs.len(), 2);
168 }
169}