1use crate::agentlog::{Kind, Record};
18
19pub mod alignment;
20pub mod axes;
21pub mod bootstrap;
22pub mod conformance;
23pub mod cost;
24pub mod drill_down;
25pub mod embedder;
26pub mod judge;
27pub mod latency;
28pub mod reasoning;
29pub mod recommendations;
30pub mod report;
31pub mod safety;
32pub mod semantic;
33pub mod trajectory;
34pub mod verbosity;
35
36pub use alignment::{DivergenceKind, FirstDivergence};
37pub use axes::{Axis, AxisStat, Severity};
38pub use bootstrap::{paired_ci, CiResult};
39pub use drill_down::{PairAxisScore, PairDrilldown};
40pub use recommendations::{ActionKind, Recommendation, RecommendationSeverity};
41pub use report::DiffReport;
42
43pub fn extract_response_pairs<'a>(
50 baseline: &'a [Record],
51 candidate: &'a [Record],
52) -> Vec<(&'a Record, &'a Record)> {
53 let b: Vec<&Record> = baseline
54 .iter()
55 .filter(|r| r.kind == Kind::ChatResponse)
56 .collect();
57 let c: Vec<&Record> = candidate
58 .iter()
59 .filter(|r| r.kind == Kind::ChatResponse)
60 .collect();
61 b.into_iter().zip(c).collect()
62}
63
64fn trace_id_for(records: &[Record]) -> String {
86 records
87 .iter()
88 .find_map(|r| {
89 r.meta
90 .as_ref()
91 .and_then(|m| m.get("trace_id"))
92 .and_then(|v| v.as_str())
93 .map(str::to_string)
94 })
95 .or_else(|| records.first().map(|r| r.id.clone()))
96 .unwrap_or_default()
97}
98
99pub fn compute_report(
104 baseline: &[Record],
105 candidate: &[Record],
106 pricing: &cost::Pricing,
107 seed: Option<u64>,
108) -> DiffReport {
109 let pairs = extract_response_pairs(baseline, candidate);
110 let rows = vec![
111 semantic::compute(&pairs, seed),
112 trajectory::compute(&pairs, seed),
113 safety::compute(&pairs, seed),
114 verbosity::compute(&pairs, seed),
115 latency::compute(&pairs, seed),
116 cost::compute(&pairs, pricing, seed),
117 reasoning::compute(&pairs, seed),
118 AxisStat::empty(Axis::Judge),
119 conformance::compute(&pairs, seed),
120 ];
121 let first_divergence = alignment::detect(baseline, candidate);
122 let divergences = alignment::detect_top_k(baseline, candidate, alignment::DEFAULT_K);
123 let drill_down = drill_down::compute(&pairs, pricing, drill_down::DEFAULT_K);
124 let mut report = DiffReport {
125 rows,
126 baseline_trace_id: trace_id_for(baseline),
127 candidate_trace_id: trace_id_for(candidate),
128 pair_count: pairs.len(),
129 first_divergence,
130 divergences,
131 drill_down,
132 recommendations: Vec::new(),
133 };
134 report.recommendations = recommendations::generate(&report);
138 report
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use crate::agentlog::Kind;
145 use serde_json::json;
146
147 fn make_trace(responses: Vec<(u64, &str)>) -> Vec<Record> {
148 let meta = Record::new(
149 Kind::Metadata,
150 json!({"sdk": {"name": "shadow"}}),
151 "2026-04-21T10:00:00Z",
152 None,
153 );
154 let mut out = vec![meta];
155 for (i, (latency, text)) in responses.iter().enumerate() {
156 let req = Record::new(
157 Kind::ChatRequest,
158 json!({"model": "x", "messages": [{"role": "user", "content": format!("q{i}")}], "params": {}}),
159 format!("2026-04-21T10:00:{:02}.000Z", i),
160 out.last().map(|r| r.id.clone()),
161 );
162 let resp = Record::new(
163 Kind::ChatResponse,
164 json!({
165 "model": "x",
166 "content": [{"type": "text", "text": text}],
167 "stop_reason": "end_turn",
168 "latency_ms": latency,
169 "usage": {"input_tokens": 10, "output_tokens": 5, "thinking_tokens": 0},
170 }),
171 format!("2026-04-21T10:00:{:02}.500Z", i),
172 Some(req.id.clone()),
173 );
174 out.push(req);
175 out.push(resp);
176 }
177 out
178 }
179
180 #[test]
181 fn compute_report_shapes_to_nine_axes() {
182 let baseline = make_trace(vec![(100, "yes"), (110, "ok"), (90, "sure")]);
183 let candidate = make_trace(vec![(200, "yes"), (220, "ok"), (180, "sure")]);
184 let pricing = cost::Pricing::new();
185 let report = compute_report(&baseline, &candidate, &pricing, Some(42));
186 assert_eq!(report.rows.len(), 9);
187 assert_eq!(report.pair_count, 3);
188 let latency_row = report
190 .rows
191 .iter()
192 .find(|r| r.axis == Axis::Latency)
193 .unwrap();
194 assert!(latency_row.delta > 0.0);
195 }
196
197 #[test]
198 fn extract_response_pairs_truncates_to_shorter() {
199 let b = make_trace(vec![(1, "a"), (2, "b"), (3, "c")]);
200 let c = make_trace(vec![(1, "a"), (2, "b")]);
201 let pairs = extract_response_pairs(&b, &c);
202 assert_eq!(pairs.len(), 2);
203 }
204
205 #[test]
206 fn trace_ids_use_envelope_meta_to_avoid_payload_collisions() {
207 fn stamp_meta(mut rec: Record, trace_id: &str) -> Record {
219 let mut m = serde_json::Map::new();
220 m.insert("trace_id".into(), json!(trace_id));
221 rec.meta = Some(m);
222 rec
223 }
224 let b = make_trace(vec![(1, "hello")])
225 .into_iter()
226 .map(|r| stamp_meta(r, "trace-aaaa"))
227 .collect::<Vec<_>>();
228 let c = make_trace(vec![(2, "hello")])
229 .into_iter()
230 .map(|r| stamp_meta(r, "trace-bbbb"))
231 .collect::<Vec<_>>();
232
233 assert_eq!(b[0].id, c[0].id);
237
238 let pricing = cost::Pricing::new();
239 let report = compute_report(&b, &c, &pricing, Some(42));
240
241 assert_eq!(report.baseline_trace_id, "trace-aaaa");
242 assert_eq!(report.candidate_trace_id, "trace-bbbb");
243 assert_ne!(report.baseline_trace_id, report.candidate_trace_id);
244 }
245
246 #[test]
247 fn trace_id_falls_back_to_first_record_id_when_meta_missing() {
248 let b = make_trace(vec![(1, "hello")]);
254 let c = make_trace(vec![(2, "world")]);
255 let pricing = cost::Pricing::new();
256 let report = compute_report(&b, &c, &pricing, Some(42));
257 assert_eq!(report.baseline_trace_id, b[0].id);
260 assert_eq!(report.candidate_trace_id, c[0].id);
261 }
262}