1use sha2::{Digest, Sha256};
46
47use crate::agentlog::Record;
48use crate::diff::axes::{Axis, AxisStat};
49use crate::diff::bootstrap::{median, paired_ci};
50
51const ARG_VALUE_DIGEST_HEX_LEN: usize = 16;
54
55fn arg_value_digest(input: &serde_json::Value) -> String {
56 let canonical = canonicalise(input);
61 let bytes = serde_json::to_vec(&canonical).unwrap_or_default();
62 let mut h = Sha256::new();
63 h.update(&bytes);
64 let digest = h.finalize();
65 let hex: String = digest
66 .iter()
67 .take(ARG_VALUE_DIGEST_HEX_LEN / 2)
68 .map(|b| format!("{b:02x}"))
69 .collect();
70 hex
71}
72
73fn canonicalise(value: &serde_json::Value) -> serde_json::Value {
74 match value {
75 serde_json::Value::Object(map) => {
76 let mut sorted: std::collections::BTreeMap<String, serde_json::Value> =
77 std::collections::BTreeMap::new();
78 for (k, v) in map {
79 sorted.insert(k.clone(), canonicalise(v));
80 }
81 serde_json::Value::Object(sorted.into_iter().collect())
82 }
83 serde_json::Value::Array(arr) => {
84 serde_json::Value::Array(arr.iter().map(canonicalise).collect())
85 }
86 other => other.clone(),
87 }
88}
89
90fn tool_shape(r: &Record) -> Vec<String> {
91 let content = match r.payload.get("content").and_then(|c| c.as_array()) {
92 Some(arr) => arr,
93 None => return Vec::new(),
94 };
95 let mut out = Vec::new();
96 for part in content {
97 if part.get("type").and_then(|t| t.as_str()) == Some("tool_use") {
98 let name = part
99 .get("name")
100 .and_then(|n| n.as_str())
101 .unwrap_or("_")
102 .to_string();
103 let input = part
104 .get("input")
105 .cloned()
106 .unwrap_or(serde_json::Value::Null);
107 let mut keys: Vec<String> = input
108 .as_object()
109 .map(|o| o.keys().cloned().collect())
110 .unwrap_or_default();
111 keys.sort();
112 let value_digest = arg_value_digest(&input);
113 out.push(format!("{name}({}|{value_digest})", keys.join(",")));
114 }
115 }
116 out
117}
118
119fn levenshtein(a: &[String], b: &[String]) -> usize {
120 let (m, n) = (a.len(), b.len());
121 if m == 0 {
122 return n;
123 }
124 if n == 0 {
125 return m;
126 }
127 let mut prev: Vec<usize> = (0..=n).collect();
128 let mut curr = vec![0usize; n + 1];
129 for i in 1..=m {
130 curr[0] = i;
131 for j in 1..=n {
132 let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
133 curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
134 }
135 std::mem::swap(&mut prev, &mut curr);
136 }
137 prev[n]
138}
139
140fn normalized_divergence(b: &[String], c: &[String]) -> f64 {
141 let denom = b.len().max(c.len());
142 if denom == 0 {
143 0.0
144 } else {
145 levenshtein(b, c) as f64 / denom as f64
146 }
147}
148
149pub fn compute(pairs: &[(&Record, &Record)], seed: Option<u64>) -> AxisStat {
151 if pairs.is_empty() {
152 return AxisStat::empty(Axis::Trajectory);
153 }
154 let baseline_zero: Vec<f64> = (0..pairs.len()).map(|_| 0.0).collect();
155 let divergence: Vec<f64> = pairs
156 .iter()
157 .map(|(b, c)| normalized_divergence(&tool_shape(b), &tool_shape(c)))
158 .collect();
159 let bm = median(&baseline_zero);
160 let cm = median(&divergence);
161 let delta = cm - bm;
162 let ci = paired_ci(
163 &baseline_zero,
164 &divergence,
165 |bs, cs| median(cs) - median(bs),
166 0,
167 seed,
168 );
169 AxisStat::new_rate(
175 Axis::Trajectory,
176 bm,
177 cm,
178 delta,
179 ci.low,
180 ci.high,
181 pairs.len(),
182 )
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188 use crate::agentlog::Kind;
189 use serde_json::json;
190
191 fn response_with_tools(tools: &[(&str, &[&str])]) -> Record {
192 let content: Vec<serde_json::Value> = tools
193 .iter()
194 .map(|(name, keys)| {
195 let input: serde_json::Map<String, serde_json::Value> = keys
196 .iter()
197 .map(|k| ((*k).to_string(), json!("v")))
198 .collect();
199 json!({
200 "type": "tool_use",
201 "id": format!("t_{name}"),
202 "name": name,
203 "input": input,
204 })
205 })
206 .collect();
207 Record::new(
208 Kind::ChatResponse,
209 json!({
210 "model": "x",
211 "content": content,
212 "stop_reason": "tool_use",
213 "latency_ms": 0,
214 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
215 }),
216 "2026-04-21T10:00:00Z",
217 None,
218 )
219 }
220
221 #[test]
222 fn identical_tool_shapes_produce_zero_divergence() {
223 let r = response_with_tools(&[("search_files", &["query"])]);
224 let pairs = [(&r, &r)];
225 let stat = compute(&pairs, Some(1));
226 assert_eq!(stat.candidate_median, 0.0);
227 }
228
229 #[test]
230 fn schema_change_on_same_tool_is_divergence() {
231 let baseline = response_with_tools(&[("search_files", &["query"])]);
232 let candidate = response_with_tools(&[("search_files", &["query", "limit"])]);
234 let pairs = [(&baseline, &candidate); 10];
235 let stat = compute(&pairs, Some(2));
236 assert!(stat.candidate_median > 0.0);
237 }
238
239 #[test]
243 fn arg_value_change_on_same_tool_is_divergence() {
244 let baseline = Record::new(
245 Kind::ChatResponse,
246 json!({
247 "model": "x",
248 "content": [{
249 "type": "tool_use",
250 "id": "t1",
251 "name": "delete_user",
252 "input": {"id": "alice"},
253 }],
254 "stop_reason": "tool_use",
255 "latency_ms": 0,
256 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
257 }),
258 "2026-04-21T10:00:00Z",
259 None,
260 );
261 let candidate = Record::new(
262 Kind::ChatResponse,
263 json!({
264 "model": "x",
265 "content": [{
266 "type": "tool_use",
267 "id": "t1",
268 "name": "delete_user",
269 "input": {"id": "bob"},
270 }],
271 "stop_reason": "tool_use",
272 "latency_ms": 0,
273 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
274 }),
275 "2026-04-21T10:00:00Z",
276 None,
277 );
278 let pairs = [(&baseline, &candidate); 10];
279 let stat = compute(&pairs, Some(3));
280 assert!(
281 stat.candidate_median > 0.0,
282 "trajectory axis must register a value change as divergence; \
283 got candidate_median = {}",
284 stat.candidate_median,
285 );
286 }
287
288 #[test]
289 fn identical_arg_values_score_zero_even_with_complex_inputs() {
290 let r = Record::new(
292 Kind::ChatResponse,
293 json!({
294 "model": "x",
295 "content": [{
296 "type": "tool_use",
297 "id": "t1",
298 "name": "execute",
299 "input": {
300 "query": "SELECT * FROM users",
301 "params": {"limit": 10, "offset": 0},
302 },
303 }],
304 "stop_reason": "tool_use",
305 "latency_ms": 0,
306 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
307 }),
308 "2026-04-21T10:00:00Z",
309 None,
310 );
311 let pairs = [(&r, &r)];
312 let stat = compute(&pairs, Some(4));
313 assert_eq!(stat.candidate_median, 0.0);
314 }
315
316 #[test]
319 fn arg_key_order_is_canonicalised() {
320 let baseline = Record::new(
321 Kind::ChatResponse,
322 json!({
323 "model": "x",
324 "content": [{
325 "type": "tool_use",
326 "id": "t1",
327 "name": "log",
328 "input": {"level": "info", "msg": "hello"},
329 }],
330 "stop_reason": "tool_use",
331 "latency_ms": 0,
332 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
333 }),
334 "2026-04-21T10:00:00Z",
335 None,
336 );
337 let candidate = Record::new(
338 Kind::ChatResponse,
339 json!({
340 "model": "x",
341 "content": [{
342 "type": "tool_use",
343 "id": "t1",
344 "name": "log",
345 "input": {"msg": "hello", "level": "info"},
347 }],
348 "stop_reason": "tool_use",
349 "latency_ms": 0,
350 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
351 }),
352 "2026-04-21T10:00:00Z",
353 None,
354 );
355 let pairs = [(&baseline, &candidate); 5];
356 let stat = compute(&pairs, Some(5));
357 assert_eq!(stat.candidate_median, 0.0);
358 }
359
360 #[test]
361 fn levenshtein_basic() {
362 let a = vec!["a".to_string(), "b".to_string(), "c".to_string()];
363 let b = vec!["a".to_string(), "x".to_string(), "c".to_string()];
364 assert_eq!(levenshtein(&a, &b), 1);
365 }
366}