Skip to main content

shadow_core/diff/
trajectory.rs

1//! Axis 2: tool-call trajectory divergence.
2//!
3//! For each response, extract the sequence of tool-call tokens that
4//! capture both the **structural shape** (tool name + sorted arg keys)
5//! AND the **argument values** (8-byte digest of canonical-JSON
6//! input). Compare baseline vs candidate sequences with Levenshtein
7//! edit distance. Normalize by max(len(baseline_seq), len(candidate_seq))
8//! so the metric is in [0, 1].
9//!
10//! Why include the value digest: a sequence that calls the same tools
11//! in the same order with different argument values is a real
12//! behavioural change (e.g. `delete_user(id="alice")` vs
13//! `delete_user(id="bob")`). Without the value digest the per-axis
14//! trajectory metric reports zero divergence on this case — even
15//! though the alignment-based first-divergence detector picks it up
16//! via its W_ARGS component. The value digest brings the per-axis
17//! number in line with the alignment finding.
18//!
19//! The digest is the leading 8 bytes (16 hex chars) of SHA-256 over
20//! the canonical-JSON serialisation of the `input` object. Birthday-
21//! paradox collision probability at 16 hex chars is ~1.8e-10 for 1000
22//! tool calls — negligible for any realistic agent trace.
23//!
24//! ## Coverage cross-references
25//!
26//! What this axis catches:
27//! - Tool added / dropped / reordered (structural)
28//! - Tool argument keys added / dropped (schema)
29//! - Tool argument values changed (digest mismatch, v2.7+)
30//!
31//! What it does NOT catch:
32//! - **Same tool sequence + same arg values + different RESPONSE
33//!   text** — that's a content regression visible on the semantic
34//!   axis (axis 1) and via the v2.7+ `text_chars_log` /
35//!   `numeric_token_density` / `error_token_flag` dimensions of
36//!   `shadow.statistical.fingerprint` (Hotelling T²).
37//! - **Tool sequence policy violations** ("verify before refund",
38//!   "no execute_sql without preview") — the LTLf checker
39//!   (`shadow.ltl`) with `must_call_before` / `no_call` rules.
40//! - **First moment of regression** — the alignment module
41//!   (`shadow_core::diff::alignment`) walks both traces and points
42//!   to the exact turn where divergence began, with kind
43//!   classification (Structural / Decision / Style).
44
45use sha2::{Digest, Sha256};
46
47use crate::agentlog::Record;
48use crate::diff::axes::{Axis, AxisStat};
49use crate::diff::bootstrap::{median, paired_ci};
50
51/// Length of the argument-value digest, in hex characters. 16 hex
52/// chars = 64 bits = ~1.8e-10 birthday collision probability at n=1000.
53const ARG_VALUE_DIGEST_HEX_LEN: usize = 16;
54
55fn arg_value_digest(input: &serde_json::Value) -> String {
56    // Canonical JSON via the `agentlog::canonical` writer would be
57    // ideal; for a per-tool-call digest the simpler `serde_json::to_vec`
58    // is sufficient as long as we sort keys first. We do that by walking
59    // the value into a BTreeMap-backed structure before serialising.
60    let canonical = canonicalise(input);
61    let bytes = serde_json::to_vec(&canonical).unwrap_or_default();
62    let mut h = Sha256::new();
63    h.update(&bytes);
64    let digest = h.finalize();
65    let hex: String = digest
66        .iter()
67        .take(ARG_VALUE_DIGEST_HEX_LEN / 2)
68        .map(|b| format!("{b:02x}"))
69        .collect();
70    hex
71}
72
73fn canonicalise(value: &serde_json::Value) -> serde_json::Value {
74    match value {
75        serde_json::Value::Object(map) => {
76            let mut sorted: std::collections::BTreeMap<String, serde_json::Value> =
77                std::collections::BTreeMap::new();
78            for (k, v) in map {
79                sorted.insert(k.clone(), canonicalise(v));
80            }
81            serde_json::Value::Object(sorted.into_iter().collect())
82        }
83        serde_json::Value::Array(arr) => {
84            serde_json::Value::Array(arr.iter().map(canonicalise).collect())
85        }
86        other => other.clone(),
87    }
88}
89
90fn tool_shape(r: &Record) -> Vec<String> {
91    let content = match r.payload.get("content").and_then(|c| c.as_array()) {
92        Some(arr) => arr,
93        None => return Vec::new(),
94    };
95    let mut out = Vec::new();
96    for part in content {
97        if part.get("type").and_then(|t| t.as_str()) == Some("tool_use") {
98            let name = part
99                .get("name")
100                .and_then(|n| n.as_str())
101                .unwrap_or("_")
102                .to_string();
103            let input = part
104                .get("input")
105                .cloned()
106                .unwrap_or(serde_json::Value::Null);
107            let mut keys: Vec<String> = input
108                .as_object()
109                .map(|o| o.keys().cloned().collect())
110                .unwrap_or_default();
111            keys.sort();
112            let value_digest = arg_value_digest(&input);
113            out.push(format!("{name}({}|{value_digest})", keys.join(",")));
114        }
115    }
116    out
117}
118
119fn levenshtein(a: &[String], b: &[String]) -> usize {
120    let (m, n) = (a.len(), b.len());
121    if m == 0 {
122        return n;
123    }
124    if n == 0 {
125        return m;
126    }
127    let mut prev: Vec<usize> = (0..=n).collect();
128    let mut curr = vec![0usize; n + 1];
129    for i in 1..=m {
130        curr[0] = i;
131        for j in 1..=n {
132            let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
133            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
134        }
135        std::mem::swap(&mut prev, &mut curr);
136    }
137    prev[n]
138}
139
140fn normalized_divergence(b: &[String], c: &[String]) -> f64 {
141    let denom = b.len().max(c.len());
142    if denom == 0 {
143        0.0
144    } else {
145        levenshtein(b, c) as f64 / denom as f64
146    }
147}
148
149/// Compute the tool-trajectory axis.
150pub fn compute(pairs: &[(&Record, &Record)], seed: Option<u64>) -> AxisStat {
151    if pairs.is_empty() {
152        return AxisStat::empty(Axis::Trajectory);
153    }
154    let baseline_zero: Vec<f64> = (0..pairs.len()).map(|_| 0.0).collect();
155    let divergence: Vec<f64> = pairs
156        .iter()
157        .map(|(b, c)| normalized_divergence(&tool_shape(b), &tool_shape(c)))
158        .collect();
159    let bm = median(&baseline_zero);
160    let cm = median(&divergence);
161    let delta = cm - bm;
162    let ci = paired_ci(
163        &baseline_zero,
164        &divergence,
165        |bs, cs| median(cs) - median(bs),
166        0,
167        seed,
168    );
169    // Trajectory is a rate in [0, 1] measured *from* zero (identical
170    // sequences → 0 divergence). The relative-delta severity used by
171    // `new_value` divides by baseline_median=0.0 and always returns
172    // Minor, regardless of magnitude. `new_rate` uses absolute-delta
173    // thresholds, which is the honest classification for this axis.
174    AxisStat::new_rate(
175        Axis::Trajectory,
176        bm,
177        cm,
178        delta,
179        ci.low,
180        ci.high,
181        pairs.len(),
182    )
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use crate::agentlog::Kind;
189    use serde_json::json;
190
191    fn response_with_tools(tools: &[(&str, &[&str])]) -> Record {
192        let content: Vec<serde_json::Value> = tools
193            .iter()
194            .map(|(name, keys)| {
195                let input: serde_json::Map<String, serde_json::Value> = keys
196                    .iter()
197                    .map(|k| ((*k).to_string(), json!("v")))
198                    .collect();
199                json!({
200                    "type": "tool_use",
201                    "id": format!("t_{name}"),
202                    "name": name,
203                    "input": input,
204                })
205            })
206            .collect();
207        Record::new(
208            Kind::ChatResponse,
209            json!({
210                "model": "x",
211                "content": content,
212                "stop_reason": "tool_use",
213                "latency_ms": 0,
214                "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
215            }),
216            "2026-04-21T10:00:00Z",
217            None,
218        )
219    }
220
221    #[test]
222    fn identical_tool_shapes_produce_zero_divergence() {
223        let r = response_with_tools(&[("search_files", &["query"])]);
224        let pairs = [(&r, &r)];
225        let stat = compute(&pairs, Some(1));
226        assert_eq!(stat.candidate_median, 0.0);
227    }
228
229    #[test]
230    fn schema_change_on_same_tool_is_divergence() {
231        let baseline = response_with_tools(&[("search_files", &["query"])]);
232        // Candidate adds a `limit` key.
233        let candidate = response_with_tools(&[("search_files", &["query", "limit"])]);
234        let pairs = [(&baseline, &candidate); 10];
235        let stat = compute(&pairs, Some(2));
236        assert!(stat.candidate_median > 0.0);
237    }
238
239    /// Pre-fix bug: same tool, same arg KEYS, different arg VALUES
240    /// produced 0 divergence on the trajectory axis. The alignment
241    /// module caught it via W_ARGS, but the per-axis number lied.
242    #[test]
243    fn arg_value_change_on_same_tool_is_divergence() {
244        let baseline = Record::new(
245            Kind::ChatResponse,
246            json!({
247                "model": "x",
248                "content": [{
249                    "type": "tool_use",
250                    "id": "t1",
251                    "name": "delete_user",
252                    "input": {"id": "alice"},
253                }],
254                "stop_reason": "tool_use",
255                "latency_ms": 0,
256                "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
257            }),
258            "2026-04-21T10:00:00Z",
259            None,
260        );
261        let candidate = Record::new(
262            Kind::ChatResponse,
263            json!({
264                "model": "x",
265                "content": [{
266                    "type": "tool_use",
267                    "id": "t1",
268                    "name": "delete_user",
269                    "input": {"id": "bob"},
270                }],
271                "stop_reason": "tool_use",
272                "latency_ms": 0,
273                "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
274            }),
275            "2026-04-21T10:00:00Z",
276            None,
277        );
278        let pairs = [(&baseline, &candidate); 10];
279        let stat = compute(&pairs, Some(3));
280        assert!(
281            stat.candidate_median > 0.0,
282            "trajectory axis must register a value change as divergence; \
283             got candidate_median = {}",
284            stat.candidate_median,
285        );
286    }
287
288    #[test]
289    fn identical_arg_values_score_zero_even_with_complex_inputs() {
290        // Same tool, same nested structured input → zero divergence.
291        let r = Record::new(
292            Kind::ChatResponse,
293            json!({
294                "model": "x",
295                "content": [{
296                    "type": "tool_use",
297                    "id": "t1",
298                    "name": "execute",
299                    "input": {
300                        "query": "SELECT * FROM users",
301                        "params": {"limit": 10, "offset": 0},
302                    },
303                }],
304                "stop_reason": "tool_use",
305                "latency_ms": 0,
306                "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
307            }),
308            "2026-04-21T10:00:00Z",
309            None,
310        );
311        let pairs = [(&r, &r)];
312        let stat = compute(&pairs, Some(4));
313        assert_eq!(stat.candidate_median, 0.0);
314    }
315
316    /// Canonicalisation: object key order in the input must NOT cause
317    /// a spurious value-change divergence.
318    #[test]
319    fn arg_key_order_is_canonicalised() {
320        let baseline = Record::new(
321            Kind::ChatResponse,
322            json!({
323                "model": "x",
324                "content": [{
325                    "type": "tool_use",
326                    "id": "t1",
327                    "name": "log",
328                    "input": {"level": "info", "msg": "hello"},
329                }],
330                "stop_reason": "tool_use",
331                "latency_ms": 0,
332                "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
333            }),
334            "2026-04-21T10:00:00Z",
335            None,
336        );
337        let candidate = Record::new(
338            Kind::ChatResponse,
339            json!({
340                "model": "x",
341                "content": [{
342                    "type": "tool_use",
343                    "id": "t1",
344                    "name": "log",
345                    // Same content, different key order.
346                    "input": {"msg": "hello", "level": "info"},
347                }],
348                "stop_reason": "tool_use",
349                "latency_ms": 0,
350                "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
351            }),
352            "2026-04-21T10:00:00Z",
353            None,
354        );
355        let pairs = [(&baseline, &candidate); 5];
356        let stat = compute(&pairs, Some(5));
357        assert_eq!(stat.candidate_median, 0.0);
358    }
359
360    #[test]
361    fn levenshtein_basic() {
362        let a = vec!["a".to_string(), "b".to_string(), "c".to_string()];
363        let b = vec!["a".to_string(), "x".to_string(), "c".to_string()];
364        assert_eq!(levenshtein(&a, &b), 1);
365    }
366}