1use serde::{Deserialize, Serialize};
40use std::collections::BTreeSet;
41
42use crate::agentlog::{Kind, Record};
43use crate::diff::axes::Axis;
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
51pub enum DivergenceKind {
52 #[serde(rename = "style_drift")]
55 Style,
56 #[serde(rename = "decision_drift")]
59 Decision,
60 #[serde(rename = "structural_drift")]
63 Structural,
64}
65
66impl DivergenceKind {
67 pub fn label(&self) -> &'static str {
69 match self {
70 DivergenceKind::Style => "style_drift",
71 DivergenceKind::Decision => "decision_drift",
72 DivergenceKind::Structural => "structural_drift",
73 }
74 }
75}
76
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
79pub struct FirstDivergence {
80 pub baseline_turn: usize,
84 pub candidate_turn: usize,
87 pub kind: DivergenceKind,
89 pub primary_axis: Axis,
93 pub explanation: String,
96 pub confidence: f64,
99}
100
101pub const DEFAULT_K: usize = 5;
109
110pub fn detect(baseline: &[Record], candidate: &[Record]) -> Option<FirstDivergence> {
121 let baseline_responses: Vec<&Record> = baseline
127 .iter()
128 .filter(|r| r.kind == Kind::ChatResponse)
129 .collect();
130 let candidate_responses: Vec<&Record> = candidate
131 .iter()
132 .filter(|r| r.kind == Kind::ChatResponse)
133 .collect();
134 if baseline_responses.is_empty() || candidate_responses.is_empty() {
135 return None;
136 }
137 let alignment = align(&baseline_responses, &candidate_responses);
138 walk_collecting(&alignment, &baseline_responses, &candidate_responses, 1)
139 .into_iter()
140 .next()
141}
142
143pub fn detect_top_k(baseline: &[Record], candidate: &[Record], k: usize) -> Vec<FirstDivergence> {
156 if k == 0 {
157 return Vec::new();
158 }
159 let baseline_responses: Vec<&Record> = baseline
160 .iter()
161 .filter(|r| r.kind == Kind::ChatResponse)
162 .collect();
163 let candidate_responses: Vec<&Record> = candidate
164 .iter()
165 .filter(|r| r.kind == Kind::ChatResponse)
166 .collect();
167 if baseline_responses.is_empty() || candidate_responses.is_empty() {
168 return Vec::new();
169 }
170 let alignment = align(&baseline_responses, &candidate_responses);
171 let max_possible = baseline_responses.len() + candidate_responses.len();
174 let mut all = walk_collecting(
175 &alignment,
176 &baseline_responses,
177 &candidate_responses,
178 max_possible,
179 );
180 all.sort_by(|a, b| {
184 kind_rank(b.kind).cmp(&kind_rank(a.kind)).then_with(|| {
185 b.confidence
186 .partial_cmp(&a.confidence)
187 .unwrap_or(std::cmp::Ordering::Equal)
188 })
189 });
190 all.truncate(k);
191 all
192}
193
194fn kind_rank(k: DivergenceKind) -> u8 {
199 match k {
200 DivergenceKind::Structural => 3,
201 DivergenceKind::Decision => 2,
202 DivergenceKind::Style => 1,
203 }
204}
205
206const W_STRUCT: f64 = 0.40; const W_SEM: f64 = 0.25; const W_STOP: f64 = 0.15; const W_ARGS: f64 = 0.20; const GAP_OPEN: f64 = 0.60;
222const GAP_EXTEND: f64 = 0.15;
223
224const NOISE_FLOOR: f64 = 0.12;
228
229const STYLE_MAX_COST: f64 = 0.25;
233
234#[derive(Debug, Clone, Copy, PartialEq, Eq)]
235enum Step {
236 Match(usize, usize),
238 InsertCandidate(usize),
240 DeleteBaseline(usize),
242}
243
244struct Alignment {
246 steps: Vec<Step>,
247}
248
249const SCALE_BAND_THRESHOLD: usize = 1000;
254
255const MIN_BAND_HALF_WIDTH: usize = 100;
259
260fn band_half_width(n: usize, m: usize) -> usize {
266 let length_diff = n.abs_diff(m);
267 let radius = (n.max(m) as f64).sqrt() as usize;
268 length_diff + MIN_BAND_HALF_WIDTH.max(radius)
269}
270
271fn align(baseline: &[&Record], candidate: &[&Record]) -> Alignment {
272 let n = baseline.len();
273 let m = candidate.len();
274 if n.max(m) > SCALE_BAND_THRESHOLD {
275 align_banded(baseline, candidate, band_half_width(n, m))
276 } else {
277 align_full(baseline, candidate)
278 }
279}
280
281fn align_full(baseline: &[&Record], candidate: &[&Record]) -> Alignment {
282 let n = baseline.len();
283 let m = candidate.len();
284 const INF: f64 = 1e18;
290 let mut mat = vec![vec![INF; m + 1]; n + 1];
291 let mut xg = vec![vec![INF; m + 1]; n + 1]; let mut yg = vec![vec![INF; m + 1]; n + 1]; let mut back = vec![vec![Step::Match(0, 0); m + 1]; n + 1];
294
295 mat[0][0] = 0.0;
296 for i in 1..=n {
297 yg[i][0] = GAP_OPEN + (i as f64 - 1.0) * GAP_EXTEND;
298 mat[i][0] = yg[i][0];
299 back[i][0] = Step::DeleteBaseline(i - 1);
300 }
301 for j in 1..=m {
302 xg[0][j] = GAP_OPEN + (j as f64 - 1.0) * GAP_EXTEND;
303 mat[0][j] = xg[0][j];
304 back[0][j] = Step::InsertCandidate(j - 1);
305 }
306
307 for i in 1..=n {
308 for j in 1..=m {
309 let c = pair_cost(baseline[i - 1], candidate[j - 1]);
310 let m_cost = mat[i - 1][j - 1]
312 .min(xg[i - 1][j - 1])
313 .min(yg[i - 1][j - 1])
314 + c;
315 let xg_cost = (mat[i][j - 1] + GAP_OPEN).min(xg[i][j - 1] + GAP_EXTEND);
317 let yg_cost = (mat[i - 1][j] + GAP_OPEN).min(yg[i - 1][j] + GAP_EXTEND);
319 mat[i][j] = m_cost;
320 xg[i][j] = xg_cost;
321 yg[i][j] = yg_cost;
322 let best = m_cost.min(xg_cost).min(yg_cost);
325 back[i][j] = if (best - m_cost).abs() < 1e-12 {
326 Step::Match(i - 1, j - 1)
327 } else if (best - xg_cost).abs() < 1e-12 {
328 Step::InsertCandidate(j - 1)
329 } else {
330 Step::DeleteBaseline(i - 1)
331 };
332 }
333 }
334
335 let mut steps = Vec::new();
337 let mut i = n;
338 let mut j = m;
339 while i > 0 || j > 0 {
340 let s = back[i][j];
341 steps.push(s);
342 match s {
343 Step::Match(_, _) => {
344 i -= 1;
345 j -= 1;
346 }
347 Step::InsertCandidate(_) => {
348 j -= 1;
349 }
350 Step::DeleteBaseline(_) => {
351 i -= 1;
352 }
353 }
354 }
355 steps.reverse();
356 Alignment { steps }
357}
358
359fn align_banded(baseline: &[&Record], candidate: &[&Record], band: usize) -> Alignment {
371 let n = baseline.len();
372 let m = candidate.len();
373 const INF: f64 = 1e18;
374 let mut mat = vec![vec![INF; m + 1]; n + 1];
375 let mut xg = vec![vec![INF; m + 1]; n + 1];
376 let mut yg = vec![vec![INF; m + 1]; n + 1];
377 let mut back = vec![vec![Step::Match(0, 0); m + 1]; n + 1];
378
379 mat[0][0] = 0.0;
380 for i in 1..=n.min(band) {
382 yg[i][0] = GAP_OPEN + (i as f64 - 1.0) * GAP_EXTEND;
383 mat[i][0] = yg[i][0];
384 back[i][0] = Step::DeleteBaseline(i - 1);
385 }
386 for j in 1..=m.min(band) {
387 xg[0][j] = GAP_OPEN + (j as f64 - 1.0) * GAP_EXTEND;
388 mat[0][j] = xg[0][j];
389 back[0][j] = Step::InsertCandidate(j - 1);
390 }
391
392 for i in 1..=n {
393 let j_lo = i.saturating_sub(band).max(1);
394 let j_hi = (i + band).min(m);
395 for j in j_lo..=j_hi {
396 let c = pair_cost(baseline[i - 1], candidate[j - 1]);
397 let m_cost = mat[i - 1][j - 1]
398 .min(xg[i - 1][j - 1])
399 .min(yg[i - 1][j - 1])
400 + c;
401 let xg_cost = (mat[i][j - 1] + GAP_OPEN).min(xg[i][j - 1] + GAP_EXTEND);
402 let yg_cost = (mat[i - 1][j] + GAP_OPEN).min(yg[i - 1][j] + GAP_EXTEND);
403 mat[i][j] = m_cost;
404 xg[i][j] = xg_cost;
405 yg[i][j] = yg_cost;
406 let best = m_cost.min(xg_cost).min(yg_cost);
407 back[i][j] = if (best - m_cost).abs() < 1e-12 {
408 Step::Match(i - 1, j - 1)
409 } else if (best - xg_cost).abs() < 1e-12 {
410 Step::InsertCandidate(j - 1)
411 } else {
412 Step::DeleteBaseline(i - 1)
413 };
414 }
415 }
416
417 let mut steps = Vec::new();
421 let mut i = n;
422 let mut j = m;
423 while i > 0 || j > 0 {
424 let s = if i > 0 && j > 0 {
427 back[i][j]
428 } else if j == 0 {
429 Step::DeleteBaseline(i - 1)
430 } else {
431 Step::InsertCandidate(j - 1)
432 };
433 steps.push(s);
434 match s {
435 Step::Match(_, _) => {
436 i -= 1;
437 j -= 1;
438 }
439 Step::InsertCandidate(_) => {
440 j -= 1;
441 }
442 Step::DeleteBaseline(_) => {
443 i -= 1;
444 }
445 }
446 }
447 steps.reverse();
448 Alignment { steps }
449}
450
451fn pair_cost(a: &Record, b: &Record) -> f64 {
458 let tool_shape_a = tool_shape(a);
459 let tool_shape_b = tool_shape(b);
460 let shape_dist = 1.0 - jaccard(&tool_shape_a, &tool_shape_b);
465 let count_a = count_tool_use(a);
466 let count_b = count_tool_use(b);
467 let count_dist = if count_a == count_b {
468 0.0
469 } else {
470 let diff = (count_a as f64 - count_b as f64).abs();
471 let denom = count_a.max(count_b) as f64;
472 if denom == 0.0 {
473 0.0
474 } else {
475 (diff / denom).min(1.0)
476 }
477 };
478 let structural = shape_dist.max(count_dist);
479
480 let text_a = response_text(a);
481 let text_b = response_text(b);
482 let semantic = 1.0 - text_similarity(&text_a, &text_b);
483
484 let stop_a = stop_reason(a);
485 let stop_b = stop_reason(b);
486 let stop = if stop_a != stop_b { 1.0 } else { 0.0 };
487
488 let args = if tool_shape_a == tool_shape_b && !tool_shape_a.is_empty() {
493 if arg_value_diff(a, b).is_some() {
494 1.0
495 } else {
496 0.0
497 }
498 } else {
499 0.0
500 };
501
502 W_STRUCT * structural + W_SEM * semantic + W_STOP * stop + W_ARGS * args
503}
504
505fn tool_shape(r: &Record) -> BTreeSet<String> {
510 let mut out = BTreeSet::new();
511 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
512 return out;
513 };
514 for part in arr {
515 if part.get("type").and_then(|t| t.as_str()) != Some("tool_use") {
516 continue;
517 }
518 let name = part.get("name").and_then(|n| n.as_str()).unwrap_or("_");
519 let mut keys: Vec<String> = part
520 .get("input")
521 .and_then(|i| i.as_object())
522 .map(|o| o.keys().cloned().collect())
523 .unwrap_or_default();
524 keys.sort();
525 out.insert(format!("{name}({})", keys.join(",")));
526 }
527 out
528}
529
530fn count_tool_use(r: &Record) -> usize {
535 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
536 return 0;
537 };
538 arr.iter()
539 .filter(|p| p.get("type").and_then(|t| t.as_str()) == Some("tool_use"))
540 .count()
541}
542
543fn response_text(r: &Record) -> String {
544 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
545 return String::new();
546 };
547 arr.iter()
548 .filter_map(|p| {
549 if p.get("type").and_then(|t| t.as_str()) == Some("text") {
550 p.get("text")
551 .and_then(|t| t.as_str())
552 .map(ToString::to_string)
553 } else {
554 None
555 }
556 })
557 .collect::<Vec<_>>()
558 .join(" ")
559}
560
561fn stop_reason(r: &Record) -> String {
562 r.payload
563 .get("stop_reason")
564 .and_then(|v| v.as_str())
565 .unwrap_or("")
566 .to_string()
567}
568
569fn jaccard(a: &BTreeSet<String>, b: &BTreeSet<String>) -> f64 {
572 if a.is_empty() && b.is_empty() {
573 return 1.0;
574 }
575 let inter = a.intersection(b).count() as f64;
576 let uni = a.union(b).count() as f64;
577 if uni == 0.0 {
578 1.0
579 } else {
580 inter / uni
581 }
582}
583
584fn text_similarity(a: &str, b: &str) -> f64 {
597 let na = normalise_whitespace(a);
598 let nb = normalise_whitespace(b);
599 if na.is_empty() && nb.is_empty() {
600 return 1.0;
601 }
602 if na == nb {
603 return 1.0;
604 }
605 let sa = shingles(&na, 4);
606 let sb = shingles(&nb, 4);
607 jaccard(&sa, &sb)
608}
609
610fn normalise_whitespace(s: &str) -> String {
614 let mut out = String::with_capacity(s.len());
615 let mut in_ws = false;
616 for ch in s.chars() {
617 if ch.is_whitespace() {
618 if !in_ws && !out.is_empty() {
619 out.push(' ');
620 }
621 in_ws = true;
622 } else {
623 out.push(ch);
624 in_ws = false;
625 }
626 }
627 if out.ends_with(' ') {
628 out.pop();
629 }
630 out
631}
632
633fn shingles(s: &str, k: usize) -> BTreeSet<String> {
634 let chars: Vec<char> = s.chars().collect();
635 let mut out = BTreeSet::new();
636 if chars.len() < k {
637 if !s.is_empty() {
638 out.insert(s.to_string());
639 }
640 return out;
641 }
642 for w in chars.windows(k) {
643 out.insert(w.iter().collect());
644 }
645 out
646}
647
648fn walk_collecting(
657 alignment: &Alignment,
658 baseline: &[&Record],
659 candidate: &[&Record],
660 limit: usize,
661) -> Vec<FirstDivergence> {
662 let mut out: Vec<FirstDivergence> = Vec::new();
663 if limit == 0 {
664 return out;
665 }
666 let mut b_cursor: usize = 0;
671 let mut c_cursor: usize = 0;
672 for step in &alignment.steps {
673 if out.len() >= limit {
674 return out;
675 }
676 match *step {
677 Step::InsertCandidate(j) => {
678 let cand = candidate[j];
682 let insertion_point = b_cursor;
683 let n_tools = tool_shape(cand).len();
684 let detail = if n_tools == 0 {
685 "an extra response turn with no tool calls".to_string()
686 } else if n_tools == 1 {
687 "an extra turn with 1 tool call".to_string()
688 } else {
689 format!("an extra turn with {n_tools} tool calls")
690 };
691 out.push(FirstDivergence {
692 baseline_turn: insertion_point,
693 candidate_turn: j,
694 kind: DivergenceKind::Structural,
695 primary_axis: Axis::Trajectory,
696 explanation: format!(
697 "candidate inserted {detail} between baseline turns #{prev} and #{insertion_point}",
698 prev = insertion_point.saturating_sub(1),
699 ),
700 confidence: 1.0,
701 });
702 c_cursor = c_cursor.saturating_add(1);
703 }
704 Step::DeleteBaseline(i) => {
705 let b = baseline[i];
706 let deletion_point = c_cursor;
707 let n_tools = tool_shape(b).len();
708 let detail = if n_tools == 0 {
709 "a response turn with no tool calls".to_string()
710 } else if n_tools == 1 {
711 "a turn with 1 tool call".to_string()
712 } else {
713 format!("a turn with {n_tools} tool calls")
714 };
715 out.push(FirstDivergence {
716 baseline_turn: i,
717 candidate_turn: deletion_point,
718 kind: DivergenceKind::Structural,
719 primary_axis: Axis::Trajectory,
720 explanation: format!(
721 "candidate dropped {detail} (baseline turn #{i} has no counterpart)",
722 ),
723 confidence: 1.0,
724 });
725 b_cursor = b_cursor.saturating_add(1);
726 }
727 Step::Match(i, j) => {
728 let b = baseline[i];
729 let c = candidate[j];
730 let cost = pair_cost(b, c);
731 b_cursor = i.saturating_add(1);
732 c_cursor = j.saturating_add(1);
733 if cost <= NOISE_FLOOR {
734 continue;
735 }
736 let (kind, axis, explanation) = classify(b, c, cost);
738 let confidence = ((cost - NOISE_FLOOR) / (1.0 - NOISE_FLOOR)).clamp(0.0, 1.0);
739 out.push(FirstDivergence {
740 baseline_turn: i,
741 candidate_turn: j,
742 kind,
743 primary_axis: axis,
744 explanation,
745 confidence,
746 });
747 }
748 }
749 }
750 out
751}
752
753fn classify(b: &Record, c: &Record, cost: f64) -> (DivergenceKind, Axis, String) {
755 let shape_b = tool_shape(b);
756 let shape_c = tool_shape(c);
757 let text_b = response_text(b);
758 let text_c = response_text(c);
759 let stop_b = stop_reason(b);
760 let stop_c = stop_reason(c);
761 let sem_sim = text_similarity(&text_b, &text_c);
762
763 if shape_b != shape_c {
765 let explanation = describe_tool_diff(&shape_b, &shape_c);
766 return (DivergenceKind::Structural, Axis::Trajectory, explanation);
767 }
768 let count_b = count_tool_use(b);
772 let count_c = count_tool_use(c);
773 if count_b != count_c {
774 let tool_names: Vec<&String> = shape_b.iter().collect();
775 let tools_summary = if tool_names.len() == 1 {
776 format!("`{}`", tool_names[0])
777 } else {
778 format!("{} tool(s)", tool_names.len())
779 };
780 let explanation = if count_c > count_b {
781 format!(
782 "candidate called {tools_summary} {count_c} time(s) vs baseline's {count_b} \
783 — duplicate tool invocation"
784 )
785 } else {
786 format!(
787 "candidate called {tools_summary} {count_c} time(s) vs baseline's {count_b} \
788 — dropped one or more repeat invocations"
789 )
790 };
791 return (DivergenceKind::Structural, Axis::Trajectory, explanation);
792 }
793
794 if stop_b != stop_c {
796 return (
797 DivergenceKind::Decision,
798 Axis::Safety,
799 format!("stop_reason changed: `{stop_b}` → `{stop_c}`"),
800 );
801 }
802
803 if let Some(arg_diff) = arg_value_diff(b, c) {
808 return (
809 DivergenceKind::Decision,
810 Axis::Trajectory,
811 format!("tool arg value changed: {arg_diff}"),
812 );
813 }
814
815 if sem_sim >= 0.90 && cost <= STYLE_MAX_COST {
817 (
818 DivergenceKind::Style,
819 Axis::Semantic,
820 "cosmetic wording change — tool sequence and semantics preserved".to_string(),
821 )
822 } else {
823 (
824 DivergenceKind::Decision,
825 Axis::Semantic,
826 format!(
827 "response text diverged (text similarity {:.2}); same tool sequence",
828 sem_sim
829 ),
830 )
831 }
832}
833
834fn describe_tool_diff(a: &BTreeSet<String>, b: &BTreeSet<String>) -> String {
835 let only_a: Vec<&String> = a.difference(b).collect();
836 let only_b: Vec<&String> = b.difference(a).collect();
837 if !only_a.is_empty() && only_b.is_empty() {
838 format!("candidate dropped tool call(s): {}", list(&only_a))
839 } else if !only_b.is_empty() && only_a.is_empty() {
840 format!("candidate added tool call(s): {}", list(&only_b))
841 } else if !only_a.is_empty() && !only_b.is_empty() {
842 format!(
843 "tool set changed: removed {}, added {}",
844 list(&only_a),
845 list(&only_b)
846 )
847 } else {
848 "tool ordering differs".to_string()
849 }
850}
851
852fn list(items: &[&String]) -> String {
853 items
854 .iter()
855 .map(|s| format!("`{s}`"))
856 .collect::<Vec<_>>()
857 .join(", ")
858}
859
860fn arg_value_diff(a: &Record, b: &Record) -> Option<String> {
864 let ta = tool_use_inputs(a);
865 let tb = tool_use_inputs(b);
866 for (name, va) in &ta {
867 if let Some(vb) = tb.get(name) {
868 if va != vb {
869 if let (Some(oa), Some(ob)) = (va.as_object(), vb.as_object()) {
871 for (k, v) in oa {
872 if ob.get(k) != Some(v) {
873 let other = ob
874 .get(k)
875 .map(|x| x.to_string())
876 .unwrap_or("<missing>".to_string());
877 return Some(format!("`{name}({k})`: `{v}` → `{other}`"));
878 }
879 }
880 }
881 return Some(format!("`{name}`: input changed"));
882 }
883 }
884 }
885 None
886}
887
888fn tool_use_inputs(r: &Record) -> std::collections::BTreeMap<String, serde_json::Value> {
891 let mut out = std::collections::BTreeMap::new();
892 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
893 return out;
894 };
895 for part in arr {
896 if part.get("type").and_then(|t| t.as_str()) != Some("tool_use") {
897 continue;
898 }
899 let name = part
900 .get("name")
901 .and_then(|n| n.as_str())
902 .unwrap_or("_")
903 .to_string();
904 let input = part
905 .get("input")
906 .cloned()
907 .unwrap_or(serde_json::Value::Null);
908 out.entry(name).or_insert(input);
909 }
910 out
911}
912
913#[cfg(test)]
918mod tests {
919 use super::*;
920 use crate::agentlog::Kind;
921 use serde_json::json;
922
923 fn response_text_only(text: &str, stop: &str) -> Record {
924 Record::new(
925 Kind::ChatResponse,
926 json!({
927 "model": "x",
928 "content": [{"type": "text", "text": text}],
929 "stop_reason": stop,
930 "latency_ms": 0,
931 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
932 }),
933 "2026-04-23T00:00:00Z",
934 None,
935 )
936 }
937
938 fn response_with_tool(name: &str, input: serde_json::Value, stop: &str) -> Record {
939 Record::new(
940 Kind::ChatResponse,
941 json!({
942 "model": "x",
943 "content": [{
944 "type": "tool_use",
945 "id": "t1",
946 "name": name,
947 "input": input,
948 }],
949 "stop_reason": stop,
950 "latency_ms": 0,
951 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
952 }),
953 "2026-04-23T00:00:00Z",
954 None,
955 )
956 }
957
958 fn meta() -> Record {
959 Record::new(
960 Kind::Metadata,
961 json!({"sdk": {"name": "shadow"}}),
962 "2026-04-23T00:00:00Z",
963 None,
964 )
965 }
966
967 #[test]
968 fn identical_traces_return_none() {
969 let r = response_text_only("Paris is the capital of France.", "end_turn");
970 let baseline = vec![meta(), r.clone(), r.clone()];
971 let candidate = vec![meta(), r.clone(), r.clone()];
972 assert_eq!(detect(&baseline, &candidate), None);
973 }
974
975 #[test]
976 fn whitespace_only_diff_is_style() {
977 let b = response_text_only("Paris is the capital of France.", "end_turn");
978 let c = response_text_only("Paris is the capital of France.", "end_turn");
979 let baseline = vec![meta(), b];
980 let candidate = vec![meta(), c];
981 if let Some(d) = detect(&baseline, &candidate) {
986 assert_eq!(d.kind, DivergenceKind::Style);
987 assert_eq!(d.primary_axis, Axis::Semantic);
988 }
989 }
990
991 #[test]
992 fn different_tool_name_is_structural_on_trajectory_axis() {
993 let b = response_with_tool("search", json!({"q": "cats"}), "tool_use");
994 let c = response_with_tool("lookup", json!({"q": "cats"}), "tool_use");
995 let baseline = vec![meta(), b];
996 let candidate = vec![meta(), c];
997 let d = detect(&baseline, &candidate).expect("divergence expected");
998 assert_eq!(d.kind, DivergenceKind::Structural);
999 assert_eq!(d.primary_axis, Axis::Trajectory);
1000 assert_eq!(d.baseline_turn, 0);
1001 assert_eq!(d.candidate_turn, 0);
1002 assert!(d.explanation.contains("search") || d.explanation.contains("lookup"));
1003 }
1004
1005 #[test]
1006 fn same_tool_different_arg_value_is_decision() {
1007 let b = response_with_tool("search", json!({"q": "cats", "limit": 10}), "tool_use");
1008 let c = response_with_tool("search", json!({"q": "cats", "limit": 50}), "tool_use");
1009 let baseline = vec![meta(), b];
1010 let candidate = vec![meta(), c];
1011 let d = detect(&baseline, &candidate).expect("divergence expected");
1012 assert_eq!(d.kind, DivergenceKind::Decision);
1013 assert_eq!(d.primary_axis, Axis::Trajectory);
1014 assert!(d.explanation.contains("limit"));
1015 }
1016
1017 #[test]
1018 fn stop_reason_flip_is_decision_on_safety() {
1019 let b = response_text_only("Here is the answer.", "end_turn");
1020 let c = response_text_only("I can't help with that.", "content_filter");
1021 let baseline = vec![meta(), b];
1022 let candidate = vec![meta(), c];
1023 let d = detect(&baseline, &candidate).expect("divergence expected");
1024 assert_eq!(d.kind, DivergenceKind::Decision);
1025 assert_eq!(d.primary_axis, Axis::Safety);
1026 assert!(d.explanation.contains("end_turn"));
1027 assert!(d.explanation.contains("content_filter"));
1028 }
1029
1030 #[test]
1031 fn candidate_drops_a_turn_is_structural() {
1032 let r1 = response_text_only("first turn", "end_turn");
1033 let r2 = response_text_only("second turn", "end_turn");
1034 let baseline = vec![meta(), r1.clone(), r2];
1035 let candidate = vec![meta(), r1]; let d = detect(&baseline, &candidate).expect("divergence expected");
1037 assert_eq!(d.kind, DivergenceKind::Structural);
1038 assert_eq!(d.primary_axis, Axis::Trajectory);
1039 }
1040
1041 #[test]
1042 fn candidate_inserts_a_turn_is_structural() {
1043 let r1 = response_text_only("turn one", "end_turn");
1044 let r2 = response_text_only("inserted", "end_turn");
1045 let r3 = response_text_only("turn two", "end_turn");
1046 let baseline = vec![meta(), r1.clone(), r3.clone()];
1047 let candidate = vec![meta(), r1, r2, r3];
1048 let d = detect(&baseline, &candidate).expect("divergence expected");
1049 assert_eq!(d.kind, DivergenceKind::Structural);
1050 }
1051
1052 #[test]
1053 fn significant_text_shift_is_decision_on_semantic() {
1054 let b = response_text_only(
1056 "Photosynthesis is the process by which plants convert sunlight.",
1057 "end_turn",
1058 );
1059 let c = response_text_only(
1060 "The stock market closed higher on Thursday after strong earnings.",
1061 "end_turn",
1062 );
1063 let baseline = vec![meta(), b];
1064 let candidate = vec![meta(), c];
1065 let d = detect(&baseline, &candidate).expect("divergence expected");
1066 assert_eq!(d.kind, DivergenceKind::Decision);
1067 assert_eq!(d.primary_axis, Axis::Semantic);
1068 }
1069
1070 #[test]
1071 fn empty_traces_return_none() {
1072 assert_eq!(detect(&[meta()], &[meta()]), None);
1073 assert_eq!(detect(&[], &[]), None);
1074 }
1075
1076 #[test]
1077 fn first_divergence_is_truly_first() {
1078 let r1 = response_text_only("same", "end_turn");
1081 let r2b = response_text_only("baseline version of turn two with lots of text", "end_turn");
1082 let r2c = response_text_only(
1083 "CANDIDATE SAID SOMETHING COMPLETELY DIFFERENT HERE",
1084 "end_turn",
1085 );
1086 let r3 = response_text_only("also same", "end_turn");
1087 let baseline = vec![meta(), r1.clone(), r2b, r3.clone()];
1088 let candidate = vec![meta(), r1, r2c, r3];
1089 let d = detect(&baseline, &candidate).expect("divergence expected");
1090 assert_eq!(d.baseline_turn, 1);
1091 assert_eq!(d.candidate_turn, 1);
1092 }
1093
1094 #[test]
1095 fn confidence_is_in_valid_range() {
1096 let b = response_with_tool("search", json!({"q": "a"}), "tool_use");
1097 let c = response_with_tool("other", json!({"q": "a"}), "tool_use");
1098 let baseline = vec![meta(), b];
1099 let candidate = vec![meta(), c];
1100 let d = detect(&baseline, &candidate).unwrap();
1101 assert!((0.0..=1.0).contains(&d.confidence));
1102 }
1103
1104 #[test]
1105 fn tool_shape_captures_name_and_arg_keys() {
1106 let r = response_with_tool("search", json!({"q": "a", "limit": 10}), "tool_use");
1107 let shape = tool_shape(&r);
1108 assert_eq!(shape.len(), 1);
1109 let entry = shape.iter().next().unwrap();
1110 assert!(entry.starts_with("search("));
1111 assert!(entry.contains("limit"));
1112 assert!(entry.contains("q"));
1113 }
1114
1115 #[test]
1116 fn jaccard_on_empty_sets_is_one() {
1117 let empty = BTreeSet::new();
1118 assert_eq!(jaccard(&empty, &empty), 1.0);
1119 }
1120
1121 #[test]
1122 fn alignment_prefers_matches_over_gaps_when_both_cheap() {
1123 let r = response_text_only("same", "end_turn");
1126 let alignment = align(&[&r, &r], &[&r, &r]);
1127 let matches = alignment
1128 .steps
1129 .iter()
1130 .filter(|s| matches!(s, Step::Match(..)))
1131 .count();
1132 assert_eq!(matches, 2);
1133 let gaps = alignment.steps.len() - matches;
1134 assert_eq!(gaps, 0);
1135 }
1136
1137 #[test]
1142 fn top_k_with_zero_returns_empty() {
1143 let r1 = response_text_only("same", "end_turn");
1144 let r2 = response_text_only("different", "end_turn");
1145 let out = detect_top_k(&[meta(), r1], &[meta(), r2], 0);
1146 assert_eq!(out.len(), 0);
1147 }
1148
1149 #[test]
1150 fn top_k_with_identical_returns_empty() {
1151 let r = response_text_only("same", "end_turn");
1152 let out = detect_top_k(&[meta(), r.clone(), r.clone()], &[meta(), r.clone(), r], 3);
1153 assert_eq!(out.len(), 0);
1154 }
1155
1156 #[test]
1157 fn top_k_orders_structural_before_decision_before_style() {
1158 let b0 = response_text_only(
1163 "Hello, here is a detailed answer explaining the topic in full.",
1164 "end_turn",
1165 );
1166 let b1 = response_text_only("The answer is 42.", "end_turn");
1167 let b2 = response_with_tool("search", json!({"q": "x"}), "tool_use");
1168 let c0 = response_text_only(
1169 "Hello, here is a detailed answer explaining the topic in full!",
1170 "end_turn",
1171 ); let c1 = response_text_only("I cannot answer that.", "content_filter"); let c2 = response_with_tool("lookup", json!({"q": "x"}), "tool_use"); let baseline = vec![meta(), b0, b1, b2];
1175 let candidate = vec![meta(), c0, c1, c2];
1176 let out = detect_top_k(&baseline, &candidate, 5);
1177 assert!(
1178 out.len() >= 2,
1179 "expected at least 2 divergences, got {}",
1180 out.len()
1181 );
1182 assert_eq!(
1184 out[0].kind,
1185 DivergenceKind::Structural,
1186 "rank 1 should be Structural, got {:?}",
1187 out[0].kind
1188 );
1189 if out.len() >= 2 {
1191 assert_eq!(
1192 out[1].kind,
1193 DivergenceKind::Decision,
1194 "rank 2 should be Decision, got {:?}",
1195 out[1].kind
1196 );
1197 }
1198 }
1199
1200 #[test]
1201 fn top_k_truncates_at_k() {
1202 let same = response_text_only("unchanged", "end_turn");
1204 let _ = same.clone(); let baseline = vec![
1206 meta(),
1207 response_with_tool("a", json!({}), "tool_use"),
1208 response_with_tool("b", json!({}), "tool_use"),
1209 response_with_tool("c", json!({}), "tool_use"),
1210 response_with_tool("d", json!({}), "tool_use"),
1211 response_with_tool("e", json!({}), "tool_use"),
1212 ];
1213 let candidate = vec![
1214 meta(),
1215 response_with_tool("A", json!({}), "tool_use"),
1216 response_with_tool("B", json!({}), "tool_use"),
1217 response_with_tool("C", json!({}), "tool_use"),
1218 response_with_tool("D", json!({}), "tool_use"),
1219 response_with_tool("E", json!({}), "tool_use"),
1220 ];
1221 let out = detect_top_k(&baseline, &candidate, 2);
1222 assert_eq!(out.len(), 2);
1223 for dv in &out {
1225 assert_eq!(dv.kind, DivergenceKind::Structural);
1226 }
1227 }
1228
1229 #[test]
1230 fn top_k_preserves_walk_order_within_same_severity_and_confidence() {
1231 let baseline = vec![
1234 meta(),
1235 response_with_tool("a", json!({}), "tool_use"),
1236 response_with_tool("b", json!({}), "tool_use"),
1237 response_with_tool("c", json!({}), "tool_use"),
1238 ];
1239 let candidate = vec![
1240 meta(),
1241 response_with_tool("A", json!({}), "tool_use"),
1242 response_with_tool("B", json!({}), "tool_use"),
1243 response_with_tool("C", json!({}), "tool_use"),
1244 ];
1245 let out = detect_top_k(&baseline, &candidate, 3);
1246 assert_eq!(out.len(), 3);
1247 assert_eq!(out[0].baseline_turn, 0);
1249 assert_eq!(out[1].baseline_turn, 1);
1250 assert_eq!(out[2].baseline_turn, 2);
1251 }
1252
1253 #[test]
1254 fn top_k_of_1_matches_first_divergence_classifier() {
1255 let b = response_with_tool("search", json!({"q": "x"}), "tool_use");
1259 let c = response_with_tool("search", json!({"q": "y"}), "tool_use");
1260 let first = detect(&[meta(), b.clone()], &[meta(), c.clone()]).unwrap();
1261 let top = detect_top_k(&[meta(), b], &[meta(), c], 1);
1262 assert_eq!(top.len(), 1);
1263 assert_eq!(top[0].kind, first.kind);
1264 assert_eq!(top[0].baseline_turn, first.baseline_turn);
1265 }
1266
1267 #[test]
1268 fn first_divergence_is_alignment_order_not_importance_rank() {
1269 let b0 = response_text_only("same across both", "end_turn");
1273 let b1 = response_with_tool("search", json!({"q": "x"}), "tool_use");
1274 let c0 = response_text_only("completely different response here", "end_turn");
1275 let c1 = response_with_tool("lookup", json!({"q": "x"}), "tool_use");
1276 let baseline = vec![meta(), b0, b1];
1280 let candidate = vec![meta(), c0, c1];
1281 let first = detect(&baseline, &candidate).unwrap();
1282 let top = detect_top_k(&baseline, &candidate, 3);
1283 assert_eq!(first.baseline_turn, 0);
1284 assert_eq!(top[0].baseline_turn, 1); assert_eq!(top[0].kind, DivergenceKind::Structural);
1286 }
1287}