1use serde::{Deserialize, Serialize};
40use std::collections::BTreeSet;
41
42use crate::agentlog::{Kind, Record};
43use crate::diff::axes::Axis;
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
51pub enum DivergenceKind {
52 #[serde(rename = "style_drift")]
55 Style,
56 #[serde(rename = "decision_drift")]
59 Decision,
60 #[serde(rename = "structural_drift")]
63 Structural,
64}
65
66impl DivergenceKind {
67 pub fn label(&self) -> &'static str {
69 match self {
70 DivergenceKind::Style => "style_drift",
71 DivergenceKind::Decision => "decision_drift",
72 DivergenceKind::Structural => "structural_drift",
73 }
74 }
75}
76
77#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
79pub struct FirstDivergence {
80 pub baseline_turn: usize,
84 pub candidate_turn: usize,
87 pub kind: DivergenceKind,
89 pub primary_axis: Axis,
93 pub explanation: String,
96 pub confidence: f64,
99}
100
101pub const DEFAULT_K: usize = 5;
109
110pub fn detect(baseline: &[Record], candidate: &[Record]) -> Option<FirstDivergence> {
121 let baseline_responses: Vec<&Record> = baseline
127 .iter()
128 .filter(|r| r.kind == Kind::ChatResponse)
129 .collect();
130 let candidate_responses: Vec<&Record> = candidate
131 .iter()
132 .filter(|r| r.kind == Kind::ChatResponse)
133 .collect();
134 if baseline_responses.is_empty() || candidate_responses.is_empty() {
135 return None;
136 }
137 let alignment = align(&baseline_responses, &candidate_responses);
138 walk_collecting(&alignment, &baseline_responses, &candidate_responses, 1)
139 .into_iter()
140 .next()
141}
142
143pub fn detect_top_k(baseline: &[Record], candidate: &[Record], k: usize) -> Vec<FirstDivergence> {
156 if k == 0 {
157 return Vec::new();
158 }
159 let baseline_responses: Vec<&Record> = baseline
160 .iter()
161 .filter(|r| r.kind == Kind::ChatResponse)
162 .collect();
163 let candidate_responses: Vec<&Record> = candidate
164 .iter()
165 .filter(|r| r.kind == Kind::ChatResponse)
166 .collect();
167 if baseline_responses.is_empty() || candidate_responses.is_empty() {
168 return Vec::new();
169 }
170 let alignment = align(&baseline_responses, &candidate_responses);
171 let max_possible = baseline_responses.len() + candidate_responses.len();
174 let mut all = walk_collecting(
175 &alignment,
176 &baseline_responses,
177 &candidate_responses,
178 max_possible,
179 );
180 all.sort_by(|a, b| {
184 kind_rank(b.kind).cmp(&kind_rank(a.kind)).then_with(|| {
185 b.confidence
186 .partial_cmp(&a.confidence)
187 .unwrap_or(std::cmp::Ordering::Equal)
188 })
189 });
190 all.truncate(k);
191 all
192}
193
194fn kind_rank(k: DivergenceKind) -> u8 {
199 match k {
200 DivergenceKind::Structural => 3,
201 DivergenceKind::Decision => 2,
202 DivergenceKind::Style => 1,
203 }
204}
205
206const W_STRUCT: f64 = 0.40; const W_SEM: f64 = 0.25; const W_STOP: f64 = 0.15; const W_ARGS: f64 = 0.20; const GAP_OPEN: f64 = 0.60;
222const GAP_EXTEND: f64 = 0.15;
223
224const NOISE_FLOOR: f64 = 0.12;
228
229const STYLE_MAX_COST: f64 = 0.25;
233
234#[derive(Debug, Clone, Copy, PartialEq, Eq)]
235enum Step {
236 Match(usize, usize),
238 InsertCandidate(usize),
240 DeleteBaseline(usize),
242}
243
244struct Alignment {
246 steps: Vec<Step>,
247}
248
249const SCALE_BAND_THRESHOLD: usize = 1000;
254
255const MIN_BAND_HALF_WIDTH: usize = 100;
259
260fn band_half_width(n: usize, m: usize) -> usize {
266 let length_diff = n.abs_diff(m);
267 let radius = (n.max(m) as f64).sqrt() as usize;
268 length_diff + MIN_BAND_HALF_WIDTH.max(radius)
269}
270
271fn align(baseline: &[&Record], candidate: &[&Record]) -> Alignment {
272 let n = baseline.len();
273 let m = candidate.len();
274 if n.max(m) > SCALE_BAND_THRESHOLD {
275 align_banded(baseline, candidate, band_half_width(n, m))
276 } else {
277 align_full(baseline, candidate)
278 }
279}
280
281fn align_full(baseline: &[&Record], candidate: &[&Record]) -> Alignment {
282 let n = baseline.len();
283 let m = candidate.len();
284 const INF: f64 = 1e18;
290 let mut mat = vec![vec![INF; m + 1]; n + 1];
291 let mut xg = vec![vec![INF; m + 1]; n + 1]; let mut yg = vec![vec![INF; m + 1]; n + 1]; let mut back = vec![vec![Step::Match(0, 0); m + 1]; n + 1];
294
295 mat[0][0] = 0.0;
296 for i in 1..=n {
297 yg[i][0] = GAP_OPEN + (i as f64 - 1.0) * GAP_EXTEND;
298 mat[i][0] = yg[i][0];
299 back[i][0] = Step::DeleteBaseline(i - 1);
300 }
301 for j in 1..=m {
302 xg[0][j] = GAP_OPEN + (j as f64 - 1.0) * GAP_EXTEND;
303 mat[0][j] = xg[0][j];
304 back[0][j] = Step::InsertCandidate(j - 1);
305 }
306
307 for i in 1..=n {
308 for j in 1..=m {
309 let c = pair_cost(baseline[i - 1], candidate[j - 1]);
310 let m_cost = mat[i - 1][j - 1]
312 .min(xg[i - 1][j - 1])
313 .min(yg[i - 1][j - 1])
314 + c;
315 let xg_cost = (mat[i][j - 1] + GAP_OPEN).min(xg[i][j - 1] + GAP_EXTEND);
317 let yg_cost = (mat[i - 1][j] + GAP_OPEN).min(yg[i - 1][j] + GAP_EXTEND);
319 mat[i][j] = m_cost;
320 xg[i][j] = xg_cost;
321 yg[i][j] = yg_cost;
322 let best = m_cost.min(xg_cost).min(yg_cost);
325 back[i][j] = if (best - m_cost).abs() < 1e-12 {
326 Step::Match(i - 1, j - 1)
327 } else if (best - xg_cost).abs() < 1e-12 {
328 Step::InsertCandidate(j - 1)
329 } else {
330 Step::DeleteBaseline(i - 1)
331 };
332 }
333 }
334
335 let mut steps = Vec::new();
337 let mut i = n;
338 let mut j = m;
339 while i > 0 || j > 0 {
340 let s = back[i][j];
341 steps.push(s);
342 match s {
343 Step::Match(_, _) => {
344 i -= 1;
345 j -= 1;
346 }
347 Step::InsertCandidate(_) => {
348 j -= 1;
349 }
350 Step::DeleteBaseline(_) => {
351 i -= 1;
352 }
353 }
354 }
355 steps.reverse();
356 Alignment { steps }
357}
358
359#[inline]
373fn band_window(i: usize, m: usize, band: usize) -> (usize, usize) {
374 (i.saturating_sub(band), (i + band).min(m))
375}
376
377struct Banded<T: Copy> {
386 rows: Vec<Vec<T>>,
387 band: usize,
388 m: usize,
389 default_val: T,
390}
391
392impl<T: Copy> Banded<T> {
393 fn new(n: usize, m: usize, band: usize, default_val: T) -> Self {
394 let mut rows = Vec::with_capacity(n + 1);
395 for i in 0..=n {
396 let (j_lo, j_hi) = band_window(i, m, band);
397 rows.push(vec![default_val; j_hi - j_lo + 1]);
398 }
399 Self {
400 rows,
401 band,
402 m,
403 default_val,
404 }
405 }
406
407 #[inline]
408 fn in_band(&self, i: usize, j: usize) -> bool {
409 let (j_lo, j_hi) = band_window(i, self.m, self.band);
410 j >= j_lo && j <= j_hi
411 }
412
413 #[inline]
414 fn get(&self, i: usize, j: usize) -> T {
415 if !self.in_band(i, j) {
416 return self.default_val;
417 }
418 let (j_lo, _) = band_window(i, self.m, self.band);
419 self.rows[i][j - j_lo]
420 }
421
422 #[inline]
423 fn set(&mut self, i: usize, j: usize, v: T) {
424 let (j_lo, _) = band_window(i, self.m, self.band);
425 self.rows[i][j - j_lo] = v;
426 }
427}
428
429fn align_banded(baseline: &[&Record], candidate: &[&Record], band: usize) -> Alignment {
430 let n = baseline.len();
431 let m = candidate.len();
432 const INF: f64 = 1e18;
433 let mut mat = Banded::new(n, m, band, INF);
434 let mut xg = Banded::new(n, m, band, INF);
435 let mut yg = Banded::new(n, m, band, INF);
436 let mut back = Banded::new(n, m, band, Step::Match(0, 0));
437
438 mat.set(0, 0, 0.0);
439 for i in 1..=n.min(band) {
441 let v = GAP_OPEN + (i as f64 - 1.0) * GAP_EXTEND;
442 yg.set(i, 0, v);
443 mat.set(i, 0, v);
444 back.set(i, 0, Step::DeleteBaseline(i - 1));
445 }
446 for j in 1..=m.min(band) {
447 let v = GAP_OPEN + (j as f64 - 1.0) * GAP_EXTEND;
448 xg.set(0, j, v);
449 mat.set(0, j, v);
450 back.set(0, j, Step::InsertCandidate(j - 1));
451 }
452
453 for i in 1..=n {
454 let j_lo = i.saturating_sub(band).max(1);
455 let j_hi = (i + band).min(m);
456 for j in j_lo..=j_hi {
457 let c = pair_cost(baseline[i - 1], candidate[j - 1]);
458 let m_cost = mat
459 .get(i - 1, j - 1)
460 .min(xg.get(i - 1, j - 1))
461 .min(yg.get(i - 1, j - 1))
462 + c;
463 let xg_cost = (mat.get(i, j - 1) + GAP_OPEN).min(xg.get(i, j - 1) + GAP_EXTEND);
464 let yg_cost = (mat.get(i - 1, j) + GAP_OPEN).min(yg.get(i - 1, j) + GAP_EXTEND);
465 mat.set(i, j, m_cost);
466 xg.set(i, j, xg_cost);
467 yg.set(i, j, yg_cost);
468 let best = m_cost.min(xg_cost).min(yg_cost);
469 let step = if (best - m_cost).abs() < 1e-12 {
470 Step::Match(i - 1, j - 1)
471 } else if (best - xg_cost).abs() < 1e-12 {
472 Step::InsertCandidate(j - 1)
473 } else {
474 Step::DeleteBaseline(i - 1)
475 };
476 back.set(i, j, step);
477 }
478 }
479
480 let mut steps = Vec::new();
484 let mut i = n;
485 let mut j = m;
486 while i > 0 || j > 0 {
487 let s = if i > 0 && j > 0 && back.in_band(i, j) {
490 back.get(i, j)
491 } else if j == 0 {
492 Step::DeleteBaseline(i - 1)
493 } else if i == 0 {
494 Step::InsertCandidate(j - 1)
495 } else {
496 Step::Match(i - 1, j - 1)
498 };
499 steps.push(s);
500 match s {
501 Step::Match(_, _) => {
502 i -= 1;
503 j -= 1;
504 }
505 Step::InsertCandidate(_) => {
506 j -= 1;
507 }
508 Step::DeleteBaseline(_) => {
509 i -= 1;
510 }
511 }
512 }
513 steps.reverse();
514 Alignment { steps }
515}
516
517fn pair_cost(a: &Record, b: &Record) -> f64 {
524 let tool_shape_a = tool_shape(a);
525 let tool_shape_b = tool_shape(b);
526 let shape_dist = 1.0 - jaccard(&tool_shape_a, &tool_shape_b);
531 let count_a = count_tool_use(a);
532 let count_b = count_tool_use(b);
533 let count_dist = if count_a == count_b {
534 0.0
535 } else {
536 let diff = (count_a as f64 - count_b as f64).abs();
537 let denom = count_a.max(count_b) as f64;
538 if denom == 0.0 {
539 0.0
540 } else {
541 (diff / denom).min(1.0)
542 }
543 };
544 let structural = shape_dist.max(count_dist);
545
546 let text_a = response_text(a);
547 let text_b = response_text(b);
548 let semantic = 1.0 - text_similarity(&text_a, &text_b);
549
550 let stop_a = stop_reason(a);
551 let stop_b = stop_reason(b);
552 let stop = if stop_a != stop_b { 1.0 } else { 0.0 };
553
554 let args = if tool_shape_a == tool_shape_b && !tool_shape_a.is_empty() {
559 if arg_value_diff(a, b).is_some() {
560 1.0
561 } else {
562 0.0
563 }
564 } else {
565 0.0
566 };
567
568 W_STRUCT * structural + W_SEM * semantic + W_STOP * stop + W_ARGS * args
569}
570
571fn tool_shape(r: &Record) -> BTreeSet<String> {
576 let mut out = BTreeSet::new();
577 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
578 return out;
579 };
580 for part in arr {
581 if part.get("type").and_then(|t| t.as_str()) != Some("tool_use") {
582 continue;
583 }
584 let name = part.get("name").and_then(|n| n.as_str()).unwrap_or("_");
585 let mut keys: Vec<String> = part
586 .get("input")
587 .and_then(|i| i.as_object())
588 .map(|o| o.keys().cloned().collect())
589 .unwrap_or_default();
590 keys.sort();
591 out.insert(format!("{name}({})", keys.join(",")));
592 }
593 out
594}
595
596fn count_tool_use(r: &Record) -> usize {
601 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
602 return 0;
603 };
604 arr.iter()
605 .filter(|p| p.get("type").and_then(|t| t.as_str()) == Some("tool_use"))
606 .count()
607}
608
609fn response_text(r: &Record) -> String {
610 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
611 return String::new();
612 };
613 arr.iter()
614 .filter_map(|p| {
615 if p.get("type").and_then(|t| t.as_str()) == Some("text") {
616 p.get("text")
617 .and_then(|t| t.as_str())
618 .map(ToString::to_string)
619 } else {
620 None
621 }
622 })
623 .collect::<Vec<_>>()
624 .join(" ")
625}
626
627fn stop_reason(r: &Record) -> String {
628 r.payload
629 .get("stop_reason")
630 .and_then(|v| v.as_str())
631 .unwrap_or("")
632 .to_string()
633}
634
635fn jaccard(a: &BTreeSet<String>, b: &BTreeSet<String>) -> f64 {
638 if a.is_empty() && b.is_empty() {
639 return 1.0;
640 }
641 let inter = a.intersection(b).count() as f64;
642 let uni = a.union(b).count() as f64;
643 if uni == 0.0 {
644 1.0
645 } else {
646 inter / uni
647 }
648}
649
650fn text_similarity(a: &str, b: &str) -> f64 {
663 let na = normalise_whitespace(a);
664 let nb = normalise_whitespace(b);
665 if na.is_empty() && nb.is_empty() {
666 return 1.0;
667 }
668 if na == nb {
669 return 1.0;
670 }
671 let sa = shingles(&na, 4);
672 let sb = shingles(&nb, 4);
673 jaccard(&sa, &sb)
674}
675
676fn normalise_whitespace(s: &str) -> String {
680 let mut out = String::with_capacity(s.len());
681 let mut in_ws = false;
682 for ch in s.chars() {
683 if ch.is_whitespace() {
684 if !in_ws && !out.is_empty() {
685 out.push(' ');
686 }
687 in_ws = true;
688 } else {
689 out.push(ch);
690 in_ws = false;
691 }
692 }
693 if out.ends_with(' ') {
694 out.pop();
695 }
696 out
697}
698
699fn shingles(s: &str, k: usize) -> BTreeSet<String> {
700 let chars: Vec<char> = s.chars().collect();
701 let mut out = BTreeSet::new();
702 if chars.len() < k {
703 if !s.is_empty() {
704 out.insert(s.to_string());
705 }
706 return out;
707 }
708 for w in chars.windows(k) {
709 out.insert(w.iter().collect());
710 }
711 out
712}
713
714fn walk_collecting(
723 alignment: &Alignment,
724 baseline: &[&Record],
725 candidate: &[&Record],
726 limit: usize,
727) -> Vec<FirstDivergence> {
728 let mut out: Vec<FirstDivergence> = Vec::new();
729 if limit == 0 {
730 return out;
731 }
732 let mut b_cursor: usize = 0;
737 let mut c_cursor: usize = 0;
738 for step in &alignment.steps {
739 if out.len() >= limit {
740 return out;
741 }
742 match *step {
743 Step::InsertCandidate(j) => {
744 let cand = candidate[j];
748 let insertion_point = b_cursor;
749 let n_tools = tool_shape(cand).len();
750 let detail = if n_tools == 0 {
751 "an extra response turn with no tool calls".to_string()
752 } else if n_tools == 1 {
753 "an extra turn with 1 tool call".to_string()
754 } else {
755 format!("an extra turn with {n_tools} tool calls")
756 };
757 out.push(FirstDivergence {
758 baseline_turn: insertion_point,
759 candidate_turn: j,
760 kind: DivergenceKind::Structural,
761 primary_axis: Axis::Trajectory,
762 explanation: format!(
763 "candidate inserted {detail} between baseline turns #{prev} and #{insertion_point}",
764 prev = insertion_point.saturating_sub(1),
765 ),
766 confidence: 1.0,
767 });
768 c_cursor = c_cursor.saturating_add(1);
769 }
770 Step::DeleteBaseline(i) => {
771 let b = baseline[i];
772 let deletion_point = c_cursor;
773 let n_tools = tool_shape(b).len();
774 let detail = if n_tools == 0 {
775 "a response turn with no tool calls".to_string()
776 } else if n_tools == 1 {
777 "a turn with 1 tool call".to_string()
778 } else {
779 format!("a turn with {n_tools} tool calls")
780 };
781 out.push(FirstDivergence {
782 baseline_turn: i,
783 candidate_turn: deletion_point,
784 kind: DivergenceKind::Structural,
785 primary_axis: Axis::Trajectory,
786 explanation: format!(
787 "candidate dropped {detail} (baseline turn #{i} has no counterpart)",
788 ),
789 confidence: 1.0,
790 });
791 b_cursor = b_cursor.saturating_add(1);
792 }
793 Step::Match(i, j) => {
794 let b = baseline[i];
795 let c = candidate[j];
796 let cost = pair_cost(b, c);
797 b_cursor = i.saturating_add(1);
798 c_cursor = j.saturating_add(1);
799 if cost <= NOISE_FLOOR {
800 continue;
801 }
802 let (kind, axis, explanation) = classify(b, c, cost);
804 let confidence = ((cost - NOISE_FLOOR) / (1.0 - NOISE_FLOOR)).clamp(0.0, 1.0);
805 out.push(FirstDivergence {
806 baseline_turn: i,
807 candidate_turn: j,
808 kind,
809 primary_axis: axis,
810 explanation,
811 confidence,
812 });
813 }
814 }
815 }
816 out
817}
818
819fn classify(b: &Record, c: &Record, cost: f64) -> (DivergenceKind, Axis, String) {
821 let shape_b = tool_shape(b);
822 let shape_c = tool_shape(c);
823 let text_b = response_text(b);
824 let text_c = response_text(c);
825 let stop_b = stop_reason(b);
826 let stop_c = stop_reason(c);
827 let sem_sim = text_similarity(&text_b, &text_c);
828
829 if shape_b != shape_c {
831 let explanation = describe_tool_diff(&shape_b, &shape_c);
832 return (DivergenceKind::Structural, Axis::Trajectory, explanation);
833 }
834 let count_b = count_tool_use(b);
838 let count_c = count_tool_use(c);
839 if count_b != count_c {
840 let tool_names: Vec<&String> = shape_b.iter().collect();
841 let tools_summary = if tool_names.len() == 1 {
842 format!("`{}`", tool_names[0])
843 } else {
844 format!("{} tool(s)", tool_names.len())
845 };
846 let explanation = if count_c > count_b {
847 format!(
848 "candidate called {tools_summary} {count_c} time(s) vs baseline's {count_b} \
849 — duplicate tool invocation"
850 )
851 } else {
852 format!(
853 "candidate called {tools_summary} {count_c} time(s) vs baseline's {count_b} \
854 — dropped one or more repeat invocations"
855 )
856 };
857 return (DivergenceKind::Structural, Axis::Trajectory, explanation);
858 }
859
860 if stop_b != stop_c {
862 return (
863 DivergenceKind::Decision,
864 Axis::Safety,
865 format!("stop_reason changed: `{stop_b}` → `{stop_c}`"),
866 );
867 }
868
869 if let Some(arg_diff) = arg_value_diff(b, c) {
874 return (
875 DivergenceKind::Decision,
876 Axis::Trajectory,
877 format!("tool arg value changed: {arg_diff}"),
878 );
879 }
880
881 if sem_sim >= 0.90 && cost <= STYLE_MAX_COST {
883 (
884 DivergenceKind::Style,
885 Axis::Semantic,
886 "cosmetic wording change — tool sequence and semantics preserved".to_string(),
887 )
888 } else {
889 (
890 DivergenceKind::Decision,
891 Axis::Semantic,
892 format!(
893 "response text diverged (text similarity {:.2}); same tool sequence",
894 sem_sim
895 ),
896 )
897 }
898}
899
900fn describe_tool_diff(a: &BTreeSet<String>, b: &BTreeSet<String>) -> String {
901 let only_a: Vec<&String> = a.difference(b).collect();
902 let only_b: Vec<&String> = b.difference(a).collect();
903 if !only_a.is_empty() && only_b.is_empty() {
904 format!("candidate dropped tool call(s): {}", list(&only_a))
905 } else if !only_b.is_empty() && only_a.is_empty() {
906 format!("candidate added tool call(s): {}", list(&only_b))
907 } else if !only_a.is_empty() && !only_b.is_empty() {
908 format!(
909 "tool set changed: removed {}, added {}",
910 list(&only_a),
911 list(&only_b)
912 )
913 } else {
914 "tool ordering differs".to_string()
915 }
916}
917
918fn list(items: &[&String]) -> String {
919 items
920 .iter()
921 .map(|s| format!("`{s}`"))
922 .collect::<Vec<_>>()
923 .join(", ")
924}
925
926fn arg_value_diff(a: &Record, b: &Record) -> Option<String> {
930 let ta = tool_use_inputs(a);
931 let tb = tool_use_inputs(b);
932 for (name, va) in &ta {
933 if let Some(vb) = tb.get(name) {
934 if va != vb {
935 if let (Some(oa), Some(ob)) = (va.as_object(), vb.as_object()) {
937 for (k, v) in oa {
938 if ob.get(k) != Some(v) {
939 let other = ob
940 .get(k)
941 .map(|x| x.to_string())
942 .unwrap_or("<missing>".to_string());
943 return Some(format!("`{name}({k})`: `{v}` → `{other}`"));
944 }
945 }
946 }
947 return Some(format!("`{name}`: input changed"));
948 }
949 }
950 }
951 None
952}
953
954fn tool_use_inputs(r: &Record) -> std::collections::BTreeMap<String, serde_json::Value> {
957 let mut out = std::collections::BTreeMap::new();
958 let Some(arr) = r.payload.get("content").and_then(|c| c.as_array()) else {
959 return out;
960 };
961 for part in arr {
962 if part.get("type").and_then(|t| t.as_str()) != Some("tool_use") {
963 continue;
964 }
965 let name = part
966 .get("name")
967 .and_then(|n| n.as_str())
968 .unwrap_or("_")
969 .to_string();
970 let input = part
971 .get("input")
972 .cloned()
973 .unwrap_or(serde_json::Value::Null);
974 out.entry(name).or_insert(input);
975 }
976 out
977}
978
979#[cfg(test)]
984mod tests {
985 use super::*;
986 use crate::agentlog::Kind;
987 use serde_json::json;
988
989 fn response_text_only(text: &str, stop: &str) -> Record {
990 Record::new(
991 Kind::ChatResponse,
992 json!({
993 "model": "x",
994 "content": [{"type": "text", "text": text}],
995 "stop_reason": stop,
996 "latency_ms": 0,
997 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
998 }),
999 "2026-04-23T00:00:00Z",
1000 None,
1001 )
1002 }
1003
1004 fn response_with_tool(name: &str, input: serde_json::Value, stop: &str) -> Record {
1005 Record::new(
1006 Kind::ChatResponse,
1007 json!({
1008 "model": "x",
1009 "content": [{
1010 "type": "tool_use",
1011 "id": "t1",
1012 "name": name,
1013 "input": input,
1014 }],
1015 "stop_reason": stop,
1016 "latency_ms": 0,
1017 "usage": {"input_tokens": 1, "output_tokens": 1, "thinking_tokens": 0},
1018 }),
1019 "2026-04-23T00:00:00Z",
1020 None,
1021 )
1022 }
1023
1024 fn meta() -> Record {
1025 Record::new(
1026 Kind::Metadata,
1027 json!({"sdk": {"name": "shadow"}}),
1028 "2026-04-23T00:00:00Z",
1029 None,
1030 )
1031 }
1032
1033 #[test]
1034 fn identical_traces_return_none() {
1035 let r = response_text_only("Paris is the capital of France.", "end_turn");
1036 let baseline = vec![meta(), r.clone(), r.clone()];
1037 let candidate = vec![meta(), r.clone(), r.clone()];
1038 assert_eq!(detect(&baseline, &candidate), None);
1039 }
1040
1041 #[test]
1042 fn whitespace_only_diff_is_style() {
1043 let b = response_text_only("Paris is the capital of France.", "end_turn");
1044 let c = response_text_only("Paris is the capital of France.", "end_turn");
1045 let baseline = vec![meta(), b];
1046 let candidate = vec![meta(), c];
1047 if let Some(d) = detect(&baseline, &candidate) {
1052 assert_eq!(d.kind, DivergenceKind::Style);
1053 assert_eq!(d.primary_axis, Axis::Semantic);
1054 }
1055 }
1056
1057 #[test]
1058 fn different_tool_name_is_structural_on_trajectory_axis() {
1059 let b = response_with_tool("search", json!({"q": "cats"}), "tool_use");
1060 let c = response_with_tool("lookup", json!({"q": "cats"}), "tool_use");
1061 let baseline = vec![meta(), b];
1062 let candidate = vec![meta(), c];
1063 let d = detect(&baseline, &candidate).expect("divergence expected");
1064 assert_eq!(d.kind, DivergenceKind::Structural);
1065 assert_eq!(d.primary_axis, Axis::Trajectory);
1066 assert_eq!(d.baseline_turn, 0);
1067 assert_eq!(d.candidate_turn, 0);
1068 assert!(d.explanation.contains("search") || d.explanation.contains("lookup"));
1069 }
1070
1071 #[test]
1072 fn same_tool_different_arg_value_is_decision() {
1073 let b = response_with_tool("search", json!({"q": "cats", "limit": 10}), "tool_use");
1074 let c = response_with_tool("search", json!({"q": "cats", "limit": 50}), "tool_use");
1075 let baseline = vec![meta(), b];
1076 let candidate = vec![meta(), c];
1077 let d = detect(&baseline, &candidate).expect("divergence expected");
1078 assert_eq!(d.kind, DivergenceKind::Decision);
1079 assert_eq!(d.primary_axis, Axis::Trajectory);
1080 assert!(d.explanation.contains("limit"));
1081 }
1082
1083 #[test]
1084 fn stop_reason_flip_is_decision_on_safety() {
1085 let b = response_text_only("Here is the answer.", "end_turn");
1086 let c = response_text_only("I can't help with that.", "content_filter");
1087 let baseline = vec![meta(), b];
1088 let candidate = vec![meta(), c];
1089 let d = detect(&baseline, &candidate).expect("divergence expected");
1090 assert_eq!(d.kind, DivergenceKind::Decision);
1091 assert_eq!(d.primary_axis, Axis::Safety);
1092 assert!(d.explanation.contains("end_turn"));
1093 assert!(d.explanation.contains("content_filter"));
1094 }
1095
1096 #[test]
1097 fn candidate_drops_a_turn_is_structural() {
1098 let r1 = response_text_only("first turn", "end_turn");
1099 let r2 = response_text_only("second turn", "end_turn");
1100 let baseline = vec![meta(), r1.clone(), r2];
1101 let candidate = vec![meta(), r1]; let d = detect(&baseline, &candidate).expect("divergence expected");
1103 assert_eq!(d.kind, DivergenceKind::Structural);
1104 assert_eq!(d.primary_axis, Axis::Trajectory);
1105 }
1106
1107 #[test]
1108 fn candidate_inserts_a_turn_is_structural() {
1109 let r1 = response_text_only("turn one", "end_turn");
1110 let r2 = response_text_only("inserted", "end_turn");
1111 let r3 = response_text_only("turn two", "end_turn");
1112 let baseline = vec![meta(), r1.clone(), r3.clone()];
1113 let candidate = vec![meta(), r1, r2, r3];
1114 let d = detect(&baseline, &candidate).expect("divergence expected");
1115 assert_eq!(d.kind, DivergenceKind::Structural);
1116 }
1117
1118 #[test]
1119 fn significant_text_shift_is_decision_on_semantic() {
1120 let b = response_text_only(
1122 "Photosynthesis is the process by which plants convert sunlight.",
1123 "end_turn",
1124 );
1125 let c = response_text_only(
1126 "The stock market closed higher on Thursday after strong earnings.",
1127 "end_turn",
1128 );
1129 let baseline = vec![meta(), b];
1130 let candidate = vec![meta(), c];
1131 let d = detect(&baseline, &candidate).expect("divergence expected");
1132 assert_eq!(d.kind, DivergenceKind::Decision);
1133 assert_eq!(d.primary_axis, Axis::Semantic);
1134 }
1135
1136 #[test]
1137 fn empty_traces_return_none() {
1138 assert_eq!(detect(&[meta()], &[meta()]), None);
1139 assert_eq!(detect(&[], &[]), None);
1140 }
1141
1142 #[test]
1143 fn first_divergence_is_truly_first() {
1144 let r1 = response_text_only("same", "end_turn");
1147 let r2b = response_text_only("baseline version of turn two with lots of text", "end_turn");
1148 let r2c = response_text_only(
1149 "CANDIDATE SAID SOMETHING COMPLETELY DIFFERENT HERE",
1150 "end_turn",
1151 );
1152 let r3 = response_text_only("also same", "end_turn");
1153 let baseline = vec![meta(), r1.clone(), r2b, r3.clone()];
1154 let candidate = vec![meta(), r1, r2c, r3];
1155 let d = detect(&baseline, &candidate).expect("divergence expected");
1156 assert_eq!(d.baseline_turn, 1);
1157 assert_eq!(d.candidate_turn, 1);
1158 }
1159
1160 #[test]
1161 fn confidence_is_in_valid_range() {
1162 let b = response_with_tool("search", json!({"q": "a"}), "tool_use");
1163 let c = response_with_tool("other", json!({"q": "a"}), "tool_use");
1164 let baseline = vec![meta(), b];
1165 let candidate = vec![meta(), c];
1166 let d = detect(&baseline, &candidate).unwrap();
1167 assert!((0.0..=1.0).contains(&d.confidence));
1168 }
1169
1170 #[test]
1171 fn tool_shape_captures_name_and_arg_keys() {
1172 let r = response_with_tool("search", json!({"q": "a", "limit": 10}), "tool_use");
1173 let shape = tool_shape(&r);
1174 assert_eq!(shape.len(), 1);
1175 let entry = shape.iter().next().unwrap();
1176 assert!(entry.starts_with("search("));
1177 assert!(entry.contains("limit"));
1178 assert!(entry.contains("q"));
1179 }
1180
1181 #[test]
1182 fn jaccard_on_empty_sets_is_one() {
1183 let empty = BTreeSet::new();
1184 assert_eq!(jaccard(&empty, &empty), 1.0);
1185 }
1186
1187 #[test]
1188 fn alignment_prefers_matches_over_gaps_when_both_cheap() {
1189 let r = response_text_only("same", "end_turn");
1192 let alignment = align(&[&r, &r], &[&r, &r]);
1193 let matches = alignment
1194 .steps
1195 .iter()
1196 .filter(|s| matches!(s, Step::Match(..)))
1197 .count();
1198 assert_eq!(matches, 2);
1199 let gaps = alignment.steps.len() - matches;
1200 assert_eq!(gaps, 0);
1201 }
1202
1203 #[test]
1208 fn top_k_with_zero_returns_empty() {
1209 let r1 = response_text_only("same", "end_turn");
1210 let r2 = response_text_only("different", "end_turn");
1211 let out = detect_top_k(&[meta(), r1], &[meta(), r2], 0);
1212 assert_eq!(out.len(), 0);
1213 }
1214
1215 #[test]
1216 fn top_k_with_identical_returns_empty() {
1217 let r = response_text_only("same", "end_turn");
1218 let out = detect_top_k(&[meta(), r.clone(), r.clone()], &[meta(), r.clone(), r], 3);
1219 assert_eq!(out.len(), 0);
1220 }
1221
1222 #[test]
1223 fn top_k_orders_structural_before_decision_before_style() {
1224 let b0 = response_text_only(
1229 "Hello, here is a detailed answer explaining the topic in full.",
1230 "end_turn",
1231 );
1232 let b1 = response_text_only("The answer is 42.", "end_turn");
1233 let b2 = response_with_tool("search", json!({"q": "x"}), "tool_use");
1234 let c0 = response_text_only(
1235 "Hello, here is a detailed answer explaining the topic in full!",
1236 "end_turn",
1237 ); let c1 = response_text_only("I cannot answer that.", "content_filter"); let c2 = response_with_tool("lookup", json!({"q": "x"}), "tool_use"); let baseline = vec![meta(), b0, b1, b2];
1241 let candidate = vec![meta(), c0, c1, c2];
1242 let out = detect_top_k(&baseline, &candidate, 5);
1243 assert!(
1244 out.len() >= 2,
1245 "expected at least 2 divergences, got {}",
1246 out.len()
1247 );
1248 assert_eq!(
1250 out[0].kind,
1251 DivergenceKind::Structural,
1252 "rank 1 should be Structural, got {:?}",
1253 out[0].kind
1254 );
1255 if out.len() >= 2 {
1257 assert_eq!(
1258 out[1].kind,
1259 DivergenceKind::Decision,
1260 "rank 2 should be Decision, got {:?}",
1261 out[1].kind
1262 );
1263 }
1264 }
1265
1266 #[test]
1267 fn top_k_truncates_at_k() {
1268 let same = response_text_only("unchanged", "end_turn");
1270 let _ = same.clone(); let baseline = vec![
1272 meta(),
1273 response_with_tool("a", json!({}), "tool_use"),
1274 response_with_tool("b", json!({}), "tool_use"),
1275 response_with_tool("c", json!({}), "tool_use"),
1276 response_with_tool("d", json!({}), "tool_use"),
1277 response_with_tool("e", json!({}), "tool_use"),
1278 ];
1279 let candidate = vec![
1280 meta(),
1281 response_with_tool("A", json!({}), "tool_use"),
1282 response_with_tool("B", json!({}), "tool_use"),
1283 response_with_tool("C", json!({}), "tool_use"),
1284 response_with_tool("D", json!({}), "tool_use"),
1285 response_with_tool("E", json!({}), "tool_use"),
1286 ];
1287 let out = detect_top_k(&baseline, &candidate, 2);
1288 assert_eq!(out.len(), 2);
1289 for dv in &out {
1291 assert_eq!(dv.kind, DivergenceKind::Structural);
1292 }
1293 }
1294
1295 #[test]
1296 fn top_k_preserves_walk_order_within_same_severity_and_confidence() {
1297 let baseline = vec![
1300 meta(),
1301 response_with_tool("a", json!({}), "tool_use"),
1302 response_with_tool("b", json!({}), "tool_use"),
1303 response_with_tool("c", json!({}), "tool_use"),
1304 ];
1305 let candidate = vec![
1306 meta(),
1307 response_with_tool("A", json!({}), "tool_use"),
1308 response_with_tool("B", json!({}), "tool_use"),
1309 response_with_tool("C", json!({}), "tool_use"),
1310 ];
1311 let out = detect_top_k(&baseline, &candidate, 3);
1312 assert_eq!(out.len(), 3);
1313 assert_eq!(out[0].baseline_turn, 0);
1315 assert_eq!(out[1].baseline_turn, 1);
1316 assert_eq!(out[2].baseline_turn, 2);
1317 }
1318
1319 #[test]
1320 fn top_k_of_1_matches_first_divergence_classifier() {
1321 let b = response_with_tool("search", json!({"q": "x"}), "tool_use");
1325 let c = response_with_tool("search", json!({"q": "y"}), "tool_use");
1326 let first = detect(&[meta(), b.clone()], &[meta(), c.clone()]).unwrap();
1327 let top = detect_top_k(&[meta(), b], &[meta(), c], 1);
1328 assert_eq!(top.len(), 1);
1329 assert_eq!(top[0].kind, first.kind);
1330 assert_eq!(top[0].baseline_turn, first.baseline_turn);
1331 }
1332
1333 #[test]
1334 fn banded_alignment_storage_is_banded_not_full_matrix() {
1335 let n = 2_000usize;
1347 let m = 2_000usize;
1348 let band = band_half_width(n, m);
1349 let banded: Banded<f64> = Banded::new(n, m, band, 0.0);
1350 let total_cells: usize = banded.rows.iter().map(|r| r.len()).sum();
1351 let full_cells = (n + 1) * (m + 1);
1352 assert!(
1356 total_cells < full_cells / 4,
1357 "banded storage size {total_cells} not meaningfully smaller than \
1358 full-matrix size {full_cells}; the storage-is-banded fix has regressed"
1359 );
1360 let middle_row_len = banded.rows[n / 2].len();
1363 assert!(
1364 middle_row_len <= 2 * band + 1,
1365 "middle row has {middle_row_len} cells; expected at most {} \
1366 (2 * band + 1). align_banded is allocating wider than the band.",
1367 2 * band + 1
1368 );
1369 assert!(
1370 middle_row_len < m + 1,
1371 "middle row has {middle_row_len} cells = m + 1; align_banded \
1372 has regressed to full-matrix storage"
1373 );
1374 }
1375
1376 #[test]
1377 fn first_divergence_is_alignment_order_not_importance_rank() {
1378 let b0 = response_text_only("same across both", "end_turn");
1382 let b1 = response_with_tool("search", json!({"q": "x"}), "tool_use");
1383 let c0 = response_text_only("completely different response here", "end_turn");
1384 let c1 = response_with_tool("lookup", json!({"q": "x"}), "tool_use");
1385 let baseline = vec![meta(), b0, b1];
1389 let candidate = vec![meta(), c0, c1];
1390 let first = detect(&baseline, &candidate).unwrap();
1391 let top = detect_top_k(&baseline, &candidate, 3);
1392 assert_eq!(first.baseline_turn, 0);
1393 assert_eq!(top[0].baseline_turn, 1); assert_eq!(top[0].kind, DivergenceKind::Structural);
1395 }
1396}