agentic_eval/
determinism.rs1use std::collections::BTreeSet;
10
11#[cfg_attr(feature = "serde", derive(serde::Serialize))]
13#[derive(Debug, Clone)]
14pub struct DeterminismReport {
15 pub runs: usize,
17 pub distinct: usize,
19 pub deterministic: bool,
21 pub first: String,
23}
24
25pub fn assess_determinism(runs: usize, mut produce: impl FnMut() -> String) -> DeterminismReport {
28 let runs = runs.max(2);
29 let mut seen: BTreeSet<String> = BTreeSet::new();
30 let mut first = String::new();
31 for i in 0..runs {
32 let out = produce();
33 if i == 0 {
34 first = out.clone();
35 }
36 seen.insert(out);
37 }
38 DeterminismReport {
39 runs,
40 distinct: seen.len(),
41 deterministic: seen.len() == 1,
42 first,
43 }
44}
45
46impl std::fmt::Display for DeterminismReport {
47 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 write!(
49 f,
50 "{} ({} distinct / {} runs)",
51 if self.deterministic {
52 "deterministic"
53 } else {
54 "NON-deterministic"
55 },
56 self.distinct,
57 self.runs
58 )
59 }
60}
61
62pub fn stable_across(a: &str, b: &str) -> bool {
66 a == b
67}
68
69#[cfg(test)]
70mod tests {
71 use super::*;
72 use std::cell::Cell;
73
74 #[test]
75 fn constant_output_is_deterministic() {
76 let r = assess_determinism(5, || "name\tsize\nfoo\t10".to_string());
77 assert!(r.deterministic);
78 assert_eq!(r.distinct, 1);
79 assert_eq!(r.runs, 5);
80 }
81
82 #[test]
83 fn varying_output_is_flagged_nondeterministic() {
84 let n = Cell::new(0);
85 let r = assess_determinism(4, || {
87 let v = n.get();
88 n.set(v + 1);
89 format!("rows=3 generated_at={v}")
90 });
91 assert!(!r.deterministic);
92 assert_eq!(r.distinct, 4);
93 }
94
95 #[test]
96 fn single_run_is_clamped_to_two() {
97 let r = assess_determinism(1, || "x".to_string());
98 assert_eq!(r.runs, 2);
99 assert!(r.deterministic);
100 }
101
102 #[test]
103 fn stable_across_compares_bytes() {
104 assert!(stable_across(r#"{"a":1,"b":2}"#, r#"{"a":1,"b":2}"#));
105 assert!(!stable_across(r#"{"a":1,"b":2}"#, r#"{"b":2,"a":1}"#));
107 }
108}