datasynth_audit_optimizer/
benchmark_comparison.rs1use rand::SeedableRng;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10
11use datasynth_audit_fsm::{
12 context::EngagementContext,
13 dispatch::infer_judgment_level,
14 engine::AuditFsmEngine,
15 error::AuditFsmError,
16 loader::{default_overlay, BlueprintWithPreconditions},
17};
18
19type BlueprintLoader = fn() -> Result<BlueprintWithPreconditions, AuditFsmError>;
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct FirmBenchmark {
29 pub firm: String,
31 pub blueprint: String,
33 pub phases: usize,
35 pub procedures: usize,
37 pub steps: usize,
39 pub events: usize,
41 pub artifacts: usize,
43 pub duration_hours: f64,
45 pub anomalies: usize,
47 pub completion_rate: f64,
49 pub judgment_distribution: JudgmentDistribution,
51 pub standards_count: usize,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
57pub struct JudgmentDistribution {
58 pub data_only: usize,
60 pub ai_assistable: usize,
62 pub human_required: usize,
64 pub data_only_pct: f64,
66 pub ai_assistable_pct: f64,
68 pub human_required_pct: f64,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct ComparisonReport {
75 pub benchmarks: Vec<FirmBenchmark>,
77 pub seed: u64,
79 pub overlay: String,
81}
82
83pub fn run_comparison(seed: u64, context: Option<&EngagementContext>) -> ComparisonReport {
94 let overlay = default_overlay();
95 let default_ctx = EngagementContext::demo();
96 let ctx = context.unwrap_or(&default_ctx);
97 let mut benchmarks = Vec::new();
98
99 let loaders: &[(&str, &str, BlueprintLoader)] = &[
101 (
102 "Generic ISA",
103 "fsa",
104 BlueprintWithPreconditions::load_builtin_fsa,
105 ),
106 (
107 "KPMG Clara",
108 "kpmg",
109 BlueprintWithPreconditions::load_builtin_kpmg,
110 ),
111 (
112 "PwC Aura",
113 "pwc",
114 BlueprintWithPreconditions::load_builtin_pwc,
115 ),
116 (
117 "Deloitte Omnia",
118 "deloitte",
119 BlueprintWithPreconditions::load_builtin_deloitte,
120 ),
121 (
122 "EY GAM Lite",
123 "ey_gam_lite",
124 BlueprintWithPreconditions::load_builtin_ey_gam_lite,
125 ),
126 (
127 "IIA-GIAS",
128 "ia",
129 BlueprintWithPreconditions::load_builtin_ia,
130 ),
131 ];
132
133 for (firm_name, bp_name, loader) in loaders {
134 let bwp = match loader() {
135 Ok(b) => b,
136 Err(_) => continue,
137 };
138
139 let phases = bwp.blueprint.phases.len();
143 let procedures: usize = bwp
144 .blueprint
145 .phases
146 .iter()
147 .map(|p| p.procedures.len())
148 .sum();
149 let steps: usize = bwp
150 .blueprint
151 .phases
152 .iter()
153 .flat_map(|p| p.procedures.iter())
154 .map(|proc| proc.steps.len())
155 .sum();
156
157 let mut data_only = 0usize;
161 let mut ai_assistable = 0usize;
162 let mut human_required = 0usize;
163
164 for phase in &bwp.blueprint.phases {
165 for proc in &phase.procedures {
166 for step in &proc.steps {
167 let level = step.judgment_level.as_deref().unwrap_or_else(|| {
168 infer_judgment_level(step.command.as_deref().unwrap_or(""))
169 });
170 match level {
171 "data_only" => data_only += 1,
172 "human_required" => human_required += 1,
173 _ => ai_assistable += 1,
174 }
175 }
176 }
177 }
178
179 let total_steps_f = (data_only + ai_assistable + human_required).max(1) as f64;
180
181 let mut engine = AuditFsmEngine::new(
185 bwp.clone(),
186 overlay.clone(),
187 ChaCha8Rng::seed_from_u64(seed),
188 );
189 let result = engine.run_engagement(ctx).unwrap();
190
191 let completed = result
192 .procedure_states
193 .values()
194 .filter(|s| s.as_str() == "completed" || s.as_str() == "closed")
195 .count();
196
197 let standards_count = bwp.blueprint.standards.len();
198
199 benchmarks.push(FirmBenchmark {
200 firm: firm_name.to_string(),
201 blueprint: bp_name.to_string(),
202 phases,
203 procedures,
204 steps,
205 events: result.event_log.len(),
206 artifacts: result.artifacts.total_artifacts(),
207 duration_hours: result.total_duration_hours,
208 anomalies: result.anomalies.len(),
209 completion_rate: completed as f64 / result.procedure_states.len().max(1) as f64,
210 judgment_distribution: JudgmentDistribution {
211 data_only,
212 ai_assistable,
213 human_required,
214 data_only_pct: data_only as f64 / total_steps_f * 100.0,
215 ai_assistable_pct: ai_assistable as f64 / total_steps_f * 100.0,
216 human_required_pct: human_required as f64 / total_steps_f * 100.0,
217 },
218 standards_count,
219 });
220 }
221
222 ComparisonReport {
223 benchmarks,
224 seed,
225 overlay: "default".to_string(),
226 }
227}
228
229pub fn format_comparison_report(report: &ComparisonReport) -> String {
231 let mut out = String::new();
232 out.push_str("Cross-Firm Methodology Benchmark\n");
233 out.push_str(&format!(
234 "Seed: {}, Overlay: {}\n\n",
235 report.seed, report.overlay
236 ));
237
238 out.push_str(&format!(
240 "{:20} {:>6} {:>6} {:>6} {:>7} {:>9} {:>8} {:>6} {:>7} {:>6} {:>6} {:>6}\n",
241 "Firm",
242 "Phases",
243 "Procs",
244 "Steps",
245 "Events",
246 "Artifacts",
247 "Hours",
248 "Anom",
249 "Compl%",
250 "Data%",
251 "AI%",
252 "Human%"
253 ));
254 out.push_str(&"-".repeat(110));
255 out.push('\n');
256
257 for b in &report.benchmarks {
258 out.push_str(&format!(
259 "{:20} {:>6} {:>6} {:>6} {:>7} {:>9} {:>8.0} {:>6} {:>6.0}% {:>5.0}% {:>5.0}% {:>5.0}%\n",
260 b.firm,
261 b.phases,
262 b.procedures,
263 b.steps,
264 b.events,
265 b.artifacts,
266 b.duration_hours,
267 b.anomalies,
268 b.completion_rate * 100.0,
269 b.judgment_distribution.data_only_pct,
270 b.judgment_distribution.ai_assistable_pct,
271 b.judgment_distribution.human_required_pct,
272 ));
273 }
274 out
275}
276
277#[cfg(test)]
282mod tests {
283 use super::*;
284
285 #[test]
286 fn test_comparison_runs_all_firms() {
287 let report = run_comparison(42, None);
288 assert!(
291 report.benchmarks.len() >= 5,
292 "Expected >= 5 benchmarks, got {}",
293 report.benchmarks.len()
294 );
295 }
296
297 #[test]
298 fn test_comparison_shows_differences() {
299 let report = run_comparison(42, None);
300 let phases: Vec<usize> = report.benchmarks.iter().map(|b| b.phases).collect();
303 let procedures: Vec<usize> = report.benchmarks.iter().map(|b| b.procedures).collect();
304 let all_phases_same = phases.windows(2).all(|w| w[0] == w[1]);
305 let all_procs_same = procedures.windows(2).all(|w| w[0] == w[1]);
306 assert!(
307 !all_phases_same || !all_procs_same,
308 "All blueprints have identical phases AND procedures — expected some structural differences"
309 );
310 }
311
312 #[test]
313 fn test_comparison_report_serializes() {
314 let report = run_comparison(42, None);
315 let json = serde_json::to_string(&report).expect("serialization failed");
316 let decoded: ComparisonReport =
317 serde_json::from_str(&json).expect("deserialization failed");
318 assert_eq!(report.benchmarks.len(), decoded.benchmarks.len());
319 for (orig, dec) in report.benchmarks.iter().zip(decoded.benchmarks.iter()) {
320 assert_eq!(orig.firm, dec.firm);
321 assert_eq!(orig.events, dec.events);
322 assert_eq!(orig.artifacts, dec.artifacts);
323 }
324 }
325
326 #[test]
327 fn test_comparison_deterministic() {
328 let r1 = run_comparison(99, None);
329 let r2 = run_comparison(99, None);
330 assert_eq!(r1.benchmarks.len(), r2.benchmarks.len());
331 for (b1, b2) in r1.benchmarks.iter().zip(r2.benchmarks.iter()) {
332 assert_eq!(b1.firm, b2.firm);
333 assert_eq!(b1.events, b2.events);
334 assert_eq!(b1.artifacts, b2.artifacts);
335 assert_eq!(b1.duration_hours.to_bits(), b2.duration_hours.to_bits());
336 assert_eq!(b1.anomalies, b2.anomalies);
337 }
338 }
339}