Skip to main content

datasynth_audit_optimizer/
blueprint_testing.rs

1//! Blueprint testing framework.
2//!
3//! Provides automated validation that a given audit blueprint produces expected
4//! artifact types, event counts, phase progression, and timing constraints.
5//! The [`test_blueprint`] function runs a single suite; [`test_all_builtins`]
6//! exercises every built-in blueprint against reasonable default expectations.
7
8use std::path::PathBuf;
9
10use datasynth_audit_fsm::context::EngagementContext;
11use datasynth_audit_fsm::engine::AuditFsmEngine;
12use datasynth_audit_fsm::error::AuditFsmError;
13use datasynth_audit_fsm::loader::*;
14use rand::SeedableRng;
15use rand_chacha::ChaCha8Rng;
16use serde::{Deserialize, Serialize};
17
18// ---------------------------------------------------------------------------
19// Suite and expectation types
20// ---------------------------------------------------------------------------
21
22/// A test suite for validating a single blueprint.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct BlueprintTestSuite {
25    /// Blueprint selector (e.g. `"fsa"`, `"ia"`, or a file path).
26    pub blueprint: String,
27    /// Overlay selector (e.g. `"default"`, `"thorough"`, or a file path).
28    pub overlay: String,
29    /// Expected metric thresholds.
30    pub expectations: BlueprintExpectations,
31}
32
33/// Metric thresholds that the blueprint engagement must satisfy.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct BlueprintExpectations {
36    /// Minimum total FSM events.
37    pub min_events: usize,
38    /// Minimum total typed artifacts.
39    pub min_artifacts: usize,
40    /// Minimum number of procedures that reach a terminal state.
41    pub min_procedures: usize,
42    /// Phase IDs that must appear in the completed-phases list.
43    pub expected_phases: Vec<String>,
44    /// Minimum fraction of procedures completed (0.0..=1.0).
45    pub min_completion_rate: f64,
46    /// Maximum engagement duration in hours.
47    pub max_duration_hours: f64,
48    /// Artifact category names that must be non-empty (e.g. `"engagements"`, `"workpapers"`).
49    pub required_artifact_types: Vec<String>,
50}
51
52// ---------------------------------------------------------------------------
53// Result types
54// ---------------------------------------------------------------------------
55
56/// Outcome of running a blueprint test suite.
57#[derive(Debug, Clone, Serialize)]
58pub struct BlueprintTestResult {
59    /// Whether all expectations were met.
60    pub passed: bool,
61    /// Human-readable descriptions of each failed expectation.
62    pub failures: Vec<String>,
63    /// Actual metrics observed during the engagement.
64    pub metrics: BlueprintMetrics,
65}
66
67/// Measured metrics from the engagement run.
68#[derive(Debug, Clone, Serialize)]
69pub struct BlueprintMetrics {
70    /// Total FSM events.
71    pub events: usize,
72    /// Total typed artifacts.
73    pub artifacts: usize,
74    /// Number of procedures that reached a terminal state.
75    pub procedures: usize,
76    /// Phase IDs that were completed.
77    pub phases_completed: Vec<String>,
78    /// Fraction of procedures completed.
79    pub completion_rate: f64,
80    /// Engagement duration in hours.
81    pub duration_hours: f64,
82    /// Artifact category names that contained at least one item.
83    pub artifact_types_present: Vec<String>,
84}
85
86// ---------------------------------------------------------------------------
87// Blueprint / overlay resolution
88// ---------------------------------------------------------------------------
89
90fn resolve_blueprint(name: &str) -> Result<BlueprintWithPreconditions, AuditFsmError> {
91    match name {
92        "fsa" | "builtin:fsa" => BlueprintWithPreconditions::load_builtin_fsa(),
93        "ia" | "builtin:ia" => BlueprintWithPreconditions::load_builtin_ia(),
94        "kpmg" | "builtin:kpmg" => BlueprintWithPreconditions::load_builtin_kpmg(),
95        "pwc" | "builtin:pwc" => BlueprintWithPreconditions::load_builtin_pwc(),
96        "deloitte" | "builtin:deloitte" => BlueprintWithPreconditions::load_builtin_deloitte(),
97        "ey_gam_lite" | "builtin:ey_gam_lite" => {
98            BlueprintWithPreconditions::load_builtin_ey_gam_lite()
99        }
100        path => BlueprintWithPreconditions::load_from_file(PathBuf::from(path)),
101    }
102}
103
104fn resolve_overlay(
105    name: &str,
106) -> Result<datasynth_audit_fsm::schema::GenerationOverlay, AuditFsmError> {
107    match name {
108        "default" | "builtin:default" => {
109            load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Default))
110        }
111        "thorough" | "builtin:thorough" => {
112            load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Thorough))
113        }
114        "rushed" | "builtin:rushed" => {
115            load_overlay(&OverlaySource::Builtin(BuiltinOverlay::Rushed))
116        }
117        "retail" | "builtin:retail" => {
118            load_overlay(&OverlaySource::Builtin(BuiltinOverlay::IndustryRetail))
119        }
120        "manufacturing" | "builtin:manufacturing" => load_overlay(&OverlaySource::Builtin(
121            BuiltinOverlay::IndustryManufacturing,
122        )),
123        "financial_services" | "builtin:financial_services" => load_overlay(
124            &OverlaySource::Builtin(BuiltinOverlay::IndustryFinancialServices),
125        ),
126        path => load_overlay(&OverlaySource::Custom(PathBuf::from(path))),
127    }
128}
129
130// ---------------------------------------------------------------------------
131// Artifact type introspection
132// ---------------------------------------------------------------------------
133
134/// Return the names of artifact categories that have at least one item.
135fn present_artifact_types(bag: &datasynth_audit_fsm::artifact::ArtifactBag) -> Vec<String> {
136    let mut types = Vec::new();
137    if !bag.engagements.is_empty() {
138        types.push("engagements".into());
139    }
140    if !bag.engagement_letters.is_empty() {
141        types.push("engagement_letters".into());
142    }
143    if !bag.materiality_calculations.is_empty() {
144        types.push("materiality_calculations".into());
145    }
146    if !bag.risk_assessments.is_empty() {
147        types.push("risk_assessments".into());
148    }
149    if !bag.combined_risk_assessments.is_empty() {
150        types.push("combined_risk_assessments".into());
151    }
152    if !bag.workpapers.is_empty() {
153        types.push("workpapers".into());
154    }
155    if !bag.evidence.is_empty() {
156        types.push("evidence".into());
157    }
158    if !bag.findings.is_empty() {
159        types.push("findings".into());
160    }
161    if !bag.judgments.is_empty() {
162        types.push("judgments".into());
163    }
164    if !bag.sampling_plans.is_empty() {
165        types.push("sampling_plans".into());
166    }
167    if !bag.sampled_items.is_empty() {
168        types.push("sampled_items".into());
169    }
170    if !bag.analytical_results.is_empty() {
171        types.push("analytical_results".into());
172    }
173    if !bag.going_concern_assessments.is_empty() {
174        types.push("going_concern_assessments".into());
175    }
176    if !bag.subsequent_events.is_empty() {
177        types.push("subsequent_events".into());
178    }
179    if !bag.audit_opinions.is_empty() {
180        types.push("audit_opinions".into());
181    }
182    if !bag.key_audit_matters.is_empty() {
183        types.push("key_audit_matters".into());
184    }
185    if !bag.procedure_steps.is_empty() {
186        types.push("procedure_steps".into());
187    }
188    if !bag.samples.is_empty() {
189        types.push("samples".into());
190    }
191    if !bag.confirmations.is_empty() {
192        types.push("confirmations".into());
193    }
194    if !bag.confirmation_responses.is_empty() {
195        types.push("confirmation_responses".into());
196    }
197    types
198}
199
200// ---------------------------------------------------------------------------
201// Main entry points
202// ---------------------------------------------------------------------------
203
204/// Run a single blueprint test suite and return the result.
205///
206/// Loads the blueprint and overlay, executes an engagement with the given
207/// `seed`, then checks each expectation against the observed metrics.
208pub fn test_blueprint(suite: &BlueprintTestSuite, seed: u64) -> BlueprintTestResult {
209    let run = || -> Result<BlueprintTestResult, AuditFsmError> {
210        let bwp = resolve_blueprint(&suite.blueprint)?;
211        let overlay = resolve_overlay(&suite.overlay)?;
212        let rng = ChaCha8Rng::seed_from_u64(seed);
213
214        let mut engine = AuditFsmEngine::new(bwp, overlay, rng);
215        let ctx = EngagementContext::demo();
216        let result = engine.run_engagement(&ctx)?;
217
218        let total_procs = result.procedure_states.len();
219        let completed = result
220            .procedure_states
221            .values()
222            .filter(|s| s.as_str() == "completed" || s.as_str() == "closed")
223            .count();
224        let completion_rate = if total_procs > 0 {
225            completed as f64 / total_procs as f64
226        } else {
227            0.0
228        };
229
230        let artifact_types = present_artifact_types(&result.artifacts);
231
232        let metrics = BlueprintMetrics {
233            events: result.event_log.len(),
234            artifacts: result.artifacts.total_artifacts(),
235            procedures: completed,
236            phases_completed: result.phases_completed.clone(),
237            completion_rate,
238            duration_hours: result.total_duration_hours,
239            artifact_types_present: artifact_types.clone(),
240        };
241
242        // Check expectations.
243        let exp = &suite.expectations;
244        let mut failures = Vec::new();
245
246        if metrics.events < exp.min_events {
247            failures.push(format!(
248                "events: expected >= {}, got {}",
249                exp.min_events, metrics.events
250            ));
251        }
252        if metrics.artifacts < exp.min_artifacts {
253            failures.push(format!(
254                "artifacts: expected >= {}, got {}",
255                exp.min_artifacts, metrics.artifacts
256            ));
257        }
258        if metrics.procedures < exp.min_procedures {
259            failures.push(format!(
260                "procedures completed: expected >= {}, got {}",
261                exp.min_procedures, metrics.procedures
262            ));
263        }
264        if metrics.completion_rate < exp.min_completion_rate {
265            failures.push(format!(
266                "completion_rate: expected >= {:.2}, got {:.2}",
267                exp.min_completion_rate, metrics.completion_rate
268            ));
269        }
270        if metrics.duration_hours > exp.max_duration_hours {
271            failures.push(format!(
272                "duration_hours: expected <= {:.1}, got {:.1}",
273                exp.max_duration_hours, metrics.duration_hours
274            ));
275        }
276        for phase in &exp.expected_phases {
277            if !metrics.phases_completed.contains(phase) {
278                failures.push(format!(
279                    "expected phase '{}' to be completed, but it was not",
280                    phase
281                ));
282            }
283        }
284        for art_type in &exp.required_artifact_types {
285            if !artifact_types.contains(art_type) {
286                failures.push(format!(
287                    "required artifact type '{}' not present (present: {:?})",
288                    art_type, artifact_types
289                ));
290            }
291        }
292
293        let passed = failures.is_empty();
294        Ok(BlueprintTestResult {
295            passed,
296            failures,
297            metrics,
298        })
299    };
300
301    match run() {
302        Ok(result) => result,
303        Err(e) => BlueprintTestResult {
304            passed: false,
305            failures: vec![format!("engine error: {}", e)],
306            metrics: BlueprintMetrics {
307                events: 0,
308                artifacts: 0,
309                procedures: 0,
310                phases_completed: vec![],
311                completion_rate: 0.0,
312                duration_hours: 0.0,
313                artifact_types_present: vec![],
314            },
315        },
316    }
317}
318
319/// Test all built-in blueprints with reasonable default expectations.
320///
321/// Returns a vec of `(blueprint_name, BlueprintTestResult)`.  Each blueprint
322/// is tested with the default overlay and lenient expectations suitable for
323/// regression testing.
324pub fn test_all_builtins() -> Vec<(String, BlueprintTestResult)> {
325    // (name, min_events, min_artifacts, min_procedures, max_duration_hours)
326    let builtins: Vec<(&str, usize, usize, usize, f64)> = vec![
327        ("fsa", 10, 5, 3, 50_000.0),
328        ("ia", 10, 1, 3, 50_000.0),
329        ("kpmg", 10, 5, 3, 50_000.0),
330        ("pwc", 10, 5, 3, 50_000.0),
331        ("deloitte", 10, 5, 3, 50_000.0),
332        ("ey_gam_lite", 10, 5, 3, 50_000.0),
333    ];
334
335    builtins
336        .into_iter()
337        .map(|(name, min_events, min_artifacts, min_procs, max_hours)| {
338            let suite = BlueprintTestSuite {
339                blueprint: name.to_string(),
340                overlay: "default".to_string(),
341                expectations: BlueprintExpectations {
342                    min_events,
343                    min_artifacts,
344                    min_procedures: min_procs,
345                    expected_phases: vec![], // lenient: don't require specific phases
346                    min_completion_rate: 0.3,
347                    max_duration_hours: max_hours,
348                    required_artifact_types: vec!["engagements".into()],
349                },
350            };
351            let result = test_blueprint(&suite, 42);
352            (name.to_string(), result)
353        })
354        .collect()
355}
356
357// ---------------------------------------------------------------------------
358// Tests
359// ---------------------------------------------------------------------------
360
361#[cfg(test)]
362mod tests {
363    use super::*;
364
365    #[test]
366    fn test_passing_suite() {
367        let suite = BlueprintTestSuite {
368            blueprint: "fsa".into(),
369            overlay: "default".into(),
370            expectations: BlueprintExpectations {
371                min_events: 1,
372                min_artifacts: 1,
373                min_procedures: 1,
374                expected_phases: vec![],
375                min_completion_rate: 0.5,
376                max_duration_hours: 100_000.0,
377                required_artifact_types: vec!["engagements".into()],
378            },
379        };
380
381        let result = test_blueprint(&suite, 42);
382        assert!(
383            result.passed,
384            "expected suite to pass, failures: {:?}",
385            result.failures
386        );
387        assert!(result.failures.is_empty());
388        assert!(result.metrics.events > 0);
389        assert!(result.metrics.artifacts > 0);
390    }
391
392    #[test]
393    fn test_failing_suite_impossible_expectations() {
394        let suite = BlueprintTestSuite {
395            blueprint: "fsa".into(),
396            overlay: "default".into(),
397            expectations: BlueprintExpectations {
398                min_events: 999_999,
399                min_artifacts: 999_999,
400                min_procedures: 999,
401                expected_phases: vec!["nonexistent_phase".into()],
402                min_completion_rate: 1.0,
403                max_duration_hours: 0.001,
404                required_artifact_types: vec!["nonexistent_artifact_type".into()],
405            },
406        };
407
408        let result = test_blueprint(&suite, 42);
409        assert!(!result.passed, "expected suite to fail");
410        assert!(
411            !result.failures.is_empty(),
412            "expected at least one failure message"
413        );
414        // Should report multiple distinct failures.
415        assert!(
416            result.failures.len() >= 3,
417            "expected >= 3 failures, got {}: {:?}",
418            result.failures.len(),
419            result.failures
420        );
421    }
422
423    #[test]
424    fn test_all_builtins_pass() {
425        let results = test_all_builtins();
426
427        assert!(
428            !results.is_empty(),
429            "should have at least one builtin blueprint"
430        );
431
432        for (name, result) in &results {
433            assert!(
434                result.passed,
435                "builtin '{}' failed: {:?}",
436                name, result.failures
437            );
438            assert!(
439                result.metrics.events > 0,
440                "builtin '{}' produced 0 events",
441                name
442            );
443        }
444    }
445}