Skip to main content

datasynth_audit_optimizer/
risk_scoping.rs

1//! Risk-based audit scoping with coverage analysis and what-if impact reports.
2//!
3//! Provides functions to measure how well a set of included procedures covers
4//! the standards and risk dimensions defined in a blueprint, and to estimate
5//! the impact of removing a single procedure from the plan.
6
7use std::collections::{HashMap, HashSet};
8
9use serde::Serialize;
10
11use datasynth_audit_fsm::schema::AuditBlueprint;
12
13// ---------------------------------------------------------------------------
14// Types
15// ---------------------------------------------------------------------------
16
17/// Coverage metrics for a set of included procedures against a blueprint.
18#[derive(Debug, Clone, Serialize)]
19pub struct CoverageReport {
20    /// Fraction of distinct standards covered (0.0 to 1.0).
21    pub standards_coverage: f64,
22    /// Standards ref_ids that are covered by included procedures.
23    pub standards_covered: Vec<String>,
24    /// Standards ref_ids that are *not* covered by any included procedure.
25    pub standards_uncovered: Vec<String>,
26    /// Per-discriminator-category coverage fraction.
27    pub risk_coverage: HashMap<String, f64>,
28    /// Total number of procedures in the blueprint.
29    pub total_procedures: usize,
30    /// Number of procedures in the included set that exist in the blueprint.
31    pub included_procedures: usize,
32}
33
34/// What-if impact report for removing a single procedure from the plan.
35#[derive(Debug, Clone, Serialize)]
36pub struct ImpactReport {
37    /// The procedure being hypothetically removed.
38    pub removed_procedure: String,
39    /// Standards that would become uncovered after removal.
40    pub standards_lost: Vec<String>,
41    /// Change in standards_coverage (negative means coverage decreases).
42    pub standards_coverage_delta: f64,
43    /// Per-category change in risk coverage.
44    pub risk_coverage_delta: HashMap<String, f64>,
45    /// Procedures whose preconditions list the removed procedure.
46    pub dependent_procedures_affected: Vec<String>,
47}
48
49// ---------------------------------------------------------------------------
50// Public API
51// ---------------------------------------------------------------------------
52
53/// Analyse the coverage of a set of included procedures against the blueprint.
54///
55/// Standards are identified via `step.standards[].ref_id` across all procedures.
56/// Risk coverage is computed per discriminator category: the fraction of values
57/// that appear in at least one included procedure.
58pub fn analyze_coverage(
59    blueprint: &AuditBlueprint,
60    included_procedures: &[String],
61) -> CoverageReport {
62    let included_set: HashSet<&str> = included_procedures.iter().map(|s| s.as_str()).collect();
63
64    // Collect all procedure ids.
65    let all_proc_ids: Vec<&str> = blueprint
66        .phases
67        .iter()
68        .flat_map(|ph| ph.procedures.iter())
69        .map(|p| p.id.as_str())
70        .collect();
71
72    let total_procedures = all_proc_ids.len();
73    let included_count = all_proc_ids
74        .iter()
75        .filter(|id| included_set.contains(**id))
76        .count();
77
78    // --- Standards coverage ---
79    let mut total_standards: HashSet<String> = HashSet::new();
80    let mut covered_standards: HashSet<String> = HashSet::new();
81
82    for phase in &blueprint.phases {
83        for proc in &phase.procedures {
84            for step in &proc.steps {
85                for std_ref in &step.standards {
86                    total_standards.insert(std_ref.ref_id.clone());
87                    if included_set.contains(proc.id.as_str()) {
88                        covered_standards.insert(std_ref.ref_id.clone());
89                    }
90                }
91            }
92        }
93    }
94
95    let standards_coverage = if total_standards.is_empty() {
96        1.0
97    } else {
98        covered_standards.len() as f64 / total_standards.len() as f64
99    };
100
101    let mut standards_covered: Vec<String> = covered_standards.iter().cloned().collect();
102    standards_covered.sort();
103
104    let mut standards_uncovered: Vec<String> = total_standards
105        .difference(&covered_standards)
106        .cloned()
107        .collect();
108    standards_uncovered.sort();
109
110    // --- Risk (discriminator) coverage per category ---
111    // For each category, collect the total set of values and the included set.
112    let mut cat_total: HashMap<String, HashSet<String>> = HashMap::new();
113    let mut cat_included: HashMap<String, HashSet<String>> = HashMap::new();
114
115    for phase in &blueprint.phases {
116        for proc in &phase.procedures {
117            for (cat, vals) in &proc.discriminators {
118                let total_entry = cat_total.entry(cat.clone()).or_default();
119                let inc_entry = cat_included.entry(cat.clone()).or_default();
120                for v in vals {
121                    total_entry.insert(v.clone());
122                    if included_set.contains(proc.id.as_str()) {
123                        inc_entry.insert(v.clone());
124                    }
125                }
126            }
127        }
128    }
129
130    let mut risk_coverage: HashMap<String, f64> = HashMap::new();
131    for (cat, total_vals) in &cat_total {
132        let inc_vals = cat_included.get(cat).map(|s| s.len()).unwrap_or(0);
133        let frac = if total_vals.is_empty() {
134            1.0
135        } else {
136            inc_vals as f64 / total_vals.len() as f64
137        };
138        risk_coverage.insert(cat.clone(), frac);
139    }
140
141    CoverageReport {
142        standards_coverage,
143        standards_covered,
144        standards_uncovered,
145        risk_coverage,
146        total_procedures,
147        included_procedures: included_count,
148    }
149}
150
151/// Estimate the impact of removing a single procedure from the current plan.
152///
153/// Returns the delta in standards coverage and risk coverage, as well as which
154/// procedures depend on the removed one via preconditions.
155pub fn impact_of_removing(
156    blueprint: &AuditBlueprint,
157    preconditions: &HashMap<String, Vec<String>>,
158    current_plan: &[String],
159    remove_procedure: &str,
160) -> ImpactReport {
161    // Compute coverage with the full plan.
162    let before = analyze_coverage(blueprint, current_plan);
163
164    // Compute coverage without the removed procedure.
165    let after_plan: Vec<String> = current_plan
166        .iter()
167        .filter(|id| id.as_str() != remove_procedure)
168        .cloned()
169        .collect();
170    let after = analyze_coverage(blueprint, &after_plan);
171
172    // Standards that become uncovered.
173    let after_covered: HashSet<&str> = after.standards_covered.iter().map(|s| s.as_str()).collect();
174    let mut standards_lost: Vec<String> = before
175        .standards_covered
176        .iter()
177        .filter(|s| !after_covered.contains(s.as_str()))
178        .cloned()
179        .collect();
180    standards_lost.sort();
181
182    let standards_coverage_delta = after.standards_coverage - before.standards_coverage;
183
184    // Risk coverage delta per category.
185    let mut risk_coverage_delta: HashMap<String, f64> = HashMap::new();
186    for (cat, before_val) in &before.risk_coverage {
187        let after_val = after.risk_coverage.get(cat).copied().unwrap_or(0.0);
188        risk_coverage_delta.insert(cat.clone(), after_val - before_val);
189    }
190    // Include categories that only appear in after (unlikely, but complete).
191    for (cat, after_val) in &after.risk_coverage {
192        risk_coverage_delta
193            .entry(cat.clone())
194            .or_insert_with(|| after_val - 0.0);
195    }
196
197    // Dependent procedures: those whose preconditions include the removed one.
198    let mut dependent_procedures_affected: Vec<String> = preconditions
199        .iter()
200        .filter(|(proc_id, deps)| {
201            current_plan.contains(proc_id) && deps.iter().any(|d| d == remove_procedure)
202        })
203        .map(|(proc_id, _)| proc_id.clone())
204        .collect();
205    dependent_procedures_affected.sort();
206
207    ImpactReport {
208        removed_procedure: remove_procedure.to_string(),
209        standards_lost,
210        standards_coverage_delta,
211        risk_coverage_delta,
212        dependent_procedures_affected,
213    }
214}
215
216// ---------------------------------------------------------------------------
217// Tests
218// ---------------------------------------------------------------------------
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use datasynth_audit_fsm::loader::BlueprintWithPreconditions;
224
225    fn load_fsa() -> BlueprintWithPreconditions {
226        BlueprintWithPreconditions::load_builtin_fsa().expect("builtin FSA blueprint should load")
227    }
228
229    /// All procedures included should yield 100% standards coverage.
230    #[test]
231    fn test_full_scope_100_percent() {
232        let bwp = load_fsa();
233        let all_procs: Vec<String> = bwp
234            .blueprint
235            .phases
236            .iter()
237            .flat_map(|ph| ph.procedures.iter())
238            .map(|p| p.id.clone())
239            .collect();
240
241        let report = analyze_coverage(&bwp.blueprint, &all_procs);
242
243        assert!(
244            (report.standards_coverage - 1.0).abs() < f64::EPSILON,
245            "full scope should give 100% standards coverage, got {}",
246            report.standards_coverage
247        );
248        assert!(
249            report.standards_uncovered.is_empty(),
250            "full scope should have no uncovered standards"
251        );
252        assert_eq!(report.included_procedures, report.total_procedures);
253    }
254
255    /// Empty included set should give 0% coverage.
256    #[test]
257    fn test_empty_scope_zero_percent() {
258        let bwp = load_fsa();
259        let report = analyze_coverage(&bwp.blueprint, &[]);
260
261        assert!(
262            report.standards_coverage.abs() < f64::EPSILON,
263            "empty scope should give 0% standards coverage, got {}",
264            report.standards_coverage
265        );
266        assert!(
267            report.standards_covered.is_empty(),
268            "empty scope should cover no standards"
269        );
270        assert_eq!(report.included_procedures, 0);
271    }
272
273    /// Including a subset of procedures should give partial coverage.
274    #[test]
275    fn test_partial_scope_coverage() {
276        let bwp = load_fsa();
277        // Include only the first procedure from the first phase.
278        let first_proc = bwp.blueprint.phases[0].procedures[0].id.clone();
279        let report = analyze_coverage(&bwp.blueprint, &[first_proc]);
280
281        assert!(
282            report.standards_coverage > 0.0,
283            "partial scope should have > 0% coverage"
284        );
285        assert!(
286            report.standards_coverage < 1.0,
287            "partial scope should have < 100% coverage"
288        );
289        assert_eq!(report.included_procedures, 1);
290    }
291
292    /// Removing a procedure should report its dependents.
293    #[test]
294    fn test_removal_impact_reports_dependents() {
295        let bwp = load_fsa();
296        let all_procs: Vec<String> = bwp
297            .blueprint
298            .phases
299            .iter()
300            .flat_map(|ph| ph.procedures.iter())
301            .map(|p| p.id.clone())
302            .collect();
303
304        // substantive_testing is a precondition of going_concern and subsequent_events
305        let impact = impact_of_removing(
306            &bwp.blueprint,
307            &bwp.preconditions,
308            &all_procs,
309            "substantive_testing",
310        );
311
312        assert_eq!(impact.removed_procedure, "substantive_testing");
313        assert!(
314            impact
315                .dependent_procedures_affected
316                .contains(&"going_concern".to_string()),
317            "going_concern depends on substantive_testing"
318        );
319        assert!(
320            impact
321                .dependent_procedures_affected
322                .contains(&"subsequent_events".to_string()),
323            "subsequent_events depends on substantive_testing"
324        );
325    }
326
327    /// Both report types should serialize to JSON.
328    #[test]
329    fn test_reports_serialize() {
330        let bwp = load_fsa();
331        let all_procs: Vec<String> = bwp
332            .blueprint
333            .phases
334            .iter()
335            .flat_map(|ph| ph.procedures.iter())
336            .map(|p| p.id.clone())
337            .collect();
338
339        let coverage = analyze_coverage(&bwp.blueprint, &all_procs);
340        let json = serde_json::to_string(&coverage).expect("CoverageReport should serialize");
341        assert!(json.contains("standards_coverage"));
342        assert!(json.contains("risk_coverage"));
343
344        let impact = impact_of_removing(
345            &bwp.blueprint,
346            &bwp.preconditions,
347            &all_procs,
348            "accept_engagement",
349        );
350        let json = serde_json::to_string(&impact).expect("ImpactReport should serialize");
351        assert!(json.contains("removed_procedure"));
352        assert!(json.contains("standards_lost"));
353        assert!(json.contains("dependent_procedures_affected"));
354    }
355}