datasynth_generators/audit/
cra_generator.rs

1//! Combined Risk Assessment (CRA) generator per ISA 315.
2//!
3//! For each entity the generator produces one `CombinedRiskAssessment` per
4//! (account area, assertion) combination drawn from a set of 12 standard
5//! account areas.  Inherent risk is driven by the economic nature of each
6//! account area; control risk can be overridden from external control-
7//! effectiveness data (e.g. from `InternalControl` records).
8//!
9//! # Significant risk rules (ISA 315.28 / ISA 240)
10//!
11//! The following are always flagged as significant risks, regardless of CRA level:
12//! - Revenue / Occurrence (presumed fraud risk per ISA 240.26)
13//! - Related Party / Occurrence (related-party transactions)
14//! - Accounting Estimates / Valuation (high estimation uncertainty)
15
16use std::collections::HashMap;
17
18use datasynth_core::models::audit::risk_assessment_cra::{
19    AuditAssertion, CombinedRiskAssessment, RiskRating,
20};
21use datasynth_core::utils::seeded_rng;
22use rand::Rng;
23use rand_chacha::ChaCha8Rng;
24use tracing::{debug, info};
25
26// ---------------------------------------------------------------------------
27// Account area definition
28// ---------------------------------------------------------------------------
29
30/// An account area with its default inherent risk and the assertions to assess.
31#[derive(Debug, Clone)]
32struct AccountAreaSpec {
33    /// Human-readable name (e.g. "Revenue").
34    name: &'static str,
35    /// Default inherent risk when no other information is available.
36    default_ir: RiskRating,
37    /// Assertions to generate CRAs for.
38    assertions: &'static [AuditAssertion],
39    /// Whether Revenue/Occurrence significant-risk rule applies.
40    always_significant_occurrence: bool,
41}
42
43/// Standard account areas per ISA 315 / typical audit scope.
44static ACCOUNT_AREAS: &[AccountAreaSpec] = &[
45    AccountAreaSpec {
46        name: "Revenue",
47        default_ir: RiskRating::High,
48        assertions: &[
49            AuditAssertion::Occurrence,
50            AuditAssertion::Cutoff,
51            AuditAssertion::Accuracy,
52        ],
53        always_significant_occurrence: true,
54    },
55    AccountAreaSpec {
56        name: "Cost of Sales",
57        default_ir: RiskRating::Medium,
58        assertions: &[AuditAssertion::Occurrence, AuditAssertion::Accuracy],
59        always_significant_occurrence: false,
60    },
61    AccountAreaSpec {
62        name: "Trade Receivables",
63        default_ir: RiskRating::High,
64        assertions: &[
65            AuditAssertion::Existence,
66            AuditAssertion::ValuationAndAllocation,
67        ],
68        always_significant_occurrence: false,
69    },
70    AccountAreaSpec {
71        name: "Inventory",
72        default_ir: RiskRating::High,
73        assertions: &[
74            AuditAssertion::Existence,
75            AuditAssertion::ValuationAndAllocation,
76        ],
77        always_significant_occurrence: false,
78    },
79    AccountAreaSpec {
80        name: "Fixed Assets",
81        default_ir: RiskRating::Medium,
82        assertions: &[
83            AuditAssertion::Existence,
84            AuditAssertion::ValuationAndAllocation,
85        ],
86        always_significant_occurrence: false,
87    },
88    AccountAreaSpec {
89        name: "Trade Payables",
90        default_ir: RiskRating::Low,
91        assertions: &[
92            AuditAssertion::CompletenessBalance,
93            AuditAssertion::Accuracy,
94        ],
95        always_significant_occurrence: false,
96    },
97    AccountAreaSpec {
98        name: "Accruals",
99        default_ir: RiskRating::Medium,
100        assertions: &[
101            AuditAssertion::CompletenessBalance,
102            AuditAssertion::ValuationAndAllocation,
103        ],
104        always_significant_occurrence: false,
105    },
106    AccountAreaSpec {
107        name: "Cash",
108        default_ir: RiskRating::Low,
109        assertions: &[
110            AuditAssertion::Existence,
111            AuditAssertion::CompletenessBalance,
112        ],
113        always_significant_occurrence: false,
114    },
115    AccountAreaSpec {
116        name: "Tax",
117        default_ir: RiskRating::Medium,
118        assertions: &[
119            AuditAssertion::Accuracy,
120            AuditAssertion::ValuationAndAllocation,
121        ],
122        always_significant_occurrence: false,
123    },
124    AccountAreaSpec {
125        name: "Equity",
126        default_ir: RiskRating::Low,
127        assertions: &[
128            AuditAssertion::Existence,
129            AuditAssertion::PresentationAndDisclosure,
130        ],
131        always_significant_occurrence: false,
132    },
133    AccountAreaSpec {
134        name: "Provisions",
135        default_ir: RiskRating::High,
136        assertions: &[
137            AuditAssertion::CompletenessBalance,
138            AuditAssertion::ValuationAndAllocation,
139        ],
140        always_significant_occurrence: false,
141    },
142    AccountAreaSpec {
143        name: "Related Parties",
144        default_ir: RiskRating::High,
145        assertions: &[AuditAssertion::Occurrence, AuditAssertion::Completeness],
146        always_significant_occurrence: true,
147    },
148];
149
150// ---------------------------------------------------------------------------
151// Risk factors by account area
152// ---------------------------------------------------------------------------
153
154fn risk_factors_for(area: &str, assertion: AuditAssertion) -> Vec<String> {
155    let mut factors: Vec<String> = Vec::new();
156
157    match area {
158        "Revenue" => {
159            factors.push(
160                "Revenue recognition involves judgment in identifying performance obligations"
161                    .into(),
162            );
163            if assertion == AuditAssertion::Occurrence {
164                factors.push(
165                    "Presumed fraud risk per ISA 240 — incentive to overstate revenue".into(),
166                );
167            }
168            if assertion == AuditAssertion::Cutoff {
169                factors.push(
170                    "Cut-off risk heightened near period-end due to shipping arrangements".into(),
171                );
172            }
173        }
174        "Trade Receivables" => {
175            factors
176                .push("Collectability assessment involves significant management judgment".into());
177            if assertion == AuditAssertion::ValuationAndAllocation {
178                factors.push(
179                    "ECL provisioning methodology may be complex under IFRS 9 / ASC 310".into(),
180                );
181            }
182        }
183        "Inventory" => {
184            factors.push("Physical quantities require verification through observation".into());
185            if assertion == AuditAssertion::ValuationAndAllocation {
186                factors
187                    .push("NRV impairment requires management's forward-looking estimates".into());
188            }
189        }
190        "Fixed Assets" => {
191            factors
192                .push("Capitalisation vs. expensing judgments affect reported asset values".into());
193            if assertion == AuditAssertion::ValuationAndAllocation {
194                factors
195                    .push("Depreciation method and useful life estimates involve judgment".into());
196            }
197        }
198        "Provisions" => {
199            factors.push("Provisions are inherently uncertain and require estimation".into());
200            factors.push("Completeness depends on management identifying all obligations".into());
201        }
202        "Related Parties" => {
203            factors.push("Related party transactions may not be conducted at arm's length".into());
204            factors.push(
205                "Completeness depends on management disclosing all related party relationships"
206                    .into(),
207            );
208        }
209        "Accruals" => {
210            factors.push(
211                "Accrual completeness relies on management's identification of liabilities".into(),
212            );
213        }
214        "Tax" => {
215            factors
216                .push("Tax provisions involve complex legislation and management judgment".into());
217            factors.push(
218                "Deferred tax calculation depends on timing difference identification".into(),
219            );
220        }
221        _ => {
222            factors.push(format!("{area} — standard inherent risk factors apply"));
223        }
224    }
225
226    factors
227}
228
229// ---------------------------------------------------------------------------
230// GL prefix mapping for balance-weighted risk
231// ---------------------------------------------------------------------------
232
233/// Map an account area name (as used in [`ACCOUNT_AREAS`]) to GL account code
234/// prefixes.  This mirrors the mapping in `sampling_plan_generator` but is kept
235/// local to avoid a cross-module dependency on a private function.
236fn account_area_to_gl_prefixes(area: &str) -> Vec<&'static str> {
237    match area {
238        "Revenue" => vec!["4"],
239        "Cost of Sales" => vec!["5", "6"],
240        "Trade Receivables" => vec!["11"],
241        "Inventory" => vec!["12", "13"],
242        "Fixed Assets" => vec!["14", "15", "16"],
243        "Trade Payables" => vec!["20"],
244        "Accruals" => vec!["21", "22"],
245        "Cash" => vec!["10"],
246        "Tax" => vec!["17", "25"],
247        "Equity" => vec!["3"],
248        "Provisions" => vec!["26"],
249        "Related Parties" => vec![], // no direct GL mapping
250        _ => vec![],
251    }
252}
253
254/// Bump a [`RiskRating`] up one level (Low -> Medium, Medium -> High).
255/// High stays High.
256fn bump_risk_up(rating: RiskRating) -> RiskRating {
257    match rating {
258        RiskRating::Low => RiskRating::Medium,
259        RiskRating::Medium => RiskRating::High,
260        RiskRating::High => RiskRating::High,
261    }
262}
263
264/// Bump a [`RiskRating`] down one level (High -> Medium, Medium -> Low).
265/// Low stays Low.
266fn bump_risk_down(rating: RiskRating) -> RiskRating {
267    match rating {
268        RiskRating::Low => RiskRating::Low,
269        RiskRating::Medium => RiskRating::Low,
270        RiskRating::High => RiskRating::Medium,
271    }
272}
273
274// ---------------------------------------------------------------------------
275// Configuration
276// ---------------------------------------------------------------------------
277
278/// Configuration for the CRA generator.
279#[derive(Debug, Clone)]
280pub struct CraGeneratorConfig {
281    /// Probability that control risk is Low (effective controls in place).
282    pub effective_controls_probability: f64,
283    /// Probability that control risk is Medium (partially effective).
284    pub partial_controls_probability: f64,
285    // Note: no_controls_probability = 1 - effective - partial
286}
287
288impl Default for CraGeneratorConfig {
289    fn default() -> Self {
290        Self {
291            effective_controls_probability: 0.40,
292            partial_controls_probability: 0.45,
293        }
294    }
295}
296
297// ---------------------------------------------------------------------------
298// Generator
299// ---------------------------------------------------------------------------
300
301/// Generator for Combined Risk Assessments per ISA 315.
302pub struct CraGenerator {
303    rng: ChaCha8Rng,
304    config: CraGeneratorConfig,
305}
306
307impl CraGenerator {
308    /// Create a new generator with the given seed and default configuration.
309    pub fn new(seed: u64) -> Self {
310        Self {
311            rng: seeded_rng(seed, 0x315), // discriminator for ISA 315
312            config: CraGeneratorConfig::default(),
313        }
314    }
315
316    /// Create a new generator with custom configuration.
317    pub fn with_config(seed: u64, config: CraGeneratorConfig) -> Self {
318        Self {
319            rng: seeded_rng(seed, 0x315),
320            config,
321        }
322    }
323
324    /// Generate CRAs for all standard account areas for a single entity.
325    ///
326    /// # Arguments
327    /// * `entity_code` — The entity being assessed.
328    /// * `control_effectiveness` — Optional map from account area name to
329    ///   control risk override.  When `None` for an area the generator picks
330    ///   control risk randomly using the configured probabilities.
331    pub fn generate_for_entity(
332        &mut self,
333        entity_code: &str,
334        control_effectiveness: Option<&std::collections::HashMap<String, RiskRating>>,
335    ) -> Vec<CombinedRiskAssessment> {
336        info!("Generating CRAs for entity {}", entity_code);
337        let mut results = Vec::new();
338
339        for spec in ACCOUNT_AREAS {
340            for &assertion in spec.assertions {
341                let ir = self.jitter_inherent_risk(spec.default_ir);
342                let cr = self.assess_control_risk(spec.name, control_effectiveness);
343
344                // Determine significant risk flag
345                let is_significant = self.is_significant_risk(spec, assertion, ir, cr);
346
347                debug!(
348                    "CRA: {} {:?} -> IR={:?} CR={:?} significant={}",
349                    spec.name, assertion, ir, cr, is_significant
350                );
351
352                let risk_factors = risk_factors_for(spec.name, assertion);
353
354                let cra = CombinedRiskAssessment::new(
355                    entity_code,
356                    spec.name,
357                    assertion,
358                    ir,
359                    cr,
360                    is_significant,
361                    risk_factors,
362                );
363
364                results.push(cra);
365            }
366        }
367
368        info!(
369            "Generated {} CRAs for entity {}",
370            results.len(),
371            entity_code
372        );
373        results
374    }
375
376    /// Generate CRAs with inherent risk influenced by real account balances.
377    ///
378    /// Account areas whose balance exceeds 15% of total absolute balances
379    /// have their inherent risk bumped up one level; areas below 2% are
380    /// bumped down one level.  This ensures CRA risk ratings are coherent
381    /// with the financial data generated by the broader pipeline.
382    ///
383    /// # Arguments
384    /// * `entity_code` — The entity being assessed.
385    /// * `control_effectiveness` — Optional control risk override map.
386    /// * `account_balances` — GL account code to absolute balance mapping
387    ///   (e.g. `{"1100": 1_250_000.0, "4000": 5_000_000.0}`).
388    pub fn generate_for_entity_with_balances(
389        &mut self,
390        entity_code: &str,
391        control_effectiveness: Option<&HashMap<String, RiskRating>>,
392        account_balances: &HashMap<String, f64>,
393    ) -> Vec<CombinedRiskAssessment> {
394        info!(
395            "Generating balance-weighted CRAs for entity {} ({} accounts)",
396            entity_code,
397            account_balances.len()
398        );
399
400        let total_balance: f64 = account_balances.values().map(|b| b.abs()).sum();
401        let mut results = Vec::new();
402
403        for spec in ACCOUNT_AREAS {
404            // Compute this area's proportion of total balances.
405            let prefixes = account_area_to_gl_prefixes(spec.name);
406            let area_balance: f64 = if prefixes.is_empty() {
407                0.0
408            } else {
409                account_balances
410                    .iter()
411                    .filter(|(code, _)| prefixes.iter().any(|p| code.starts_with(p)))
412                    .map(|(_, bal)| bal.abs())
413                    .sum()
414            };
415            let proportion = if total_balance > 0.0 {
416                area_balance / total_balance
417            } else {
418                0.0
419            };
420
421            for &assertion in spec.assertions {
422                let mut ir = self.jitter_inherent_risk(spec.default_ir);
423
424                // Adjust inherent risk based on materiality proportion.
425                if proportion > 0.15 {
426                    ir = bump_risk_up(ir);
427                    debug!(
428                        "CRA balance bump-up: {} proportion={:.2} -> IR={:?}",
429                        spec.name, proportion, ir
430                    );
431                } else if proportion > 0.0 && proportion < 0.02 {
432                    ir = bump_risk_down(ir);
433                    debug!(
434                        "CRA balance bump-down: {} proportion={:.2} -> IR={:?}",
435                        spec.name, proportion, ir
436                    );
437                }
438
439                let cr = self.assess_control_risk(spec.name, control_effectiveness);
440                let is_significant = self.is_significant_risk(spec, assertion, ir, cr);
441
442                debug!(
443                    "CRA: {} {:?} -> IR={:?} CR={:?} significant={} (proportion={:.3})",
444                    spec.name, assertion, ir, cr, is_significant, proportion
445                );
446
447                let risk_factors = risk_factors_for(spec.name, assertion);
448
449                let cra = CombinedRiskAssessment::new(
450                    entity_code,
451                    spec.name,
452                    assertion,
453                    ir,
454                    cr,
455                    is_significant,
456                    risk_factors,
457                );
458
459                results.push(cra);
460            }
461        }
462
463        info!(
464            "Generated {} balance-weighted CRAs for entity {}",
465            results.len(),
466            entity_code
467        );
468        results
469    }
470
471    /// Apply small random jitter to the default inherent risk so outputs vary.
472    ///
473    /// There is a 15% chance of moving one step up/down from the default,
474    /// ensuring most assessments reflect the expected risk profile while
475    /// allowing realistic variation.
476    fn jitter_inherent_risk(&mut self, default: RiskRating) -> RiskRating {
477        let roll: f64 = self.rng.random();
478        match default {
479            RiskRating::Low => {
480                if roll > 0.85 {
481                    RiskRating::Medium
482                } else {
483                    RiskRating::Low
484                }
485            }
486            RiskRating::Medium => {
487                if roll < 0.10 {
488                    RiskRating::Low
489                } else if roll > 0.85 {
490                    RiskRating::High
491                } else {
492                    RiskRating::Medium
493                }
494            }
495            RiskRating::High => {
496                if roll > 0.85 {
497                    RiskRating::Medium
498                } else {
499                    RiskRating::High
500                }
501            }
502        }
503    }
504
505    /// Determine control risk for an account area.
506    ///
507    /// Uses the supplied override map if present, otherwise draws randomly
508    /// according to the configured probabilities.
509    fn assess_control_risk(
510        &mut self,
511        area: &str,
512        overrides: Option<&std::collections::HashMap<String, RiskRating>>,
513    ) -> RiskRating {
514        if let Some(map) = overrides {
515            if let Some(&cr) = map.get(area) {
516                return cr;
517            }
518        }
519        let roll: f64 = self.rng.random();
520        if roll < self.config.effective_controls_probability {
521            RiskRating::Low
522        } else if roll
523            < self.config.effective_controls_probability + self.config.partial_controls_probability
524        {
525            RiskRating::Medium
526        } else {
527            RiskRating::High
528        }
529    }
530
531    /// Apply the significant risk rules per ISA 315.28, ISA 240, and ISA 501.
532    fn is_significant_risk(
533        &self,
534        spec: &AccountAreaSpec,
535        assertion: AuditAssertion,
536        ir: RiskRating,
537        _cr: RiskRating,
538    ) -> bool {
539        // Per ISA 240.26 — revenue occurrence is always presumed fraud risk
540        if spec.always_significant_occurrence && assertion == AuditAssertion::Occurrence {
541            return true;
542        }
543        // Per ISA 501 — inventory existence requires physical observation (always significant
544        // when inherent risk is High, as quantities cannot be confirmed by other means).
545        if spec.name == "Inventory"
546            && assertion == AuditAssertion::Existence
547            && ir == RiskRating::High
548        {
549            return true;
550        }
551        // High IR on high-judgment areas (Provisions, Estimates) is significant
552        if ir == RiskRating::High
553            && matches!(
554                spec.name,
555                "Provisions" | "Accruals" | "Trade Receivables" | "Inventory"
556            )
557            && assertion == AuditAssertion::ValuationAndAllocation
558        {
559            return true;
560        }
561        false
562    }
563}
564
565// ---------------------------------------------------------------------------
566// Tests
567// ---------------------------------------------------------------------------
568
569#[cfg(test)]
570#[allow(clippy::unwrap_used)]
571mod tests {
572    use super::*;
573
574    #[test]
575    fn generates_cras_for_entity() {
576        let mut gen = CraGenerator::new(42);
577        let cras = gen.generate_for_entity("C001", None);
578        // Should produce at least 12 CRAs (2 assertions × 12 areas minimum)
579        assert!(!cras.is_empty());
580        assert!(cras.len() >= 12);
581    }
582
583    #[test]
584    fn revenue_occurrence_always_significant() {
585        let mut gen = CraGenerator::new(42);
586        let cras = gen.generate_for_entity("C001", None);
587        let rev_occurrence = cras
588            .iter()
589            .find(|c| c.account_area == "Revenue" && c.assertion == AuditAssertion::Occurrence);
590        assert!(
591            rev_occurrence.is_some(),
592            "Revenue/Occurrence CRA should exist"
593        );
594        assert!(
595            rev_occurrence.unwrap().significant_risk,
596            "Revenue/Occurrence must always be significant per ISA 240"
597        );
598    }
599
600    #[test]
601    fn related_party_occurrence_is_significant() {
602        let mut gen = CraGenerator::new(42);
603        let cras = gen.generate_for_entity("C001", None);
604        let rp = cras.iter().find(|c| {
605            c.account_area == "Related Parties" && c.assertion == AuditAssertion::Occurrence
606        });
607        assert!(rp.is_some());
608        assert!(rp.unwrap().significant_risk);
609    }
610
611    #[test]
612    fn cra_ids_are_unique() {
613        let mut gen = CraGenerator::new(42);
614        let cras = gen.generate_for_entity("C001", None);
615        let ids: std::collections::HashSet<&str> = cras.iter().map(|c| c.id.as_str()).collect();
616        assert_eq!(ids.len(), cras.len(), "CRA IDs should be unique");
617    }
618
619    #[test]
620    fn control_override_respected() {
621        let mut overrides = std::collections::HashMap::new();
622        overrides.insert("Cash".into(), RiskRating::Low);
623        let mut gen = CraGenerator::new(42);
624        let cras = gen.generate_for_entity("C001", Some(&overrides));
625        let cash_cras: Vec<_> = cras.iter().filter(|c| c.account_area == "Cash").collect();
626        for c in &cash_cras {
627            assert_eq!(
628                c.control_risk,
629                RiskRating::Low,
630                "Control override should apply"
631            );
632        }
633    }
634
635    #[test]
636    fn balance_weighted_bumps_high_proportion_areas() {
637        // Revenue accounts dominate (>15%) — IR should be bumped up (already High by default,
638        // so stays High).  Cash is tiny (<2%) — IR should be bumped down.
639        let balances = HashMap::from([
640            ("4000".into(), 8_000_000.0), // Revenue — huge proportion
641            ("1100".into(), 500_000.0),   // Trade Receivables
642            ("1010".into(), 50_000.0),    // Cash — tiny proportion (<2%)
643        ]);
644
645        let mut gen = CraGenerator::new(42);
646        let cras = gen.generate_for_entity_with_balances("C001", None, &balances);
647
648        // Same number of CRAs as the non-weighted version.
649        assert!(!cras.is_empty());
650        assert!(cras.len() >= 12);
651
652        // Revenue is >15% of total — IR should be High (default is High, bump keeps it High).
653        let rev = cras
654            .iter()
655            .filter(|c| c.account_area == "Revenue")
656            .collect::<Vec<_>>();
657        for c in &rev {
658            assert_eq!(
659                c.inherent_risk,
660                RiskRating::High,
661                "Revenue with huge balance should have High IR"
662            );
663        }
664
665        // Cash is <2% of total — IR should be bumped down from Low.
666        // Default for Cash is Low, bump-down keeps it Low.
667        let cash = cras
668            .iter()
669            .filter(|c| c.account_area == "Cash")
670            .collect::<Vec<_>>();
671        for c in &cash {
672            assert_eq!(
673                c.inherent_risk,
674                RiskRating::Low,
675                "Cash with tiny balance should have Low IR"
676            );
677        }
678    }
679
680    #[test]
681    fn balance_weighted_same_count_as_unweighted() {
682        let balances = HashMap::from([("4000".into(), 5_000_000.0), ("1100".into(), 1_250_000.0)]);
683        let mut gen1 = CraGenerator::new(99);
684        let cras_unweighted = gen1.generate_for_entity("C001", None);
685
686        let mut gen2 = CraGenerator::new(99);
687        let cras_weighted = gen2.generate_for_entity_with_balances("C001", None, &balances);
688
689        assert_eq!(
690            cras_unweighted.len(),
691            cras_weighted.len(),
692            "Weighted and unweighted should produce the same number of CRAs"
693        );
694    }
695
696    #[test]
697    fn balance_weighted_empty_balances_same_as_unweighted() {
698        let empty: HashMap<String, f64> = HashMap::new();
699        let mut gen1 = CraGenerator::new(55);
700        let cras_unweighted = gen1.generate_for_entity("C001", None);
701
702        let mut gen2 = CraGenerator::new(55);
703        let cras_weighted = gen2.generate_for_entity_with_balances("C001", None, &empty);
704
705        // With empty balances, proportion is 0.0 for all areas — no bumps applied.
706        // Since the same seed is used, results should match the unweighted version.
707        assert_eq!(cras_unweighted.len(), cras_weighted.len());
708        for (a, b) in cras_unweighted.iter().zip(cras_weighted.iter()) {
709            assert_eq!(a.account_area, b.account_area);
710            assert_eq!(a.assertion, b.assertion);
711            assert_eq!(
712                a.inherent_risk, b.inherent_risk,
713                "With empty balances, IR should match unweighted for {}//{:?}",
714                a.account_area, a.assertion
715            );
716        }
717    }
718}
datasynth_generators/audit/cra_generator.rs

datasynth_generators/audit/
cra_generator.rs