Skip to main content

datasynth_generators/
external_expectation_generator.rs

1//! External-expectation generator — the ISA-520 substantive-analytics layer (Phase 2).
2//!
3//! Emits, per material GL account, an [`ExternalExpectation`]: an expected period total derived from
4//! an exogenous driver (prior-year / market / macro / budget) plus a materiality tolerance band, the
5//! realized deviation, and the ground-truth fraud contribution.
6//!
7//! The expectation is anchored to the account's *legitimate* level (the sum of its non-fraud
8//! postings) perturbed by a forecast error — modelling a real auditor's prior-year/benchmark
9//! expectation, which tracks the legitimate economics but is imprecise. A mimetic fraud preserves the
10//! per-entry ledger distribution (so the per-JE residual arms are blind) yet inflates the account's
11//! *aggregate* above this expectation: a band exceedance is the ISA-520 "investigate" trigger, and is
12//! a true positive iff the account was in fact fraud-inflated (the engine knows). This is the
13//! engine-side realization of the perfect-crime countermeasure (`docs/phase2-ledger-evidence-assurance.md`).
14
15use datasynth_config::schema::ExternalExpectationsConfig;
16use datasynth_core::models::{AccountType, ExpectationDriver, ExternalExpectation};
17use datasynth_core::utils::seeded_rng;
18use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
19use rand_chacha::ChaCha8Rng;
20use rand_distr::{Distribution, Normal};
21use rust_decimal::prelude::{FromPrimitive, ToPrimitive};
22use rust_decimal::Decimal;
23
24/// Per-account input to the expectation generator: the realized totals split into legitimate vs actual.
25#[derive(Debug, Clone)]
26pub struct AccountActuals {
27    /// GL account number.
28    pub account_code: String,
29    /// GL account description.
30    pub account_description: String,
31    /// GL account type.
32    pub account_type: AccountType,
33    /// All postings to the account (legitimate + fraud).
34    pub actual_total: Decimal,
35    /// Postings to the account from non-fraud journal entries only.
36    pub legit_total: Decimal,
37}
38
39/// Generates [`ExternalExpectation`] records for a company's material accounts.
40pub struct ExternalExpectationsGenerator {
41    rng: ChaCha8Rng,
42    uuid_factory: DeterministicUuidFactory,
43}
44
45impl ExternalExpectationsGenerator {
46    /// Create a new generator with the given seed.
47    pub fn new(seed: u64) -> Self {
48        Self {
49            rng: seeded_rng(seed, 0),
50            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::ExternalExpectation),
51        }
52    }
53
54    /// Generate expectations for the material accounts among `accounts`.
55    ///
56    /// # Arguments
57    /// * `company_code` — the company the accounts belong to.
58    /// * `fiscal_year` — the fiscal year the expectations apply to.
59    /// * `accounts` — per-account realized totals (actual + legitimate).
60    /// * `config` — driver, tolerance band, forecast noise, growth, and materiality threshold.
61    pub fn generate(
62        &mut self,
63        company_code: &str,
64        fiscal_year: i32,
65        accounts: &[AccountActuals],
66        config: &ExternalExpectationsConfig,
67    ) -> Vec<ExternalExpectation> {
68        let grand_legit: Decimal = accounts.iter().map(|a| a.legit_total).sum();
69        if grand_legit <= Decimal::ZERO {
70            return Vec::new();
71        }
72        let min_legit = grand_legit
73            * Decimal::from_f64(config.min_materiality_share.max(0.0)).unwrap_or(Decimal::ZERO);
74        let noise = Normal::new(0.0, config.forecast_noise.max(1e-9)).expect("valid normal params");
75        let tol = config.tolerance_pct.max(1e-9);
76
77        let mut out = Vec::new();
78        for a in accounts {
79            // substantive analytics targets material balances; skip the rest (pure sampling noise).
80            if a.legit_total < min_legit {
81                continue;
82            }
83            let legit = a.legit_total.to_f64().unwrap_or(0.0);
84            // expectation = the legitimate level perturbed by the auditor's forecast error.
85            let forecast_err: f64 = noise.sample(&mut self.rng);
86            let expected_f = (legit * (1.0 + forecast_err)).max(0.0);
87            let expected = Decimal::from_f64(expected_f)
88                .unwrap_or(Decimal::ZERO)
89                .round_dp(2);
90            let band = (expected_f * tol).max(1.0);
91
92            let actual = a.actual_total;
93            let actual_f = actual.to_f64().unwrap_or(0.0);
94            let deviation_f = actual_f - expected_f;
95            let fraud_inflation = a.actual_total - a.legit_total;
96            let (driver_value, basis) =
97                driver_view(config.driver, legit, config.growth_rate, expected);
98
99            out.push(ExternalExpectation {
100                expectation_id: self.uuid_factory.next().to_string(),
101                company_code: company_code.to_string(),
102                account_code: a.account_code.clone(),
103                account_description: a.account_description.clone(),
104                account_type: a.account_type,
105                fiscal_year,
106                driver: config.driver,
107                basis,
108                driver_value,
109                expected_value: expected,
110                tolerance_pct: config.tolerance_pct,
111                lower_bound: Decimal::from_f64(expected_f - band)
112                    .unwrap_or(Decimal::ZERO)
113                    .round_dp(2),
114                upper_bound: Decimal::from_f64(expected_f + band)
115                    .unwrap_or(Decimal::ZERO)
116                    .round_dp(2),
117                actual_value: actual,
118                deviation: Decimal::from_f64(deviation_f)
119                    .unwrap_or(Decimal::ZERO)
120                    .round_dp(2),
121                deviation_ratio: deviation_f / band,
122                exceeds_band: deviation_f.abs() > band,
123                fraud_inflation,
124                is_fraud_inflated: fraud_inflation > Decimal::ZERO,
125            });
126        }
127        out
128    }
129}
130
131/// The driver's reported value + a human-readable ISA-520 basis note.
132fn driver_view(
133    driver: ExpectationDriver,
134    legit: f64,
135    growth: f64,
136    expected: Decimal,
137) -> (Decimal, String) {
138    match driver {
139        ExpectationDriver::PriorYear => {
140            let prior = if (1.0 + growth).abs() > 1e-9 {
141                legit / (1.0 + growth)
142            } else {
143                legit
144            };
145            let pv = Decimal::from_f64(prior)
146                .unwrap_or(Decimal::ZERO)
147                .round_dp(2);
148            (
149                pv,
150                format!("prior-year actual {pv} grown at {:.1}%", growth * 100.0),
151            )
152        }
153        ExpectationDriver::MarketIndex => (
154            expected,
155            "market/industry index, sensitivity calibrated to the legitimate level".to_string(),
156        ),
157        ExpectationDriver::MacroSeries => (
158            expected,
159            "macroeconomic series, sensitivity calibrated to the legitimate level".to_string(),
160        ),
161        ExpectationDriver::Budget => (expected, "budgeted amount for the account".to_string()),
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    fn cfg() -> ExternalExpectationsConfig {
170        ExternalExpectationsConfig {
171            enabled: true,
172            driver: ExpectationDriver::PriorYear,
173            tolerance_pct: 0.10,
174            forecast_noise: 0.04,
175            growth_rate: 0.05,
176            min_materiality_share: 0.005,
177        }
178    }
179
180    fn acct(code: &str, actual: i64, legit: i64) -> AccountActuals {
181        AccountActuals {
182            account_code: code.to_string(),
183            account_description: format!("Account {code}"),
184            account_type: AccountType::Revenue,
185            actual_total: Decimal::from(actual),
186            legit_total: Decimal::from(legit),
187        }
188    }
189
190    /// A clean account (actual == legit) stays inside the band; a fraud-inflated account breaches it
191    /// and carries the correct ground-truth flag.
192    #[test]
193    fn fraud_inflation_breaches_band_clean_does_not() {
194        let mut g = ExternalExpectationsGenerator::new(7);
195        let accounts = vec![
196            acct("4000", 1_000_000, 1_000_000), // clean
197            acct("4010", 3_000_000, 1_000_000), // 3x inflated (fraud = 2M)
198        ];
199        let exps = g.generate("1000", 2024, &accounts, &cfg());
200        assert_eq!(exps.len(), 2);
201        let clean = exps.iter().find(|e| e.account_code == "4000").unwrap();
202        let fraud = exps.iter().find(|e| e.account_code == "4010").unwrap();
203
204        assert!(!clean.is_fraud_inflated);
205        assert!(
206            !clean.exceeds_band,
207            "clean account should sit in band, dev_ratio={}",
208            clean.deviation_ratio
209        );
210
211        assert!(fraud.is_fraud_inflated);
212        assert_eq!(fraud.fraud_inflation, Decimal::from(2_000_000));
213        assert!(
214            fraud.exceeds_band,
215            "fraud-inflated account must breach the band"
216        );
217        assert!(fraud.deviation_ratio > 1.0);
218        assert!(fraud.expected_value > Decimal::ZERO);
219        assert!(fraud.lower_bound < fraud.upper_bound);
220    }
221
222    /// Immaterial accounts (below the materiality share) are not scored.
223    #[test]
224    fn immaterial_accounts_are_skipped() {
225        let mut g = ExternalExpectationsGenerator::new(1);
226        let accounts = vec![
227            acct("4000", 10_000_000, 10_000_000),
228            acct("9999", 100, 100), // ~0.001% — immaterial
229        ];
230        let exps = g.generate("1000", 2024, &accounts, &cfg());
231        assert!(exps.iter().all(|e| e.account_code != "9999"));
232        assert!(exps.iter().any(|e| e.account_code == "4000"));
233    }
234
235    /// Determinism: same seed + inputs ⇒ identical output.
236    #[test]
237    fn deterministic() {
238        let accounts = vec![
239            acct("4000", 2_000_000, 1_000_000),
240            acct("4010", 900_000, 900_000),
241        ];
242        let a = ExternalExpectationsGenerator::new(42).generate("1000", 2024, &accounts, &cfg());
243        let b = ExternalExpectationsGenerator::new(42).generate("1000", 2024, &accounts, &cfg());
244        assert_eq!(a.len(), b.len());
245        for (x, y) in a.iter().zip(b.iter()) {
246            assert_eq!(x.expected_value, y.expected_value);
247            assert_eq!(x.deviation, y.deviation);
248            assert_eq!(x.exceeds_band, y.exceeds_band);
249        }
250    }
251
252    /// Empty / zero-activity input yields no expectations rather than panicking.
253    #[test]
254    fn empty_is_safe() {
255        let mut g = ExternalExpectationsGenerator::new(3);
256        assert!(g.generate("1000", 2024, &[], &cfg()).is_empty());
257        let zero = vec![acct("4000", 0, 0)];
258        assert!(g.generate("1000", 2024, &zero, &cfg()).is_empty());
259    }
260}