datasynth-generators 5.34.0

50+ data generators covering GL, P2P, O2C, S2C, HR, manufacturing, audit, tax, treasury, and ESG
Documentation
//! Evidence-anchor generator — the ISA-505 external-corroboration layer (Phase 2).
//!
//! Emits, per material GL account, an [`EvidenceAnchor`]: whether the account's activity is
//! corroborated by evidence *exogenous* to the ledger, and — if material and uncorroborated — a
//! **dangling node** (the ISA-505 existence/occurrence lead).
//!
//! Corroboration is modelled, not sampled from the engine's (sparse) confirmation population:
//! genuine accounts are corroborated at `corroboration_rate`; fraud-linked accounts only at
//! `fabrication_evade_rate` (the adversary who forged the external evidence — the expensive, fragile
//! "perfect audit crime" of `prop:counter`). A material, uncorroborated account is a true positive
//! iff it was fraud-linked; the `(1 − corroboration_rate)` of genuine accounts that go uncorroborated
//! are realistic false positives (legitimate-but-unconfirmed balances). The engine knows the ground
//! truth (`is_fraud_linked`), so the dangling-node detector can be scored against it.

use datasynth_config::schema::EvidenceAnchorsConfig;
use datasynth_core::models::{AccountType, CorroborationMethod, EvidenceAnchor};
use datasynth_core::utils::seeded_rng;
use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rust_decimal::Decimal;

/// Per-account activity input to the evidence-anchor generator.
#[derive(Debug, Clone)]
pub struct AccountActivity {
    /// GL account number.
    pub account_code: String,
    /// GL account description.
    pub account_description: String,
    /// GL account type.
    pub account_type: AccountType,
    /// Total posting activity (legitimate + any fraud).
    pub total_activity: Decimal,
    /// Posting activity attributable to fraud journal entries.
    pub fraud_activity: Decimal,
    /// Number of journal entries touching the account.
    pub transaction_count: u32,
}

/// Generates [`EvidenceAnchor`] records for a company's material accounts.
pub struct EvidenceAnchorGenerator {
    rng: ChaCha8Rng,
    uuid_factory: DeterministicUuidFactory,
}

impl EvidenceAnchorGenerator {
    /// Create a new generator with the given seed.
    pub fn new(seed: u64) -> Self {
        Self {
            rng: seeded_rng(seed, 0),
            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::EvidenceAnchor),
        }
    }

    /// Generate evidence anchors for the material accounts among `accounts`.
    pub fn generate(
        &mut self,
        company_code: &str,
        fiscal_year: i32,
        accounts: &[AccountActivity],
        config: &EvidenceAnchorsConfig,
    ) -> Vec<EvidenceAnchor> {
        let grand: Decimal = accounts.iter().map(|a| a.total_activity).sum();
        if grand <= Decimal::ZERO {
            return Vec::new();
        }
        let thresh = grand
            * Decimal::try_from(config.min_materiality_share.max(0.0)).unwrap_or(Decimal::ZERO);

        let mut out = Vec::new();
        for a in accounts {
            if a.total_activity < thresh {
                continue; // immaterial — out of substantive scope
            }
            let is_fraud_linked = a.fraud_activity > Decimal::ZERO;
            let roll: f64 = self.rng.random();
            // genuine accounts corroborate at `corroboration_rate`; fraud-linked accounts only at the
            // (small) `fabrication_evade_rate` — the forged-evidence perfect-audit-crime.
            let corroborated = if is_fraud_linked {
                roll < config.fabrication_evade_rate
            } else {
                roll < config.corroboration_rate
            };
            let corroboration_method = if corroborated {
                CorroborationMethod::Confirmation
            } else {
                CorroborationMethod::None
            };
            out.push(EvidenceAnchor {
                anchor_id: self.uuid_factory.next().to_string(),
                company_code: company_code.to_string(),
                account_code: a.account_code.clone(),
                account_description: a.account_description.clone(),
                account_type: a.account_type,
                fiscal_year,
                total_activity: a.total_activity,
                transaction_count: a.transaction_count,
                is_material: true,
                corroborated,
                corroboration_method,
                is_dangling: !corroborated,
                fraud_activity: a.fraud_activity,
                is_fraud_linked,
            });
        }
        out
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Config with deterministic corroboration: genuine always corroborated, fraud never.
    fn cfg_crisp() -> EvidenceAnchorsConfig {
        EvidenceAnchorsConfig {
            enabled: true,
            min_materiality_share: 0.005,
            corroboration_rate: 1.0,
            fabrication_evade_rate: 0.0,
        }
    }

    fn acct(code: &str, total: i64, fraud: i64) -> AccountActivity {
        AccountActivity {
            account_code: code.to_string(),
            account_description: format!("Account {code}"),
            account_type: AccountType::Expense,
            total_activity: Decimal::from(total),
            fraud_activity: Decimal::from(fraud),
            transaction_count: 10,
        }
    }

    /// A fraud-linked material account is a dangling node; a clean one is corroborated.
    #[test]
    fn fraud_linked_account_is_dangling() {
        let mut g = EvidenceAnchorGenerator::new(5);
        let accounts = vec![acct("4000", 1_000_000, 0), acct("2100", 800_000, 300_000)];
        let anchors = g.generate("1000", 2024, &accounts, &cfg_crisp());
        assert_eq!(anchors.len(), 2);
        let clean = anchors.iter().find(|e| e.account_code == "4000").unwrap();
        let fraud = anchors.iter().find(|e| e.account_code == "2100").unwrap();

        assert!(!clean.is_fraud_linked);
        assert!(clean.corroborated && !clean.is_dangling);
        assert_eq!(
            clean.corroboration_method,
            CorroborationMethod::Confirmation
        );

        assert!(fraud.is_fraud_linked);
        assert!(
            !fraud.corroborated && fraud.is_dangling,
            "fraud-linked account must dangle when evidence isn't forged"
        );
        assert_eq!(fraud.fraud_activity, Decimal::from(300_000));
        assert_eq!(fraud.corroboration_method, CorroborationMethod::None);
    }

    /// Immaterial accounts are skipped.
    #[test]
    fn immaterial_accounts_skipped() {
        let mut g = EvidenceAnchorGenerator::new(1);
        let accounts = vec![acct("4000", 10_000_000, 0), acct("9999", 100, 0)];
        let anchors = g.generate("1000", 2024, &accounts, &cfg_crisp());
        assert!(anchors.iter().all(|e| e.account_code != "9999"));
        assert!(anchors.iter().any(|e| e.account_code == "4000"));
    }

    /// With realistic rates, fraud accounts dangle far more often than clean ones; deterministic by seed.
    #[test]
    fn dangling_rate_separates_fraud_from_clean_and_is_deterministic() {
        let cfg = EvidenceAnchorsConfig {
            enabled: true,
            min_materiality_share: 0.0,
            corroboration_rate: 0.9,
            fabrication_evade_rate: 0.1,
        };
        let clean: Vec<_> = (0..200)
            .map(|i| acct(&format!("C{i}"), 100_000, 0))
            .collect();
        let fraud: Vec<_> = (0..200)
            .map(|i| acct(&format!("F{i}"), 100_000, 50_000))
            .collect();
        let mut all = clean.clone();
        all.extend(fraud.clone());

        let a = EvidenceAnchorGenerator::new(99).generate("1000", 2024, &all, &cfg);
        let b = EvidenceAnchorGenerator::new(99).generate("1000", 2024, &all, &cfg);
        assert_eq!(
            a.iter().map(|x| x.is_dangling).collect::<Vec<_>>(),
            b.iter().map(|x| x.is_dangling).collect::<Vec<_>>(),
            "same seed ⇒ identical dangling pattern"
        );

        let clean_dangle = a
            .iter()
            .filter(|e| !e.is_fraud_linked && e.is_dangling)
            .count();
        let fraud_dangle = a
            .iter()
            .filter(|e| e.is_fraud_linked && e.is_dangling)
            .count();
        assert!(
            fraud_dangle > 150,
            "most fraud accounts dangle (got {fraud_dangle}/200)"
        );
        assert!(
            clean_dangle < 40,
            "few clean accounts dangle (got {clean_dangle}/200)"
        );
    }
}