use datasynth_config::schema::EvidenceAnchorsConfig;
use datasynth_core::models::{AccountType, CorroborationMethod, EvidenceAnchor};
use datasynth_core::utils::seeded_rng;
use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rust_decimal::Decimal;
#[derive(Debug, Clone)]
pub struct AccountActivity {
pub account_code: String,
pub account_description: String,
pub account_type: AccountType,
pub total_activity: Decimal,
pub fraud_activity: Decimal,
pub transaction_count: u32,
}
pub struct EvidenceAnchorGenerator {
rng: ChaCha8Rng,
uuid_factory: DeterministicUuidFactory,
}
impl EvidenceAnchorGenerator {
pub fn new(seed: u64) -> Self {
Self {
rng: seeded_rng(seed, 0),
uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::EvidenceAnchor),
}
}
pub fn generate(
&mut self,
company_code: &str,
fiscal_year: i32,
accounts: &[AccountActivity],
config: &EvidenceAnchorsConfig,
) -> Vec<EvidenceAnchor> {
let grand: Decimal = accounts.iter().map(|a| a.total_activity).sum();
if grand <= Decimal::ZERO {
return Vec::new();
}
let thresh = grand
* Decimal::try_from(config.min_materiality_share.max(0.0)).unwrap_or(Decimal::ZERO);
let mut out = Vec::new();
for a in accounts {
if a.total_activity < thresh {
continue; }
let is_fraud_linked = a.fraud_activity > Decimal::ZERO;
let roll: f64 = self.rng.random();
let corroborated = if is_fraud_linked {
roll < config.fabrication_evade_rate
} else {
roll < config.corroboration_rate
};
let corroboration_method = if corroborated {
CorroborationMethod::Confirmation
} else {
CorroborationMethod::None
};
out.push(EvidenceAnchor {
anchor_id: self.uuid_factory.next().to_string(),
company_code: company_code.to_string(),
account_code: a.account_code.clone(),
account_description: a.account_description.clone(),
account_type: a.account_type,
fiscal_year,
total_activity: a.total_activity,
transaction_count: a.transaction_count,
is_material: true,
corroborated,
corroboration_method,
is_dangling: !corroborated,
fraud_activity: a.fraud_activity,
is_fraud_linked,
});
}
out
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cfg_crisp() -> EvidenceAnchorsConfig {
EvidenceAnchorsConfig {
enabled: true,
min_materiality_share: 0.005,
corroboration_rate: 1.0,
fabrication_evade_rate: 0.0,
}
}
fn acct(code: &str, total: i64, fraud: i64) -> AccountActivity {
AccountActivity {
account_code: code.to_string(),
account_description: format!("Account {code}"),
account_type: AccountType::Expense,
total_activity: Decimal::from(total),
fraud_activity: Decimal::from(fraud),
transaction_count: 10,
}
}
#[test]
fn fraud_linked_account_is_dangling() {
let mut g = EvidenceAnchorGenerator::new(5);
let accounts = vec![acct("4000", 1_000_000, 0), acct("2100", 800_000, 300_000)];
let anchors = g.generate("1000", 2024, &accounts, &cfg_crisp());
assert_eq!(anchors.len(), 2);
let clean = anchors.iter().find(|e| e.account_code == "4000").unwrap();
let fraud = anchors.iter().find(|e| e.account_code == "2100").unwrap();
assert!(!clean.is_fraud_linked);
assert!(clean.corroborated && !clean.is_dangling);
assert_eq!(
clean.corroboration_method,
CorroborationMethod::Confirmation
);
assert!(fraud.is_fraud_linked);
assert!(
!fraud.corroborated && fraud.is_dangling,
"fraud-linked account must dangle when evidence isn't forged"
);
assert_eq!(fraud.fraud_activity, Decimal::from(300_000));
assert_eq!(fraud.corroboration_method, CorroborationMethod::None);
}
#[test]
fn immaterial_accounts_skipped() {
let mut g = EvidenceAnchorGenerator::new(1);
let accounts = vec![acct("4000", 10_000_000, 0), acct("9999", 100, 0)];
let anchors = g.generate("1000", 2024, &accounts, &cfg_crisp());
assert!(anchors.iter().all(|e| e.account_code != "9999"));
assert!(anchors.iter().any(|e| e.account_code == "4000"));
}
#[test]
fn dangling_rate_separates_fraud_from_clean_and_is_deterministic() {
let cfg = EvidenceAnchorsConfig {
enabled: true,
min_materiality_share: 0.0,
corroboration_rate: 0.9,
fabrication_evade_rate: 0.1,
};
let clean: Vec<_> = (0..200)
.map(|i| acct(&format!("C{i}"), 100_000, 0))
.collect();
let fraud: Vec<_> = (0..200)
.map(|i| acct(&format!("F{i}"), 100_000, 50_000))
.collect();
let mut all = clean.clone();
all.extend(fraud.clone());
let a = EvidenceAnchorGenerator::new(99).generate("1000", 2024, &all, &cfg);
let b = EvidenceAnchorGenerator::new(99).generate("1000", 2024, &all, &cfg);
assert_eq!(
a.iter().map(|x| x.is_dangling).collect::<Vec<_>>(),
b.iter().map(|x| x.is_dangling).collect::<Vec<_>>(),
"same seed ⇒ identical dangling pattern"
);
let clean_dangle = a
.iter()
.filter(|e| !e.is_fraud_linked && e.is_dangling)
.count();
let fraud_dangle = a
.iter()
.filter(|e| e.is_fraud_linked && e.is_dangling)
.count();
assert!(
fraud_dangle > 150,
"most fraud accounts dangle (got {fraud_dangle}/200)"
);
assert!(
clean_dangle < 40,
"few clean accounts dangle (got {clean_dangle}/200)"
);
}
}