use crate::models::{
Decimal128, FraudPatternType, GaapViolationType, JournalEntry, JournalLineItem,
};
use rand::prelude::*;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct AnomalyInjectionConfig {
pub injection_rate: f64,
pub fraud_patterns: Vec<FraudPatternConfig>,
pub gaap_violations: Vec<GaapViolationConfig>,
pub timing_anomalies: TimingAnomalyConfig,
pub amount_anomalies: AmountAnomalyConfig,
pub label_anomalies: bool,
}
impl Default for AnomalyInjectionConfig {
fn default() -> Self {
Self {
injection_rate: 0.02, fraud_patterns: vec![
FraudPatternConfig::circular_flow(0.25),
FraudPatternConfig::threshold_clustering(0.20),
FraudPatternConfig::round_amounts(0.15),
FraudPatternConfig::velocity(0.15),
FraudPatternConfig::dormant_activation(0.10),
FraudPatternConfig::unusual_pairing(0.15),
],
gaap_violations: vec![
GaapViolationConfig::new(GaapViolationType::RevenueToCashDirect, 0.30),
GaapViolationConfig::new(GaapViolationType::ExpenseToAsset, 0.25),
GaapViolationConfig::new(GaapViolationType::CashToRevenue, 0.20),
GaapViolationConfig::new(GaapViolationType::RevenueToExpense, 0.10),
GaapViolationConfig::new(GaapViolationType::UnbalancedEntry, 0.15),
],
timing_anomalies: TimingAnomalyConfig::default(),
amount_anomalies: AmountAnomalyConfig::default(),
label_anomalies: true,
}
}
}
impl AnomalyInjectionConfig {
pub fn disabled() -> Self {
Self {
injection_rate: 0.0,
..Default::default()
}
}
pub fn high_rate() -> Self {
Self {
injection_rate: 0.10,
..Default::default()
}
}
pub fn validate(&self) -> Result<(), String> {
let fraud_total: f64 = self.fraud_patterns.iter().map(|p| p.probability).sum();
if (fraud_total - 1.0).abs() > 0.01 {
return Err(format!(
"Fraud pattern probabilities must sum to 1.0, got {}",
fraud_total
));
}
let gaap_total: f64 = self.gaap_violations.iter().map(|v| v.probability).sum();
if (gaap_total - 1.0).abs() > 0.01 {
return Err(format!(
"GAAP violation probabilities must sum to 1.0, got {}",
gaap_total
));
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct FraudPatternConfig {
pub pattern_type: FraudPatternType,
pub probability: f64,
pub account_count: (u8, u8),
pub amount_range: (f64, f64),
}
impl FraudPatternConfig {
pub fn circular_flow(probability: f64) -> Self {
Self {
pattern_type: FraudPatternType::CircularFlow,
probability,
account_count: (3, 5),
amount_range: (10000.0, 100000.0),
}
}
pub fn threshold_clustering(probability: f64) -> Self {
Self {
pattern_type: FraudPatternType::ThresholdClustering,
probability,
account_count: (2, 2),
amount_range: (9000.0, 9999.0), }
}
pub fn round_amounts(probability: f64) -> Self {
Self {
pattern_type: FraudPatternType::RoundAmounts,
probability,
account_count: (2, 2),
amount_range: (1000.0, 50000.0),
}
}
pub fn velocity(probability: f64) -> Self {
Self {
pattern_type: FraudPatternType::HighVelocity,
probability,
account_count: (3, 6),
amount_range: (5000.0, 50000.0),
}
}
pub fn dormant_activation(probability: f64) -> Self {
Self {
pattern_type: FraudPatternType::DormantActivation,
probability,
account_count: (2, 2),
amount_range: (10000.0, 500000.0),
}
}
pub fn unusual_pairing(probability: f64) -> Self {
Self {
pattern_type: FraudPatternType::UnusualPairing,
probability,
account_count: (2, 2),
amount_range: (5000.0, 100000.0),
}
}
}
#[derive(Debug, Clone)]
pub struct GaapViolationConfig {
pub violation_type: GaapViolationType,
pub probability: f64,
}
impl GaapViolationConfig {
pub fn new(violation_type: GaapViolationType, probability: f64) -> Self {
Self {
violation_type,
probability,
}
}
}
#[derive(Debug, Clone)]
pub struct TimingAnomalyConfig {
pub after_hours: bool,
pub weekend_entries: bool,
pub holiday_entries: bool,
pub month_end_manipulation: bool,
}
impl Default for TimingAnomalyConfig {
fn default() -> Self {
Self {
after_hours: true,
weekend_entries: true,
holiday_entries: false,
month_end_manipulation: true,
}
}
}
#[derive(Debug, Clone)]
pub struct AmountAnomalyConfig {
pub round_amounts: bool,
pub benford_violations: bool,
pub outliers: bool,
pub outlier_multiplier: f64,
}
impl Default for AmountAnomalyConfig {
fn default() -> Self {
Self {
round_amounts: true,
benford_violations: true,
outliers: true,
outlier_multiplier: 10.0,
}
}
}
pub struct AnomalyInjector {
config: AnomalyInjectionConfig,
rng: StdRng,
account_types: HashMap<u16, AccountTypeInfo>,
stats: InjectionStats,
#[allow(dead_code)]
pending_circular_flows: Vec<CircularFlowState>,
dormant_accounts: Vec<u16>,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct AccountTypeInfo {
pub is_asset: bool,
pub is_liability: bool,
pub is_revenue: bool,
pub is_expense: bool,
pub is_equity: bool,
pub is_cash: bool,
pub is_suspense: bool,
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
struct CircularFlowState {
accounts: Vec<u16>,
current_position: usize,
amount: Decimal128,
remaining: usize,
}
#[derive(Debug, Clone, Default)]
pub struct InjectionStats {
pub entries_processed: u64,
pub anomalies_injected: u64,
pub fraud_patterns: HashMap<FraudPatternType, u32>,
pub gaap_violations: HashMap<GaapViolationType, u32>,
pub timing_anomalies: u32,
pub amount_anomalies: u32,
}
#[derive(Debug, Clone)]
pub struct InjectionResult {
pub entry: JournalEntry,
pub debit_lines: Vec<JournalLineItem>,
pub credit_lines: Vec<JournalLineItem>,
pub anomaly_injected: bool,
pub anomaly_label: Option<AnomalyLabel>,
}
#[derive(Debug, Clone)]
pub enum AnomalyLabel {
FraudPattern(FraudPatternType),
GaapViolation(GaapViolationType),
TimingAnomaly(String),
AmountAnomaly(String),
}
impl AnomalyInjector {
pub fn new(config: AnomalyInjectionConfig, seed: Option<u64>) -> Self {
let seed = seed.unwrap_or_else(|| rand::thread_rng().gen());
Self {
config,
rng: StdRng::seed_from_u64(seed),
account_types: HashMap::new(),
stats: InjectionStats::default(),
pending_circular_flows: Vec::new(),
dormant_accounts: Vec::new(),
}
}
pub fn register_account(&mut self, index: u16, info: AccountTypeInfo) {
self.account_types.insert(index, info);
}
pub fn mark_dormant(&mut self, index: u16) {
if !self.dormant_accounts.contains(&index) {
self.dormant_accounts.push(index);
}
}
pub fn process(
&mut self,
entry: JournalEntry,
debit_lines: Vec<JournalLineItem>,
credit_lines: Vec<JournalLineItem>,
) -> InjectionResult {
self.stats.entries_processed += 1;
if self.config.injection_rate <= 0.0 || self.rng.gen::<f64>() > self.config.injection_rate {
return InjectionResult {
entry,
debit_lines,
credit_lines,
anomaly_injected: false,
anomaly_label: None,
};
}
let anomaly_type: f64 = self.rng.gen();
if anomaly_type < 0.5 {
self.inject_fraud_pattern(entry, debit_lines, credit_lines)
} else if anomaly_type < 0.8 {
self.inject_gaap_violation(entry, debit_lines, credit_lines)
} else if anomaly_type < 0.9 {
self.inject_timing_anomaly(entry, debit_lines, credit_lines)
} else {
self.inject_amount_anomaly(entry, debit_lines, credit_lines)
}
}
fn inject_fraud_pattern(
&mut self,
mut entry: JournalEntry,
mut debit_lines: Vec<JournalLineItem>,
mut credit_lines: Vec<JournalLineItem>,
) -> InjectionResult {
let pattern_type = self.select_fraud_pattern();
let label = match pattern_type {
FraudPatternType::ThresholdClustering => {
let threshold = 10000.0;
let new_amount = Decimal128::from_f64(threshold - self.rng.gen_range(1.0..999.0));
for line in &mut debit_lines {
line.amount = new_amount;
}
for line in &mut credit_lines {
line.amount = new_amount;
}
entry.total_debits = new_amount;
entry.total_credits = new_amount;
Some(AnomalyLabel::FraudPattern(
FraudPatternType::ThresholdClustering,
))
}
FraudPatternType::RoundAmounts => {
let round_amounts = [1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0];
let new_amount =
Decimal128::from_f64(round_amounts[self.rng.gen_range(0..round_amounts.len())]);
for line in &mut debit_lines {
line.amount = new_amount;
}
for line in &mut credit_lines {
line.amount = new_amount;
}
entry.total_debits = new_amount;
entry.total_credits = new_amount;
Some(AnomalyLabel::FraudPattern(FraudPatternType::RoundAmounts))
}
FraudPatternType::UnusualPairing => {
if let (Some(revenue_idx), Some(expense_idx)) = self.find_unusual_pair() {
if !debit_lines.is_empty() {
debit_lines[0].account_index = revenue_idx; }
if !credit_lines.is_empty() {
credit_lines[0].account_index = expense_idx; }
Some(AnomalyLabel::FraudPattern(FraudPatternType::UnusualPairing))
} else {
None
}
}
_ => {
Some(AnomalyLabel::FraudPattern(pattern_type))
}
};
if label.is_some() {
self.stats.anomalies_injected += 1;
*self.stats.fraud_patterns.entry(pattern_type).or_insert(0) += 1;
}
InjectionResult {
entry,
debit_lines,
credit_lines,
anomaly_injected: label.is_some(),
anomaly_label: if self.config.label_anomalies {
label
} else {
None
},
}
}
fn inject_gaap_violation(
&mut self,
mut entry: JournalEntry,
mut debit_lines: Vec<JournalLineItem>,
mut credit_lines: Vec<JournalLineItem>,
) -> InjectionResult {
let violation_type = self.select_gaap_violation();
let label = match violation_type {
GaapViolationType::UnbalancedEntry => {
if !credit_lines.is_empty() {
let adjustment = Decimal128::from_f64(self.rng.gen_range(100.0..1000.0));
credit_lines[0].amount = credit_lines[0].amount + adjustment;
entry.total_credits = entry.total_credits + adjustment;
entry.flags.0 &= !crate::models::JournalEntryFlags::IS_BALANCED;
}
Some(AnomalyLabel::GaapViolation(
GaapViolationType::UnbalancedEntry,
))
}
GaapViolationType::RevenueToCashDirect => {
if let (Some(revenue_idx), Some(cash_idx)) = self.find_revenue_cash_pair() {
if !debit_lines.is_empty() {
debit_lines[0].account_index = cash_idx;
}
if !credit_lines.is_empty() {
credit_lines[0].account_index = revenue_idx;
}
Some(AnomalyLabel::GaapViolation(
GaapViolationType::RevenueToCashDirect,
))
} else {
None
}
}
_ => {
Some(AnomalyLabel::GaapViolation(violation_type))
}
};
if label.is_some() {
self.stats.anomalies_injected += 1;
*self
.stats
.gaap_violations
.entry(violation_type)
.or_insert(0) += 1;
}
InjectionResult {
entry,
debit_lines,
credit_lines,
anomaly_injected: label.is_some(),
anomaly_label: if self.config.label_anomalies {
label
} else {
None
},
}
}
fn inject_timing_anomaly(
&mut self,
mut entry: JournalEntry,
debit_lines: Vec<JournalLineItem>,
credit_lines: Vec<JournalLineItem>,
) -> InjectionResult {
let ms_per_day = 86_400_000u64;
let ms_per_hour = 3_600_000u64;
let day_start = (entry.posting_date.physical / ms_per_day) * ms_per_day;
entry.posting_date.physical =
day_start + 23 * ms_per_hour + self.rng.gen_range(0..ms_per_hour);
self.stats.anomalies_injected += 1;
self.stats.timing_anomalies += 1;
InjectionResult {
entry,
debit_lines,
credit_lines,
anomaly_injected: true,
anomaly_label: if self.config.label_anomalies {
Some(AnomalyLabel::TimingAnomaly("after_hours".to_string()))
} else {
None
},
}
}
fn inject_amount_anomaly(
&mut self,
mut entry: JournalEntry,
mut debit_lines: Vec<JournalLineItem>,
mut credit_lines: Vec<JournalLineItem>,
) -> InjectionResult {
let multiplier = self.config.amount_anomalies.outlier_multiplier;
let current = entry.total_debits.to_f64();
let new_amount = Decimal128::from_f64(current * multiplier);
for line in &mut debit_lines {
line.amount = Decimal128::from_f64(line.amount.to_f64() * multiplier);
}
for line in &mut credit_lines {
line.amount = Decimal128::from_f64(line.amount.to_f64() * multiplier);
}
entry.total_debits = new_amount;
entry.total_credits = new_amount;
self.stats.anomalies_injected += 1;
self.stats.amount_anomalies += 1;
InjectionResult {
entry,
debit_lines,
credit_lines,
anomaly_injected: true,
anomaly_label: if self.config.label_anomalies {
Some(AnomalyLabel::AmountAnomaly("outlier".to_string()))
} else {
None
},
}
}
fn select_fraud_pattern(&mut self) -> FraudPatternType {
let r: f64 = self.rng.gen();
let mut cumulative = 0.0;
for config in &self.config.fraud_patterns {
cumulative += config.probability;
if r < cumulative {
return config.pattern_type;
}
}
FraudPatternType::RoundAmounts }
fn select_gaap_violation(&mut self) -> GaapViolationType {
let r: f64 = self.rng.gen();
let mut cumulative = 0.0;
for config in &self.config.gaap_violations {
cumulative += config.probability;
if r < cumulative {
return config.violation_type;
}
}
GaapViolationType::UnbalancedEntry }
fn find_unusual_pair(&self) -> (Option<u16>, Option<u16>) {
let revenue = self
.account_types
.iter()
.find(|(_, info)| info.is_revenue)
.map(|(&idx, _)| idx);
let expense = self
.account_types
.iter()
.find(|(_, info)| info.is_expense)
.map(|(&idx, _)| idx);
(revenue, expense)
}
fn find_revenue_cash_pair(&self) -> (Option<u16>, Option<u16>) {
let revenue = self
.account_types
.iter()
.find(|(_, info)| info.is_revenue)
.map(|(&idx, _)| idx);
let cash = self
.account_types
.iter()
.find(|(_, info)| info.is_cash)
.map(|(&idx, _)| idx);
(revenue, cash)
}
pub fn stats(&self) -> &InjectionStats {
&self.stats
}
pub fn reset_stats(&mut self) {
self.stats = InjectionStats::default();
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::HybridTimestamp;
use uuid::Uuid;
#[test]
fn test_config_default() {
let config = AnomalyInjectionConfig::default();
assert!(config.validate().is_ok());
assert!(config.injection_rate > 0.0);
}
#[test]
fn test_injector_creation() {
let config = AnomalyInjectionConfig::default();
let injector = AnomalyInjector::new(config, Some(42));
assert_eq!(injector.stats().entries_processed, 0);
}
#[test]
fn test_disabled_injection() {
let config = AnomalyInjectionConfig::disabled();
let mut injector = AnomalyInjector::new(config, Some(42));
let entry = JournalEntry::new(Uuid::new_v4(), Uuid::new_v4(), HybridTimestamp::now());
let result = injector.process(entry, vec![], vec![]);
assert!(!result.anomaly_injected);
}
#[test]
fn test_fraud_pattern_selection() {
let config = AnomalyInjectionConfig {
injection_rate: 1.0, ..Default::default()
};
let mut injector = AnomalyInjector::new(config, Some(42));
for _ in 0..100 {
let entry = JournalEntry::new(Uuid::new_v4(), Uuid::new_v4(), HybridTimestamp::now());
let debit = JournalLineItem::debit(0, Decimal128::from_f64(1000.0), 1);
let credit = JournalLineItem::credit(1, Decimal128::from_f64(1000.0), 2);
injector.process(entry, vec![debit], vec![credit]);
}
assert!(injector.stats().anomalies_injected > 0);
}
}