1use crate::models::{
7 Decimal128, FraudPatternType, GaapViolationType, JournalEntry, JournalLineItem,
8};
9use rand::prelude::*;
10use std::collections::HashMap;
11
12#[derive(Debug, Clone)]
14pub struct AnomalyInjectionConfig {
15 pub injection_rate: f64,
17
18 pub fraud_patterns: Vec<FraudPatternConfig>,
20
21 pub gaap_violations: Vec<GaapViolationConfig>,
23
24 pub timing_anomalies: TimingAnomalyConfig,
26
27 pub amount_anomalies: AmountAnomalyConfig,
29
30 pub label_anomalies: bool,
32}
33
34impl Default for AnomalyInjectionConfig {
35 fn default() -> Self {
36 Self {
37 injection_rate: 0.02, fraud_patterns: vec![
39 FraudPatternConfig::circular_flow(0.25),
40 FraudPatternConfig::threshold_clustering(0.20),
41 FraudPatternConfig::round_amounts(0.15),
42 FraudPatternConfig::velocity(0.15),
43 FraudPatternConfig::dormant_activation(0.10),
44 FraudPatternConfig::unusual_pairing(0.15),
45 ],
46 gaap_violations: vec![
47 GaapViolationConfig::new(GaapViolationType::RevenueToCashDirect, 0.30),
48 GaapViolationConfig::new(GaapViolationType::ExpenseToAsset, 0.25),
49 GaapViolationConfig::new(GaapViolationType::CashToRevenue, 0.20),
50 GaapViolationConfig::new(GaapViolationType::RevenueToExpense, 0.10),
51 GaapViolationConfig::new(GaapViolationType::UnbalancedEntry, 0.15),
52 ],
53 timing_anomalies: TimingAnomalyConfig::default(),
54 amount_anomalies: AmountAnomalyConfig::default(),
55 label_anomalies: true,
56 }
57 }
58}
59
60impl AnomalyInjectionConfig {
61 pub fn disabled() -> Self {
63 Self {
64 injection_rate: 0.0,
65 ..Default::default()
66 }
67 }
68
69 pub fn high_rate() -> Self {
71 Self {
72 injection_rate: 0.10,
73 ..Default::default()
74 }
75 }
76
77 pub fn validate(&self) -> Result<(), String> {
79 let fraud_total: f64 = self.fraud_patterns.iter().map(|p| p.probability).sum();
80 if (fraud_total - 1.0).abs() > 0.01 {
81 return Err(format!(
82 "Fraud pattern probabilities must sum to 1.0, got {}",
83 fraud_total
84 ));
85 }
86
87 let gaap_total: f64 = self.gaap_violations.iter().map(|v| v.probability).sum();
88 if (gaap_total - 1.0).abs() > 0.01 {
89 return Err(format!(
90 "GAAP violation probabilities must sum to 1.0, got {}",
91 gaap_total
92 ));
93 }
94
95 Ok(())
96 }
97}
98
99#[derive(Debug, Clone)]
101pub struct FraudPatternConfig {
102 pub pattern_type: FraudPatternType,
104 pub probability: f64,
106 pub account_count: (u8, u8),
108 pub amount_range: (f64, f64),
110}
111
112impl FraudPatternConfig {
113 pub fn circular_flow(probability: f64) -> Self {
115 Self {
116 pattern_type: FraudPatternType::CircularFlow,
117 probability,
118 account_count: (3, 5),
119 amount_range: (10000.0, 100000.0),
120 }
121 }
122
123 pub fn threshold_clustering(probability: f64) -> Self {
125 Self {
126 pattern_type: FraudPatternType::ThresholdClustering,
127 probability,
128 account_count: (2, 2),
129 amount_range: (9000.0, 9999.0), }
131 }
132
133 pub fn round_amounts(probability: f64) -> Self {
135 Self {
136 pattern_type: FraudPatternType::RoundAmounts,
137 probability,
138 account_count: (2, 2),
139 amount_range: (1000.0, 50000.0),
140 }
141 }
142
143 pub fn velocity(probability: f64) -> Self {
145 Self {
146 pattern_type: FraudPatternType::HighVelocity,
147 probability,
148 account_count: (3, 6),
149 amount_range: (5000.0, 50000.0),
150 }
151 }
152
153 pub fn dormant_activation(probability: f64) -> Self {
155 Self {
156 pattern_type: FraudPatternType::DormantActivation,
157 probability,
158 account_count: (2, 2),
159 amount_range: (10000.0, 500000.0),
160 }
161 }
162
163 pub fn unusual_pairing(probability: f64) -> Self {
165 Self {
166 pattern_type: FraudPatternType::UnusualPairing,
167 probability,
168 account_count: (2, 2),
169 amount_range: (5000.0, 100000.0),
170 }
171 }
172}
173
174#[derive(Debug, Clone)]
176pub struct GaapViolationConfig {
177 pub violation_type: GaapViolationType,
179 pub probability: f64,
181}
182
183impl GaapViolationConfig {
184 pub fn new(violation_type: GaapViolationType, probability: f64) -> Self {
186 Self {
187 violation_type,
188 probability,
189 }
190 }
191}
192
193#[derive(Debug, Clone)]
195pub struct TimingAnomalyConfig {
196 pub after_hours: bool,
198 pub weekend_entries: bool,
200 pub holiday_entries: bool,
202 pub month_end_manipulation: bool,
204}
205
206impl Default for TimingAnomalyConfig {
207 fn default() -> Self {
208 Self {
209 after_hours: true,
210 weekend_entries: true,
211 holiday_entries: false,
212 month_end_manipulation: true,
213 }
214 }
215}
216
217#[derive(Debug, Clone)]
219pub struct AmountAnomalyConfig {
220 pub round_amounts: bool,
222 pub benford_violations: bool,
224 pub outliers: bool,
226 pub outlier_multiplier: f64,
228}
229
230impl Default for AmountAnomalyConfig {
231 fn default() -> Self {
232 Self {
233 round_amounts: true,
234 benford_violations: true,
235 outliers: true,
236 outlier_multiplier: 10.0,
237 }
238 }
239}
240
241pub struct AnomalyInjector {
243 config: AnomalyInjectionConfig,
245 rng: StdRng,
247 account_types: HashMap<u16, AccountTypeInfo>,
249 stats: InjectionStats,
251 #[allow(dead_code)]
253 pending_circular_flows: Vec<CircularFlowState>,
254 dormant_accounts: Vec<u16>,
256}
257
258#[derive(Debug, Clone, Copy, Default)]
260pub struct AccountTypeInfo {
261 pub is_asset: bool,
263 pub is_liability: bool,
265 pub is_revenue: bool,
267 pub is_expense: bool,
269 pub is_equity: bool,
271 pub is_cash: bool,
273 pub is_suspense: bool,
275}
276
277#[derive(Debug, Clone)]
279#[allow(dead_code)]
280struct CircularFlowState {
281 accounts: Vec<u16>,
283 current_position: usize,
285 amount: Decimal128,
287 remaining: usize,
289}
290
291#[derive(Debug, Clone, Default)]
293pub struct InjectionStats {
294 pub entries_processed: u64,
296 pub anomalies_injected: u64,
298 pub fraud_patterns: HashMap<FraudPatternType, u32>,
300 pub gaap_violations: HashMap<GaapViolationType, u32>,
302 pub timing_anomalies: u32,
304 pub amount_anomalies: u32,
306}
307
308#[derive(Debug, Clone)]
310pub struct InjectionResult {
311 pub entry: JournalEntry,
313 pub debit_lines: Vec<JournalLineItem>,
315 pub credit_lines: Vec<JournalLineItem>,
317 pub anomaly_injected: bool,
319 pub anomaly_label: Option<AnomalyLabel>,
321}
322
323#[derive(Debug, Clone)]
325pub enum AnomalyLabel {
326 FraudPattern(FraudPatternType),
328 GaapViolation(GaapViolationType),
330 TimingAnomaly(String),
332 AmountAnomaly(String),
334}
335
336impl AnomalyInjector {
337 pub fn new(config: AnomalyInjectionConfig, seed: Option<u64>) -> Self {
339 let seed = seed.unwrap_or_else(|| rand::thread_rng().gen());
340 Self {
341 config,
342 rng: StdRng::seed_from_u64(seed),
343 account_types: HashMap::new(),
344 stats: InjectionStats::default(),
345 pending_circular_flows: Vec::new(),
346 dormant_accounts: Vec::new(),
347 }
348 }
349
350 pub fn register_account(&mut self, index: u16, info: AccountTypeInfo) {
352 self.account_types.insert(index, info);
353 }
354
355 pub fn mark_dormant(&mut self, index: u16) {
357 if !self.dormant_accounts.contains(&index) {
358 self.dormant_accounts.push(index);
359 }
360 }
361
362 pub fn process(
364 &mut self,
365 entry: JournalEntry,
366 debit_lines: Vec<JournalLineItem>,
367 credit_lines: Vec<JournalLineItem>,
368 ) -> InjectionResult {
369 self.stats.entries_processed += 1;
370
371 if self.config.injection_rate <= 0.0 || self.rng.gen::<f64>() > self.config.injection_rate {
373 return InjectionResult {
374 entry,
375 debit_lines,
376 credit_lines,
377 anomaly_injected: false,
378 anomaly_label: None,
379 };
380 }
381
382 let anomaly_type: f64 = self.rng.gen();
384
385 if anomaly_type < 0.5 {
386 self.inject_fraud_pattern(entry, debit_lines, credit_lines)
388 } else if anomaly_type < 0.8 {
389 self.inject_gaap_violation(entry, debit_lines, credit_lines)
391 } else if anomaly_type < 0.9 {
392 self.inject_timing_anomaly(entry, debit_lines, credit_lines)
394 } else {
395 self.inject_amount_anomaly(entry, debit_lines, credit_lines)
397 }
398 }
399
400 fn inject_fraud_pattern(
402 &mut self,
403 mut entry: JournalEntry,
404 mut debit_lines: Vec<JournalLineItem>,
405 mut credit_lines: Vec<JournalLineItem>,
406 ) -> InjectionResult {
407 let pattern_type = self.select_fraud_pattern();
409
410 let label = match pattern_type {
411 FraudPatternType::ThresholdClustering => {
412 let threshold = 10000.0;
414 let new_amount = Decimal128::from_f64(threshold - self.rng.gen_range(1.0..999.0));
415
416 for line in &mut debit_lines {
417 line.amount = new_amount;
418 }
419 for line in &mut credit_lines {
420 line.amount = new_amount;
421 }
422 entry.total_debits = new_amount;
423 entry.total_credits = new_amount;
424
425 Some(AnomalyLabel::FraudPattern(
426 FraudPatternType::ThresholdClustering,
427 ))
428 }
429
430 FraudPatternType::RoundAmounts => {
431 let round_amounts = [1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0];
433 let new_amount =
434 Decimal128::from_f64(round_amounts[self.rng.gen_range(0..round_amounts.len())]);
435
436 for line in &mut debit_lines {
437 line.amount = new_amount;
438 }
439 for line in &mut credit_lines {
440 line.amount = new_amount;
441 }
442 entry.total_debits = new_amount;
443 entry.total_credits = new_amount;
444
445 Some(AnomalyLabel::FraudPattern(FraudPatternType::RoundAmounts))
446 }
447
448 FraudPatternType::UnusualPairing => {
449 if let (Some(revenue_idx), Some(expense_idx)) = self.find_unusual_pair() {
452 if !debit_lines.is_empty() {
453 debit_lines[0].account_index = revenue_idx; }
455 if !credit_lines.is_empty() {
456 credit_lines[0].account_index = expense_idx; }
458 Some(AnomalyLabel::FraudPattern(FraudPatternType::UnusualPairing))
459 } else {
460 None
461 }
462 }
463
464 _ => {
465 Some(AnomalyLabel::FraudPattern(pattern_type))
467 }
468 };
469
470 if label.is_some() {
471 self.stats.anomalies_injected += 1;
472 *self.stats.fraud_patterns.entry(pattern_type).or_insert(0) += 1;
473 }
474
475 InjectionResult {
476 entry,
477 debit_lines,
478 credit_lines,
479 anomaly_injected: label.is_some(),
480 anomaly_label: if self.config.label_anomalies {
481 label
482 } else {
483 None
484 },
485 }
486 }
487
488 fn inject_gaap_violation(
490 &mut self,
491 mut entry: JournalEntry,
492 mut debit_lines: Vec<JournalLineItem>,
493 mut credit_lines: Vec<JournalLineItem>,
494 ) -> InjectionResult {
495 let violation_type = self.select_gaap_violation();
496
497 let label = match violation_type {
498 GaapViolationType::UnbalancedEntry => {
499 if !credit_lines.is_empty() {
501 let adjustment = Decimal128::from_f64(self.rng.gen_range(100.0..1000.0));
502 credit_lines[0].amount = credit_lines[0].amount + adjustment;
503 entry.total_credits = entry.total_credits + adjustment;
504 entry.flags.0 &= !crate::models::JournalEntryFlags::IS_BALANCED;
505 }
506 Some(AnomalyLabel::GaapViolation(
507 GaapViolationType::UnbalancedEntry,
508 ))
509 }
510
511 GaapViolationType::RevenueToCashDirect => {
512 if let (Some(revenue_idx), Some(cash_idx)) = self.find_revenue_cash_pair() {
514 if !debit_lines.is_empty() {
515 debit_lines[0].account_index = cash_idx;
516 }
517 if !credit_lines.is_empty() {
518 credit_lines[0].account_index = revenue_idx;
519 }
520 Some(AnomalyLabel::GaapViolation(
521 GaapViolationType::RevenueToCashDirect,
522 ))
523 } else {
524 None
525 }
526 }
527
528 _ => {
529 Some(AnomalyLabel::GaapViolation(violation_type))
531 }
532 };
533
534 if label.is_some() {
535 self.stats.anomalies_injected += 1;
536 *self
537 .stats
538 .gaap_violations
539 .entry(violation_type)
540 .or_insert(0) += 1;
541 }
542
543 InjectionResult {
544 entry,
545 debit_lines,
546 credit_lines,
547 anomaly_injected: label.is_some(),
548 anomaly_label: if self.config.label_anomalies {
549 label
550 } else {
551 None
552 },
553 }
554 }
555
556 fn inject_timing_anomaly(
558 &mut self,
559 mut entry: JournalEntry,
560 debit_lines: Vec<JournalLineItem>,
561 credit_lines: Vec<JournalLineItem>,
562 ) -> InjectionResult {
563 let ms_per_day = 86_400_000u64;
566 let ms_per_hour = 3_600_000u64;
567 let day_start = (entry.posting_date.physical / ms_per_day) * ms_per_day;
568 entry.posting_date.physical =
569 day_start + 23 * ms_per_hour + self.rng.gen_range(0..ms_per_hour);
570
571 self.stats.anomalies_injected += 1;
572 self.stats.timing_anomalies += 1;
573
574 InjectionResult {
575 entry,
576 debit_lines,
577 credit_lines,
578 anomaly_injected: true,
579 anomaly_label: if self.config.label_anomalies {
580 Some(AnomalyLabel::TimingAnomaly("after_hours".to_string()))
581 } else {
582 None
583 },
584 }
585 }
586
587 fn inject_amount_anomaly(
589 &mut self,
590 mut entry: JournalEntry,
591 mut debit_lines: Vec<JournalLineItem>,
592 mut credit_lines: Vec<JournalLineItem>,
593 ) -> InjectionResult {
594 let multiplier = self.config.amount_anomalies.outlier_multiplier;
596 let current = entry.total_debits.to_f64();
597 let new_amount = Decimal128::from_f64(current * multiplier);
598
599 for line in &mut debit_lines {
600 line.amount = Decimal128::from_f64(line.amount.to_f64() * multiplier);
601 }
602 for line in &mut credit_lines {
603 line.amount = Decimal128::from_f64(line.amount.to_f64() * multiplier);
604 }
605 entry.total_debits = new_amount;
606 entry.total_credits = new_amount;
607
608 self.stats.anomalies_injected += 1;
609 self.stats.amount_anomalies += 1;
610
611 InjectionResult {
612 entry,
613 debit_lines,
614 credit_lines,
615 anomaly_injected: true,
616 anomaly_label: if self.config.label_anomalies {
617 Some(AnomalyLabel::AmountAnomaly("outlier".to_string()))
618 } else {
619 None
620 },
621 }
622 }
623
624 fn select_fraud_pattern(&mut self) -> FraudPatternType {
626 let r: f64 = self.rng.gen();
627 let mut cumulative = 0.0;
628
629 for config in &self.config.fraud_patterns {
630 cumulative += config.probability;
631 if r < cumulative {
632 return config.pattern_type;
633 }
634 }
635
636 FraudPatternType::RoundAmounts }
638
639 fn select_gaap_violation(&mut self) -> GaapViolationType {
641 let r: f64 = self.rng.gen();
642 let mut cumulative = 0.0;
643
644 for config in &self.config.gaap_violations {
645 cumulative += config.probability;
646 if r < cumulative {
647 return config.violation_type;
648 }
649 }
650
651 GaapViolationType::UnbalancedEntry }
653
654 fn find_unusual_pair(&self) -> (Option<u16>, Option<u16>) {
656 let revenue = self
657 .account_types
658 .iter()
659 .find(|(_, info)| info.is_revenue)
660 .map(|(&idx, _)| idx);
661 let expense = self
662 .account_types
663 .iter()
664 .find(|(_, info)| info.is_expense)
665 .map(|(&idx, _)| idx);
666 (revenue, expense)
667 }
668
669 fn find_revenue_cash_pair(&self) -> (Option<u16>, Option<u16>) {
671 let revenue = self
672 .account_types
673 .iter()
674 .find(|(_, info)| info.is_revenue)
675 .map(|(&idx, _)| idx);
676 let cash = self
677 .account_types
678 .iter()
679 .find(|(_, info)| info.is_cash)
680 .map(|(&idx, _)| idx);
681 (revenue, cash)
682 }
683
684 pub fn stats(&self) -> &InjectionStats {
686 &self.stats
687 }
688
689 pub fn reset_stats(&mut self) {
691 self.stats = InjectionStats::default();
692 }
693}
694
695#[cfg(test)]
696mod tests {
697 use super::*;
698 use crate::models::HybridTimestamp;
699 use uuid::Uuid;
700
701 #[test]
702 fn test_config_default() {
703 let config = AnomalyInjectionConfig::default();
704 assert!(config.validate().is_ok());
705 assert!(config.injection_rate > 0.0);
706 }
707
708 #[test]
709 fn test_injector_creation() {
710 let config = AnomalyInjectionConfig::default();
711 let injector = AnomalyInjector::new(config, Some(42));
712 assert_eq!(injector.stats().entries_processed, 0);
713 }
714
715 #[test]
716 fn test_disabled_injection() {
717 let config = AnomalyInjectionConfig::disabled();
718 let mut injector = AnomalyInjector::new(config, Some(42));
719
720 let entry = JournalEntry::new(Uuid::new_v4(), Uuid::new_v4(), HybridTimestamp::now());
721
722 let result = injector.process(entry, vec![], vec![]);
723 assert!(!result.anomaly_injected);
724 }
725
726 #[test]
727 fn test_fraud_pattern_selection() {
728 let config = AnomalyInjectionConfig {
729 injection_rate: 1.0, ..Default::default()
731 };
732 let mut injector = AnomalyInjector::new(config, Some(42));
733
734 for _ in 0..100 {
736 let entry = JournalEntry::new(Uuid::new_v4(), Uuid::new_v4(), HybridTimestamp::now());
737 let debit = JournalLineItem::debit(0, Decimal128::from_f64(1000.0), 1);
738 let credit = JournalLineItem::credit(1, Decimal128::from_f64(1000.0), 2);
739
740 injector.process(entry, vec![debit], vec![credit]);
741 }
742
743 assert!(injector.stats().anomalies_injected > 0);
744 }
745}