ringkernel_accnet/fabric/
anomaly_injection.rs

1//! Anomaly injection for testing fraud and violation detection.
2//!
3//! This module allows configurable injection of various anomaly patterns
4//! into the synthetic data stream for testing detection algorithms.
5
6use crate::models::{
7    Decimal128, FraudPatternType, GaapViolationType, JournalEntry, JournalLineItem,
8};
9use rand::prelude::*;
10use std::collections::HashMap;
11
12/// Configuration for anomaly injection.
13#[derive(Debug, Clone)]
14pub struct AnomalyInjectionConfig {
15    /// Overall injection rate (0.0 - 1.0)
16    pub injection_rate: f64,
17
18    /// Fraud pattern injection settings
19    pub fraud_patterns: Vec<FraudPatternConfig>,
20
21    /// GAAP violation injection settings
22    pub gaap_violations: Vec<GaapViolationConfig>,
23
24    /// Timing anomaly injection settings
25    pub timing_anomalies: TimingAnomalyConfig,
26
27    /// Amount anomaly injection settings
28    pub amount_anomalies: AmountAnomalyConfig,
29
30    /// Whether injected anomalies should be labeled (for training/evaluation)
31    pub label_anomalies: bool,
32}
33
34impl Default for AnomalyInjectionConfig {
35    fn default() -> Self {
36        Self {
37            injection_rate: 0.02, // 2% of transactions
38            fraud_patterns: vec![
39                FraudPatternConfig::circular_flow(0.25),
40                FraudPatternConfig::threshold_clustering(0.20),
41                FraudPatternConfig::round_amounts(0.15),
42                FraudPatternConfig::velocity(0.15),
43                FraudPatternConfig::dormant_activation(0.10),
44                FraudPatternConfig::unusual_pairing(0.15),
45            ],
46            gaap_violations: vec![
47                GaapViolationConfig::new(GaapViolationType::RevenueToCashDirect, 0.30),
48                GaapViolationConfig::new(GaapViolationType::ExpenseToAsset, 0.25),
49                GaapViolationConfig::new(GaapViolationType::CashToRevenue, 0.20),
50                GaapViolationConfig::new(GaapViolationType::RevenueToExpense, 0.10),
51                GaapViolationConfig::new(GaapViolationType::UnbalancedEntry, 0.15),
52            ],
53            timing_anomalies: TimingAnomalyConfig::default(),
54            amount_anomalies: AmountAnomalyConfig::default(),
55            label_anomalies: true,
56        }
57    }
58}
59
60impl AnomalyInjectionConfig {
61    /// Create a configuration with no anomaly injection.
62    pub fn disabled() -> Self {
63        Self {
64            injection_rate: 0.0,
65            ..Default::default()
66        }
67    }
68
69    /// Create a high anomaly rate for testing (10%).
70    pub fn high_rate() -> Self {
71        Self {
72            injection_rate: 0.10,
73            ..Default::default()
74        }
75    }
76
77    /// Validate that probability distributions sum correctly.
78    pub fn validate(&self) -> Result<(), String> {
79        let fraud_total: f64 = self.fraud_patterns.iter().map(|p| p.probability).sum();
80        if (fraud_total - 1.0).abs() > 0.01 {
81            return Err(format!(
82                "Fraud pattern probabilities must sum to 1.0, got {}",
83                fraud_total
84            ));
85        }
86
87        let gaap_total: f64 = self.gaap_violations.iter().map(|v| v.probability).sum();
88        if (gaap_total - 1.0).abs() > 0.01 {
89            return Err(format!(
90                "GAAP violation probabilities must sum to 1.0, got {}",
91                gaap_total
92            ));
93        }
94
95        Ok(())
96    }
97}
98
99/// Configuration for a specific fraud pattern.
100#[derive(Debug, Clone)]
101pub struct FraudPatternConfig {
102    /// Type of fraud pattern
103    pub pattern_type: FraudPatternType,
104    /// Probability of this pattern (within fraud injections)
105    pub probability: f64,
106    /// Number of accounts involved (min, max)
107    pub account_count: (u8, u8),
108    /// Amount range for fraudulent transactions
109    pub amount_range: (f64, f64),
110}
111
112impl FraudPatternConfig {
113    /// Create a circular flow pattern configuration.
114    pub fn circular_flow(probability: f64) -> Self {
115        Self {
116            pattern_type: FraudPatternType::CircularFlow,
117            probability,
118            account_count: (3, 5),
119            amount_range: (10000.0, 100000.0),
120        }
121    }
122
123    /// Create a threshold clustering pattern configuration.
124    pub fn threshold_clustering(probability: f64) -> Self {
125        Self {
126            pattern_type: FraudPatternType::ThresholdClustering,
127            probability,
128            account_count: (2, 2),
129            amount_range: (9000.0, 9999.0), // Just below $10k threshold
130        }
131    }
132
133    /// Create a round amounts pattern configuration.
134    pub fn round_amounts(probability: f64) -> Self {
135        Self {
136            pattern_type: FraudPatternType::RoundAmounts,
137            probability,
138            account_count: (2, 2),
139            amount_range: (1000.0, 50000.0),
140        }
141    }
142
143    /// Create a high velocity pattern configuration.
144    pub fn velocity(probability: f64) -> Self {
145        Self {
146            pattern_type: FraudPatternType::HighVelocity,
147            probability,
148            account_count: (3, 6),
149            amount_range: (5000.0, 50000.0),
150        }
151    }
152
153    /// Create a dormant activation pattern configuration.
154    pub fn dormant_activation(probability: f64) -> Self {
155        Self {
156            pattern_type: FraudPatternType::DormantActivation,
157            probability,
158            account_count: (2, 2),
159            amount_range: (10000.0, 500000.0),
160        }
161    }
162
163    /// Create an unusual pairing pattern configuration.
164    pub fn unusual_pairing(probability: f64) -> Self {
165        Self {
166            pattern_type: FraudPatternType::UnusualPairing,
167            probability,
168            account_count: (2, 2),
169            amount_range: (5000.0, 100000.0),
170        }
171    }
172}
173
174/// Configuration for a GAAP violation.
175#[derive(Debug, Clone)]
176pub struct GaapViolationConfig {
177    /// Type of violation
178    pub violation_type: GaapViolationType,
179    /// Probability of this violation (within GAAP injections)
180    pub probability: f64,
181}
182
183impl GaapViolationConfig {
184    /// Create a new GAAP violation configuration.
185    pub fn new(violation_type: GaapViolationType, probability: f64) -> Self {
186        Self {
187            violation_type,
188            probability,
189        }
190    }
191}
192
193/// Configuration for timing-based anomalies.
194#[derive(Debug, Clone)]
195pub struct TimingAnomalyConfig {
196    /// Inject after-hours entries
197    pub after_hours: bool,
198    /// Inject weekend entries
199    pub weekend_entries: bool,
200    /// Inject holiday entries
201    pub holiday_entries: bool,
202    /// Inject month-end manipulation
203    pub month_end_manipulation: bool,
204}
205
206impl Default for TimingAnomalyConfig {
207    fn default() -> Self {
208        Self {
209            after_hours: true,
210            weekend_entries: true,
211            holiday_entries: false,
212            month_end_manipulation: true,
213        }
214    }
215}
216
217/// Configuration for amount-based anomalies.
218#[derive(Debug, Clone)]
219pub struct AmountAnomalyConfig {
220    /// Inject round amount anomalies
221    pub round_amounts: bool,
222    /// Inject Benford's Law violations
223    pub benford_violations: bool,
224    /// Inject outlier amounts
225    pub outliers: bool,
226    /// Outlier multiplier (e.g., 10x normal)
227    pub outlier_multiplier: f64,
228}
229
230impl Default for AmountAnomalyConfig {
231    fn default() -> Self {
232        Self {
233            round_amounts: true,
234            benford_violations: true,
235            outliers: true,
236            outlier_multiplier: 10.0,
237        }
238    }
239}
240
241/// Injector that modifies transactions to create anomalies.
242pub struct AnomalyInjector {
243    /// Configuration
244    config: AnomalyInjectionConfig,
245    /// Random number generator
246    rng: StdRng,
247    /// Account type mapping (index -> is_asset, is_revenue, etc.)
248    account_types: HashMap<u16, AccountTypeInfo>,
249    /// Injection statistics
250    stats: InjectionStats,
251    /// Pending circular flow entries (for future multi-entry injection)
252    #[allow(dead_code)]
253    pending_circular_flows: Vec<CircularFlowState>,
254    /// Dormant accounts (haven't been used recently)
255    dormant_accounts: Vec<u16>,
256}
257
258/// Information about an account's type for violation detection.
259#[derive(Debug, Clone, Copy, Default)]
260pub struct AccountTypeInfo {
261    /// Whether this is an asset account.
262    pub is_asset: bool,
263    /// Whether this is a liability account.
264    pub is_liability: bool,
265    /// Whether this is a revenue account.
266    pub is_revenue: bool,
267    /// Whether this is an expense account.
268    pub is_expense: bool,
269    /// Whether this is an equity account.
270    pub is_equity: bool,
271    /// Whether this is a cash account.
272    pub is_cash: bool,
273    /// Whether this is a suspense account.
274    pub is_suspense: bool,
275}
276
277/// State for multi-entry circular flow injection.
278#[derive(Debug, Clone)]
279#[allow(dead_code)]
280struct CircularFlowState {
281    /// Accounts in the circle
282    accounts: Vec<u16>,
283    /// Current position in the circle
284    current_position: usize,
285    /// Amount being circulated
286    amount: Decimal128,
287    /// Entries remaining
288    remaining: usize,
289}
290
291/// Statistics about injected anomalies.
292#[derive(Debug, Clone, Default)]
293pub struct InjectionStats {
294    /// Total entries processed
295    pub entries_processed: u64,
296    /// Total anomalies injected
297    pub anomalies_injected: u64,
298    /// Fraud patterns by type
299    pub fraud_patterns: HashMap<FraudPatternType, u32>,
300    /// GAAP violations by type
301    pub gaap_violations: HashMap<GaapViolationType, u32>,
302    /// Timing anomalies
303    pub timing_anomalies: u32,
304    /// Amount anomalies
305    pub amount_anomalies: u32,
306}
307
308/// Result of anomaly injection.
309#[derive(Debug, Clone)]
310pub struct InjectionResult {
311    /// Modified entry (or original if not modified)
312    pub entry: JournalEntry,
313    /// Modified debit lines
314    pub debit_lines: Vec<JournalLineItem>,
315    /// Modified credit lines
316    pub credit_lines: Vec<JournalLineItem>,
317    /// Whether an anomaly was injected
318    pub anomaly_injected: bool,
319    /// Label for the anomaly (if labeling is enabled)
320    pub anomaly_label: Option<AnomalyLabel>,
321}
322
323/// Label describing an injected anomaly.
324#[derive(Debug, Clone)]
325pub enum AnomalyLabel {
326    /// Fraud pattern
327    FraudPattern(FraudPatternType),
328    /// GAAP violation
329    GaapViolation(GaapViolationType),
330    /// Timing anomaly
331    TimingAnomaly(String),
332    /// Amount anomaly
333    AmountAnomaly(String),
334}
335
336impl AnomalyInjector {
337    /// Create a new anomaly injector.
338    pub fn new(config: AnomalyInjectionConfig, seed: Option<u64>) -> Self {
339        let seed = seed.unwrap_or_else(|| rand::thread_rng().gen());
340        Self {
341            config,
342            rng: StdRng::seed_from_u64(seed),
343            account_types: HashMap::new(),
344            stats: InjectionStats::default(),
345            pending_circular_flows: Vec::new(),
346            dormant_accounts: Vec::new(),
347        }
348    }
349
350    /// Register an account's type information.
351    pub fn register_account(&mut self, index: u16, info: AccountTypeInfo) {
352        self.account_types.insert(index, info);
353    }
354
355    /// Mark an account as dormant.
356    pub fn mark_dormant(&mut self, index: u16) {
357        if !self.dormant_accounts.contains(&index) {
358            self.dormant_accounts.push(index);
359        }
360    }
361
362    /// Process an entry and potentially inject an anomaly.
363    pub fn process(
364        &mut self,
365        entry: JournalEntry,
366        debit_lines: Vec<JournalLineItem>,
367        credit_lines: Vec<JournalLineItem>,
368    ) -> InjectionResult {
369        self.stats.entries_processed += 1;
370
371        // Check if we should inject an anomaly
372        if self.config.injection_rate <= 0.0 || self.rng.gen::<f64>() > self.config.injection_rate {
373            return InjectionResult {
374                entry,
375                debit_lines,
376                credit_lines,
377                anomaly_injected: false,
378                anomaly_label: None,
379            };
380        }
381
382        // Decide what type of anomaly to inject
383        let anomaly_type: f64 = self.rng.gen();
384
385        if anomaly_type < 0.5 {
386            // Fraud pattern (50% of anomalies)
387            self.inject_fraud_pattern(entry, debit_lines, credit_lines)
388        } else if anomaly_type < 0.8 {
389            // GAAP violation (30% of anomalies)
390            self.inject_gaap_violation(entry, debit_lines, credit_lines)
391        } else if anomaly_type < 0.9 {
392            // Timing anomaly (10% of anomalies)
393            self.inject_timing_anomaly(entry, debit_lines, credit_lines)
394        } else {
395            // Amount anomaly (10% of anomalies)
396            self.inject_amount_anomaly(entry, debit_lines, credit_lines)
397        }
398    }
399
400    /// Inject a fraud pattern.
401    fn inject_fraud_pattern(
402        &mut self,
403        mut entry: JournalEntry,
404        mut debit_lines: Vec<JournalLineItem>,
405        mut credit_lines: Vec<JournalLineItem>,
406    ) -> InjectionResult {
407        // Select fraud pattern type
408        let pattern_type = self.select_fraud_pattern();
409
410        let label = match pattern_type {
411            FraudPatternType::ThresholdClustering => {
412                // Modify amount to be just below threshold
413                let threshold = 10000.0;
414                let new_amount = Decimal128::from_f64(threshold - self.rng.gen_range(1.0..999.0));
415
416                for line in &mut debit_lines {
417                    line.amount = new_amount;
418                }
419                for line in &mut credit_lines {
420                    line.amount = new_amount;
421                }
422                entry.total_debits = new_amount;
423                entry.total_credits = new_amount;
424
425                Some(AnomalyLabel::FraudPattern(
426                    FraudPatternType::ThresholdClustering,
427                ))
428            }
429
430            FraudPatternType::RoundAmounts => {
431                // Make amount suspiciously round
432                let round_amounts = [1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0];
433                let new_amount =
434                    Decimal128::from_f64(round_amounts[self.rng.gen_range(0..round_amounts.len())]);
435
436                for line in &mut debit_lines {
437                    line.amount = new_amount;
438                }
439                for line in &mut credit_lines {
440                    line.amount = new_amount;
441                }
442                entry.total_debits = new_amount;
443                entry.total_credits = new_amount;
444
445                Some(AnomalyLabel::FraudPattern(FraudPatternType::RoundAmounts))
446            }
447
448            FraudPatternType::UnusualPairing => {
449                // Create an implausible account pairing
450                // Find a revenue account and expense account
451                if let (Some(revenue_idx), Some(expense_idx)) = self.find_unusual_pair() {
452                    if !debit_lines.is_empty() {
453                        debit_lines[0].account_index = revenue_idx; // Revenue as debit is unusual
454                    }
455                    if !credit_lines.is_empty() {
456                        credit_lines[0].account_index = expense_idx; // Expense as credit is unusual
457                    }
458                    Some(AnomalyLabel::FraudPattern(FraudPatternType::UnusualPairing))
459                } else {
460                    None
461                }
462            }
463
464            _ => {
465                // Other patterns require multi-entry injection (simplified here)
466                Some(AnomalyLabel::FraudPattern(pattern_type))
467            }
468        };
469
470        if label.is_some() {
471            self.stats.anomalies_injected += 1;
472            *self.stats.fraud_patterns.entry(pattern_type).or_insert(0) += 1;
473        }
474
475        InjectionResult {
476            entry,
477            debit_lines,
478            credit_lines,
479            anomaly_injected: label.is_some(),
480            anomaly_label: if self.config.label_anomalies {
481                label
482            } else {
483                None
484            },
485        }
486    }
487
488    /// Inject a GAAP violation.
489    fn inject_gaap_violation(
490        &mut self,
491        mut entry: JournalEntry,
492        mut debit_lines: Vec<JournalLineItem>,
493        mut credit_lines: Vec<JournalLineItem>,
494    ) -> InjectionResult {
495        let violation_type = self.select_gaap_violation();
496
497        let label = match violation_type {
498            GaapViolationType::UnbalancedEntry => {
499                // Make entry unbalanced
500                if !credit_lines.is_empty() {
501                    let adjustment = Decimal128::from_f64(self.rng.gen_range(100.0..1000.0));
502                    credit_lines[0].amount = credit_lines[0].amount + adjustment;
503                    entry.total_credits = entry.total_credits + adjustment;
504                    entry.flags.0 &= !crate::models::JournalEntryFlags::IS_BALANCED;
505                }
506                Some(AnomalyLabel::GaapViolation(
507                    GaapViolationType::UnbalancedEntry,
508                ))
509            }
510
511            GaapViolationType::RevenueToCashDirect => {
512                // Find revenue and cash accounts
513                if let (Some(revenue_idx), Some(cash_idx)) = self.find_revenue_cash_pair() {
514                    if !debit_lines.is_empty() {
515                        debit_lines[0].account_index = cash_idx;
516                    }
517                    if !credit_lines.is_empty() {
518                        credit_lines[0].account_index = revenue_idx;
519                    }
520                    Some(AnomalyLabel::GaapViolation(
521                        GaapViolationType::RevenueToCashDirect,
522                    ))
523                } else {
524                    None
525                }
526            }
527
528            _ => {
529                // Other violations need specific account pairs
530                Some(AnomalyLabel::GaapViolation(violation_type))
531            }
532        };
533
534        if label.is_some() {
535            self.stats.anomalies_injected += 1;
536            *self
537                .stats
538                .gaap_violations
539                .entry(violation_type)
540                .or_insert(0) += 1;
541        }
542
543        InjectionResult {
544            entry,
545            debit_lines,
546            credit_lines,
547            anomaly_injected: label.is_some(),
548            anomaly_label: if self.config.label_anomalies {
549                label
550            } else {
551                None
552            },
553        }
554    }
555
556    /// Inject a timing anomaly.
557    fn inject_timing_anomaly(
558        &mut self,
559        mut entry: JournalEntry,
560        debit_lines: Vec<JournalLineItem>,
561        credit_lines: Vec<JournalLineItem>,
562    ) -> InjectionResult {
563        // Modify timestamp to be after hours
564        // Set hour to 23 (11 PM)
565        let ms_per_day = 86_400_000u64;
566        let ms_per_hour = 3_600_000u64;
567        let day_start = (entry.posting_date.physical / ms_per_day) * ms_per_day;
568        entry.posting_date.physical =
569            day_start + 23 * ms_per_hour + self.rng.gen_range(0..ms_per_hour);
570
571        self.stats.anomalies_injected += 1;
572        self.stats.timing_anomalies += 1;
573
574        InjectionResult {
575            entry,
576            debit_lines,
577            credit_lines,
578            anomaly_injected: true,
579            anomaly_label: if self.config.label_anomalies {
580                Some(AnomalyLabel::TimingAnomaly("after_hours".to_string()))
581            } else {
582                None
583            },
584        }
585    }
586
587    /// Inject an amount anomaly.
588    fn inject_amount_anomaly(
589        &mut self,
590        mut entry: JournalEntry,
591        mut debit_lines: Vec<JournalLineItem>,
592        mut credit_lines: Vec<JournalLineItem>,
593    ) -> InjectionResult {
594        // Create an outlier amount
595        let multiplier = self.config.amount_anomalies.outlier_multiplier;
596        let current = entry.total_debits.to_f64();
597        let new_amount = Decimal128::from_f64(current * multiplier);
598
599        for line in &mut debit_lines {
600            line.amount = Decimal128::from_f64(line.amount.to_f64() * multiplier);
601        }
602        for line in &mut credit_lines {
603            line.amount = Decimal128::from_f64(line.amount.to_f64() * multiplier);
604        }
605        entry.total_debits = new_amount;
606        entry.total_credits = new_amount;
607
608        self.stats.anomalies_injected += 1;
609        self.stats.amount_anomalies += 1;
610
611        InjectionResult {
612            entry,
613            debit_lines,
614            credit_lines,
615            anomaly_injected: true,
616            anomaly_label: if self.config.label_anomalies {
617                Some(AnomalyLabel::AmountAnomaly("outlier".to_string()))
618            } else {
619                None
620            },
621        }
622    }
623
624    /// Select a fraud pattern based on configured probabilities.
625    fn select_fraud_pattern(&mut self) -> FraudPatternType {
626        let r: f64 = self.rng.gen();
627        let mut cumulative = 0.0;
628
629        for config in &self.config.fraud_patterns {
630            cumulative += config.probability;
631            if r < cumulative {
632                return config.pattern_type;
633            }
634        }
635
636        FraudPatternType::RoundAmounts // Default
637    }
638
639    /// Select a GAAP violation based on configured probabilities.
640    fn select_gaap_violation(&mut self) -> GaapViolationType {
641        let r: f64 = self.rng.gen();
642        let mut cumulative = 0.0;
643
644        for config in &self.config.gaap_violations {
645            cumulative += config.probability;
646            if r < cumulative {
647                return config.violation_type;
648            }
649        }
650
651        GaapViolationType::UnbalancedEntry // Default
652    }
653
654    /// Find an unusual account pairing (revenue-expense).
655    fn find_unusual_pair(&self) -> (Option<u16>, Option<u16>) {
656        let revenue = self
657            .account_types
658            .iter()
659            .find(|(_, info)| info.is_revenue)
660            .map(|(&idx, _)| idx);
661        let expense = self
662            .account_types
663            .iter()
664            .find(|(_, info)| info.is_expense)
665            .map(|(&idx, _)| idx);
666        (revenue, expense)
667    }
668
669    /// Find revenue and cash accounts.
670    fn find_revenue_cash_pair(&self) -> (Option<u16>, Option<u16>) {
671        let revenue = self
672            .account_types
673            .iter()
674            .find(|(_, info)| info.is_revenue)
675            .map(|(&idx, _)| idx);
676        let cash = self
677            .account_types
678            .iter()
679            .find(|(_, info)| info.is_cash)
680            .map(|(&idx, _)| idx);
681        (revenue, cash)
682    }
683
684    /// Get injection statistics.
685    pub fn stats(&self) -> &InjectionStats {
686        &self.stats
687    }
688
689    /// Reset statistics.
690    pub fn reset_stats(&mut self) {
691        self.stats = InjectionStats::default();
692    }
693}
694
695#[cfg(test)]
696mod tests {
697    use super::*;
698    use crate::models::HybridTimestamp;
699    use uuid::Uuid;
700
701    #[test]
702    fn test_config_default() {
703        let config = AnomalyInjectionConfig::default();
704        assert!(config.validate().is_ok());
705        assert!(config.injection_rate > 0.0);
706    }
707
708    #[test]
709    fn test_injector_creation() {
710        let config = AnomalyInjectionConfig::default();
711        let injector = AnomalyInjector::new(config, Some(42));
712        assert_eq!(injector.stats().entries_processed, 0);
713    }
714
715    #[test]
716    fn test_disabled_injection() {
717        let config = AnomalyInjectionConfig::disabled();
718        let mut injector = AnomalyInjector::new(config, Some(42));
719
720        let entry = JournalEntry::new(Uuid::new_v4(), Uuid::new_v4(), HybridTimestamp::now());
721
722        let result = injector.process(entry, vec![], vec![]);
723        assert!(!result.anomaly_injected);
724    }
725
726    #[test]
727    fn test_fraud_pattern_selection() {
728        let config = AnomalyInjectionConfig {
729            injection_rate: 1.0, // Always inject
730            ..Default::default()
731        };
732        let mut injector = AnomalyInjector::new(config, Some(42));
733
734        // Process multiple entries and verify injections happen
735        for _ in 0..100 {
736            let entry = JournalEntry::new(Uuid::new_v4(), Uuid::new_v4(), HybridTimestamp::now());
737            let debit = JournalLineItem::debit(0, Decimal128::from_f64(1000.0), 1);
738            let credit = JournalLineItem::credit(1, Decimal128::from_f64(1000.0), 2);
739
740            injector.process(entry, vec![debit], vec![credit]);
741        }
742
743        assert!(injector.stats().anomalies_injected > 0);
744    }
745}