Skip to main content

datasynth_generators/anomaly/
patterns.rs

1//! Anomaly patterns for realistic distribution.
2//!
3//! Patterns control how anomalies are distributed across time and entities,
4//! including clustering behavior and temporal patterns.
5
6use chrono::{Datelike, NaiveDate, Weekday};
7use datasynth_core::utils::weighted_select;
8use rand::Rng;
9use std::collections::HashMap;
10
11/// Temporal pattern for anomaly injection.
12#[derive(Debug, Clone)]
13pub enum TemporalPattern {
14    /// Uniform distribution across all periods.
15    Uniform,
16    /// Higher probability at period/year end.
17    PeriodEndSpike {
18        /// Multiplier for month-end days.
19        month_end_multiplier: f64,
20        /// Multiplier for quarter-end.
21        quarter_end_multiplier: f64,
22        /// Multiplier for year-end.
23        year_end_multiplier: f64,
24    },
25    /// Higher probability at specific times.
26    TimeBased {
27        /// Multiplier for after-hours.
28        after_hours_multiplier: f64,
29        /// Multiplier for weekends.
30        weekend_multiplier: f64,
31    },
32    /// Seasonal pattern.
33    Seasonal {
34        /// Multipliers by month (1-12).
35        month_multipliers: [f64; 12],
36    },
37    /// Custom pattern function.
38    Custom {
39        /// Name of the pattern.
40        name: String,
41    },
42}
43
44impl Default for TemporalPattern {
45    fn default() -> Self {
46        TemporalPattern::PeriodEndSpike {
47            month_end_multiplier: 2.0,
48            quarter_end_multiplier: 3.0,
49            year_end_multiplier: 5.0,
50        }
51    }
52}
53
54impl TemporalPattern {
55    /// Calculates the probability multiplier for a given date.
56    pub fn probability_multiplier(&self, date: NaiveDate) -> f64 {
57        match self {
58            TemporalPattern::Uniform => 1.0,
59            TemporalPattern::PeriodEndSpike {
60                month_end_multiplier,
61                quarter_end_multiplier,
62                year_end_multiplier,
63            } => {
64                let day = date.day();
65                let month = date.month();
66
67                // Year end (December 28-31)
68                if month == 12 && day >= 28 {
69                    return *year_end_multiplier;
70                }
71
72                // Quarter end (Mar, Jun, Sep, Dec last 3 days)
73                if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
74                    return *quarter_end_multiplier;
75                }
76
77                // Month end (last 3 days)
78                if day >= 28 {
79                    return *month_end_multiplier;
80                }
81
82                1.0
83            }
84            TemporalPattern::TimeBased {
85                after_hours_multiplier: _,
86                weekend_multiplier,
87            } => {
88                let weekday = date.weekday();
89                if weekday == Weekday::Sat || weekday == Weekday::Sun {
90                    return *weekend_multiplier;
91                }
92                // Assume all entries have potential for after-hours
93                // In practice, this would check timestamp
94                1.0
95            }
96            TemporalPattern::Seasonal { month_multipliers } => {
97                let month_idx = (date.month() - 1) as usize;
98                month_multipliers[month_idx]
99            }
100            TemporalPattern::Custom { .. } => 1.0,
101        }
102    }
103
104    /// Creates a standard audit season pattern (higher in Q1).
105    pub fn audit_season() -> Self {
106        TemporalPattern::Seasonal {
107            month_multipliers: [
108                2.0, 2.0, 1.5, // Q1 - audit busy season
109                1.0, 1.0, 1.2, // Q2 - quarter end
110                1.0, 1.0, 1.2, // Q3 - quarter end
111                1.0, 1.0, 3.0, // Q4 - year end
112            ],
113        }
114    }
115}
116
117/// Fraud category for cluster time window selection.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
119pub enum FraudCategory {
120    /// Accounts Receivable fraud (invoice aging: 30-45 days)
121    AccountsReceivable,
122    /// Accounts Payable fraud (payment cycles: 14-30 days)
123    AccountsPayable,
124    /// Payroll fraud (monthly: 28-35 days)
125    Payroll,
126    /// Expense fraud (submission cycles: 7-14 days)
127    Expense,
128    /// Revenue manipulation (quarterly: 85-95 days)
129    Revenue,
130    /// Asset fraud (periodic: 30-60 days)
131    Asset,
132    /// General fraud (default: 7 days)
133    General,
134}
135
136impl FraudCategory {
137    /// Get the time window range (min, max days) for this fraud category.
138    pub fn time_window_days(&self) -> (i64, i64) {
139        match self {
140            FraudCategory::AccountsReceivable => (30, 45), // Invoice aging cycles
141            FraudCategory::AccountsPayable => (14, 30),    // Payment terms
142            FraudCategory::Payroll => (28, 35),            // Monthly pay cycles
143            FraudCategory::Expense => (7, 14),             // Expense report cycles
144            FraudCategory::Revenue => (85, 95),            // Quarterly close periods
145            FraudCategory::Asset => (30, 60),              // Asset reconciliation
146            FraudCategory::General => (5, 10),             // Default short window
147        }
148    }
149
150    /// Infer fraud category from anomaly type string.
151    pub fn from_anomaly_type(anomaly_type: &str) -> Self {
152        let lower = anomaly_type.to_lowercase();
153        if lower.contains("receivable")
154            || lower.contains("ar")
155            || lower.contains("invoice")
156            || lower.contains("customer")
157        {
158            FraudCategory::AccountsReceivable
159        } else if lower.contains("payable")
160            || lower.contains("ap")
161            || lower.contains("vendor")
162            || lower.contains("payment")
163        {
164            FraudCategory::AccountsPayable
165        } else if lower.contains("payroll")
166            || lower.contains("ghost")
167            || lower.contains("employee")
168            || lower.contains("salary")
169        {
170            FraudCategory::Payroll
171        } else if lower.contains("expense") || lower.contains("reimbursement") {
172            FraudCategory::Expense
173        } else if lower.contains("revenue")
174            || lower.contains("sales")
175            || lower.contains("channel")
176            || lower.contains("premature")
177        {
178            FraudCategory::Revenue
179        } else if lower.contains("asset")
180            || lower.contains("inventory")
181            || lower.contains("fixed")
182            || lower.contains("depreciation")
183        {
184            FraudCategory::Asset
185        } else {
186            FraudCategory::General
187        }
188    }
189}
190
191/// Clustering behavior for anomalies.
192#[derive(Debug, Clone)]
193pub struct ClusteringConfig {
194    /// Whether clustering is enabled.
195    pub enabled: bool,
196    /// Probability that an anomaly starts a new cluster.
197    pub cluster_start_probability: f64,
198    /// Probability that next anomaly joins current cluster.
199    pub cluster_continuation_probability: f64,
200    /// Minimum cluster size.
201    pub min_cluster_size: usize,
202    /// Maximum cluster size.
203    pub max_cluster_size: usize,
204    /// Time window for cluster (days) - default for General category.
205    pub cluster_time_window_days: i64,
206    /// Whether to use fraud-type-specific time windows.
207    pub use_fraud_specific_windows: bool,
208    /// Whether to preserve account relationships within clusters.
209    pub preserve_account_relationships: bool,
210}
211
212impl Default for ClusteringConfig {
213    fn default() -> Self {
214        Self {
215            enabled: true,
216            cluster_start_probability: 0.3,
217            cluster_continuation_probability: 0.7,
218            min_cluster_size: 2,
219            max_cluster_size: 10,
220            cluster_time_window_days: 7,
221            use_fraud_specific_windows: true,
222            preserve_account_relationships: true,
223        }
224    }
225}
226
227/// Causal link between entities in a fraud cluster.
228#[derive(Debug, Clone)]
229pub struct CausalLink {
230    /// Source entity (e.g., payment document ID)
231    pub source_entity: String,
232    /// Source entity type
233    pub source_type: String,
234    /// Target entity (e.g., vendor ID)
235    pub target_entity: String,
236    /// Target entity type
237    pub target_type: String,
238    /// Relationship type
239    pub relationship: String,
240}
241
242impl CausalLink {
243    /// Create a new causal link.
244    pub fn new(
245        source_entity: impl Into<String>,
246        source_type: impl Into<String>,
247        target_entity: impl Into<String>,
248        target_type: impl Into<String>,
249        relationship: impl Into<String>,
250    ) -> Self {
251        Self {
252            source_entity: source_entity.into(),
253            source_type: source_type.into(),
254            target_entity: target_entity.into(),
255            target_type: target_type.into(),
256            relationship: relationship.into(),
257        }
258    }
259}
260
261/// Manages anomaly clustering.
262pub struct ClusterManager {
263    config: ClusteringConfig,
264    /// Current active clusters by fraud category.
265    active_clusters: HashMap<FraudCategory, ActiveCluster>,
266    /// Next cluster ID to assign.
267    next_cluster_id: u64,
268    /// Cluster statistics.
269    cluster_stats: HashMap<String, ClusterStats>,
270}
271
272/// Active cluster state.
273#[derive(Debug, Clone)]
274struct ActiveCluster {
275    /// Cluster ID.
276    cluster_id: String,
277    /// Number of anomalies in cluster.
278    size: usize,
279    /// Start date.
280    start_date: NaiveDate,
281    /// Fraud category (clusters are keyed by category in the HashMap; retained
282    /// here for debug/display and future per-cluster analytics).
283    #[allow(dead_code)]
284    category: FraudCategory,
285    /// Time window for this cluster.
286    time_window_days: i64,
287    /// Accounts involved (for relationship preservation).
288    accounts: Vec<String>,
289    /// Entities involved (vendors, customers, employees).
290    entities: Vec<String>,
291}
292
293/// Statistics for a cluster.
294#[derive(Debug, Clone, Default)]
295pub struct ClusterStats {
296    /// Number of anomalies in cluster.
297    pub size: usize,
298    /// Start date.
299    pub start_date: Option<NaiveDate>,
300    /// End date.
301    pub end_date: Option<NaiveDate>,
302    /// Anomaly types in cluster.
303    pub anomaly_types: Vec<String>,
304    /// Fraud category of this cluster.
305    pub fraud_category: Option<FraudCategory>,
306    /// Time window used (days).
307    pub time_window_days: i64,
308    /// Accounts involved in this cluster.
309    pub accounts: Vec<String>,
310    /// Entities involved in this cluster.
311    pub entities: Vec<String>,
312    /// Causal links within this cluster.
313    pub causal_links: Vec<CausalLink>,
314}
315
316impl ClusterManager {
317    /// Creates a new cluster manager.
318    pub fn new(config: ClusteringConfig) -> Self {
319        Self {
320            config,
321            active_clusters: HashMap::new(),
322            next_cluster_id: 1,
323            cluster_stats: HashMap::new(),
324        }
325    }
326
327    /// Determines the cluster ID for a new anomaly.
328    pub fn assign_cluster<R: Rng>(
329        &mut self,
330        date: NaiveDate,
331        anomaly_type: &str,
332        rng: &mut R,
333    ) -> Option<String> {
334        self.assign_cluster_with_context(date, anomaly_type, None, None, rng)
335    }
336
337    /// Determines the cluster ID with additional context for relationship preservation.
338    pub fn assign_cluster_with_context<R: Rng>(
339        &mut self,
340        date: NaiveDate,
341        anomaly_type: &str,
342        account: Option<&str>,
343        entity: Option<&str>,
344        rng: &mut R,
345    ) -> Option<String> {
346        if !self.config.enabled {
347            return None;
348        }
349
350        // Determine fraud category from anomaly type
351        let category = FraudCategory::from_anomaly_type(anomaly_type);
352
353        // Get time window for this category
354        let time_window = if self.config.use_fraud_specific_windows {
355            let (min, max) = category.time_window_days();
356            rng.gen_range(min..=max)
357        } else {
358            self.config.cluster_time_window_days
359        };
360
361        // Check if we should continue an existing cluster for this category
362        if let Some(active) = self.active_clusters.get(&category).cloned() {
363            let days_elapsed = (date - active.start_date).num_days();
364
365            // Check if within time window and not at max size
366            if days_elapsed <= active.time_window_days
367                && active.size < self.config.max_cluster_size
368                && rng.gen::<f64>() < self.config.cluster_continuation_probability
369            {
370                // If preserving relationships, prefer matching accounts/entities
371                let relationship_match = if self.config.preserve_account_relationships {
372                    let account_match =
373                        account.is_none_or(|a| active.accounts.contains(&a.to_string()));
374                    let entity_match =
375                        entity.is_none_or(|e| active.entities.contains(&e.to_string()));
376                    account_match || entity_match
377                } else {
378                    true
379                };
380
381                if relationship_match {
382                    // Continue the cluster
383                    let cluster_id = active.cluster_id.clone();
384
385                    // Update active cluster
386                    if let Some(active_mut) = self.active_clusters.get_mut(&category) {
387                        active_mut.size += 1;
388                        if let Some(acct) = account {
389                            if !active_mut.accounts.contains(&acct.to_string()) {
390                                active_mut.accounts.push(acct.to_string());
391                            }
392                        }
393                        if let Some(ent) = entity {
394                            if !active_mut.entities.contains(&ent.to_string()) {
395                                active_mut.entities.push(ent.to_string());
396                            }
397                        }
398                    }
399
400                    // Update cluster stats
401                    if let Some(stats) = self.cluster_stats.get_mut(&cluster_id) {
402                        stats.size += 1;
403                        stats.end_date = Some(date);
404                        stats.anomaly_types.push(anomaly_type.to_string());
405                        if let Some(acct) = account {
406                            if !stats.accounts.contains(&acct.to_string()) {
407                                stats.accounts.push(acct.to_string());
408                            }
409                        }
410                        if let Some(ent) = entity {
411                            if !stats.entities.contains(&ent.to_string()) {
412                                stats.entities.push(ent.to_string());
413                            }
414                        }
415                    }
416
417                    return Some(cluster_id);
418                }
419            }
420
421            // End current cluster if at min size
422            if active.size >= self.config.min_cluster_size {
423                self.active_clusters.remove(&category);
424            }
425        }
426
427        // Decide whether to start a new cluster
428        if rng.gen::<f64>() < self.config.cluster_start_probability {
429            let cluster_id = format!("CLU{:06}", self.next_cluster_id);
430            self.next_cluster_id += 1;
431
432            let mut accounts = Vec::new();
433            let mut entities = Vec::new();
434            if let Some(acct) = account {
435                accounts.push(acct.to_string());
436            }
437            if let Some(ent) = entity {
438                entities.push(ent.to_string());
439            }
440
441            // Create new active cluster
442            self.active_clusters.insert(
443                category,
444                ActiveCluster {
445                    cluster_id: cluster_id.clone(),
446                    size: 1,
447                    start_date: date,
448                    category,
449                    time_window_days: time_window,
450                    accounts: accounts.clone(),
451                    entities: entities.clone(),
452                },
453            );
454
455            // Initialize cluster stats
456            self.cluster_stats.insert(
457                cluster_id.clone(),
458                ClusterStats {
459                    size: 1,
460                    start_date: Some(date),
461                    end_date: Some(date),
462                    anomaly_types: vec![anomaly_type.to_string()],
463                    fraud_category: Some(category),
464                    time_window_days: time_window,
465                    accounts,
466                    entities,
467                    causal_links: Vec::new(),
468                },
469            );
470
471            return Some(cluster_id);
472        }
473
474        None
475    }
476
477    /// Add a causal link to a cluster.
478    pub fn add_causal_link(&mut self, cluster_id: &str, link: CausalLink) {
479        if let Some(stats) = self.cluster_stats.get_mut(cluster_id) {
480            stats.causal_links.push(link);
481        }
482    }
483
484    /// Get suggested account for relationship preservation within a cluster.
485    pub fn get_related_account(&self, cluster_id: &str) -> Option<&str> {
486        self.cluster_stats
487            .get(cluster_id)
488            .and_then(|s| s.accounts.first().map(|a| a.as_str()))
489    }
490
491    /// Get suggested entity for relationship preservation within a cluster.
492    pub fn get_related_entity(&self, cluster_id: &str) -> Option<&str> {
493        self.cluster_stats
494            .get(cluster_id)
495            .and_then(|s| s.entities.first().map(|e| e.as_str()))
496    }
497
498    /// Gets cluster statistics.
499    pub fn get_cluster_stats(&self, cluster_id: &str) -> Option<&ClusterStats> {
500        self.cluster_stats.get(cluster_id)
501    }
502
503    /// Gets all cluster statistics.
504    pub fn all_cluster_stats(&self) -> &HashMap<String, ClusterStats> {
505        &self.cluster_stats
506    }
507
508    /// Returns the number of clusters created.
509    pub fn cluster_count(&self) -> usize {
510        self.cluster_stats.len()
511    }
512
513    /// Get cluster statistics by fraud category.
514    pub fn clusters_by_category(&self) -> HashMap<FraudCategory, Vec<&ClusterStats>> {
515        let mut by_category: HashMap<FraudCategory, Vec<&ClusterStats>> = HashMap::new();
516        for stats in self.cluster_stats.values() {
517            if let Some(cat) = stats.fraud_category {
518                by_category.entry(cat).or_default().push(stats);
519            }
520        }
521        by_category
522    }
523}
524
525/// Entity targeting pattern.
526#[derive(Debug, Clone, Default)]
527pub enum EntityTargetingPattern {
528    /// Random entity selection.
529    #[default]
530    Random,
531    /// Weighted by transaction volume.
532    VolumeWeighted,
533    /// Focus on specific entity types.
534    TypeFocused {
535        /// Target entity types with weights.
536        type_weights: HashMap<String, f64>,
537    },
538    /// Repeat offender pattern (same entities).
539    RepeatOffender {
540        /// Probability of targeting same entity.
541        repeat_probability: f64,
542    },
543}
544
545/// Manages entity targeting for anomalies.
546pub struct EntityTargetingManager {
547    pattern: EntityTargetingPattern,
548    /// Recently targeted entities.
549    recent_targets: Vec<String>,
550    /// Maximum recent targets to track.
551    max_recent: usize,
552    /// Entity hit counts.
553    hit_counts: HashMap<String, usize>,
554}
555
556impl EntityTargetingManager {
557    /// Creates a new entity targeting manager.
558    pub fn new(pattern: EntityTargetingPattern) -> Self {
559        Self {
560            pattern,
561            recent_targets: Vec::new(),
562            max_recent: 20,
563            hit_counts: HashMap::new(),
564        }
565    }
566
567    /// Selects an entity to target.
568    pub fn select_entity<R: Rng>(&mut self, candidates: &[String], rng: &mut R) -> Option<String> {
569        if candidates.is_empty() {
570            return None;
571        }
572
573        let selected = match &self.pattern {
574            EntityTargetingPattern::Random => {
575                candidates[rng.gen_range(0..candidates.len())].clone()
576            }
577            EntityTargetingPattern::VolumeWeighted => {
578                // In practice, would weight by actual volume
579                // For now, use random
580                candidates[rng.gen_range(0..candidates.len())].clone()
581            }
582            EntityTargetingPattern::TypeFocused { type_weights } => {
583                // Filter by type weights
584                let weighted: Vec<_> = candidates
585                    .iter()
586                    .filter_map(|c| type_weights.get(c).map(|&w| (c.clone(), w)))
587                    .collect();
588
589                if weighted.is_empty() {
590                    candidates[rng.gen_range(0..candidates.len())].clone()
591                } else {
592                    weighted_select(rng, &weighted).clone()
593                }
594            }
595            EntityTargetingPattern::RepeatOffender { repeat_probability } => {
596                // Check if we should repeat a recent target
597                if !self.recent_targets.is_empty() && rng.gen::<f64>() < *repeat_probability {
598                    let idx = rng.gen_range(0..self.recent_targets.len());
599                    self.recent_targets[idx].clone()
600                } else {
601                    candidates[rng.gen_range(0..candidates.len())].clone()
602                }
603            }
604        };
605
606        // Track the selection
607        self.recent_targets.push(selected.clone());
608        if self.recent_targets.len() > self.max_recent {
609            self.recent_targets.remove(0);
610        }
611
612        *self.hit_counts.entry(selected.clone()).or_insert(0) += 1;
613
614        Some(selected)
615    }
616
617    /// Gets hit count for an entity.
618    pub fn hit_count(&self, entity: &str) -> usize {
619        *self.hit_counts.get(entity).unwrap_or(&0)
620    }
621}
622
623/// Combined pattern configuration.
624#[derive(Debug, Clone)]
625pub struct AnomalyPatternConfig {
626    /// Temporal pattern.
627    pub temporal_pattern: TemporalPattern,
628    /// Clustering configuration.
629    pub clustering: ClusteringConfig,
630    /// Entity targeting pattern.
631    pub entity_targeting: EntityTargetingPattern,
632    /// Whether to inject anomalies in batches.
633    pub batch_injection: bool,
634    /// Batch size range.
635    pub batch_size_range: (usize, usize),
636}
637
638impl Default for AnomalyPatternConfig {
639    fn default() -> Self {
640        Self {
641            temporal_pattern: TemporalPattern::default(),
642            clustering: ClusteringConfig::default(),
643            entity_targeting: EntityTargetingPattern::default(),
644            batch_injection: false,
645            batch_size_range: (2, 5),
646        }
647    }
648}
649
650/// Determines if an anomaly should be injected at this point.
651pub fn should_inject_anomaly<R: Rng>(
652    base_rate: f64,
653    date: NaiveDate,
654    pattern: &TemporalPattern,
655    rng: &mut R,
656) -> bool {
657    let multiplier = pattern.probability_multiplier(date);
658    let adjusted_rate = (base_rate * multiplier).min(1.0);
659    rng.gen::<f64>() < adjusted_rate
660}
661
662// ============================================================================
663// Fraud Actor System - User-Based Fraud Targeting
664// ============================================================================
665
666/// Escalation pattern for fraud amounts over time.
667#[derive(Debug, Clone, Copy, PartialEq, Eq)]
668pub enum EscalationPattern {
669    /// Fraud amounts stay relatively constant.
670    Stable,
671    /// Fraud amounts gradually increase over time (typical embezzlement).
672    Gradual,
673    /// Fraud amounts increase rapidly (getting bolder).
674    Aggressive,
675    /// Fraud amounts vary but trend upward.
676    Erratic,
677    /// Single large fraud after testing with small amounts.
678    TestThenStrike,
679}
680
681impl EscalationPattern {
682    /// Get the escalation multiplier based on the number of prior frauds.
683    pub fn escalation_multiplier(&self, prior_fraud_count: usize) -> f64 {
684        match self {
685            EscalationPattern::Stable => 1.0,
686            EscalationPattern::Gradual => {
687                // 10% increase per prior fraud, max 3x
688                (1.0 + 0.1 * prior_fraud_count as f64).min(3.0)
689            }
690            EscalationPattern::Aggressive => {
691                // 25% increase per prior fraud, max 5x
692                (1.0 + 0.25 * prior_fraud_count as f64).min(5.0)
693            }
694            EscalationPattern::Erratic => {
695                // Variable multiplier with upward trend
696                let base = 1.0 + 0.15 * prior_fraud_count as f64;
697                base.min(4.0)
698            }
699            EscalationPattern::TestThenStrike => {
700                // Small amounts initially, then big jump
701                if prior_fraud_count < 3 {
702                    0.3 // Test with small amounts
703                } else if prior_fraud_count == 3 {
704                    5.0 // Big strike
705                } else {
706                    0.0 // Stop after the strike
707                }
708            }
709        }
710    }
711}
712
713/// A fraud actor represents a user who commits fraud over time.
714#[derive(Debug, Clone)]
715pub struct FraudActor {
716    /// User ID of the fraudster.
717    pub user_id: String,
718    /// User's name for display purposes.
719    pub user_name: String,
720    /// Fraud history (document IDs and dates).
721    pub fraud_history: Vec<FraudIncident>,
722    /// Escalation pattern for this actor.
723    pub escalation_pattern: EscalationPattern,
724    /// Preferred GL accounts for fraud.
725    pub preferred_accounts: Vec<String>,
726    /// Preferred vendors (for AP fraud).
727    pub preferred_vendors: Vec<String>,
728    /// Total amount of fraud committed.
729    pub total_amount: rust_decimal::Decimal,
730    /// Start date of fraud activity.
731    pub start_date: Option<NaiveDate>,
732    /// Detection likelihood (0.0-1.0) - increases with activity.
733    pub detection_risk: f64,
734    /// Is this actor currently active?
735    pub is_active: bool,
736}
737
738/// A single fraud incident committed by an actor.
739#[derive(Debug, Clone)]
740pub struct FraudIncident {
741    /// Document ID of the fraudulent entry.
742    pub document_id: String,
743    /// Date of the fraud.
744    pub date: NaiveDate,
745    /// Amount of the fraud.
746    pub amount: rust_decimal::Decimal,
747    /// Fraud type.
748    pub fraud_type: String,
749    /// Account used.
750    pub account: Option<String>,
751    /// Related entity (vendor, customer, etc.).
752    pub entity: Option<String>,
753}
754
755impl FraudActor {
756    /// Create a new fraud actor.
757    pub fn new(
758        user_id: impl Into<String>,
759        user_name: impl Into<String>,
760        escalation_pattern: EscalationPattern,
761    ) -> Self {
762        Self {
763            user_id: user_id.into(),
764            user_name: user_name.into(),
765            fraud_history: Vec::new(),
766            escalation_pattern,
767            preferred_accounts: Vec::new(),
768            preferred_vendors: Vec::new(),
769            total_amount: rust_decimal::Decimal::ZERO,
770            start_date: None,
771            detection_risk: 0.0,
772            is_active: true,
773        }
774    }
775
776    /// Add a preferred account for fraud.
777    pub fn with_account(mut self, account: impl Into<String>) -> Self {
778        self.preferred_accounts.push(account.into());
779        self
780    }
781
782    /// Add a preferred vendor for fraud.
783    pub fn with_vendor(mut self, vendor: impl Into<String>) -> Self {
784        self.preferred_vendors.push(vendor.into());
785        self
786    }
787
788    /// Record a fraud incident.
789    pub fn record_fraud(
790        &mut self,
791        document_id: impl Into<String>,
792        date: NaiveDate,
793        amount: rust_decimal::Decimal,
794        fraud_type: impl Into<String>,
795        account: Option<String>,
796        entity: Option<String>,
797    ) {
798        let incident = FraudIncident {
799            document_id: document_id.into(),
800            date,
801            amount,
802            fraud_type: fraud_type.into(),
803            account: account.clone(),
804            entity: entity.clone(),
805        };
806
807        self.fraud_history.push(incident);
808        self.total_amount += amount;
809
810        if self.start_date.is_none() {
811            self.start_date = Some(date);
812        }
813
814        // Update detection risk based on activity
815        self.update_detection_risk();
816
817        // Add account/entity to preferences if not already present
818        if let Some(acct) = account {
819            if !self.preferred_accounts.contains(&acct) {
820                self.preferred_accounts.push(acct);
821            }
822        }
823        if let Some(ent) = entity {
824            if !self.preferred_vendors.contains(&ent) {
825                self.preferred_vendors.push(ent);
826            }
827        }
828    }
829
830    /// Update detection risk based on fraud activity.
831    fn update_detection_risk(&mut self) {
832        // Detection risk increases with:
833        // 1. Number of frauds committed
834        // 2. Total amount
835        // 3. How bold the escalation pattern is
836        let count_factor = (self.fraud_history.len() as f64 * 0.05).min(0.3);
837        let amount_factor = if self.total_amount > rust_decimal::Decimal::from(100_000) {
838            0.3
839        } else if self.total_amount > rust_decimal::Decimal::from(10_000) {
840            0.2
841        } else {
842            0.1
843        };
844        let pattern_factor = match self.escalation_pattern {
845            EscalationPattern::Stable => 0.1,
846            EscalationPattern::Gradual => 0.15,
847            EscalationPattern::Erratic => 0.2,
848            EscalationPattern::Aggressive => 0.25,
849            EscalationPattern::TestThenStrike => 0.3,
850        };
851
852        self.detection_risk = (count_factor + amount_factor + pattern_factor).min(0.95);
853    }
854
855    /// Get the escalation multiplier for the next fraud.
856    pub fn next_escalation_multiplier(&self) -> f64 {
857        self.escalation_pattern
858            .escalation_multiplier(self.fraud_history.len())
859    }
860
861    /// Get a preferred account, or None if no preferences.
862    pub fn get_preferred_account<R: Rng>(&self, rng: &mut R) -> Option<&str> {
863        if self.preferred_accounts.is_empty() {
864            None
865        } else {
866            Some(&self.preferred_accounts[rng.gen_range(0..self.preferred_accounts.len())])
867        }
868    }
869
870    /// Get a preferred vendor, or None if no preferences.
871    pub fn get_preferred_vendor<R: Rng>(&self, rng: &mut R) -> Option<&str> {
872        if self.preferred_vendors.is_empty() {
873            None
874        } else {
875            Some(&self.preferred_vendors[rng.gen_range(0..self.preferred_vendors.len())])
876        }
877    }
878}
879
880/// Manages fraud actors for user-based fraud targeting.
881pub struct FraudActorManager {
882    /// All fraud actors.
883    actors: Vec<FraudActor>,
884    /// Map from user_id to actor index.
885    user_index: HashMap<String, usize>,
886    /// Probability of using an existing actor vs creating new one.
887    repeat_actor_probability: f64,
888    /// Maximum active actors at any time.
889    max_active_actors: usize,
890}
891
892impl FraudActorManager {
893    /// Create a new fraud actor manager.
894    pub fn new(repeat_actor_probability: f64, max_active_actors: usize) -> Self {
895        Self {
896            actors: Vec::new(),
897            user_index: HashMap::new(),
898            repeat_actor_probability,
899            max_active_actors,
900        }
901    }
902
903    /// Add a fraud actor.
904    pub fn add_actor(&mut self, actor: FraudActor) {
905        let idx = self.actors.len();
906        self.user_index.insert(actor.user_id.clone(), idx);
907        self.actors.push(actor);
908    }
909
910    /// Get or create a fraud actor for the next fraud.
911    pub fn get_or_create_actor<R: Rng>(
912        &mut self,
913        available_users: &[String],
914        rng: &mut R,
915    ) -> Option<&mut FraudActor> {
916        if available_users.is_empty() {
917            return None;
918        }
919
920        // Check if we should use an existing active actor
921        let active_actors: Vec<usize> = self
922            .actors
923            .iter()
924            .enumerate()
925            .filter(|(_, a)| a.is_active)
926            .map(|(i, _)| i)
927            .collect();
928
929        if !active_actors.is_empty() && rng.gen::<f64>() < self.repeat_actor_probability {
930            // Use existing actor
931            let idx = active_actors[rng.gen_range(0..active_actors.len())];
932            return Some(&mut self.actors[idx]);
933        }
934
935        // Create new actor if under max
936        if self.actors.len() < self.max_active_actors {
937            // Pick a random user
938            let user_id = &available_users[rng.gen_range(0..available_users.len())];
939
940            // Check if user already has an actor
941            if let Some(&idx) = self.user_index.get(user_id) {
942                return Some(&mut self.actors[idx]);
943            }
944
945            // Create new actor with random escalation pattern
946            let pattern = match rng.gen_range(0..5) {
947                0 => EscalationPattern::Stable,
948                1 => EscalationPattern::Gradual,
949                2 => EscalationPattern::Aggressive,
950                3 => EscalationPattern::Erratic,
951                _ => EscalationPattern::TestThenStrike,
952            };
953
954            let actor = FraudActor::new(user_id.clone(), format!("Fraudster {}", user_id), pattern);
955            let idx = self.actors.len();
956            self.user_index.insert(user_id.clone(), idx);
957            self.actors.push(actor);
958            return Some(&mut self.actors[idx]);
959        }
960
961        // Use random existing actor
962        if !self.actors.is_empty() {
963            let idx = rng.gen_range(0..self.actors.len());
964            return Some(&mut self.actors[idx]);
965        }
966
967        None
968    }
969
970    /// Get an actor by user ID.
971    pub fn get_actor(&self, user_id: &str) -> Option<&FraudActor> {
972        self.user_index.get(user_id).map(|&i| &self.actors[i])
973    }
974
975    /// Get a mutable actor by user ID.
976    pub fn get_actor_mut(&mut self, user_id: &str) -> Option<&mut FraudActor> {
977        if let Some(&idx) = self.user_index.get(user_id) {
978            Some(&mut self.actors[idx])
979        } else {
980            None
981        }
982    }
983
984    /// Deactivate actors who have high detection risk.
985    pub fn apply_detection<R: Rng>(&mut self, rng: &mut R) {
986        for actor in &mut self.actors {
987            if actor.is_active && rng.gen::<f64>() < actor.detection_risk {
988                actor.is_active = false;
989            }
990        }
991    }
992
993    /// Get all actors.
994    pub fn all_actors(&self) -> &[FraudActor] {
995        &self.actors
996    }
997
998    /// Get summary statistics.
999    pub fn get_statistics(&self) -> FraudActorStatistics {
1000        let total_actors = self.actors.len();
1001        let active_actors = self.actors.iter().filter(|a| a.is_active).count();
1002        let total_incidents: usize = self.actors.iter().map(|a| a.fraud_history.len()).sum();
1003        let total_amount: rust_decimal::Decimal = self.actors.iter().map(|a| a.total_amount).sum();
1004
1005        FraudActorStatistics {
1006            total_actors,
1007            active_actors,
1008            total_incidents,
1009            total_amount,
1010        }
1011    }
1012}
1013
1014/// Statistics about fraud actors.
1015#[derive(Debug, Clone)]
1016pub struct FraudActorStatistics {
1017    /// Total number of fraud actors.
1018    pub total_actors: usize,
1019    /// Number of currently active actors.
1020    pub active_actors: usize,
1021    /// Total fraud incidents across all actors.
1022    pub total_incidents: usize,
1023    /// Total fraud amount across all actors.
1024    pub total_amount: rust_decimal::Decimal,
1025}
1026
1027#[cfg(test)]
1028#[allow(clippy::unwrap_used)]
1029mod tests {
1030    use super::*;
1031    use rand::SeedableRng;
1032    use rand_chacha::ChaCha8Rng;
1033
1034    #[test]
1035    fn test_temporal_pattern_multiplier() {
1036        let pattern = TemporalPattern::default();
1037
1038        // Regular day
1039        let regular = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1040        assert_eq!(pattern.probability_multiplier(regular), 1.0);
1041
1042        // Month end
1043        let month_end = NaiveDate::from_ymd_opt(2024, 6, 30).unwrap();
1044        assert!(pattern.probability_multiplier(month_end) > 1.0);
1045
1046        // Year end
1047        let year_end = NaiveDate::from_ymd_opt(2024, 12, 31).unwrap();
1048        assert!(
1049            pattern.probability_multiplier(year_end) > pattern.probability_multiplier(month_end)
1050        );
1051    }
1052
1053    #[test]
1054    fn test_cluster_manager() {
1055        let mut manager = ClusterManager::new(ClusteringConfig::default());
1056        let mut rng = ChaCha8Rng::seed_from_u64(42);
1057        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1058
1059        // Generate several anomalies and check clustering
1060        let mut clustered = 0;
1061        for i in 0..20 {
1062            let d = date + chrono::Duration::days(i % 7); // Within time window
1063            if manager.assign_cluster(d, "TestType", &mut rng).is_some() {
1064                clustered += 1;
1065            }
1066        }
1067
1068        // Some should be clustered
1069        assert!(clustered > 0);
1070        assert!(manager.cluster_count() > 0);
1071    }
1072
1073    #[test]
1074    fn test_fraud_category_time_windows() {
1075        // AR fraud should have longer window than general
1076        let ar = FraudCategory::AccountsReceivable;
1077        let general = FraudCategory::General;
1078
1079        let (ar_min, ar_max) = ar.time_window_days();
1080        let (gen_min, gen_max) = general.time_window_days();
1081
1082        assert!(ar_min > gen_min);
1083        assert!(ar_max > gen_max);
1084    }
1085
1086    #[test]
1087    fn test_fraud_category_inference() {
1088        assert_eq!(
1089            FraudCategory::from_anomaly_type("AccountsReceivable"),
1090            FraudCategory::AccountsReceivable
1091        );
1092        assert_eq!(
1093            FraudCategory::from_anomaly_type("VendorPayment"),
1094            FraudCategory::AccountsPayable
1095        );
1096        assert_eq!(
1097            FraudCategory::from_anomaly_type("GhostEmployee"),
1098            FraudCategory::Payroll
1099        );
1100        assert_eq!(
1101            FraudCategory::from_anomaly_type("RandomType"),
1102            FraudCategory::General
1103        );
1104    }
1105
1106    #[test]
1107    fn test_cluster_with_context() {
1108        let mut manager = ClusterManager::new(ClusteringConfig {
1109            cluster_start_probability: 1.0,        // Always start
1110            cluster_continuation_probability: 1.0, // Always continue
1111            ..Default::default()
1112        });
1113        let mut rng = ChaCha8Rng::seed_from_u64(42);
1114        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1115
1116        // First anomaly starts a cluster
1117        let cluster1 = manager.assign_cluster_with_context(
1118            date,
1119            "VendorPayment",
1120            Some("200000"),
1121            Some("V001"),
1122            &mut rng,
1123        );
1124        assert!(cluster1.is_some());
1125
1126        // Second anomaly with same account should join same cluster
1127        let cluster2 = manager.assign_cluster_with_context(
1128            date + chrono::Duration::days(5),
1129            "VendorPayment",
1130            Some("200000"),
1131            Some("V002"),
1132            &mut rng,
1133        );
1134
1135        assert_eq!(cluster1, cluster2);
1136
1137        // Check stats have both entities
1138        let stats = manager.get_cluster_stats(&cluster1.unwrap()).unwrap();
1139        assert_eq!(stats.accounts.len(), 1); // Same account
1140        assert_eq!(stats.entities.len(), 2); // Two vendors
1141    }
1142
1143    #[test]
1144    fn test_causal_links() {
1145        let mut manager = ClusterManager::new(ClusteringConfig {
1146            cluster_start_probability: 1.0,
1147            ..Default::default()
1148        });
1149        let mut rng = ChaCha8Rng::seed_from_u64(42);
1150        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1151
1152        let cluster_id = manager
1153            .assign_cluster(date, "VendorPayment", &mut rng)
1154            .unwrap();
1155
1156        // Add causal link
1157        manager.add_causal_link(
1158            &cluster_id,
1159            CausalLink::new("PAY-001", "Payment", "V001", "Vendor", "references"),
1160        );
1161        manager.add_causal_link(
1162            &cluster_id,
1163            CausalLink::new("V001", "Vendor", "EMP-001", "Employee", "owned_by"),
1164        );
1165
1166        let stats = manager.get_cluster_stats(&cluster_id).unwrap();
1167        assert_eq!(stats.causal_links.len(), 2);
1168    }
1169
1170    #[test]
1171    fn test_should_inject_anomaly() {
1172        let mut rng = ChaCha8Rng::seed_from_u64(42);
1173        let pattern = TemporalPattern::default();
1174
1175        let regular_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1176        let year_end = NaiveDate::from_ymd_opt(2024, 12, 31).unwrap();
1177
1178        // Count injections over many trials
1179        let mut regular_count = 0;
1180        let mut year_end_count = 0;
1181
1182        for _ in 0..1000 {
1183            if should_inject_anomaly(0.1, regular_date, &pattern, &mut rng) {
1184                regular_count += 1;
1185            }
1186            if should_inject_anomaly(0.1, year_end, &pattern, &mut rng) {
1187                year_end_count += 1;
1188            }
1189        }
1190
1191        // Year end should have more injections due to multiplier
1192        assert!(year_end_count > regular_count);
1193    }
1194
1195    #[test]
1196    fn test_escalation_patterns() {
1197        // Stable should always return 1.0
1198        assert_eq!(EscalationPattern::Stable.escalation_multiplier(0), 1.0);
1199        assert_eq!(EscalationPattern::Stable.escalation_multiplier(10), 1.0);
1200
1201        // Gradual should increase over time
1202        let gradual = EscalationPattern::Gradual;
1203        assert!(gradual.escalation_multiplier(5) > gradual.escalation_multiplier(0));
1204        assert!(gradual.escalation_multiplier(5) <= 3.0); // Max is 3x
1205
1206        // Aggressive should increase faster
1207        let aggressive = EscalationPattern::Aggressive;
1208        assert!(aggressive.escalation_multiplier(5) > gradual.escalation_multiplier(5));
1209
1210        // TestThenStrike has specific pattern
1211        let tts = EscalationPattern::TestThenStrike;
1212        assert!(tts.escalation_multiplier(0) < 1.0); // Small test amounts
1213        assert!(tts.escalation_multiplier(3) > 1.0); // Big strike
1214        assert_eq!(tts.escalation_multiplier(4), 0.0); // Stop after strike
1215    }
1216
1217    #[test]
1218    fn test_fraud_actor() {
1219        use rust_decimal_macros::dec;
1220
1221        let mut actor = FraudActor::new("USER001", "John Fraudster", EscalationPattern::Gradual)
1222            .with_account("600000")
1223            .with_vendor("V001");
1224
1225        assert_eq!(actor.preferred_accounts.len(), 1);
1226        assert_eq!(actor.preferred_vendors.len(), 1);
1227        assert!(actor.is_active);
1228
1229        // Record some fraud
1230        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1231        actor.record_fraud(
1232            "JE-001",
1233            date,
1234            dec!(1000),
1235            "DuplicatePayment",
1236            Some("600000".to_string()),
1237            Some("V002".to_string()),
1238        );
1239
1240        assert_eq!(actor.fraud_history.len(), 1);
1241        assert_eq!(actor.total_amount, dec!(1000));
1242        assert_eq!(actor.start_date, Some(date));
1243        assert!(actor.detection_risk > 0.0);
1244
1245        // V002 should be added to preferences
1246        assert!(actor.preferred_vendors.contains(&"V002".to_string()));
1247    }
1248
1249    #[test]
1250    fn test_fraud_actor_manager() {
1251        let mut rng = ChaCha8Rng::seed_from_u64(42);
1252        let mut manager = FraudActorManager::new(0.7, 5);
1253
1254        let users = vec![
1255            "USER001".to_string(),
1256            "USER002".to_string(),
1257            "USER003".to_string(),
1258        ];
1259
1260        // Get or create actor
1261        let actor = manager.get_or_create_actor(&users, &mut rng);
1262        assert!(actor.is_some());
1263
1264        // Record fraud
1265        let actor = actor.unwrap();
1266        let user_id = actor.user_id.clone();
1267        actor.record_fraud(
1268            "JE-001",
1269            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1270            rust_decimal::Decimal::from(1000),
1271            "FictitiousEntry",
1272            None,
1273            None,
1274        );
1275
1276        // Should be able to retrieve actor
1277        let retrieved = manager.get_actor(&user_id);
1278        assert!(retrieved.is_some());
1279        assert_eq!(retrieved.unwrap().fraud_history.len(), 1);
1280
1281        // Get statistics
1282        let stats = manager.get_statistics();
1283        assert_eq!(stats.total_actors, 1);
1284        assert_eq!(stats.active_actors, 1);
1285        assert_eq!(stats.total_incidents, 1);
1286    }
1287
1288    #[test]
1289    fn test_fraud_actor_detection() {
1290        use rust_decimal_macros::dec;
1291
1292        let mut rng = ChaCha8Rng::seed_from_u64(42);
1293        let mut manager = FraudActorManager::new(1.0, 10);
1294
1295        // Add actor with high activity
1296        let mut actor =
1297            FraudActor::new("USER001", "Heavy Fraudster", EscalationPattern::Aggressive);
1298        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1299
1300        // Record many frauds to increase detection risk
1301        for i in 0..10 {
1302            actor.record_fraud(
1303                format!("JE-{:03}", i),
1304                date + chrono::Duration::days(i as i64),
1305                dec!(10000),
1306                "FictitiousEntry",
1307                None,
1308                None,
1309            );
1310        }
1311
1312        manager.add_actor(actor);
1313
1314        // Detection risk should be high
1315        let actor = manager.get_actor("USER001").unwrap();
1316        assert!(actor.detection_risk > 0.5);
1317
1318        // Apply detection (with high risk, likely to be caught eventually)
1319        for _ in 0..20 {
1320            manager.apply_detection(&mut rng);
1321        }
1322
1323        // After many detection attempts, high-risk actor likely deactivated
1324        let stats = manager.get_statistics();
1325        // Note: This is probabilistic, but with high risk the actor should likely be caught
1326        assert!(stats.active_actors <= stats.total_actors);
1327    }
1328}