datasynth_generators/anomaly/
patterns.rs

1//! Anomaly patterns for realistic distribution.
2//!
3//! Patterns control how anomalies are distributed across time and entities,
4//! including clustering behavior and temporal patterns.
5
6use chrono::{Datelike, NaiveDate, Weekday};
7use datasynth_core::utils::weighted_select;
8use rand::Rng;
9use std::collections::HashMap;
10
11/// Temporal pattern for anomaly injection.
12#[derive(Debug, Clone)]
13pub enum TemporalPattern {
14    /// Uniform distribution across all periods.
15    Uniform,
16    /// Higher probability at period/year end.
17    PeriodEndSpike {
18        /// Multiplier for month-end days.
19        month_end_multiplier: f64,
20        /// Multiplier for quarter-end.
21        quarter_end_multiplier: f64,
22        /// Multiplier for year-end.
23        year_end_multiplier: f64,
24    },
25    /// Higher probability at specific times.
26    TimeBased {
27        /// Multiplier for after-hours.
28        after_hours_multiplier: f64,
29        /// Multiplier for weekends.
30        weekend_multiplier: f64,
31    },
32    /// Seasonal pattern.
33    Seasonal {
34        /// Multipliers by month (1-12).
35        month_multipliers: [f64; 12],
36    },
37    /// Custom pattern function.
38    Custom {
39        /// Name of the pattern.
40        name: String,
41    },
42}
43
44impl Default for TemporalPattern {
45    fn default() -> Self {
46        TemporalPattern::PeriodEndSpike {
47            month_end_multiplier: 2.0,
48            quarter_end_multiplier: 3.0,
49            year_end_multiplier: 5.0,
50        }
51    }
52}
53
54impl TemporalPattern {
55    /// Calculates the probability multiplier for a given date.
56    pub fn probability_multiplier(&self, date: NaiveDate) -> f64 {
57        match self {
58            TemporalPattern::Uniform => 1.0,
59            TemporalPattern::PeriodEndSpike {
60                month_end_multiplier,
61                quarter_end_multiplier,
62                year_end_multiplier,
63            } => {
64                let day = date.day();
65                let month = date.month();
66
67                // Year end (December 28-31)
68                if month == 12 && day >= 28 {
69                    return *year_end_multiplier;
70                }
71
72                // Quarter end (Mar, Jun, Sep, Dec last 3 days)
73                if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
74                    return *quarter_end_multiplier;
75                }
76
77                // Month end (last 3 days)
78                if day >= 28 {
79                    return *month_end_multiplier;
80                }
81
82                1.0
83            }
84            TemporalPattern::TimeBased {
85                after_hours_multiplier: _,
86                weekend_multiplier,
87            } => {
88                let weekday = date.weekday();
89                if weekday == Weekday::Sat || weekday == Weekday::Sun {
90                    return *weekend_multiplier;
91                }
92                // Assume all entries have potential for after-hours
93                // In practice, this would check timestamp
94                1.0
95            }
96            TemporalPattern::Seasonal { month_multipliers } => {
97                let month_idx = (date.month() - 1) as usize;
98                month_multipliers[month_idx]
99            }
100            TemporalPattern::Custom { .. } => 1.0,
101        }
102    }
103
104    /// Creates a standard audit season pattern (higher in Q1).
105    pub fn audit_season() -> Self {
106        TemporalPattern::Seasonal {
107            month_multipliers: [
108                2.0, 2.0, 1.5, // Q1 - audit busy season
109                1.0, 1.0, 1.2, // Q2 - quarter end
110                1.0, 1.0, 1.2, // Q3 - quarter end
111                1.0, 1.0, 3.0, // Q4 - year end
112            ],
113        }
114    }
115}
116
117/// Fraud category for cluster time window selection.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
119pub enum FraudCategory {
120    /// Accounts Receivable fraud (invoice aging: 30-45 days)
121    AccountsReceivable,
122    /// Accounts Payable fraud (payment cycles: 14-30 days)
123    AccountsPayable,
124    /// Payroll fraud (monthly: 28-35 days)
125    Payroll,
126    /// Expense fraud (submission cycles: 7-14 days)
127    Expense,
128    /// Revenue manipulation (quarterly: 85-95 days)
129    Revenue,
130    /// Asset fraud (periodic: 30-60 days)
131    Asset,
132    /// General fraud (default: 7 days)
133    General,
134}
135
136impl FraudCategory {
137    /// Get the time window range (min, max days) for this fraud category.
138    pub fn time_window_days(&self) -> (i64, i64) {
139        match self {
140            FraudCategory::AccountsReceivable => (30, 45), // Invoice aging cycles
141            FraudCategory::AccountsPayable => (14, 30),    // Payment terms
142            FraudCategory::Payroll => (28, 35),            // Monthly pay cycles
143            FraudCategory::Expense => (7, 14),             // Expense report cycles
144            FraudCategory::Revenue => (85, 95),            // Quarterly close periods
145            FraudCategory::Asset => (30, 60),              // Asset reconciliation
146            FraudCategory::General => (5, 10),             // Default short window
147        }
148    }
149
150    /// Infer fraud category from anomaly type string.
151    pub fn from_anomaly_type(anomaly_type: &str) -> Self {
152        let lower = anomaly_type.to_lowercase();
153        if lower.contains("receivable")
154            || lower.contains("ar")
155            || lower.contains("invoice")
156            || lower.contains("customer")
157        {
158            FraudCategory::AccountsReceivable
159        } else if lower.contains("payable")
160            || lower.contains("ap")
161            || lower.contains("vendor")
162            || lower.contains("payment")
163        {
164            FraudCategory::AccountsPayable
165        } else if lower.contains("payroll")
166            || lower.contains("ghost")
167            || lower.contains("employee")
168            || lower.contains("salary")
169        {
170            FraudCategory::Payroll
171        } else if lower.contains("expense") || lower.contains("reimbursement") {
172            FraudCategory::Expense
173        } else if lower.contains("revenue")
174            || lower.contains("sales")
175            || lower.contains("channel")
176            || lower.contains("premature")
177        {
178            FraudCategory::Revenue
179        } else if lower.contains("asset")
180            || lower.contains("inventory")
181            || lower.contains("fixed")
182            || lower.contains("depreciation")
183        {
184            FraudCategory::Asset
185        } else {
186            FraudCategory::General
187        }
188    }
189}
190
191/// Clustering behavior for anomalies.
192#[derive(Debug, Clone)]
193pub struct ClusteringConfig {
194    /// Whether clustering is enabled.
195    pub enabled: bool,
196    /// Probability that an anomaly starts a new cluster.
197    pub cluster_start_probability: f64,
198    /// Probability that next anomaly joins current cluster.
199    pub cluster_continuation_probability: f64,
200    /// Minimum cluster size.
201    pub min_cluster_size: usize,
202    /// Maximum cluster size.
203    pub max_cluster_size: usize,
204    /// Time window for cluster (days) - default for General category.
205    pub cluster_time_window_days: i64,
206    /// Whether to use fraud-type-specific time windows.
207    pub use_fraud_specific_windows: bool,
208    /// Whether to preserve account relationships within clusters.
209    pub preserve_account_relationships: bool,
210}
211
212impl Default for ClusteringConfig {
213    fn default() -> Self {
214        Self {
215            enabled: true,
216            cluster_start_probability: 0.3,
217            cluster_continuation_probability: 0.7,
218            min_cluster_size: 2,
219            max_cluster_size: 10,
220            cluster_time_window_days: 7,
221            use_fraud_specific_windows: true,
222            preserve_account_relationships: true,
223        }
224    }
225}
226
227/// Causal link between entities in a fraud cluster.
228#[derive(Debug, Clone)]
229pub struct CausalLink {
230    /// Source entity (e.g., payment document ID)
231    pub source_entity: String,
232    /// Source entity type
233    pub source_type: String,
234    /// Target entity (e.g., vendor ID)
235    pub target_entity: String,
236    /// Target entity type
237    pub target_type: String,
238    /// Relationship type
239    pub relationship: String,
240}
241
242impl CausalLink {
243    /// Create a new causal link.
244    pub fn new(
245        source_entity: impl Into<String>,
246        source_type: impl Into<String>,
247        target_entity: impl Into<String>,
248        target_type: impl Into<String>,
249        relationship: impl Into<String>,
250    ) -> Self {
251        Self {
252            source_entity: source_entity.into(),
253            source_type: source_type.into(),
254            target_entity: target_entity.into(),
255            target_type: target_type.into(),
256            relationship: relationship.into(),
257        }
258    }
259}
260
261/// Manages anomaly clustering.
262pub struct ClusterManager {
263    config: ClusteringConfig,
264    /// Current active clusters by fraud category.
265    active_clusters: HashMap<FraudCategory, ActiveCluster>,
266    /// Next cluster ID to assign.
267    next_cluster_id: u64,
268    /// Cluster statistics.
269    cluster_stats: HashMap<String, ClusterStats>,
270}
271
272/// Active cluster state.
273#[derive(Debug, Clone)]
274struct ActiveCluster {
275    /// Cluster ID.
276    cluster_id: String,
277    /// Number of anomalies in cluster.
278    size: usize,
279    /// Start date.
280    start_date: NaiveDate,
281    /// Fraud category — clusters are keyed by category in the parent HashMap;
282    /// retained here so `Debug` output and future per-cluster analytics can
283    /// identify the category without a reverse-lookup.
284    #[allow(dead_code)]
285    category: FraudCategory,
286    /// Time window for this cluster.
287    time_window_days: i64,
288    /// Accounts involved (for relationship preservation).
289    accounts: Vec<String>,
290    /// Entities involved (vendors, customers, employees).
291    entities: Vec<String>,
292}
293
294/// Statistics for a cluster.
295#[derive(Debug, Clone, Default)]
296pub struct ClusterStats {
297    /// Number of anomalies in cluster.
298    pub size: usize,
299    /// Start date.
300    pub start_date: Option<NaiveDate>,
301    /// End date.
302    pub end_date: Option<NaiveDate>,
303    /// Anomaly types in cluster.
304    pub anomaly_types: Vec<String>,
305    /// Fraud category of this cluster.
306    pub fraud_category: Option<FraudCategory>,
307    /// Time window used (days).
308    pub time_window_days: i64,
309    /// Accounts involved in this cluster.
310    pub accounts: Vec<String>,
311    /// Entities involved in this cluster.
312    pub entities: Vec<String>,
313    /// Causal links within this cluster.
314    pub causal_links: Vec<CausalLink>,
315}
316
317impl ClusterManager {
318    /// Creates a new cluster manager.
319    pub fn new(config: ClusteringConfig) -> Self {
320        Self {
321            config,
322            active_clusters: HashMap::new(),
323            next_cluster_id: 1,
324            cluster_stats: HashMap::new(),
325        }
326    }
327
328    /// Determines the cluster ID for a new anomaly.
329    pub fn assign_cluster<R: Rng>(
330        &mut self,
331        date: NaiveDate,
332        anomaly_type: &str,
333        rng: &mut R,
334    ) -> Option<String> {
335        self.assign_cluster_with_context(date, anomaly_type, None, None, rng)
336    }
337
338    /// Determines the cluster ID with additional context for relationship preservation.
339    pub fn assign_cluster_with_context<R: Rng>(
340        &mut self,
341        date: NaiveDate,
342        anomaly_type: &str,
343        account: Option<&str>,
344        entity: Option<&str>,
345        rng: &mut R,
346    ) -> Option<String> {
347        if !self.config.enabled {
348            return None;
349        }
350
351        // Determine fraud category from anomaly type
352        let category = FraudCategory::from_anomaly_type(anomaly_type);
353
354        // Get time window for this category
355        let time_window = if self.config.use_fraud_specific_windows {
356            let (min, max) = category.time_window_days();
357            rng.random_range(min..=max)
358        } else {
359            self.config.cluster_time_window_days
360        };
361
362        // Check if we should continue an existing cluster for this category
363        if let Some(active) = self.active_clusters.get(&category).cloned() {
364            let days_elapsed = (date - active.start_date).num_days();
365
366            // Check if within time window and not at max size
367            if days_elapsed <= active.time_window_days
368                && active.size < self.config.max_cluster_size
369                && rng.random::<f64>() < self.config.cluster_continuation_probability
370            {
371                // If preserving relationships, prefer matching accounts/entities
372                let relationship_match = if self.config.preserve_account_relationships {
373                    let account_match =
374                        account.is_none_or(|a| active.accounts.contains(&a.to_string()));
375                    let entity_match =
376                        entity.is_none_or(|e| active.entities.contains(&e.to_string()));
377                    account_match || entity_match
378                } else {
379                    true
380                };
381
382                if relationship_match {
383                    // Continue the cluster
384                    let cluster_id = active.cluster_id.clone();
385
386                    // Update active cluster
387                    if let Some(active_mut) = self.active_clusters.get_mut(&category) {
388                        active_mut.size += 1;
389                        if let Some(acct) = account {
390                            if !active_mut.accounts.contains(&acct.to_string()) {
391                                active_mut.accounts.push(acct.to_string());
392                            }
393                        }
394                        if let Some(ent) = entity {
395                            if !active_mut.entities.contains(&ent.to_string()) {
396                                active_mut.entities.push(ent.to_string());
397                            }
398                        }
399                    }
400
401                    // Update cluster stats
402                    if let Some(stats) = self.cluster_stats.get_mut(&cluster_id) {
403                        stats.size += 1;
404                        stats.end_date = Some(date);
405                        stats.anomaly_types.push(anomaly_type.to_string());
406                        if let Some(acct) = account {
407                            if !stats.accounts.contains(&acct.to_string()) {
408                                stats.accounts.push(acct.to_string());
409                            }
410                        }
411                        if let Some(ent) = entity {
412                            if !stats.entities.contains(&ent.to_string()) {
413                                stats.entities.push(ent.to_string());
414                            }
415                        }
416                    }
417
418                    return Some(cluster_id);
419                }
420            }
421
422            // End current cluster if at min size
423            if active.size >= self.config.min_cluster_size {
424                self.active_clusters.remove(&category);
425            }
426        }
427
428        // Decide whether to start a new cluster
429        if rng.random::<f64>() < self.config.cluster_start_probability {
430            let cluster_id = format!("CLU{:06}", self.next_cluster_id);
431            self.next_cluster_id += 1;
432
433            let mut accounts = Vec::new();
434            let mut entities = Vec::new();
435            if let Some(acct) = account {
436                accounts.push(acct.to_string());
437            }
438            if let Some(ent) = entity {
439                entities.push(ent.to_string());
440            }
441
442            // Create new active cluster
443            self.active_clusters.insert(
444                category,
445                ActiveCluster {
446                    cluster_id: cluster_id.clone(),
447                    size: 1,
448                    start_date: date,
449                    category,
450                    time_window_days: time_window,
451                    accounts: accounts.clone(),
452                    entities: entities.clone(),
453                },
454            );
455
456            // Initialize cluster stats
457            self.cluster_stats.insert(
458                cluster_id.clone(),
459                ClusterStats {
460                    size: 1,
461                    start_date: Some(date),
462                    end_date: Some(date),
463                    anomaly_types: vec![anomaly_type.to_string()],
464                    fraud_category: Some(category),
465                    time_window_days: time_window,
466                    accounts,
467                    entities,
468                    causal_links: Vec::new(),
469                },
470            );
471
472            return Some(cluster_id);
473        }
474
475        None
476    }
477
478    /// Add a causal link to a cluster.
479    pub fn add_causal_link(&mut self, cluster_id: &str, link: CausalLink) {
480        if let Some(stats) = self.cluster_stats.get_mut(cluster_id) {
481            stats.causal_links.push(link);
482        }
483    }
484
485    /// Get suggested account for relationship preservation within a cluster.
486    pub fn get_related_account(&self, cluster_id: &str) -> Option<&str> {
487        self.cluster_stats
488            .get(cluster_id)
489            .and_then(|s| s.accounts.first().map(std::string::String::as_str))
490    }
491
492    /// Get suggested entity for relationship preservation within a cluster.
493    pub fn get_related_entity(&self, cluster_id: &str) -> Option<&str> {
494        self.cluster_stats
495            .get(cluster_id)
496            .and_then(|s| s.entities.first().map(std::string::String::as_str))
497    }
498
499    /// Gets cluster statistics.
500    pub fn get_cluster_stats(&self, cluster_id: &str) -> Option<&ClusterStats> {
501        self.cluster_stats.get(cluster_id)
502    }
503
504    /// Gets all cluster statistics.
505    pub fn all_cluster_stats(&self) -> &HashMap<String, ClusterStats> {
506        &self.cluster_stats
507    }
508
509    /// Returns the number of clusters created.
510    pub fn cluster_count(&self) -> usize {
511        self.cluster_stats.len()
512    }
513
514    /// Get cluster statistics by fraud category.
515    pub fn clusters_by_category(&self) -> HashMap<FraudCategory, Vec<&ClusterStats>> {
516        let mut by_category: HashMap<FraudCategory, Vec<&ClusterStats>> = HashMap::new();
517        for stats in self.cluster_stats.values() {
518            if let Some(cat) = stats.fraud_category {
519                by_category.entry(cat).or_default().push(stats);
520            }
521        }
522        by_category
523    }
524}
525
526/// Entity targeting pattern.
527#[derive(Debug, Clone, Default)]
528pub enum EntityTargetingPattern {
529    /// Random entity selection.
530    #[default]
531    Random,
532    /// Weighted by transaction volume.
533    VolumeWeighted,
534    /// Focus on specific entity types.
535    TypeFocused {
536        /// Target entity types with weights.
537        type_weights: HashMap<String, f64>,
538    },
539    /// Repeat offender pattern (same entities).
540    RepeatOffender {
541        /// Probability of targeting same entity.
542        repeat_probability: f64,
543    },
544}
545
546/// Manages entity targeting for anomalies.
547pub struct EntityTargetingManager {
548    pattern: EntityTargetingPattern,
549    /// Recently targeted entities.
550    recent_targets: Vec<String>,
551    /// Maximum recent targets to track.
552    max_recent: usize,
553    /// Entity hit counts.
554    hit_counts: HashMap<String, usize>,
555}
556
557impl EntityTargetingManager {
558    /// Creates a new entity targeting manager.
559    pub fn new(pattern: EntityTargetingPattern) -> Self {
560        Self {
561            pattern,
562            recent_targets: Vec::new(),
563            max_recent: 20,
564            hit_counts: HashMap::new(),
565        }
566    }
567
568    /// Selects an entity to target.
569    pub fn select_entity<R: Rng>(&mut self, candidates: &[String], rng: &mut R) -> Option<String> {
570        if candidates.is_empty() {
571            return None;
572        }
573
574        let selected = match &self.pattern {
575            EntityTargetingPattern::Random => {
576                candidates[rng.random_range(0..candidates.len())].clone()
577            }
578            EntityTargetingPattern::VolumeWeighted => {
579                // In practice, would weight by actual volume
580                // For now, use random
581                candidates[rng.random_range(0..candidates.len())].clone()
582            }
583            EntityTargetingPattern::TypeFocused { type_weights } => {
584                // Filter by type weights
585                let weighted: Vec<_> = candidates
586                    .iter()
587                    .filter_map(|c| type_weights.get(c).map(|&w| (c.clone(), w)))
588                    .collect();
589
590                if weighted.is_empty() {
591                    candidates[rng.random_range(0..candidates.len())].clone()
592                } else {
593                    weighted_select(rng, &weighted).clone()
594                }
595            }
596            EntityTargetingPattern::RepeatOffender { repeat_probability } => {
597                // Check if we should repeat a recent target
598                if !self.recent_targets.is_empty() && rng.random::<f64>() < *repeat_probability {
599                    let idx = rng.random_range(0..self.recent_targets.len());
600                    self.recent_targets[idx].clone()
601                } else {
602                    candidates[rng.random_range(0..candidates.len())].clone()
603                }
604            }
605        };
606
607        // Track the selection
608        self.recent_targets.push(selected.clone());
609        if self.recent_targets.len() > self.max_recent {
610            self.recent_targets.remove(0);
611        }
612
613        *self.hit_counts.entry(selected.clone()).or_insert(0) += 1;
614
615        Some(selected)
616    }
617
618    /// Gets hit count for an entity.
619    pub fn hit_count(&self, entity: &str) -> usize {
620        *self.hit_counts.get(entity).unwrap_or(&0)
621    }
622}
623
624/// Combined pattern configuration.
625#[derive(Debug, Clone)]
626pub struct AnomalyPatternConfig {
627    /// Temporal pattern.
628    pub temporal_pattern: TemporalPattern,
629    /// Clustering configuration.
630    pub clustering: ClusteringConfig,
631    /// Entity targeting pattern.
632    pub entity_targeting: EntityTargetingPattern,
633    /// Whether to inject anomalies in batches.
634    pub batch_injection: bool,
635    /// Batch size range.
636    pub batch_size_range: (usize, usize),
637}
638
639impl Default for AnomalyPatternConfig {
640    fn default() -> Self {
641        Self {
642            temporal_pattern: TemporalPattern::default(),
643            clustering: ClusteringConfig::default(),
644            entity_targeting: EntityTargetingPattern::default(),
645            batch_injection: false,
646            batch_size_range: (2, 5),
647        }
648    }
649}
650
651/// Determines if an anomaly should be injected at this point.
652pub fn should_inject_anomaly<R: Rng>(
653    base_rate: f64,
654    date: NaiveDate,
655    pattern: &TemporalPattern,
656    rng: &mut R,
657) -> bool {
658    let multiplier = pattern.probability_multiplier(date);
659    let adjusted_rate = (base_rate * multiplier).min(1.0);
660    rng.random::<f64>() < adjusted_rate
661}
662
663// ============================================================================
664// Fraud Actor System - User-Based Fraud Targeting
665// ============================================================================
666
667/// Escalation pattern for fraud amounts over time.
668#[derive(Debug, Clone, Copy, PartialEq, Eq)]
669pub enum EscalationPattern {
670    /// Fraud amounts stay relatively constant.
671    Stable,
672    /// Fraud amounts gradually increase over time (typical embezzlement).
673    Gradual,
674    /// Fraud amounts increase rapidly (getting bolder).
675    Aggressive,
676    /// Fraud amounts vary but trend upward.
677    Erratic,
678    /// Single large fraud after testing with small amounts.
679    TestThenStrike,
680}
681
682impl EscalationPattern {
683    /// Get the escalation multiplier based on the number of prior frauds.
684    pub fn escalation_multiplier(&self, prior_fraud_count: usize) -> f64 {
685        match self {
686            EscalationPattern::Stable => 1.0,
687            EscalationPattern::Gradual => {
688                // 10% increase per prior fraud, max 3x
689                (1.0 + 0.1 * prior_fraud_count as f64).min(3.0)
690            }
691            EscalationPattern::Aggressive => {
692                // 25% increase per prior fraud, max 5x
693                (1.0 + 0.25 * prior_fraud_count as f64).min(5.0)
694            }
695            EscalationPattern::Erratic => {
696                // Variable multiplier with upward trend
697                let base = 1.0 + 0.15 * prior_fraud_count as f64;
698                base.min(4.0)
699            }
700            EscalationPattern::TestThenStrike => {
701                // Small amounts initially, then big jump
702                if prior_fraud_count < 3 {
703                    0.3 // Test with small amounts
704                } else if prior_fraud_count == 3 {
705                    5.0 // Big strike
706                } else {
707                    0.0 // Stop after the strike
708                }
709            }
710        }
711    }
712}
713
714/// A fraud actor represents a user who commits fraud over time.
715#[derive(Debug, Clone)]
716pub struct FraudActor {
717    /// User ID of the fraudster.
718    pub user_id: String,
719    /// User's name for display purposes.
720    pub user_name: String,
721    /// Fraud history (document IDs and dates).
722    pub fraud_history: Vec<FraudIncident>,
723    /// Escalation pattern for this actor.
724    pub escalation_pattern: EscalationPattern,
725    /// Preferred GL accounts for fraud.
726    pub preferred_accounts: Vec<String>,
727    /// Preferred vendors (for AP fraud).
728    pub preferred_vendors: Vec<String>,
729    /// Total amount of fraud committed.
730    pub total_amount: rust_decimal::Decimal,
731    /// Start date of fraud activity.
732    pub start_date: Option<NaiveDate>,
733    /// Detection likelihood (0.0-1.0) - increases with activity.
734    pub detection_risk: f64,
735    /// Is this actor currently active?
736    pub is_active: bool,
737}
738
739/// A single fraud incident committed by an actor.
740#[derive(Debug, Clone)]
741pub struct FraudIncident {
742    /// Document ID of the fraudulent entry.
743    pub document_id: String,
744    /// Date of the fraud.
745    pub date: NaiveDate,
746    /// Amount of the fraud.
747    pub amount: rust_decimal::Decimal,
748    /// Fraud type.
749    pub fraud_type: String,
750    /// Account used.
751    pub account: Option<String>,
752    /// Related entity (vendor, customer, etc.).
753    pub entity: Option<String>,
754}
755
756impl FraudActor {
757    /// Create a new fraud actor.
758    pub fn new(
759        user_id: impl Into<String>,
760        user_name: impl Into<String>,
761        escalation_pattern: EscalationPattern,
762    ) -> Self {
763        Self {
764            user_id: user_id.into(),
765            user_name: user_name.into(),
766            fraud_history: Vec::new(),
767            escalation_pattern,
768            preferred_accounts: Vec::new(),
769            preferred_vendors: Vec::new(),
770            total_amount: rust_decimal::Decimal::ZERO,
771            start_date: None,
772            detection_risk: 0.0,
773            is_active: true,
774        }
775    }
776
777    /// Add a preferred account for fraud.
778    pub fn with_account(mut self, account: impl Into<String>) -> Self {
779        self.preferred_accounts.push(account.into());
780        self
781    }
782
783    /// Add a preferred vendor for fraud.
784    pub fn with_vendor(mut self, vendor: impl Into<String>) -> Self {
785        self.preferred_vendors.push(vendor.into());
786        self
787    }
788
789    /// Record a fraud incident.
790    pub fn record_fraud(
791        &mut self,
792        document_id: impl Into<String>,
793        date: NaiveDate,
794        amount: rust_decimal::Decimal,
795        fraud_type: impl Into<String>,
796        account: Option<String>,
797        entity: Option<String>,
798    ) {
799        let incident = FraudIncident {
800            document_id: document_id.into(),
801            date,
802            amount,
803            fraud_type: fraud_type.into(),
804            account: account.clone(),
805            entity: entity.clone(),
806        };
807
808        self.fraud_history.push(incident);
809        self.total_amount += amount;
810
811        if self.start_date.is_none() {
812            self.start_date = Some(date);
813        }
814
815        // Update detection risk based on activity
816        self.update_detection_risk();
817
818        // Add account/entity to preferences if not already present
819        if let Some(acct) = account {
820            if !self.preferred_accounts.contains(&acct) {
821                self.preferred_accounts.push(acct);
822            }
823        }
824        if let Some(ent) = entity {
825            if !self.preferred_vendors.contains(&ent) {
826                self.preferred_vendors.push(ent);
827            }
828        }
829    }
830
831    /// Update detection risk based on fraud activity.
832    fn update_detection_risk(&mut self) {
833        // Detection risk increases with:
834        // 1. Number of frauds committed
835        // 2. Total amount
836        // 3. How bold the escalation pattern is
837        let count_factor = (self.fraud_history.len() as f64 * 0.05).min(0.3);
838        let amount_factor = if self.total_amount > rust_decimal::Decimal::from(100_000) {
839            0.3
840        } else if self.total_amount > rust_decimal::Decimal::from(10_000) {
841            0.2
842        } else {
843            0.1
844        };
845        let pattern_factor = match self.escalation_pattern {
846            EscalationPattern::Stable => 0.1,
847            EscalationPattern::Gradual => 0.15,
848            EscalationPattern::Erratic => 0.2,
849            EscalationPattern::Aggressive => 0.25,
850            EscalationPattern::TestThenStrike => 0.3,
851        };
852
853        self.detection_risk = (count_factor + amount_factor + pattern_factor).min(0.95);
854    }
855
856    /// Get the escalation multiplier for the next fraud.
857    pub fn next_escalation_multiplier(&self) -> f64 {
858        self.escalation_pattern
859            .escalation_multiplier(self.fraud_history.len())
860    }
861
862    /// Get a preferred account, or None if no preferences.
863    pub fn get_preferred_account<R: Rng>(&self, rng: &mut R) -> Option<&str> {
864        if self.preferred_accounts.is_empty() {
865            None
866        } else {
867            Some(&self.preferred_accounts[rng.random_range(0..self.preferred_accounts.len())])
868        }
869    }
870
871    /// Get a preferred vendor, or None if no preferences.
872    pub fn get_preferred_vendor<R: Rng>(&self, rng: &mut R) -> Option<&str> {
873        if self.preferred_vendors.is_empty() {
874            None
875        } else {
876            Some(&self.preferred_vendors[rng.random_range(0..self.preferred_vendors.len())])
877        }
878    }
879}
880
881/// Manages fraud actors for user-based fraud targeting.
882pub struct FraudActorManager {
883    /// All fraud actors.
884    actors: Vec<FraudActor>,
885    /// Map from user_id to actor index.
886    user_index: HashMap<String, usize>,
887    /// Probability of using an existing actor vs creating new one.
888    repeat_actor_probability: f64,
889    /// Maximum active actors at any time.
890    max_active_actors: usize,
891}
892
893impl FraudActorManager {
894    /// Create a new fraud actor manager.
895    pub fn new(repeat_actor_probability: f64, max_active_actors: usize) -> Self {
896        Self {
897            actors: Vec::new(),
898            user_index: HashMap::new(),
899            repeat_actor_probability,
900            max_active_actors,
901        }
902    }
903
904    /// Add a fraud actor.
905    pub fn add_actor(&mut self, actor: FraudActor) {
906        let idx = self.actors.len();
907        self.user_index.insert(actor.user_id.clone(), idx);
908        self.actors.push(actor);
909    }
910
911    /// Get or create a fraud actor for the next fraud.
912    pub fn get_or_create_actor<R: Rng>(
913        &mut self,
914        available_users: &[String],
915        rng: &mut R,
916    ) -> Option<&mut FraudActor> {
917        if available_users.is_empty() {
918            return None;
919        }
920
921        // Check if we should use an existing active actor
922        let active_actors: Vec<usize> = self
923            .actors
924            .iter()
925            .enumerate()
926            .filter(|(_, a)| a.is_active)
927            .map(|(i, _)| i)
928            .collect();
929
930        if !active_actors.is_empty() && rng.random::<f64>() < self.repeat_actor_probability {
931            // Use existing actor
932            let idx = active_actors[rng.random_range(0..active_actors.len())];
933            return Some(&mut self.actors[idx]);
934        }
935
936        // Create new actor if under max
937        if self.actors.len() < self.max_active_actors {
938            // Pick a random user
939            let user_id = &available_users[rng.random_range(0..available_users.len())];
940
941            // Check if user already has an actor
942            if let Some(&idx) = self.user_index.get(user_id) {
943                return Some(&mut self.actors[idx]);
944            }
945
946            // Create new actor with random escalation pattern
947            let pattern = match rng.random_range(0..5) {
948                0 => EscalationPattern::Stable,
949                1 => EscalationPattern::Gradual,
950                2 => EscalationPattern::Aggressive,
951                3 => EscalationPattern::Erratic,
952                _ => EscalationPattern::TestThenStrike,
953            };
954
955            let actor = FraudActor::new(user_id.clone(), format!("Fraudster {user_id}"), pattern);
956            let idx = self.actors.len();
957            self.user_index.insert(user_id.clone(), idx);
958            self.actors.push(actor);
959            return Some(&mut self.actors[idx]);
960        }
961
962        // Use random existing actor
963        if !self.actors.is_empty() {
964            let idx = rng.random_range(0..self.actors.len());
965            return Some(&mut self.actors[idx]);
966        }
967
968        None
969    }
970
971    /// Get an actor by user ID.
972    pub fn get_actor(&self, user_id: &str) -> Option<&FraudActor> {
973        self.user_index.get(user_id).map(|&i| &self.actors[i])
974    }
975
976    /// Get a mutable actor by user ID.
977    pub fn get_actor_mut(&mut self, user_id: &str) -> Option<&mut FraudActor> {
978        if let Some(&idx) = self.user_index.get(user_id) {
979            Some(&mut self.actors[idx])
980        } else {
981            None
982        }
983    }
984
985    /// Deactivate actors who have high detection risk.
986    pub fn apply_detection<R: Rng>(&mut self, rng: &mut R) {
987        for actor in &mut self.actors {
988            if actor.is_active && rng.random::<f64>() < actor.detection_risk {
989                actor.is_active = false;
990            }
991        }
992    }
993
994    /// Get all actors.
995    pub fn all_actors(&self) -> &[FraudActor] {
996        &self.actors
997    }
998
999    /// Get summary statistics.
1000    pub fn get_statistics(&self) -> FraudActorStatistics {
1001        let total_actors = self.actors.len();
1002        let active_actors = self.actors.iter().filter(|a| a.is_active).count();
1003        let total_incidents: usize = self.actors.iter().map(|a| a.fraud_history.len()).sum();
1004        let total_amount: rust_decimal::Decimal = self.actors.iter().map(|a| a.total_amount).sum();
1005
1006        FraudActorStatistics {
1007            total_actors,
1008            active_actors,
1009            total_incidents,
1010            total_amount,
1011        }
1012    }
1013}
1014
1015/// Statistics about fraud actors.
1016#[derive(Debug, Clone)]
1017pub struct FraudActorStatistics {
1018    /// Total number of fraud actors.
1019    pub total_actors: usize,
1020    /// Number of currently active actors.
1021    pub active_actors: usize,
1022    /// Total fraud incidents across all actors.
1023    pub total_incidents: usize,
1024    /// Total fraud amount across all actors.
1025    pub total_amount: rust_decimal::Decimal,
1026}
1027
1028#[cfg(test)]
1029#[allow(clippy::unwrap_used)]
1030mod tests {
1031    use super::*;
1032    use rand::SeedableRng;
1033    use rand_chacha::ChaCha8Rng;
1034
1035    #[test]
1036    fn test_temporal_pattern_multiplier() {
1037        let pattern = TemporalPattern::default();
1038
1039        // Regular day
1040        let regular = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1041        assert_eq!(pattern.probability_multiplier(regular), 1.0);
1042
1043        // Month end
1044        let month_end = NaiveDate::from_ymd_opt(2024, 6, 30).unwrap();
1045        assert!(pattern.probability_multiplier(month_end) > 1.0);
1046
1047        // Year end
1048        let year_end = NaiveDate::from_ymd_opt(2024, 12, 31).unwrap();
1049        assert!(
1050            pattern.probability_multiplier(year_end) > pattern.probability_multiplier(month_end)
1051        );
1052    }
1053
1054    #[test]
1055    fn test_cluster_manager() {
1056        let mut manager = ClusterManager::new(ClusteringConfig::default());
1057        let mut rng = ChaCha8Rng::seed_from_u64(42);
1058        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1059
1060        // Generate several anomalies and check clustering
1061        let mut clustered = 0;
1062        for i in 0..20 {
1063            let d = date + chrono::Duration::days(i % 7); // Within time window
1064            if manager.assign_cluster(d, "TestType", &mut rng).is_some() {
1065                clustered += 1;
1066            }
1067        }
1068
1069        // Some should be clustered
1070        assert!(clustered > 0);
1071        assert!(manager.cluster_count() > 0);
1072    }
1073
1074    #[test]
1075    fn test_fraud_category_time_windows() {
1076        // AR fraud should have longer window than general
1077        let ar = FraudCategory::AccountsReceivable;
1078        let general = FraudCategory::General;
1079
1080        let (ar_min, ar_max) = ar.time_window_days();
1081        let (gen_min, gen_max) = general.time_window_days();
1082
1083        assert!(ar_min > gen_min);
1084        assert!(ar_max > gen_max);
1085    }
1086
1087    #[test]
1088    fn test_fraud_category_inference() {
1089        assert_eq!(
1090            FraudCategory::from_anomaly_type("AccountsReceivable"),
1091            FraudCategory::AccountsReceivable
1092        );
1093        assert_eq!(
1094            FraudCategory::from_anomaly_type("VendorPayment"),
1095            FraudCategory::AccountsPayable
1096        );
1097        assert_eq!(
1098            FraudCategory::from_anomaly_type("GhostEmployee"),
1099            FraudCategory::Payroll
1100        );
1101        assert_eq!(
1102            FraudCategory::from_anomaly_type("RandomType"),
1103            FraudCategory::General
1104        );
1105    }
1106
1107    #[test]
1108    fn test_cluster_with_context() {
1109        let mut manager = ClusterManager::new(ClusteringConfig {
1110            cluster_start_probability: 1.0,        // Always start
1111            cluster_continuation_probability: 1.0, // Always continue
1112            ..Default::default()
1113        });
1114        let mut rng = ChaCha8Rng::seed_from_u64(42);
1115        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1116
1117        // First anomaly starts a cluster
1118        let cluster1 = manager.assign_cluster_with_context(
1119            date,
1120            "VendorPayment",
1121            Some("200000"),
1122            Some("V001"),
1123            &mut rng,
1124        );
1125        assert!(cluster1.is_some());
1126
1127        // Second anomaly with same account should join same cluster
1128        let cluster2 = manager.assign_cluster_with_context(
1129            date + chrono::Duration::days(5),
1130            "VendorPayment",
1131            Some("200000"),
1132            Some("V002"),
1133            &mut rng,
1134        );
1135
1136        assert_eq!(cluster1, cluster2);
1137
1138        // Check stats have both entities
1139        let stats = manager.get_cluster_stats(&cluster1.unwrap()).unwrap();
1140        assert_eq!(stats.accounts.len(), 1); // Same account
1141        assert_eq!(stats.entities.len(), 2); // Two vendors
1142    }
1143
1144    #[test]
1145    fn test_causal_links() {
1146        let mut manager = ClusterManager::new(ClusteringConfig {
1147            cluster_start_probability: 1.0,
1148            ..Default::default()
1149        });
1150        let mut rng = ChaCha8Rng::seed_from_u64(42);
1151        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1152
1153        let cluster_id = manager
1154            .assign_cluster(date, "VendorPayment", &mut rng)
1155            .unwrap();
1156
1157        // Add causal link
1158        manager.add_causal_link(
1159            &cluster_id,
1160            CausalLink::new("PAY-001", "Payment", "V001", "Vendor", "references"),
1161        );
1162        manager.add_causal_link(
1163            &cluster_id,
1164            CausalLink::new("V001", "Vendor", "EMP-001", "Employee", "owned_by"),
1165        );
1166
1167        let stats = manager.get_cluster_stats(&cluster_id).unwrap();
1168        assert_eq!(stats.causal_links.len(), 2);
1169    }
1170
1171    #[test]
1172    fn test_should_inject_anomaly() {
1173        let mut rng = ChaCha8Rng::seed_from_u64(42);
1174        let pattern = TemporalPattern::default();
1175
1176        let regular_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1177        let year_end = NaiveDate::from_ymd_opt(2024, 12, 31).unwrap();
1178
1179        // Count injections over many trials
1180        let mut regular_count = 0;
1181        let mut year_end_count = 0;
1182
1183        for _ in 0..1000 {
1184            if should_inject_anomaly(0.1, regular_date, &pattern, &mut rng) {
1185                regular_count += 1;
1186            }
1187            if should_inject_anomaly(0.1, year_end, &pattern, &mut rng) {
1188                year_end_count += 1;
1189            }
1190        }
1191
1192        // Year end should have more injections due to multiplier
1193        assert!(year_end_count > regular_count);
1194    }
1195
1196    #[test]
1197    fn test_escalation_patterns() {
1198        // Stable should always return 1.0
1199        assert_eq!(EscalationPattern::Stable.escalation_multiplier(0), 1.0);
1200        assert_eq!(EscalationPattern::Stable.escalation_multiplier(10), 1.0);
1201
1202        // Gradual should increase over time
1203        let gradual = EscalationPattern::Gradual;
1204        assert!(gradual.escalation_multiplier(5) > gradual.escalation_multiplier(0));
1205        assert!(gradual.escalation_multiplier(5) <= 3.0); // Max is 3x
1206
1207        // Aggressive should increase faster
1208        let aggressive = EscalationPattern::Aggressive;
1209        assert!(aggressive.escalation_multiplier(5) > gradual.escalation_multiplier(5));
1210
1211        // TestThenStrike has specific pattern
1212        let tts = EscalationPattern::TestThenStrike;
1213        assert!(tts.escalation_multiplier(0) < 1.0); // Small test amounts
1214        assert!(tts.escalation_multiplier(3) > 1.0); // Big strike
1215        assert_eq!(tts.escalation_multiplier(4), 0.0); // Stop after strike
1216    }
1217
1218    #[test]
1219    fn test_fraud_actor() {
1220        use rust_decimal_macros::dec;
1221
1222        let mut actor = FraudActor::new("USER001", "John Fraudster", EscalationPattern::Gradual)
1223            .with_account("600000")
1224            .with_vendor("V001");
1225
1226        assert_eq!(actor.preferred_accounts.len(), 1);
1227        assert_eq!(actor.preferred_vendors.len(), 1);
1228        assert!(actor.is_active);
1229
1230        // Record some fraud
1231        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1232        actor.record_fraud(
1233            "JE-001",
1234            date,
1235            dec!(1000),
1236            "DuplicatePayment",
1237            Some("600000".to_string()),
1238            Some("V002".to_string()),
1239        );
1240
1241        assert_eq!(actor.fraud_history.len(), 1);
1242        assert_eq!(actor.total_amount, dec!(1000));
1243        assert_eq!(actor.start_date, Some(date));
1244        assert!(actor.detection_risk > 0.0);
1245
1246        // V002 should be added to preferences
1247        assert!(actor.preferred_vendors.contains(&"V002".to_string()));
1248    }
1249
1250    #[test]
1251    fn test_fraud_actor_manager() {
1252        let mut rng = ChaCha8Rng::seed_from_u64(42);
1253        let mut manager = FraudActorManager::new(0.7, 5);
1254
1255        let users = vec![
1256            "USER001".to_string(),
1257            "USER002".to_string(),
1258            "USER003".to_string(),
1259        ];
1260
1261        // Get or create actor
1262        let actor = manager.get_or_create_actor(&users, &mut rng);
1263        assert!(actor.is_some());
1264
1265        // Record fraud
1266        let actor = actor.unwrap();
1267        let user_id = actor.user_id.clone();
1268        actor.record_fraud(
1269            "JE-001",
1270            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1271            rust_decimal::Decimal::from(1000),
1272            "FictitiousEntry",
1273            None,
1274            None,
1275        );
1276
1277        // Should be able to retrieve actor
1278        let retrieved = manager.get_actor(&user_id);
1279        assert!(retrieved.is_some());
1280        assert_eq!(retrieved.unwrap().fraud_history.len(), 1);
1281
1282        // Get statistics
1283        let stats = manager.get_statistics();
1284        assert_eq!(stats.total_actors, 1);
1285        assert_eq!(stats.active_actors, 1);
1286        assert_eq!(stats.total_incidents, 1);
1287    }
1288
1289    #[test]
1290    fn test_fraud_actor_detection() {
1291        use rust_decimal_macros::dec;
1292
1293        let mut rng = ChaCha8Rng::seed_from_u64(42);
1294        let mut manager = FraudActorManager::new(1.0, 10);
1295
1296        // Add actor with high activity
1297        let mut actor =
1298            FraudActor::new("USER001", "Heavy Fraudster", EscalationPattern::Aggressive);
1299        let date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
1300
1301        // Record many frauds to increase detection risk
1302        for i in 0..10 {
1303            actor.record_fraud(
1304                format!("JE-{:03}", i),
1305                date + chrono::Duration::days(i as i64),
1306                dec!(10000),
1307                "FictitiousEntry",
1308                None,
1309                None,
1310            );
1311        }
1312
1313        manager.add_actor(actor);
1314
1315        // Detection risk should be high
1316        let actor = manager.get_actor("USER001").unwrap();
1317        assert!(actor.detection_risk > 0.5);
1318
1319        // Apply detection (with high risk, likely to be caught eventually)
1320        for _ in 0..20 {
1321            manager.apply_detection(&mut rng);
1322        }
1323
1324        // After many detection attempts, high-risk actor likely deactivated
1325        let stats = manager.get_statistics();
1326        // Note: This is probabilistic, but with high risk the actor should likely be caught
1327        assert!(stats.active_actors <= stats.total_actors);
1328    }
1329}
datasynth_generators/anomaly/patterns.rs

datasynth_generators/anomaly/
patterns.rs