Skip to main content

datasynth_banking/
config.rs

1//! Configuration for banking data generation.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6/// Configuration for banking data generation.
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct BankingConfig {
9    /// Whether banking generation is enabled
10    #[serde(default = "default_true")]
11    pub enabled: bool,
12    /// Population configuration
13    #[serde(default)]
14    pub population: PopulationConfig,
15    /// Product configuration
16    #[serde(default)]
17    pub products: ProductConfig,
18    /// Compliance configuration
19    #[serde(default)]
20    pub compliance: ComplianceConfig,
21    /// AML typology configuration
22    #[serde(default)]
23    pub typologies: TypologyConfig,
24    /// Spoofing (adversarial) configuration
25    #[serde(default)]
26    pub spoofing: SpoofingConfig,
27    /// Output configuration
28    #[serde(default)]
29    pub output: BankingOutputConfig,
30    /// Temporal behavior configuration
31    #[serde(default)]
32    pub temporal: TemporalBehaviorConfig,
33    /// Device fingerprint configuration
34    #[serde(default)]
35    pub device: DeviceFingerprintConfig,
36}
37
38fn default_true() -> bool {
39    true
40}
41
42impl Default for BankingConfig {
43    fn default() -> Self {
44        Self {
45            enabled: true,
46            population: PopulationConfig::default(),
47            products: ProductConfig::default(),
48            compliance: ComplianceConfig::default(),
49            typologies: TypologyConfig::default(),
50            spoofing: SpoofingConfig::default(),
51            output: BankingOutputConfig::default(),
52            temporal: TemporalBehaviorConfig::default(),
53            device: DeviceFingerprintConfig::default(),
54        }
55    }
56}
57
58impl BankingConfig {
59    /// Create a small configuration for testing.
60    pub fn small() -> Self {
61        Self {
62            population: PopulationConfig {
63                retail_customers: 100,
64                business_customers: 20,
65                trusts: 5,
66                ..Default::default()
67            },
68            ..Default::default()
69        }
70    }
71
72    /// Create a medium configuration.
73    pub fn medium() -> Self {
74        Self {
75            population: PopulationConfig {
76                retail_customers: 1_000,
77                business_customers: 200,
78                trusts: 50,
79                ..Default::default()
80            },
81            ..Default::default()
82        }
83    }
84
85    /// Create a large configuration.
86    pub fn large() -> Self {
87        Self {
88            population: PopulationConfig {
89                retail_customers: 10_000,
90                business_customers: 1_000,
91                trusts: 100,
92                ..Default::default()
93            },
94            ..Default::default()
95        }
96    }
97
98    /// Validate the configuration.
99    pub fn validate(&self) -> Result<(), Vec<String>> {
100        let mut errors = Vec::new();
101
102        // Validate population
103        if self.population.retail_customers == 0
104            && self.population.business_customers == 0
105            && self.population.trusts == 0
106        {
107            errors.push("At least one customer type must have non-zero count".to_string());
108        }
109
110        // Validate persona weights sum to 1.0
111        let retail_sum: f64 = self.population.retail_persona_weights.values().sum();
112        if (retail_sum - 1.0).abs() > 0.01 {
113            errors.push(format!(
114                "Retail persona weights must sum to 1.0, got {retail_sum}"
115            ));
116        }
117
118        // Validate typology rates
119        let total_suspicious = self.typologies.structuring_rate
120            + self.typologies.funnel_rate
121            + self.typologies.layering_rate
122            + self.typologies.mule_rate
123            + self.typologies.fraud_rate;
124        if total_suspicious > self.typologies.suspicious_rate + 0.001 {
125            errors.push(format!(
126                "Sum of typology rates ({}) exceeds suspicious_rate ({})",
127                total_suspicious, self.typologies.suspicious_rate
128            ));
129        }
130
131        // Validate spoofing intensity
132        if self.spoofing.intensity < 0.0 || self.spoofing.intensity > 1.0 {
133            errors.push("Spoofing intensity must be between 0.0 and 1.0".to_string());
134        }
135
136        if errors.is_empty() {
137            Ok(())
138        } else {
139            Err(errors)
140        }
141    }
142}
143
144/// Population configuration.
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct PopulationConfig {
147    /// Number of retail customers
148    pub retail_customers: u32,
149    /// Retail persona weight distribution
150    pub retail_persona_weights: HashMap<String, f64>,
151    /// Number of business customers
152    pub business_customers: u32,
153    /// Business persona weight distribution
154    pub business_persona_weights: HashMap<String, f64>,
155    /// Number of trust customers
156    pub trusts: u32,
157    /// Household formation rate (proportion of retail in households)
158    pub household_rate: f64,
159    /// Average household size
160    pub avg_household_size: f64,
161    /// Simulation period in months
162    pub period_months: u32,
163    /// Simulation start date (YYYY-MM-DD)
164    pub start_date: String,
165}
166
167impl Default for PopulationConfig {
168    fn default() -> Self {
169        let mut retail_weights = HashMap::new();
170        retail_weights.insert("student".to_string(), 0.15);
171        retail_weights.insert("early_career".to_string(), 0.25);
172        retail_weights.insert("mid_career".to_string(), 0.30);
173        retail_weights.insert("retiree".to_string(), 0.15);
174        retail_weights.insert("high_net_worth".to_string(), 0.05);
175        retail_weights.insert("gig_worker".to_string(), 0.10);
176
177        let mut business_weights = HashMap::new();
178        business_weights.insert("small_business".to_string(), 0.50);
179        business_weights.insert("mid_market".to_string(), 0.25);
180        business_weights.insert("enterprise".to_string(), 0.05);
181        business_weights.insert("cash_intensive".to_string(), 0.10);
182        business_weights.insert("import_export".to_string(), 0.05);
183        business_weights.insert("professional_services".to_string(), 0.05);
184
185        Self {
186            retail_customers: 10_000,
187            retail_persona_weights: retail_weights,
188            business_customers: 1_000,
189            business_persona_weights: business_weights,
190            trusts: 100,
191            household_rate: 0.4,
192            avg_household_size: 2.3,
193            period_months: 12,
194            start_date: "2024-01-01".to_string(),
195        }
196    }
197}
198
199/// Product configuration.
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct ProductConfig {
202    /// Cash transaction intensity (0.0-1.0)
203    pub cash_intensity: f64,
204    /// Cross-border transaction rate (0.0-1.0)
205    pub cross_border_rate: f64,
206    /// Card vs transfer ratio for payments
207    pub card_vs_transfer: f64,
208    /// Average accounts per retail customer
209    pub avg_accounts_retail: f64,
210    /// Average accounts per business customer
211    pub avg_accounts_business: f64,
212    /// Proportion of customers with debit cards
213    pub debit_card_rate: f64,
214    /// Proportion of customers with international capability
215    pub international_rate: f64,
216}
217
218impl Default for ProductConfig {
219    fn default() -> Self {
220        Self {
221            cash_intensity: 0.15,
222            cross_border_rate: 0.05,
223            card_vs_transfer: 0.6,
224            avg_accounts_retail: 1.5,
225            avg_accounts_business: 2.5,
226            debit_card_rate: 0.85,
227            international_rate: 0.10,
228        }
229    }
230}
231
232/// Compliance configuration.
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct ComplianceConfig {
235    /// Risk appetite (low, medium, high)
236    pub risk_appetite: RiskAppetite,
237    /// KYC completeness rate (0.0-1.0)
238    pub kyc_completeness: f64,
239    /// Proportion of high-risk customers accepted
240    pub high_risk_tolerance: f64,
241    /// PEP proportion in customer base
242    pub pep_rate: f64,
243    /// Enhanced due diligence trigger threshold
244    pub edd_threshold: u64,
245}
246
247impl Default for ComplianceConfig {
248    fn default() -> Self {
249        Self {
250            risk_appetite: RiskAppetite::Medium,
251            kyc_completeness: 0.95,
252            high_risk_tolerance: 0.05,
253            pep_rate: 0.01,
254            edd_threshold: 50_000,
255        }
256    }
257}
258
259/// Risk appetite level.
260#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
261#[serde(rename_all = "snake_case")]
262pub enum RiskAppetite {
263    /// Low risk tolerance
264    Low,
265    /// Medium risk tolerance
266    #[default]
267    Medium,
268    /// High risk tolerance
269    High,
270}
271
272impl RiskAppetite {
273    /// High-risk customer multiplier.
274    pub fn high_risk_multiplier(&self) -> f64 {
275        match self {
276            Self::Low => 0.5,
277            Self::Medium => 1.0,
278            Self::High => 2.0,
279        }
280    }
281}
282
283/// AML typology configuration.
284#[derive(Debug, Clone, Serialize, Deserialize)]
285pub struct TypologyConfig {
286    /// Overall suspicious activity rate (0.0-1.0)
287    pub suspicious_rate: f64,
288    /// Structuring typology rate
289    pub structuring_rate: f64,
290    /// Funnel account rate
291    pub funnel_rate: f64,
292    /// Layering chain rate
293    pub layering_rate: f64,
294    /// Money mule rate
295    pub mule_rate: f64,
296    /// Fraud rate (ATO, synthetic, etc.)
297    pub fraud_rate: f64,
298    /// Sophistication distribution
299    pub sophistication: SophisticationDistribution,
300    /// Base detectability (0.0-1.0)
301    pub detectability: f64,
302    /// Round-tripping rate
303    pub round_tripping_rate: f64,
304    /// Trade-based ML rate
305    pub trade_based_rate: f64,
306    /// Synthetic identity fraud rate
307    #[serde(default = "default_synth_id_rate")]
308    pub synthetic_identity_rate: f64,
309    /// Cryptocurrency integration rate
310    #[serde(default = "default_crypto_rate")]
311    pub crypto_integration_rate: f64,
312    /// Sanctions evasion rate
313    #[serde(default = "default_sanctions_rate")]
314    pub sanctions_evasion_rate: f64,
315    /// False positive rate (fraction of legitimate txns tagged as suspicious-looking)
316    #[serde(default = "default_false_positive_rate")]
317    pub false_positive_rate: f64,
318    /// Cross-typology co-occurrence rate (fraction of cases combining multiple typologies)
319    #[serde(default = "default_co_occurrence_rate")]
320    pub co_occurrence_rate: f64,
321    /// Multi-party network scenario rate (fraction using coordinated networks)
322    #[serde(default = "default_network_rate")]
323    pub network_typology_rate: f64,
324    /// Fraction of document-flow Payments to bridge to BankTransactions (0.0 disables)
325    #[serde(default = "default_payment_bridge_rate")]
326    pub payment_bridge_rate: f64,
327}
328
329fn default_synth_id_rate() -> f64 {
330    0.001
331}
332fn default_crypto_rate() -> f64 {
333    0.001
334}
335fn default_sanctions_rate() -> f64 {
336    0.0005
337}
338fn default_false_positive_rate() -> f64 {
339    0.05
340}
341fn default_co_occurrence_rate() -> f64 {
342    0.10
343}
344fn default_network_rate() -> f64 {
345    0.05
346}
347fn default_payment_bridge_rate() -> f64 {
348    0.75
349}
350
351impl Default for TypologyConfig {
352    fn default() -> Self {
353        Self {
354            suspicious_rate: 0.02,
355            structuring_rate: 0.004,
356            funnel_rate: 0.003,
357            layering_rate: 0.003,
358            mule_rate: 0.005,
359            fraud_rate: 0.005,
360            sophistication: SophisticationDistribution::default(),
361            detectability: 0.5,
362            round_tripping_rate: 0.001,
363            trade_based_rate: 0.001,
364            synthetic_identity_rate: 0.001,
365            crypto_integration_rate: 0.001,
366            sanctions_evasion_rate: 0.0005,
367            false_positive_rate: 0.05,
368            co_occurrence_rate: 0.10,
369            network_typology_rate: 0.05,
370            payment_bridge_rate: 0.75,
371        }
372    }
373}
374
375/// Sophistication level distribution.
376#[derive(Debug, Clone, Serialize, Deserialize)]
377pub struct SophisticationDistribution {
378    /// Basic sophistication weight
379    pub basic: f64,
380    /// Standard sophistication weight
381    pub standard: f64,
382    /// Professional sophistication weight
383    pub professional: f64,
384    /// Advanced sophistication weight
385    pub advanced: f64,
386}
387
388impl Default for SophisticationDistribution {
389    fn default() -> Self {
390        Self {
391            basic: 0.4,
392            standard: 0.35,
393            professional: 0.2,
394            advanced: 0.05,
395        }
396    }
397}
398
399/// Spoofing (adversarial) configuration.
400#[derive(Debug, Clone, Serialize, Deserialize)]
401pub struct SpoofingConfig {
402    /// Enable spoofing mode
403    pub enabled: bool,
404    /// Spoofing intensity (0.0-1.0)
405    pub intensity: f64,
406    /// Spoof transaction timing
407    pub spoof_timing: bool,
408    /// Spoof transaction amounts
409    pub spoof_amounts: bool,
410    /// Spoof merchant selection
411    pub spoof_merchants: bool,
412    /// Spoof geographic patterns
413    pub spoof_geography: bool,
414    /// Add delays to reduce velocity detection
415    pub add_delays: bool,
416}
417
418impl Default for SpoofingConfig {
419    fn default() -> Self {
420        Self {
421            enabled: true,
422            intensity: 0.3,
423            spoof_timing: true,
424            spoof_amounts: true,
425            spoof_merchants: true,
426            spoof_geography: false,
427            add_delays: true,
428        }
429    }
430}
431
432/// Banking output configuration.
433#[derive(Debug, Clone, Serialize, Deserialize)]
434pub struct BankingOutputConfig {
435    /// Output directory (relative to main output)
436    pub directory: String,
437    /// Include customer master data
438    pub include_customers: bool,
439    /// Include account master data
440    pub include_accounts: bool,
441    /// Include transactions
442    pub include_transactions: bool,
443    /// Include counterparties
444    pub include_counterparties: bool,
445    /// Include beneficial ownership
446    pub include_beneficial_ownership: bool,
447    /// Include transaction labels
448    pub include_transaction_labels: bool,
449    /// Include entity labels
450    pub include_entity_labels: bool,
451    /// Include relationship labels
452    pub include_relationship_labels: bool,
453    /// Include case narratives
454    pub include_case_narratives: bool,
455    /// Export graph data
456    pub include_graph: bool,
457}
458
459impl Default for BankingOutputConfig {
460    fn default() -> Self {
461        Self {
462            directory: "banking".to_string(),
463            include_customers: true,
464            include_accounts: true,
465            include_transactions: true,
466            include_counterparties: true,
467            include_beneficial_ownership: true,
468            include_transaction_labels: true,
469            include_entity_labels: true,
470            include_relationship_labels: true,
471            include_case_narratives: true,
472            include_graph: true,
473        }
474    }
475}
476
477#[cfg(test)]
478#[allow(clippy::unwrap_used)]
479mod tests {
480    use super::*;
481
482    #[test]
483    fn test_default_config() {
484        let config = BankingConfig::default();
485        assert!(config.enabled);
486        assert!(config.validate().is_ok());
487    }
488
489    #[test]
490    fn test_small_config() {
491        let config = BankingConfig::small();
492        assert_eq!(config.population.retail_customers, 100);
493        assert!(config.validate().is_ok());
494    }
495
496    #[test]
497    fn test_validation_empty_population() {
498        let config = BankingConfig {
499            population: PopulationConfig {
500                retail_customers: 0,
501                business_customers: 0,
502                trusts: 0,
503                ..Default::default()
504            },
505            ..Default::default()
506        };
507        assert!(config.validate().is_err());
508    }
509
510    #[test]
511    fn test_persona_weights() {
512        let config = BankingConfig::default();
513        let sum: f64 = config.population.retail_persona_weights.values().sum();
514        assert!((sum - 1.0).abs() < 0.01);
515    }
516}
517
518/// Temporal behavior configuration.
519#[derive(Debug, Clone, Serialize, Deserialize)]
520pub struct TemporalBehaviorConfig {
521    /// Enable account lifecycle phases (New → RampUp → Steady → Decline → Dormant)
522    #[serde(default = "default_true")]
523    pub enable_lifecycle_phases: bool,
524    /// Enable behavioral drift (gradual/sudden spending pattern shifts)
525    #[serde(default = "default_true")]
526    pub enable_behavioral_drift: bool,
527    /// Enable velocity feature pre-computation on every transaction
528    #[serde(default = "default_true")]
529    pub enable_velocity_features: bool,
530    /// Enable impossible travel injection (geolocation anomalies)
531    #[serde(default = "default_true")]
532    pub enable_impossible_travel: bool,
533    /// Proportion of customers with behavioral drift (default 5%)
534    #[serde(default = "default_drift_rate")]
535    pub drift_rate: f64,
536    /// Of drifting customers, proportion with sudden (suspicious) drift vs gradual (normal)
537    #[serde(default = "default_sudden_ratio")]
538    pub sudden_drift_ratio: f64,
539    /// Impossible travel injection rate for suspicious customers
540    #[serde(default = "default_impossible_travel_rate")]
541    pub impossible_travel_rate: f64,
542}
543
544fn default_drift_rate() -> f64 {
545    0.05
546}
547fn default_sudden_ratio() -> f64 {
548    0.30
549}
550fn default_impossible_travel_rate() -> f64 {
551    0.02
552}
553
554impl Default for TemporalBehaviorConfig {
555    fn default() -> Self {
556        Self {
557            enable_lifecycle_phases: true,
558            enable_behavioral_drift: true,
559            enable_velocity_features: true,
560            enable_impossible_travel: true,
561            drift_rate: 0.05,
562            sudden_drift_ratio: 0.30,
563            impossible_travel_rate: 0.02,
564        }
565    }
566}
567
568/// Device fingerprint generation configuration.
569#[derive(Debug, Clone, Serialize, Deserialize)]
570pub struct DeviceFingerprintConfig {
571    /// Enable structured device fingerprints (replaces simple DEV-hash pattern)
572    #[serde(default = "default_true")]
573    pub enabled: bool,
574    /// Probability a customer reuses an existing device vs generating a new one
575    #[serde(default = "default_device_reuse")]
576    pub device_reuse_rate: f64,
577    /// Proportion of customers with 2+ devices
578    #[serde(default = "default_multi_device")]
579    pub multi_device_rate: f64,
580}
581
582fn default_device_reuse() -> f64 {
583    0.85
584}
585fn default_multi_device() -> f64 {
586    0.30
587}
588
589impl Default for DeviceFingerprintConfig {
590    fn default() -> Self {
591        Self {
592            enabled: true,
593            device_reuse_rate: 0.85,
594            multi_device_rate: 0.30,
595        }
596    }
597}