datasynth-banking 2.3.1

KYC/AML banking transaction generator for synthetic data - compliance testing and fraud analytics
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
//! Configuration for banking data generation.

use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Configuration for banking data generation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BankingConfig {
    /// Whether banking generation is enabled
    #[serde(default = "default_true")]
    pub enabled: bool,
    /// Population configuration
    #[serde(default)]
    pub population: PopulationConfig,
    /// Product configuration
    #[serde(default)]
    pub products: ProductConfig,
    /// Compliance configuration
    #[serde(default)]
    pub compliance: ComplianceConfig,
    /// AML typology configuration
    #[serde(default)]
    pub typologies: TypologyConfig,
    /// Spoofing (adversarial) configuration
    #[serde(default)]
    pub spoofing: SpoofingConfig,
    /// Output configuration
    #[serde(default)]
    pub output: BankingOutputConfig,
    /// Temporal behavior configuration
    #[serde(default)]
    pub temporal: TemporalBehaviorConfig,
    /// Device fingerprint configuration
    #[serde(default)]
    pub device: DeviceFingerprintConfig,
}

fn default_true() -> bool {
    true
}

impl Default for BankingConfig {
    fn default() -> Self {
        Self {
            enabled: true,
            population: PopulationConfig::default(),
            products: ProductConfig::default(),
            compliance: ComplianceConfig::default(),
            typologies: TypologyConfig::default(),
            spoofing: SpoofingConfig::default(),
            output: BankingOutputConfig::default(),
            temporal: TemporalBehaviorConfig::default(),
            device: DeviceFingerprintConfig::default(),
        }
    }
}

impl BankingConfig {
    /// Create a small configuration for testing.
    pub fn small() -> Self {
        Self {
            population: PopulationConfig {
                retail_customers: 100,
                business_customers: 20,
                trusts: 5,
                ..Default::default()
            },
            ..Default::default()
        }
    }

    /// Create a medium configuration.
    pub fn medium() -> Self {
        Self {
            population: PopulationConfig {
                retail_customers: 1_000,
                business_customers: 200,
                trusts: 50,
                ..Default::default()
            },
            ..Default::default()
        }
    }

    /// Create a large configuration.
    pub fn large() -> Self {
        Self {
            population: PopulationConfig {
                retail_customers: 10_000,
                business_customers: 1_000,
                trusts: 100,
                ..Default::default()
            },
            ..Default::default()
        }
    }

    /// Validate the configuration.
    pub fn validate(&self) -> Result<(), Vec<String>> {
        let mut errors = Vec::new();

        // Validate population
        if self.population.retail_customers == 0
            && self.population.business_customers == 0
            && self.population.trusts == 0
        {
            errors.push("At least one customer type must have non-zero count".to_string());
        }

        // Validate persona weights sum to 1.0
        let retail_sum: f64 = self.population.retail_persona_weights.values().sum();
        if (retail_sum - 1.0).abs() > 0.01 {
            errors.push(format!(
                "Retail persona weights must sum to 1.0, got {retail_sum}"
            ));
        }

        // Validate typology rates
        let total_suspicious = self.typologies.structuring_rate
            + self.typologies.funnel_rate
            + self.typologies.layering_rate
            + self.typologies.mule_rate
            + self.typologies.fraud_rate;
        if total_suspicious > self.typologies.suspicious_rate + 0.001 {
            errors.push(format!(
                "Sum of typology rates ({}) exceeds suspicious_rate ({})",
                total_suspicious, self.typologies.suspicious_rate
            ));
        }

        // Validate spoofing intensity
        if self.spoofing.intensity < 0.0 || self.spoofing.intensity > 1.0 {
            errors.push("Spoofing intensity must be between 0.0 and 1.0".to_string());
        }

        if errors.is_empty() {
            Ok(())
        } else {
            Err(errors)
        }
    }
}

/// Population configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PopulationConfig {
    /// Number of retail customers
    pub retail_customers: u32,
    /// Retail persona weight distribution
    pub retail_persona_weights: HashMap<String, f64>,
    /// Number of business customers
    pub business_customers: u32,
    /// Business persona weight distribution
    pub business_persona_weights: HashMap<String, f64>,
    /// Number of trust customers
    pub trusts: u32,
    /// Household formation rate (proportion of retail in households)
    pub household_rate: f64,
    /// Average household size
    pub avg_household_size: f64,
    /// Simulation period in months
    pub period_months: u32,
    /// Simulation start date (YYYY-MM-DD)
    pub start_date: String,
}

impl Default for PopulationConfig {
    fn default() -> Self {
        let mut retail_weights = HashMap::new();
        retail_weights.insert("student".to_string(), 0.15);
        retail_weights.insert("early_career".to_string(), 0.25);
        retail_weights.insert("mid_career".to_string(), 0.30);
        retail_weights.insert("retiree".to_string(), 0.15);
        retail_weights.insert("high_net_worth".to_string(), 0.05);
        retail_weights.insert("gig_worker".to_string(), 0.10);

        let mut business_weights = HashMap::new();
        business_weights.insert("small_business".to_string(), 0.50);
        business_weights.insert("mid_market".to_string(), 0.25);
        business_weights.insert("enterprise".to_string(), 0.05);
        business_weights.insert("cash_intensive".to_string(), 0.10);
        business_weights.insert("import_export".to_string(), 0.05);
        business_weights.insert("professional_services".to_string(), 0.05);

        Self {
            retail_customers: 10_000,
            retail_persona_weights: retail_weights,
            business_customers: 1_000,
            business_persona_weights: business_weights,
            trusts: 100,
            household_rate: 0.4,
            avg_household_size: 2.3,
            period_months: 12,
            start_date: "2024-01-01".to_string(),
        }
    }
}

/// Product configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProductConfig {
    /// Cash transaction intensity (0.0-1.0)
    pub cash_intensity: f64,
    /// Cross-border transaction rate (0.0-1.0)
    pub cross_border_rate: f64,
    /// Card vs transfer ratio for payments
    pub card_vs_transfer: f64,
    /// Average accounts per retail customer
    pub avg_accounts_retail: f64,
    /// Average accounts per business customer
    pub avg_accounts_business: f64,
    /// Proportion of customers with debit cards
    pub debit_card_rate: f64,
    /// Proportion of customers with international capability
    pub international_rate: f64,
}

impl Default for ProductConfig {
    fn default() -> Self {
        Self {
            cash_intensity: 0.15,
            cross_border_rate: 0.05,
            card_vs_transfer: 0.6,
            avg_accounts_retail: 1.5,
            avg_accounts_business: 2.5,
            debit_card_rate: 0.85,
            international_rate: 0.10,
        }
    }
}

/// Compliance configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ComplianceConfig {
    /// Risk appetite (low, medium, high)
    pub risk_appetite: RiskAppetite,
    /// KYC completeness rate (0.0-1.0)
    pub kyc_completeness: f64,
    /// Proportion of high-risk customers accepted
    pub high_risk_tolerance: f64,
    /// PEP proportion in customer base
    pub pep_rate: f64,
    /// Enhanced due diligence trigger threshold
    pub edd_threshold: u64,
}

impl Default for ComplianceConfig {
    fn default() -> Self {
        Self {
            risk_appetite: RiskAppetite::Medium,
            kyc_completeness: 0.95,
            high_risk_tolerance: 0.05,
            pep_rate: 0.01,
            edd_threshold: 50_000,
        }
    }
}

/// Risk appetite level.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[serde(rename_all = "snake_case")]
pub enum RiskAppetite {
    /// Low risk tolerance
    Low,
    /// Medium risk tolerance
    #[default]
    Medium,
    /// High risk tolerance
    High,
}

impl RiskAppetite {
    /// High-risk customer multiplier.
    pub fn high_risk_multiplier(&self) -> f64 {
        match self {
            Self::Low => 0.5,
            Self::Medium => 1.0,
            Self::High => 2.0,
        }
    }
}

/// AML typology configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TypologyConfig {
    /// Overall suspicious activity rate (0.0-1.0)
    pub suspicious_rate: f64,
    /// Structuring typology rate
    pub structuring_rate: f64,
    /// Funnel account rate
    pub funnel_rate: f64,
    /// Layering chain rate
    pub layering_rate: f64,
    /// Money mule rate
    pub mule_rate: f64,
    /// Fraud rate (ATO, synthetic, etc.)
    pub fraud_rate: f64,
    /// Sophistication distribution
    pub sophistication: SophisticationDistribution,
    /// Base detectability (0.0-1.0)
    pub detectability: f64,
    /// Round-tripping rate
    pub round_tripping_rate: f64,
    /// Trade-based ML rate
    pub trade_based_rate: f64,
    /// Synthetic identity fraud rate
    #[serde(default = "default_synth_id_rate")]
    pub synthetic_identity_rate: f64,
    /// Cryptocurrency integration rate
    #[serde(default = "default_crypto_rate")]
    pub crypto_integration_rate: f64,
    /// Sanctions evasion rate
    #[serde(default = "default_sanctions_rate")]
    pub sanctions_evasion_rate: f64,
    /// False positive rate (fraction of legitimate txns tagged as suspicious-looking)
    #[serde(default = "default_false_positive_rate")]
    pub false_positive_rate: f64,
    /// Cross-typology co-occurrence rate (fraction of cases combining multiple typologies)
    #[serde(default = "default_co_occurrence_rate")]
    pub co_occurrence_rate: f64,
    /// Multi-party network scenario rate (fraction using coordinated networks)
    #[serde(default = "default_network_rate")]
    pub network_typology_rate: f64,
    /// Fraction of document-flow Payments to bridge to BankTransactions (0.0 disables)
    #[serde(default = "default_payment_bridge_rate")]
    pub payment_bridge_rate: f64,
}

fn default_synth_id_rate() -> f64 {
    0.001
}
fn default_crypto_rate() -> f64 {
    0.001
}
fn default_sanctions_rate() -> f64 {
    0.0005
}
fn default_false_positive_rate() -> f64 {
    0.05
}
fn default_co_occurrence_rate() -> f64 {
    0.10
}
fn default_network_rate() -> f64 {
    0.05
}
fn default_payment_bridge_rate() -> f64 {
    0.75
}

impl Default for TypologyConfig {
    fn default() -> Self {
        Self {
            suspicious_rate: 0.02,
            structuring_rate: 0.004,
            funnel_rate: 0.003,
            layering_rate: 0.003,
            mule_rate: 0.005,
            fraud_rate: 0.005,
            sophistication: SophisticationDistribution::default(),
            detectability: 0.5,
            round_tripping_rate: 0.001,
            trade_based_rate: 0.001,
            synthetic_identity_rate: 0.001,
            crypto_integration_rate: 0.001,
            sanctions_evasion_rate: 0.0005,
            false_positive_rate: 0.05,
            co_occurrence_rate: 0.10,
            network_typology_rate: 0.05,
            payment_bridge_rate: 0.75,
        }
    }
}

/// Sophistication level distribution.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SophisticationDistribution {
    /// Basic sophistication weight
    pub basic: f64,
    /// Standard sophistication weight
    pub standard: f64,
    /// Professional sophistication weight
    pub professional: f64,
    /// Advanced sophistication weight
    pub advanced: f64,
}

impl Default for SophisticationDistribution {
    fn default() -> Self {
        Self {
            basic: 0.4,
            standard: 0.35,
            professional: 0.2,
            advanced: 0.05,
        }
    }
}

/// Spoofing (adversarial) configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpoofingConfig {
    /// Enable spoofing mode
    pub enabled: bool,
    /// Spoofing intensity (0.0-1.0)
    pub intensity: f64,
    /// Spoof transaction timing
    pub spoof_timing: bool,
    /// Spoof transaction amounts
    pub spoof_amounts: bool,
    /// Spoof merchant selection
    pub spoof_merchants: bool,
    /// Spoof geographic patterns
    pub spoof_geography: bool,
    /// Add delays to reduce velocity detection
    pub add_delays: bool,
}

impl Default for SpoofingConfig {
    fn default() -> Self {
        Self {
            enabled: true,
            intensity: 0.3,
            spoof_timing: true,
            spoof_amounts: true,
            spoof_merchants: true,
            spoof_geography: false,
            add_delays: true,
        }
    }
}

/// Banking output configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BankingOutputConfig {
    /// Output directory (relative to main output)
    pub directory: String,
    /// Include customer master data
    pub include_customers: bool,
    /// Include account master data
    pub include_accounts: bool,
    /// Include transactions
    pub include_transactions: bool,
    /// Include counterparties
    pub include_counterparties: bool,
    /// Include beneficial ownership
    pub include_beneficial_ownership: bool,
    /// Include transaction labels
    pub include_transaction_labels: bool,
    /// Include entity labels
    pub include_entity_labels: bool,
    /// Include relationship labels
    pub include_relationship_labels: bool,
    /// Include case narratives
    pub include_case_narratives: bool,
    /// Export graph data
    pub include_graph: bool,
}

impl Default for BankingOutputConfig {
    fn default() -> Self {
        Self {
            directory: "banking".to_string(),
            include_customers: true,
            include_accounts: true,
            include_transactions: true,
            include_counterparties: true,
            include_beneficial_ownership: true,
            include_transaction_labels: true,
            include_entity_labels: true,
            include_relationship_labels: true,
            include_case_narratives: true,
            include_graph: true,
        }
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use super::*;

    #[test]
    fn test_default_config() {
        let config = BankingConfig::default();
        assert!(config.enabled);
        assert!(config.validate().is_ok());
    }

    #[test]
    fn test_small_config() {
        let config = BankingConfig::small();
        assert_eq!(config.population.retail_customers, 100);
        assert!(config.validate().is_ok());
    }

    #[test]
    fn test_validation_empty_population() {
        let config = BankingConfig {
            population: PopulationConfig {
                retail_customers: 0,
                business_customers: 0,
                trusts: 0,
                ..Default::default()
            },
            ..Default::default()
        };
        assert!(config.validate().is_err());
    }

    #[test]
    fn test_persona_weights() {
        let config = BankingConfig::default();
        let sum: f64 = config.population.retail_persona_weights.values().sum();
        assert!((sum - 1.0).abs() < 0.01);
    }
}

/// Temporal behavior configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TemporalBehaviorConfig {
    /// Enable account lifecycle phases (New → RampUp → Steady → Decline → Dormant)
    #[serde(default = "default_true")]
    pub enable_lifecycle_phases: bool,
    /// Enable behavioral drift (gradual/sudden spending pattern shifts)
    #[serde(default = "default_true")]
    pub enable_behavioral_drift: bool,
    /// Enable velocity feature pre-computation on every transaction
    #[serde(default = "default_true")]
    pub enable_velocity_features: bool,
    /// Enable impossible travel injection (geolocation anomalies)
    #[serde(default = "default_true")]
    pub enable_impossible_travel: bool,
    /// Proportion of customers with behavioral drift (default 5%)
    #[serde(default = "default_drift_rate")]
    pub drift_rate: f64,
    /// Of drifting customers, proportion with sudden (suspicious) drift vs gradual (normal)
    #[serde(default = "default_sudden_ratio")]
    pub sudden_drift_ratio: f64,
    /// Impossible travel injection rate for suspicious customers
    #[serde(default = "default_impossible_travel_rate")]
    pub impossible_travel_rate: f64,
}

fn default_drift_rate() -> f64 {
    0.05
}
fn default_sudden_ratio() -> f64 {
    0.30
}
fn default_impossible_travel_rate() -> f64 {
    0.02
}

impl Default for TemporalBehaviorConfig {
    fn default() -> Self {
        Self {
            enable_lifecycle_phases: true,
            enable_behavioral_drift: true,
            enable_velocity_features: true,
            enable_impossible_travel: true,
            drift_rate: 0.05,
            sudden_drift_ratio: 0.30,
            impossible_travel_rate: 0.02,
        }
    }
}

/// Device fingerprint generation configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeviceFingerprintConfig {
    /// Enable structured device fingerprints (replaces simple DEV-hash pattern)
    #[serde(default = "default_true")]
    pub enabled: bool,
    /// Probability a customer reuses an existing device vs generating a new one
    #[serde(default = "default_device_reuse")]
    pub device_reuse_rate: f64,
    /// Proportion of customers with 2+ devices
    #[serde(default = "default_multi_device")]
    pub multi_device_rate: f64,
}

fn default_device_reuse() -> f64 {
    0.85
}
fn default_multi_device() -> f64 {
    0.30
}

impl Default for DeviceFingerprintConfig {
    fn default() -> Self {
        Self {
            enabled: true,
            device_reuse_rate: 0.85,
            multi_device_rate: 0.30,
        }
    }
}