Skip to main content

datasynth_banking/generators/
customer_generator.rs

1//! Customer generator for banking data.
2
3use chrono::{Datelike, NaiveDate};
4use datasynth_core::models::banking::{
5    BankingCustomerType, BusinessPersona, RetailPersona, RiskTier, TrustPersona,
6};
7use datasynth_core::DeterministicUuidFactory;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10
11use crate::config::BankingConfig;
12use crate::models::{BankingCustomer, KycProfile, PepCategory, PersonaVariant};
13
14/// Generator for banking customers.
15pub struct CustomerGenerator {
16    config: BankingConfig,
17    rng: ChaCha8Rng,
18    uuid_factory: DeterministicUuidFactory,
19    start_date: NaiveDate,
20    end_date: NaiveDate,
21}
22
23impl CustomerGenerator {
24    /// Create a new customer generator.
25    pub fn new(config: BankingConfig, seed: u64) -> Self {
26        let start_date = NaiveDate::parse_from_str(&config.population.start_date, "%Y-%m-%d")
27            .unwrap_or_else(|_| NaiveDate::from_ymd_opt(2024, 1, 1).unwrap());
28        let end_date = start_date + chrono::Months::new(config.population.period_months);
29
30        Self {
31            config,
32            rng: ChaCha8Rng::seed_from_u64(seed),
33            uuid_factory: DeterministicUuidFactory::new(
34                seed,
35                datasynth_core::GeneratorType::Customer,
36            ),
37            start_date,
38            end_date,
39        }
40    }
41
42    /// Generate all customers.
43    pub fn generate_all(&mut self) -> Vec<BankingCustomer> {
44        let mut customers = Vec::new();
45
46        // Generate retail customers
47        for _ in 0..self.config.population.retail_customers {
48            customers.push(self.generate_retail_customer());
49        }
50
51        // Generate business customers
52        for _ in 0..self.config.population.business_customers {
53            customers.push(self.generate_business_customer());
54        }
55
56        // Generate trusts
57        for _ in 0..self.config.population.trusts {
58            customers.push(self.generate_trust_customer());
59        }
60
61        // Form households
62        self.form_households(&mut customers);
63
64        customers
65    }
66
67    /// Generate a single retail customer.
68    pub fn generate_retail_customer(&mut self) -> BankingCustomer {
69        let customer_id = self.uuid_factory.next();
70        let persona = self.select_retail_persona();
71        let (first_name, last_name) = self.generate_person_name();
72        let country = self.select_country();
73        let onboarding_date = self.random_onboarding_date();
74
75        let mut customer = BankingCustomer::new_retail(
76            customer_id,
77            &first_name,
78            &last_name,
79            &country,
80            onboarding_date,
81        )
82        .with_persona(PersonaVariant::Retail(persona));
83
84        // Set risk tier based on persona and configuration
85        let risk_tier = self.calculate_retail_risk_tier(persona, &country);
86        customer.risk_tier = risk_tier;
87
88        // Generate KYC profile
89        customer.kyc_profile = self.generate_retail_kyc_profile(persona);
90
91        // Possibly mark as PEP
92        if self.rng.gen::<f64>() < self.config.compliance.pep_rate {
93            customer.is_pep = true;
94            customer.pep_category = Some(self.select_pep_category());
95            customer.risk_tier = RiskTier::High;
96        }
97
98        // Generate contact info
99        customer.email = Some(self.generate_email(&first_name, &last_name));
100        customer.phone = Some(self.generate_phone(&country));
101        customer.date_of_birth = Some(self.generate_birth_date(persona));
102
103        customer
104    }
105
106    /// Generate a single business customer.
107    pub fn generate_business_customer(&mut self) -> BankingCustomer {
108        let customer_id = self.uuid_factory.next();
109        let persona = self.select_business_persona();
110        let name = self.generate_business_name(persona);
111        let country = self.select_country();
112        let onboarding_date = self.random_onboarding_date();
113
114        let mut customer =
115            BankingCustomer::new_business(customer_id, &name, &country, onboarding_date)
116                .with_persona(PersonaVariant::Business(persona));
117
118        // Set risk tier
119        let risk_tier = self.calculate_business_risk_tier(persona, &country);
120        customer.risk_tier = risk_tier;
121
122        // Generate KYC profile
123        customer.kyc_profile = self.generate_business_kyc_profile(persona);
124
125        // Generate contact info
126        customer.email = Some(format!("info@{}.com", name.to_lowercase().replace(' ', "")));
127        customer.phone = Some(self.generate_phone(&country));
128
129        // Set industry
130        customer.industry_description = Some(self.get_industry_description(persona));
131
132        customer
133    }
134
135    /// Generate a trust customer.
136    pub fn generate_trust_customer(&mut self) -> BankingCustomer {
137        let customer_id = self.uuid_factory.next();
138        let persona = self.select_trust_persona();
139        let name = self.generate_trust_name(persona);
140        let country = self.select_country();
141        let onboarding_date = self.random_onboarding_date();
142
143        let mut customer =
144            BankingCustomer::new_business(customer_id, &name, &country, onboarding_date)
145                .with_persona(PersonaVariant::Trust(persona));
146
147        customer.customer_type = BankingCustomerType::Trust;
148
149        // Trusts are typically higher risk
150        customer.risk_tier = RiskTier::High;
151
152        // Generate KYC profile
153        customer.kyc_profile = KycProfile::high_net_worth()
154            .with_turnover(datasynth_core::models::banking::TurnoverBand::VeryHigh);
155
156        customer
157    }
158
159    /// Select a retail persona based on configured weights.
160    fn select_retail_persona(&mut self) -> RetailPersona {
161        let weights = &self.config.population.retail_persona_weights;
162        let roll: f64 = self.rng.gen();
163        let mut cumulative = 0.0;
164
165        for (name, weight) in weights {
166            cumulative += weight;
167            if roll < cumulative {
168                return match name.as_str() {
169                    "student" => RetailPersona::Student,
170                    "early_career" => RetailPersona::EarlyCareer,
171                    "mid_career" => RetailPersona::MidCareer,
172                    "retiree" => RetailPersona::Retiree,
173                    "high_net_worth" => RetailPersona::HighNetWorth,
174                    "gig_worker" => RetailPersona::GigWorker,
175                    "seasonal_worker" => RetailPersona::SeasonalWorker,
176                    "low_activity" => RetailPersona::LowActivity,
177                    _ => RetailPersona::MidCareer,
178                };
179            }
180        }
181        RetailPersona::MidCareer
182    }
183
184    /// Select a business persona based on configured weights.
185    fn select_business_persona(&mut self) -> BusinessPersona {
186        let weights = &self.config.population.business_persona_weights;
187        let roll: f64 = self.rng.gen();
188        let mut cumulative = 0.0;
189
190        for (name, weight) in weights {
191            cumulative += weight;
192            if roll < cumulative {
193                return match name.as_str() {
194                    "small_business" => BusinessPersona::SmallBusiness,
195                    "mid_market" => BusinessPersona::MidMarket,
196                    "enterprise" => BusinessPersona::Enterprise,
197                    "startup" => BusinessPersona::Startup,
198                    "cash_intensive" => BusinessPersona::CashIntensive,
199                    "import_export" => BusinessPersona::ImportExport,
200                    "money_services" => BusinessPersona::MoneyServices,
201                    "professional_services" => BusinessPersona::ProfessionalServices,
202                    _ => BusinessPersona::SmallBusiness,
203                };
204            }
205        }
206        BusinessPersona::SmallBusiness
207    }
208
209    /// Select a trust persona.
210    fn select_trust_persona(&mut self) -> TrustPersona {
211        let options = [
212            TrustPersona::FamilyTrust,
213            TrustPersona::PrivateFoundation,
214            TrustPersona::CharitableTrust,
215            TrustPersona::InvestmentHolding,
216            TrustPersona::SpecialPurposeVehicle,
217        ];
218        *options.choose(&mut self.rng).unwrap()
219    }
220
221    /// Generate a person name.
222    fn generate_person_name(&mut self) -> (String, String) {
223        let first_names = [
224            "James",
225            "Mary",
226            "John",
227            "Patricia",
228            "Robert",
229            "Jennifer",
230            "Michael",
231            "Linda",
232            "William",
233            "Barbara",
234            "David",
235            "Elizabeth",
236            "Richard",
237            "Susan",
238            "Joseph",
239            "Jessica",
240            "Thomas",
241            "Sarah",
242            "Charles",
243            "Karen",
244            "Christopher",
245            "Nancy",
246            "Daniel",
247            "Lisa",
248            "Matthew",
249            "Betty",
250            "Anthony",
251            "Margaret",
252            "Mark",
253            "Sandra",
254        ];
255        let last_names = [
256            "Smith",
257            "Johnson",
258            "Williams",
259            "Brown",
260            "Jones",
261            "Garcia",
262            "Miller",
263            "Davis",
264            "Rodriguez",
265            "Martinez",
266            "Hernandez",
267            "Lopez",
268            "Gonzalez",
269            "Wilson",
270            "Anderson",
271            "Thomas",
272            "Taylor",
273            "Moore",
274            "Jackson",
275            "Martin",
276            "Lee",
277            "Perez",
278            "Thompson",
279            "White",
280            "Harris",
281            "Sanchez",
282            "Clark",
283            "Ramirez",
284            "Lewis",
285            "Robinson",
286        ];
287
288        let first = first_names.choose(&mut self.rng).unwrap();
289        let last = last_names.choose(&mut self.rng).unwrap();
290        (first.to_string(), last.to_string())
291    }
292
293    /// Generate a business name.
294    fn generate_business_name(&mut self, persona: BusinessPersona) -> String {
295        let prefixes = [
296            "Acme", "Global", "Premier", "Advanced", "Pacific", "Summit", "Atlas", "Apex",
297        ];
298        let industries = match persona {
299            BusinessPersona::SmallBusiness => ["Services", "Solutions", "Group", "LLC"],
300            BusinessPersona::MidMarket => ["Industries", "Corporation", "Enterprises", "Holdings"],
301            BusinessPersona::Enterprise => ["International", "Global Corp", "Worldwide", "Inc"],
302            BusinessPersona::CashIntensive => ["Retail", "Restaurant", "Store", "Shop"],
303            BusinessPersona::ImportExport => {
304                ["Trading", "Import Export", "Commerce", "International"]
305            }
306            BusinessPersona::ProfessionalServices => {
307                ["Consulting", "Advisors", "Partners", "Associates"]
308            }
309            _ => ["Company", "Business", "Firm", "LLC"],
310        };
311
312        let prefix = prefixes.choose(&mut self.rng).unwrap();
313        let suffix = industries.choose(&mut self.rng).unwrap();
314        format!("{} {}", prefix, suffix)
315    }
316
317    /// Generate a trust name.
318    fn generate_trust_name(&mut self, persona: TrustPersona) -> String {
319        let (first_name, last_name) = self.generate_person_name();
320        match persona {
321            TrustPersona::FamilyTrust => format!("{} Family Trust", last_name),
322            TrustPersona::PrivateFoundation => format!("{} {} Foundation", first_name, last_name),
323            TrustPersona::CharitableTrust => format!("{} Charitable Trust", last_name),
324            TrustPersona::InvestmentHolding => format!("{} Holdings Ltd", last_name),
325            TrustPersona::SpecialPurposeVehicle => format!("{} SPV LLC", last_name),
326        }
327    }
328
329    /// Select a country (weighted towards US).
330    fn select_country(&mut self) -> String {
331        let roll: f64 = self.rng.gen();
332        if roll < 0.8 {
333            "US".to_string()
334        } else if roll < 0.85 {
335            "GB".to_string()
336        } else if roll < 0.90 {
337            "CA".to_string()
338        } else if roll < 0.93 {
339            "DE".to_string()
340        } else if roll < 0.96 {
341            "FR".to_string()
342        } else {
343            let countries = ["JP", "AU", "SG", "CH", "NL"];
344            countries.choose(&mut self.rng).unwrap().to_string()
345        }
346    }
347
348    /// Generate a random onboarding date within the simulation period.
349    fn random_onboarding_date(&mut self) -> NaiveDate {
350        // 70% onboarded before simulation, 30% during
351        if self.rng.gen::<f64>() < 0.7 {
352            // Onboarded 1-5 years before simulation start
353            let years_before: i64 = self.rng.gen_range(1..=5);
354            let days_offset: i64 = self.rng.gen_range(0..365);
355            self.start_date - chrono::Duration::days(years_before * 365 + days_offset)
356        } else {
357            // Onboarded during simulation
358            let sim_days = (self.end_date - self.start_date).num_days();
359            let offset = self.rng.gen_range(0..sim_days);
360            self.start_date + chrono::Duration::days(offset)
361        }
362    }
363
364    /// Calculate risk tier for retail customer.
365    fn calculate_retail_risk_tier(&mut self, persona: RetailPersona, country: &str) -> RiskTier {
366        let base_score = persona.base_risk_score();
367        let mut score = base_score as f64 * 10.0;
368
369        // Country risk
370        if !["US", "GB", "CA", "DE", "FR", "JP", "AU"].contains(&country) {
371            score += 20.0;
372        }
373
374        // Risk appetite adjustment
375        score *= self.config.compliance.risk_appetite.high_risk_multiplier();
376
377        // Random variation
378        score += self.rng.gen_range(-10.0..10.0);
379
380        RiskTier::from_score(score.clamp(0.0, 100.0) as u8)
381    }
382
383    /// Calculate risk tier for business customer.
384    fn calculate_business_risk_tier(
385        &mut self,
386        persona: BusinessPersona,
387        country: &str,
388    ) -> RiskTier {
389        let base_score = persona.base_risk_score();
390        let mut score = base_score as f64 * 10.0;
391
392        // Enhanced DD requirement
393        if persona.requires_enhanced_dd() {
394            score += 15.0;
395        }
396
397        // Country risk
398        if !["US", "GB", "CA", "DE", "FR", "JP", "AU"].contains(&country) {
399            score += 25.0;
400        }
401
402        // Risk appetite adjustment
403        score *= self.config.compliance.risk_appetite.high_risk_multiplier();
404
405        // Random variation
406        score += self.rng.gen_range(-10.0..10.0);
407
408        RiskTier::from_score(score.clamp(0.0, 100.0) as u8)
409    }
410
411    /// Generate KYC profile for retail customer.
412    fn generate_retail_kyc_profile(&mut self, persona: RetailPersona) -> KycProfile {
413        use datasynth_core::models::banking::{
414            CashIntensity, FrequencyBand, SourceOfFunds, TurnoverBand,
415        };
416
417        let (income_min, income_max) = persona.income_range();
418        let (freq_min, freq_max) = persona.transaction_frequency_range();
419        let avg_income = (income_min + income_max) / 2;
420        let avg_freq = (freq_min + freq_max) / 2;
421
422        let turnover = match avg_income {
423            0..=2000 => TurnoverBand::VeryLow,
424            2001..=5000 => TurnoverBand::Low,
425            5001..=25000 => TurnoverBand::Medium,
426            25001..=100000 => TurnoverBand::High,
427            _ => TurnoverBand::VeryHigh,
428        };
429
430        let frequency = match avg_freq {
431            0..=10 => FrequencyBand::VeryLow,
432            11..=30 => FrequencyBand::Low,
433            31..=100 => FrequencyBand::Medium,
434            101..=300 => FrequencyBand::High,
435            _ => FrequencyBand::VeryHigh,
436        };
437
438        let source = match persona {
439            RetailPersona::Student => SourceOfFunds::Other,
440            RetailPersona::Retiree => SourceOfFunds::Pension,
441            RetailPersona::HighNetWorth => SourceOfFunds::Investments,
442            RetailPersona::GigWorker | RetailPersona::SeasonalWorker => {
443                SourceOfFunds::SelfEmployment
444            }
445            _ => SourceOfFunds::Employment,
446        };
447
448        let cash_intensity_level = if persona.cash_intensity() < 0.1 {
449            CashIntensity::VeryLow
450        } else if persona.cash_intensity() < 0.2 {
451            CashIntensity::Low
452        } else if persona.cash_intensity() < 0.35 {
453            CashIntensity::Moderate
454        } else {
455            CashIntensity::High
456        };
457
458        KycProfile::new("Personal banking", source)
459            .with_turnover(turnover)
460            .with_frequency(frequency)
461            .with_cash_intensity(cash_intensity_level)
462    }
463
464    /// Generate KYC profile for business customer.
465    fn generate_business_kyc_profile(&mut self, persona: BusinessPersona) -> KycProfile {
466        use datasynth_core::models::banking::{
467            CashIntensity, FrequencyBand, SourceOfFunds, TurnoverBand,
468        };
469
470        let (turnover_min, turnover_max) = persona.turnover_range();
471        let avg_turnover = (turnover_min + turnover_max) / 2;
472
473        let turnover = match avg_turnover {
474            0..=10_000 => TurnoverBand::VeryLow,
475            10_001..=100_000 => TurnoverBand::Medium,
476            100_001..=500_000 => TurnoverBand::High,
477            500_001..=5_000_000 => TurnoverBand::VeryHigh,
478            _ => TurnoverBand::UltraHigh,
479        };
480
481        let (_cash_min, cash_max) = persona.cash_deposit_frequency();
482        let cash_intensity = if cash_max > 50 {
483            CashIntensity::VeryHigh
484        } else if cash_max > 20 {
485            CashIntensity::High
486        } else if cash_max > 5 {
487            CashIntensity::Moderate
488        } else {
489            CashIntensity::Low
490        };
491
492        KycProfile::new("Business operations", SourceOfFunds::SelfEmployment)
493            .with_turnover(turnover)
494            .with_frequency(FrequencyBand::High)
495            .with_cash_intensity(cash_intensity)
496    }
497
498    /// Select a PEP category.
499    fn select_pep_category(&mut self) -> PepCategory {
500        let categories = [
501            PepCategory::SeniorGovernment,
502            PepCategory::SeniorPolitical,
503            PepCategory::FamilyMember,
504            PepCategory::CloseAssociate,
505            PepCategory::StateEnterprise,
506        ];
507        *categories.choose(&mut self.rng).unwrap()
508    }
509
510    /// Generate email address.
511    fn generate_email(&mut self, first: &str, last: &str) -> String {
512        let domains = [
513            "gmail.com",
514            "yahoo.com",
515            "outlook.com",
516            "hotmail.com",
517            "icloud.com",
518        ];
519        let domain = domains.choose(&mut self.rng).unwrap();
520        let num: u32 = self.rng.gen_range(1..999);
521        format!(
522            "{}.{}{}@{}",
523            first.to_lowercase(),
524            last.to_lowercase(),
525            num,
526            domain
527        )
528    }
529
530    /// Generate phone number.
531    fn generate_phone(&self, country: &str) -> String {
532        match country {
533            "US" | "CA" => format!(
534                "+1-555-{:03}-{:04}",
535                rand::random::<u16>() % 1000,
536                rand::random::<u16>() % 10000
537            ),
538            "GB" => format!(
539                "+44-7{:03}-{:06}",
540                rand::random::<u16>() % 1000,
541                rand::random::<u32>() % 1000000
542            ),
543            _ => format!(
544                "+{}-{:010}",
545                rand::random::<u8>() % 90 + 10,
546                rand::random::<u64>() % 10000000000
547            ),
548        }
549    }
550
551    /// Generate birth date based on persona.
552    fn generate_birth_date(&mut self, persona: RetailPersona) -> NaiveDate {
553        let base_year = self.start_date.year();
554        let age_range = match persona {
555            RetailPersona::Student => (18, 25),
556            RetailPersona::EarlyCareer => (25, 35),
557            RetailPersona::MidCareer => (35, 55),
558            RetailPersona::Retiree => (55, 80),
559            RetailPersona::HighNetWorth => (40, 70),
560            RetailPersona::GigWorker => (20, 40),
561            RetailPersona::SeasonalWorker => (18, 50),
562            RetailPersona::LowActivity => (20, 70),
563        };
564
565        let age: i32 = self.rng.gen_range(age_range.0..=age_range.1);
566        let month: u32 = self.rng.gen_range(1..=12);
567        let day: u32 = self.rng.gen_range(1..=28);
568
569        NaiveDate::from_ymd_opt(base_year - age, month, day)
570            .unwrap_or_else(|| NaiveDate::from_ymd_opt(base_year - age, 1, 1).unwrap())
571    }
572
573    /// Get industry description for business persona.
574    fn get_industry_description(&self, persona: BusinessPersona) -> String {
575        match persona {
576            BusinessPersona::SmallBusiness => "Small Business Services",
577            BusinessPersona::MidMarket => "Mid-Market Corporation",
578            BusinessPersona::Enterprise => "Large Enterprise",
579            BusinessPersona::Startup => "Technology Startup",
580            BusinessPersona::CashIntensive => "Retail / Restaurant",
581            BusinessPersona::ImportExport => "International Trade",
582            BusinessPersona::MoneyServices => "Money Services Business",
583            BusinessPersona::ProfessionalServices => "Professional Services",
584        }
585        .to_string()
586    }
587
588    /// Form households from retail customers.
589    fn form_households(&mut self, customers: &mut [BankingCustomer]) {
590        use uuid::Uuid;
591
592        let retail_indices: Vec<usize> = customers
593            .iter()
594            .enumerate()
595            .filter(|(_, c)| c.customer_type == BankingCustomerType::Retail)
596            .map(|(i, _)| i)
597            .collect();
598
599        let household_count = (retail_indices.len() as f64 * self.config.population.household_rate
600            / self.config.population.avg_household_size) as usize;
601
602        for _ in 0..household_count {
603            let household_id = Uuid::new_v4();
604            let size = self.rng.gen_range(2..=4).min(retail_indices.len());
605
606            // Select random customers for household
607            let selected: Vec<usize> = retail_indices
608                .choose_multiple(&mut self.rng, size)
609                .copied()
610                .collect();
611
612            for idx in selected {
613                customers[idx].household_id = Some(household_id);
614            }
615        }
616    }
617}
618
619#[cfg(test)]
620mod tests {
621    use super::*;
622
623    #[test]
624    fn test_customer_generation() {
625        let config = BankingConfig::small();
626        let mut generator = CustomerGenerator::new(config, 12345);
627        let customers = generator.generate_all();
628
629        assert!(!customers.is_empty());
630
631        let retail_count = customers
632            .iter()
633            .filter(|c| c.customer_type == BankingCustomerType::Retail)
634            .count();
635        let business_count = customers
636            .iter()
637            .filter(|c| c.customer_type == BankingCustomerType::Business)
638            .count();
639
640        assert!(retail_count > 0);
641        assert!(business_count > 0);
642    }
643
644    #[test]
645    fn test_persona_distribution() {
646        let config = BankingConfig::small();
647        let mut generator = CustomerGenerator::new(config, 12345);
648
649        // Generate many personas and check distribution
650        let mut personas = std::collections::HashMap::new();
651        for _ in 0..1000 {
652            let persona = generator.select_retail_persona();
653            *personas.entry(format!("{:?}", persona)).or_insert(0) += 1;
654        }
655
656        // Should have multiple different personas
657        assert!(personas.len() > 3);
658    }
659}