use std::path::Path;
use crate::config::BankingConfig;
use crate::generators::{
AccountGenerator, CounterpartyGenerator, CustomerGenerator, KycGenerator, TransactionGenerator,
};
use crate::labels::{
AccountLabel, CustomerLabel, EntityLabelExtractor, ExportedNarrative, NarrativeGenerator,
RelationshipLabel, RelationshipLabelExtractor, TransactionLabel, TransactionLabelExtractor,
};
use crate::models::{AmlScenario, BankAccount, BankTransaction, BankingCustomer, CounterpartyPool};
use crate::typologies::TypologyInjector;
pub struct BankingOrchestrator {
config: BankingConfig,
seed: u64,
country_pack: Option<datasynth_core::CountryPack>,
}
#[derive(Debug)]
pub struct BankingData {
pub customers: Vec<BankingCustomer>,
pub accounts: Vec<BankAccount>,
pub transactions: Vec<BankTransaction>,
pub counterparties: CounterpartyPool,
pub scenarios: Vec<AmlScenario>,
pub transaction_labels: Vec<TransactionLabel>,
pub customer_labels: Vec<CustomerLabel>,
pub account_labels: Vec<AccountLabel>,
pub relationship_labels: Vec<RelationshipLabel>,
pub narratives: Vec<ExportedNarrative>,
pub stats: GenerationStats,
}
#[derive(Debug, Clone, Default)]
pub struct GenerationStats {
pub customer_count: usize,
pub account_count: usize,
pub transaction_count: usize,
pub suspicious_count: usize,
pub suspicious_rate: f64,
pub spoofed_count: usize,
pub spoofed_rate: f64,
pub scenario_count: usize,
pub duration_ms: u64,
}
impl BankingOrchestrator {
pub fn new(config: BankingConfig, seed: u64) -> Self {
Self {
config,
seed,
country_pack: None,
}
}
pub fn set_country_pack(&mut self, pack: datasynth_core::CountryPack) {
self.country_pack = Some(pack);
}
pub fn generate(&self) -> BankingData {
let start = std::time::Instant::now();
let mut counterparty_gen = CounterpartyGenerator::new(self.seed);
let counterparties = counterparty_gen.generate_pool(&self.config);
let mut customer_gen = CustomerGenerator::new(self.config.clone(), self.seed);
if let Some(ref pack) = self.country_pack {
customer_gen.set_country_pack(pack.clone());
}
let mut customers = customer_gen.generate_all();
let mut kyc_gen = KycGenerator::new(self.seed);
for customer in &mut customers {
let profile = kyc_gen.generate_profile(customer, &self.config);
customer.kyc_profile = profile;
}
let mut account_gen = AccountGenerator::new(self.config.clone(), self.seed);
let mut accounts = account_gen.generate_for_customers(&mut customers);
let mut txn_gen = TransactionGenerator::new(self.config.clone(), self.seed);
let mut transactions = txn_gen.generate_all(&customers, &mut accounts);
let mut typology_injector = TypologyInjector::new(self.config.clone(), self.seed);
typology_injector.inject(&mut customers, &mut accounts, &mut transactions);
let scenarios: Vec<AmlScenario> = typology_injector.get_scenarios().to_vec();
let mut narrative_gen = NarrativeGenerator::new(self.seed);
let narratives: Vec<ExportedNarrative> = scenarios
.iter()
.map(|s| {
let narrative = narrative_gen.generate(s);
ExportedNarrative::from_scenario(s, &narrative)
})
.collect();
let transaction_labels = TransactionLabelExtractor::extract_with_features(&transactions);
let customer_labels = EntityLabelExtractor::extract_customers(&customers);
let account_labels = EntityLabelExtractor::extract_accounts(&accounts);
let relationship_labels = RelationshipLabelExtractor::extract_from_customers(&customers);
let suspicious_count = transactions.iter().filter(|t| t.is_suspicious).count();
let spoofed_count = transactions.iter().filter(|t| t.is_spoofed).count();
let stats = GenerationStats {
customer_count: customers.len(),
account_count: accounts.len(),
transaction_count: transactions.len(),
suspicious_count,
suspicious_rate: suspicious_count as f64 / transactions.len().max(1) as f64,
spoofed_count,
spoofed_rate: spoofed_count as f64 / transactions.len().max(1) as f64,
scenario_count: scenarios.len(),
duration_ms: start.elapsed().as_millis() as u64,
};
BankingData {
customers,
accounts,
transactions,
counterparties,
scenarios,
transaction_labels,
customer_labels,
account_labels,
relationship_labels,
narratives,
stats,
}
}
pub fn write_output(&self, data: &BankingData, output_dir: &Path) -> std::io::Result<()> {
std::fs::create_dir_all(output_dir)?;
self.write_csv(&data.customers, &output_dir.join("banking_customers.csv"))?;
self.write_csv(&data.accounts, &output_dir.join("banking_accounts.csv"))?;
self.write_csv(
&data.transactions,
&output_dir.join("banking_transactions.csv"),
)?;
self.write_csv(
&data.transaction_labels,
&output_dir.join("transaction_labels.csv"),
)?;
self.write_csv(
&data.customer_labels,
&output_dir.join("customer_labels.csv"),
)?;
self.write_csv(&data.account_labels, &output_dir.join("account_labels.csv"))?;
self.write_csv(
&data.relationship_labels,
&output_dir.join("relationship_labels.csv"),
)?;
self.write_json(&data.narratives, &output_dir.join("case_narratives.json"))?;
self.write_csv(
&data.counterparties.merchants,
&output_dir.join("merchants.csv"),
)?;
self.write_csv(
&data.counterparties.employers,
&output_dir.join("employers.csv"),
)?;
Ok(())
}
fn write_csv<T: serde::Serialize>(&self, data: &[T], path: &Path) -> std::io::Result<()> {
let mut writer = csv::Writer::from_path(path)?;
for item in data {
writer.serialize(item)?;
}
writer.flush()?;
Ok(())
}
fn write_json<T: serde::Serialize>(&self, data: &T, path: &Path) -> std::io::Result<()> {
let file = std::fs::File::create(path)?;
serde_json::to_writer_pretty(file, data)?;
Ok(())
}
}
pub struct BankingOrchestratorBuilder {
config: Option<BankingConfig>,
seed: u64,
country_pack: Option<datasynth_core::CountryPack>,
}
impl Default for BankingOrchestratorBuilder {
fn default() -> Self {
Self {
config: None,
seed: 42,
country_pack: None,
}
}
}
impl BankingOrchestratorBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn config(mut self, config: BankingConfig) -> Self {
self.config = Some(config);
self
}
pub fn seed(mut self, seed: u64) -> Self {
self.seed = seed;
self
}
pub fn country_pack(mut self, pack: datasynth_core::CountryPack) -> Self {
self.country_pack = Some(pack);
self
}
pub fn build(self) -> BankingOrchestrator {
let mut orch = BankingOrchestrator::new(self.config.unwrap_or_default(), self.seed);
if let Some(pack) = self.country_pack {
orch.set_country_pack(pack);
}
orch
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_orchestrator_generation() {
let config = BankingConfig::small();
let orchestrator = BankingOrchestrator::new(config, 12345);
let data = orchestrator.generate();
assert!(!data.customers.is_empty());
assert!(!data.accounts.is_empty());
assert!(!data.transactions.is_empty());
assert!(!data.transaction_labels.is_empty());
assert!(!data.customer_labels.is_empty());
assert!(data.stats.customer_count > 0);
assert!(data.stats.transaction_count > 0);
}
#[test]
fn test_builder() {
let orchestrator = BankingOrchestratorBuilder::new()
.config(BankingConfig::small())
.seed(12345)
.build();
let data = orchestrator.generate();
assert!(!data.customers.is_empty());
}
}