Skip to main content

datasynth_banking/
orchestrator.rs

1//! Banking data generation orchestrator.
2
3use std::path::Path;
4
5use crate::config::BankingConfig;
6use crate::generators::{
7    AccountGenerator, CounterpartyGenerator, CustomerGenerator, KycGenerator, TransactionGenerator,
8};
9use crate::labels::{
10    AccountLabel, CustomerLabel, EntityLabelExtractor, ExportedNarrative, NarrativeGenerator,
11    RelationshipLabel, RelationshipLabelExtractor, TransactionLabel, TransactionLabelExtractor,
12};
13use crate::models::{AmlScenario, BankAccount, BankTransaction, BankingCustomer, CounterpartyPool};
14use crate::typologies::TypologyInjector;
15
16/// Banking data generation orchestrator.
17///
18/// Coordinates the generation of:
19/// - Customers with KYC profiles
20/// - Accounts for customers
21/// - Transactions based on personas
22/// - AML typology injection
23/// - Ground truth labels
24pub struct BankingOrchestrator {
25    config: BankingConfig,
26    seed: u64,
27}
28
29/// Generated banking data result.
30#[derive(Debug)]
31pub struct BankingData {
32    /// Generated customers
33    pub customers: Vec<BankingCustomer>,
34    /// Generated accounts
35    pub accounts: Vec<BankAccount>,
36    /// Generated transactions
37    pub transactions: Vec<BankTransaction>,
38    /// Counterparty pool
39    pub counterparties: CounterpartyPool,
40    /// AML scenarios
41    pub scenarios: Vec<AmlScenario>,
42    /// Transaction labels
43    pub transaction_labels: Vec<TransactionLabel>,
44    /// Customer labels
45    pub customer_labels: Vec<CustomerLabel>,
46    /// Account labels
47    pub account_labels: Vec<AccountLabel>,
48    /// Relationship labels
49    pub relationship_labels: Vec<RelationshipLabel>,
50    /// Case narratives
51    pub narratives: Vec<ExportedNarrative>,
52    /// Generation statistics
53    pub stats: GenerationStats,
54}
55
56/// Generation statistics.
57#[derive(Debug, Clone, Default)]
58pub struct GenerationStats {
59    /// Total customers generated
60    pub customer_count: usize,
61    /// Total accounts generated
62    pub account_count: usize,
63    /// Total transactions generated
64    pub transaction_count: usize,
65    /// Suspicious transaction count
66    pub suspicious_count: usize,
67    /// Suspicious rate
68    pub suspicious_rate: f64,
69    /// Spoofed transaction count
70    pub spoofed_count: usize,
71    /// Spoofed rate
72    pub spoofed_rate: f64,
73    /// AML scenario count
74    pub scenario_count: usize,
75    /// Generation duration in milliseconds
76    pub duration_ms: u64,
77}
78
79impl BankingOrchestrator {
80    /// Create a new banking orchestrator.
81    pub fn new(config: BankingConfig, seed: u64) -> Self {
82        Self { config, seed }
83    }
84
85    /// Generate all banking data.
86    pub fn generate(&self) -> BankingData {
87        let start = std::time::Instant::now();
88
89        // Phase 1: Generate counterparty pool
90        let mut counterparty_gen = CounterpartyGenerator::new(self.seed);
91        let counterparties = counterparty_gen.generate_pool(&self.config);
92
93        // Phase 2: Generate customers with KYC profiles
94        let mut customer_gen = CustomerGenerator::new(self.config.clone(), self.seed);
95        let mut customers = customer_gen.generate_all();
96
97        // Phase 3: Generate KYC profiles
98        let mut kyc_gen = KycGenerator::new(self.seed);
99        for customer in &mut customers {
100            let profile = kyc_gen.generate_profile(customer, &self.config);
101            customer.kyc_profile = profile;
102        }
103
104        // Phase 4: Generate accounts for customers
105        let mut account_gen = AccountGenerator::new(self.config.clone(), self.seed);
106        let mut accounts = account_gen.generate_for_customers(&mut customers);
107
108        // Phase 5: Generate transactions
109        let mut txn_gen = TransactionGenerator::new(self.config.clone(), self.seed);
110        let mut transactions = txn_gen.generate_all(&customers, &mut accounts);
111
112        // Phase 6: Inject AML typologies
113        let mut typology_injector = TypologyInjector::new(self.config.clone(), self.seed);
114        typology_injector.inject(&mut customers, &mut accounts, &mut transactions);
115        let scenarios: Vec<AmlScenario> = typology_injector.get_scenarios().to_vec();
116
117        // Phase 7: Generate narratives
118        let mut narrative_gen = NarrativeGenerator::new(self.seed);
119        let narratives: Vec<ExportedNarrative> = scenarios
120            .iter()
121            .map(|s| {
122                let narrative = narrative_gen.generate(s);
123                ExportedNarrative::from_scenario(s, &narrative)
124            })
125            .collect();
126
127        // Phase 8: Extract labels
128        let transaction_labels = TransactionLabelExtractor::extract_with_features(&transactions);
129        let customer_labels = EntityLabelExtractor::extract_customers(&customers);
130        let account_labels = EntityLabelExtractor::extract_accounts(&accounts);
131        let relationship_labels = RelationshipLabelExtractor::extract_from_customers(&customers);
132
133        // Compute statistics
134        let suspicious_count = transactions.iter().filter(|t| t.is_suspicious).count();
135        let spoofed_count = transactions.iter().filter(|t| t.is_spoofed).count();
136
137        let stats = GenerationStats {
138            customer_count: customers.len(),
139            account_count: accounts.len(),
140            transaction_count: transactions.len(),
141            suspicious_count,
142            suspicious_rate: suspicious_count as f64 / transactions.len().max(1) as f64,
143            spoofed_count,
144            spoofed_rate: spoofed_count as f64 / transactions.len().max(1) as f64,
145            scenario_count: scenarios.len(),
146            duration_ms: start.elapsed().as_millis() as u64,
147        };
148
149        BankingData {
150            customers,
151            accounts,
152            transactions,
153            counterparties,
154            scenarios,
155            transaction_labels,
156            customer_labels,
157            account_labels,
158            relationship_labels,
159            narratives,
160            stats,
161        }
162    }
163
164    /// Write generated data to output directory.
165    pub fn write_output(&self, data: &BankingData, output_dir: &Path) -> std::io::Result<()> {
166        std::fs::create_dir_all(output_dir)?;
167
168        // Write customers
169        self.write_csv(&data.customers, &output_dir.join("banking_customers.csv"))?;
170
171        // Write accounts
172        self.write_csv(&data.accounts, &output_dir.join("banking_accounts.csv"))?;
173
174        // Write transactions
175        self.write_csv(
176            &data.transactions,
177            &output_dir.join("banking_transactions.csv"),
178        )?;
179
180        // Write labels
181        self.write_csv(
182            &data.transaction_labels,
183            &output_dir.join("transaction_labels.csv"),
184        )?;
185        self.write_csv(
186            &data.customer_labels,
187            &output_dir.join("customer_labels.csv"),
188        )?;
189        self.write_csv(&data.account_labels, &output_dir.join("account_labels.csv"))?;
190        self.write_csv(
191            &data.relationship_labels,
192            &output_dir.join("relationship_labels.csv"),
193        )?;
194
195        // Write narratives as JSON
196        self.write_json(&data.narratives, &output_dir.join("case_narratives.json"))?;
197
198        // Write counterparties
199        self.write_csv(
200            &data.counterparties.merchants,
201            &output_dir.join("merchants.csv"),
202        )?;
203        self.write_csv(
204            &data.counterparties.employers,
205            &output_dir.join("employers.csv"),
206        )?;
207
208        Ok(())
209    }
210
211    /// Write data to CSV file.
212    fn write_csv<T: serde::Serialize>(&self, data: &[T], path: &Path) -> std::io::Result<()> {
213        let mut writer = csv::Writer::from_path(path)?;
214        for item in data {
215            writer.serialize(item)?;
216        }
217        writer.flush()?;
218        Ok(())
219    }
220
221    /// Write data to JSON file.
222    fn write_json<T: serde::Serialize>(&self, data: &T, path: &Path) -> std::io::Result<()> {
223        let file = std::fs::File::create(path)?;
224        serde_json::to_writer_pretty(file, data)?;
225        Ok(())
226    }
227}
228
229/// Builder for BankingOrchestrator.
230pub struct BankingOrchestratorBuilder {
231    config: Option<BankingConfig>,
232    seed: u64,
233}
234
235impl Default for BankingOrchestratorBuilder {
236    fn default() -> Self {
237        Self {
238            config: None,
239            seed: 42,
240        }
241    }
242}
243
244impl BankingOrchestratorBuilder {
245    /// Create a new builder.
246    pub fn new() -> Self {
247        Self::default()
248    }
249
250    /// Set the configuration.
251    pub fn config(mut self, config: BankingConfig) -> Self {
252        self.config = Some(config);
253        self
254    }
255
256    /// Set the random seed.
257    pub fn seed(mut self, seed: u64) -> Self {
258        self.seed = seed;
259        self
260    }
261
262    /// Build the orchestrator.
263    pub fn build(self) -> BankingOrchestrator {
264        BankingOrchestrator::new(self.config.unwrap_or_default(), self.seed)
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_orchestrator_generation() {
274        let config = BankingConfig::small();
275        let orchestrator = BankingOrchestrator::new(config, 12345);
276
277        let data = orchestrator.generate();
278
279        assert!(!data.customers.is_empty());
280        assert!(!data.accounts.is_empty());
281        assert!(!data.transactions.is_empty());
282        assert!(!data.transaction_labels.is_empty());
283        assert!(!data.customer_labels.is_empty());
284
285        // Stats should be populated
286        assert!(data.stats.customer_count > 0);
287        assert!(data.stats.transaction_count > 0);
288    }
289
290    #[test]
291    fn test_builder() {
292        let orchestrator = BankingOrchestratorBuilder::new()
293            .config(BankingConfig::small())
294            .seed(12345)
295            .build();
296
297        let data = orchestrator.generate();
298        assert!(!data.customers.is_empty());
299    }
300}