Skip to main content

datasynth_banking/labels/
entity_labels.rs

1//! Entity-level label generation.
2
3use datasynth_core::models::banking::{RiskTier, SourceOfFunds, TurnoverBand};
4use serde::{Deserialize, Serialize};
5use uuid::Uuid;
6
7use crate::models::{BankAccount, BankingCustomer};
8
9/// Customer-level labels for ML training.
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct CustomerLabel {
12    /// Customer ID
13    pub customer_id: Uuid,
14    /// Risk tier
15    pub risk_tier: RiskTier,
16    /// Expected monthly turnover band
17    pub expected_turnover: TurnoverBand,
18    /// Beneficial owner complexity score (1-10)
19    pub bo_complexity: u8,
20    /// Is known money mule?
21    pub is_mule: bool,
22    /// True vs declared source of funds match?
23    pub sof_truthful: bool,
24    /// True source of funds (if different from declared)
25    pub true_sof: Option<SourceOfFunds>,
26    /// Declared source of funds
27    pub declared_sof: SourceOfFunds,
28    /// KYC completeness score (0.0-1.0)
29    pub kyc_completeness: f64,
30    /// Customer type risk weight
31    pub type_risk_weight: f64,
32    /// Associated case IDs
33    pub case_ids: Vec<String>,
34    /// Confidence score for the label
35    pub confidence: f64,
36}
37
38impl CustomerLabel {
39    /// Create a new customer label from a customer.
40    pub fn from_customer(customer: &BankingCustomer) -> Self {
41        Self {
42            customer_id: customer.customer_id,
43            risk_tier: customer.risk_tier,
44            expected_turnover: customer.kyc_profile.expected_monthly_turnover,
45            bo_complexity: customer.kyc_profile.beneficial_owner_complexity,
46            is_mule: customer.is_mule,
47            sof_truthful: customer.kyc_truthful,
48            true_sof: customer.kyc_profile.true_source_of_funds,
49            declared_sof: customer.kyc_profile.source_of_funds,
50            kyc_completeness: customer.kyc_profile.completeness_score,
51            type_risk_weight: Self::customer_type_risk_weight(&customer.customer_type),
52            case_ids: Vec::new(),
53            confidence: 1.0,
54        }
55    }
56
57    /// Get risk weight for customer type.
58    fn customer_type_risk_weight(
59        customer_type: &datasynth_core::models::banking::BankingCustomerType,
60    ) -> f64 {
61        use datasynth_core::models::banking::BankingCustomerType;
62        match customer_type {
63            BankingCustomerType::Retail => 1.0,
64            BankingCustomerType::Business => 1.2,
65            BankingCustomerType::Trust => 1.5,
66            BankingCustomerType::FinancialInstitution => 1.8,
67            BankingCustomerType::Government => 0.8,
68            BankingCustomerType::NonProfit => 1.0,
69        }
70    }
71
72    /// Add case ID to customer label.
73    pub fn with_case(mut self, case_id: &str) -> Self {
74        self.case_ids.push(case_id.to_string());
75        self
76    }
77}
78
79/// Account-level labels for ML training.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct AccountLabel {
82    /// Account ID
83    pub account_id: Uuid,
84    /// Owner customer ID
85    pub customer_id: Uuid,
86    /// Is funnel account?
87    pub is_funnel: bool,
88    /// Is mule account?
89    pub is_mule_account: bool,
90    /// Account risk weight
91    pub risk_weight: f64,
92    /// Expected transaction count per month
93    pub expected_tx_count: u32,
94    /// Expected average transaction amount
95    pub expected_avg_amount: f64,
96    /// Associated case ID
97    pub case_id: Option<String>,
98    /// Account age in days
99    pub account_age_days: u32,
100    /// Is dormant (no activity in 90+ days)?
101    pub is_dormant: bool,
102    /// Confidence score
103    pub confidence: f64,
104}
105
106impl AccountLabel {
107    /// Create a new account label from an account.
108    pub fn from_account(account: &BankAccount) -> Self {
109        let today = chrono::Utc::now().date_naive();
110        let age_days = (today - account.opening_date).num_days().max(0) as u32;
111
112        Self {
113            account_id: account.account_id,
114            customer_id: account.primary_owner_id,
115            is_funnel: account.is_funnel_account,
116            is_mule_account: account.is_mule_account,
117            risk_weight: account.account_type.risk_weight(),
118            expected_tx_count: Self::estimate_tx_count(&account.account_type),
119            expected_avg_amount: Self::estimate_avg_amount(&account.account_type),
120            case_id: account.case_id.clone(),
121            account_age_days: age_days,
122            is_dormant: account.days_dormant > 90,
123            confidence: 1.0,
124        }
125    }
126
127    /// Estimate expected transaction count.
128    fn estimate_tx_count(account_type: &datasynth_core::models::banking::BankAccountType) -> u32 {
129        use datasynth_core::models::banking::BankAccountType;
130
131        match account_type {
132            BankAccountType::Checking => 30,
133            BankAccountType::Savings => 5,
134            BankAccountType::MoneyMarket => 3,
135            BankAccountType::CertificateOfDeposit => 1,
136            BankAccountType::BusinessOperating => 100,
137            BankAccountType::BusinessSavings => 10,
138            BankAccountType::Payroll => 50,
139            BankAccountType::TrustAccount => 5,
140            BankAccountType::Escrow => 3,
141            BankAccountType::Investment => 10,
142            BankAccountType::ForeignCurrency => 20,
143        }
144    }
145
146    /// Estimate expected average amount.
147    fn estimate_avg_amount(account_type: &datasynth_core::models::banking::BankAccountType) -> f64 {
148        use datasynth_core::models::banking::BankAccountType;
149
150        match account_type {
151            BankAccountType::Checking => 250.0,
152            BankAccountType::Savings => 1000.0,
153            BankAccountType::MoneyMarket => 5000.0,
154            BankAccountType::CertificateOfDeposit => 10000.0,
155            BankAccountType::BusinessOperating => 2500.0,
156            BankAccountType::BusinessSavings => 10000.0,
157            BankAccountType::Payroll => 3500.0,
158            BankAccountType::TrustAccount => 50000.0,
159            BankAccountType::Escrow => 25000.0,
160            BankAccountType::Investment => 15000.0,
161            BankAccountType::ForeignCurrency => 5000.0,
162        }
163    }
164}
165
166/// Entity label extractor.
167pub struct EntityLabelExtractor;
168
169impl EntityLabelExtractor {
170    /// Extract customer labels.
171    pub fn extract_customers(customers: &[BankingCustomer]) -> Vec<CustomerLabel> {
172        customers.iter().map(CustomerLabel::from_customer).collect()
173    }
174
175    /// Extract account labels.
176    pub fn extract_accounts(accounts: &[BankAccount]) -> Vec<AccountLabel> {
177        accounts.iter().map(AccountLabel::from_account).collect()
178    }
179
180    /// Get customer label summary.
181    pub fn summarize_customers(labels: &[CustomerLabel]) -> CustomerLabelSummary {
182        let total = labels.len();
183        let mules = labels.iter().filter(|l| l.is_mule).count();
184        let deceptive = labels.iter().filter(|l| !l.sof_truthful).count();
185
186        let mut by_risk_tier = std::collections::HashMap::new();
187        for label in labels {
188            *by_risk_tier.entry(label.risk_tier).or_insert(0) += 1;
189        }
190
191        CustomerLabelSummary {
192            total_customers: total,
193            mule_count: mules,
194            mule_rate: mules as f64 / total as f64,
195            deceptive_count: deceptive,
196            deceptive_rate: deceptive as f64 / total as f64,
197            by_risk_tier,
198        }
199    }
200
201    /// Get account label summary.
202    pub fn summarize_accounts(labels: &[AccountLabel]) -> AccountLabelSummary {
203        let total = labels.len();
204        let funnel = labels.iter().filter(|l| l.is_funnel).count();
205        let mule = labels.iter().filter(|l| l.is_mule_account).count();
206        let dormant = labels.iter().filter(|l| l.is_dormant).count();
207
208        AccountLabelSummary {
209            total_accounts: total,
210            funnel_count: funnel,
211            funnel_rate: funnel as f64 / total as f64,
212            mule_count: mule,
213            mule_rate: mule as f64 / total as f64,
214            dormant_count: dormant,
215            dormant_rate: dormant as f64 / total as f64,
216        }
217    }
218}
219
220/// Customer label summary.
221#[derive(Debug, Clone)]
222pub struct CustomerLabelSummary {
223    /// Total customers
224    pub total_customers: usize,
225    /// Number of mules
226    pub mule_count: usize,
227    /// Mule rate
228    pub mule_rate: f64,
229    /// Number with deceptive KYC
230    pub deceptive_count: usize,
231    /// Deceptive rate
232    pub deceptive_rate: f64,
233    /// Counts by risk tier
234    pub by_risk_tier: std::collections::HashMap<RiskTier, usize>,
235}
236
237/// Account label summary.
238#[derive(Debug, Clone)]
239pub struct AccountLabelSummary {
240    /// Total accounts
241    pub total_accounts: usize,
242    /// Number of funnel accounts
243    pub funnel_count: usize,
244    /// Funnel rate
245    pub funnel_rate: f64,
246    /// Number of mule accounts
247    pub mule_count: usize,
248    /// Mule rate
249    pub mule_rate: f64,
250    /// Number of dormant accounts
251    pub dormant_count: usize,
252    /// Dormant rate
253    pub dormant_rate: f64,
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use chrono::NaiveDate;
260
261    #[test]
262    fn test_customer_label() {
263        let customer = BankingCustomer::new_retail(
264            Uuid::new_v4(),
265            "Test",
266            "User",
267            "US",
268            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
269        );
270
271        let label = CustomerLabel::from_customer(&customer);
272
273        assert_eq!(label.customer_id, customer.customer_id);
274        assert!(!label.is_mule);
275        assert!(label.sof_truthful);
276    }
277
278    #[test]
279    fn test_account_label() {
280        let account = BankAccount::new(
281            Uuid::new_v4(),
282            "****1234".to_string(),
283            datasynth_core::models::banking::BankAccountType::Checking,
284            Uuid::new_v4(),
285            "USD",
286            NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
287        );
288
289        let label = AccountLabel::from_account(&account);
290
291        assert_eq!(label.account_id, account.account_id);
292        assert!(!label.is_funnel);
293        assert!(!label.is_mule_account);
294    }
295}