Skip to main content

datasynth_banking/labels/
transaction_labels.rs

1//! Transaction-level label generation.
2
3use chrono::{Datelike, Timelike};
4use datasynth_core::models::banking::{AmlTypology, LaunderingStage};
5use serde::{Deserialize, Serialize};
6use uuid::Uuid;
7
8use crate::models::BankTransaction;
9
10/// Transaction-level labels for ML training.
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct TransactionLabel {
13    /// Transaction ID
14    pub transaction_id: Uuid,
15    /// Binary suspicious flag
16    pub is_suspicious: bool,
17    /// Specific suspicion reason
18    pub suspicion_reason: Option<AmlTypology>,
19    /// Money laundering stage
20    pub laundering_stage: Option<LaunderingStage>,
21    /// Case ID for linking related transactions
22    pub case_id: Option<String>,
23    /// Whether transaction has been spoofed
24    pub is_spoofed: bool,
25    /// Spoofing intensity (0.0-1.0)
26    pub spoofing_intensity: Option<f64>,
27    /// Sequence within scenario
28    pub scenario_sequence: Option<u32>,
29    /// Confidence score for the label (for soft labels)
30    pub confidence: f64,
31    /// Additional feature flags
32    pub features: TransactionLabelFeatures,
33}
34
35/// Additional transaction label features.
36#[derive(Debug, Clone, Default, Serialize, Deserialize)]
37pub struct TransactionLabelFeatures {
38    /// Is this part of a structuring pattern?
39    pub is_structuring: bool,
40    /// Is amount below reporting threshold?
41    pub below_threshold: bool,
42    /// Is this a cash transaction?
43    pub is_cash: bool,
44    /// Is this an international transaction?
45    pub is_international: bool,
46    /// Rapid succession with other transactions?
47    pub is_rapid_succession: bool,
48    /// Is counterparty new/unknown?
49    pub new_counterparty: bool,
50    /// Round number amount?
51    pub round_amount: bool,
52    /// Transaction on weekend/holiday?
53    pub unusual_timing: bool,
54}
55
56impl TransactionLabel {
57    /// Create a new transaction label from a transaction.
58    pub fn from_transaction(txn: &BankTransaction) -> Self {
59        let amount_f64: f64 = txn.amount.try_into().unwrap_or(0.0);
60
61        Self {
62            transaction_id: txn.transaction_id,
63            is_suspicious: txn.is_suspicious,
64            suspicion_reason: txn.suspicion_reason,
65            laundering_stage: txn.laundering_stage,
66            case_id: txn.case_id.clone(),
67            is_spoofed: txn.is_spoofed,
68            spoofing_intensity: txn.spoofing_intensity,
69            scenario_sequence: txn.scenario_sequence,
70            confidence: 1.0, // Ground truth has full confidence
71            features: TransactionLabelFeatures {
72                is_structuring: txn.suspicion_reason == Some(AmlTypology::Structuring),
73                below_threshold: amount_f64 < 10_000.0 && amount_f64 > 8_000.0,
74                is_cash: matches!(
75                    txn.channel,
76                    datasynth_core::models::banking::TransactionChannel::Cash
77                ),
78                is_international: matches!(
79                    txn.category,
80                    datasynth_core::models::banking::TransactionCategory::InternationalTransfer
81                ),
82                is_rapid_succession: false, // Computed separately
83                new_counterparty: false,    // Computed separately
84                round_amount: Self::is_round_amount(amount_f64),
85                unusual_timing: Self::is_unusual_timing(txn),
86            },
87        }
88    }
89
90    /// Check if amount is a round number.
91    fn is_round_amount(amount: f64) -> bool {
92        let cents = (amount * 100.0) % 100.0;
93        cents.abs() < 0.01 && amount >= 100.0 && (amount % 100.0).abs() < 0.01
94    }
95
96    /// Check if timing is unusual (weekend/off-hours).
97    fn is_unusual_timing(txn: &BankTransaction) -> bool {
98        let weekday = txn.timestamp_initiated.weekday();
99        let hour = txn.timestamp_initiated.hour();
100
101        // Weekend or outside business hours
102        matches!(weekday, chrono::Weekday::Sat | chrono::Weekday::Sun) || !(6..=22).contains(&hour)
103    }
104}
105
106/// Transaction label extractor.
107pub struct TransactionLabelExtractor;
108
109impl TransactionLabelExtractor {
110    /// Extract labels from all transactions.
111    pub fn extract(transactions: &[BankTransaction]) -> Vec<TransactionLabel> {
112        transactions
113            .iter()
114            .map(TransactionLabel::from_transaction)
115            .collect()
116    }
117
118    /// Extract labels with computed features.
119    pub fn extract_with_features(transactions: &[BankTransaction]) -> Vec<TransactionLabel> {
120        let mut labels: Vec<_> = transactions
121            .iter()
122            .map(TransactionLabel::from_transaction)
123            .collect();
124
125        // Compute rapid succession features
126        Self::compute_rapid_succession(&mut labels, transactions);
127
128        // Compute new counterparty features
129        Self::compute_new_counterparty(&mut labels, transactions);
130
131        labels
132    }
133
134    /// Compute rapid succession feature.
135    fn compute_rapid_succession(labels: &mut [TransactionLabel], transactions: &[BankTransaction]) {
136        // Group transactions by account
137        use std::collections::HashMap;
138        let mut by_account: HashMap<Uuid, Vec<usize>> = HashMap::new();
139
140        for (i, txn) in transactions.iter().enumerate() {
141            by_account.entry(txn.account_id).or_default().push(i);
142        }
143
144        // Check for rapid succession within each account
145        for indices in by_account.values() {
146            for window in indices.windows(2) {
147                let t1 = &transactions[window[0]];
148                let t2 = &transactions[window[1]];
149
150                let duration = (t2.timestamp_initiated - t1.timestamp_initiated)
151                    .num_minutes()
152                    .abs();
153                if duration < 30 {
154                    labels[window[0]].features.is_rapid_succession = true;
155                    labels[window[1]].features.is_rapid_succession = true;
156                }
157            }
158        }
159    }
160
161    /// Compute new counterparty feature.
162    fn compute_new_counterparty(labels: &mut [TransactionLabel], transactions: &[BankTransaction]) {
163        use std::collections::{HashMap, HashSet};
164
165        // Track seen counterparties per account
166        let mut seen: HashMap<Uuid, HashSet<String>> = HashMap::new();
167
168        // Sort by timestamp
169        let mut sorted_indices: Vec<usize> = (0..transactions.len()).collect();
170        sorted_indices.sort_by_key(|&i| transactions[i].timestamp_initiated);
171
172        for idx in sorted_indices {
173            let txn = &transactions[idx];
174            let counterparty_key = txn.counterparty.name.clone();
175
176            let account_seen = seen.entry(txn.account_id).or_default();
177
178            if !account_seen.contains(&counterparty_key) {
179                labels[idx].features.new_counterparty = true;
180                account_seen.insert(counterparty_key);
181            }
182        }
183    }
184
185    /// Get summary statistics for labels.
186    pub fn summarize(labels: &[TransactionLabel]) -> LabelSummary {
187        let total = labels.len();
188        let suspicious = labels.iter().filter(|l| l.is_suspicious).count();
189        let spoofed = labels.iter().filter(|l| l.is_spoofed).count();
190
191        let mut by_typology = std::collections::HashMap::new();
192        let mut by_stage = std::collections::HashMap::new();
193
194        for label in labels {
195            if let Some(reason) = &label.suspicion_reason {
196                *by_typology.entry(*reason).or_insert(0) += 1;
197            }
198            if let Some(stage) = &label.laundering_stage {
199                *by_stage.entry(*stage).or_insert(0) += 1;
200            }
201        }
202
203        LabelSummary {
204            total_transactions: total,
205            suspicious_count: suspicious,
206            suspicious_rate: suspicious as f64 / total as f64,
207            spoofed_count: spoofed,
208            spoofed_rate: spoofed as f64 / total as f64,
209            by_typology,
210            by_stage,
211        }
212    }
213}
214
215/// Summary statistics for transaction labels.
216#[derive(Debug, Clone)]
217pub struct LabelSummary {
218    /// Total number of transactions
219    pub total_transactions: usize,
220    /// Number of suspicious transactions
221    pub suspicious_count: usize,
222    /// Rate of suspicious transactions
223    pub suspicious_rate: f64,
224    /// Number of spoofed transactions
225    pub spoofed_count: usize,
226    /// Rate of spoofed transactions
227    pub spoofed_rate: f64,
228    /// Counts by typology
229    pub by_typology: std::collections::HashMap<AmlTypology, usize>,
230    /// Counts by laundering stage
231    pub by_stage: std::collections::HashMap<LaunderingStage, usize>,
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn test_label_extraction() {
240        let account_id = Uuid::new_v4();
241
242        let txn = BankTransaction::new(
243            Uuid::new_v4(),
244            account_id,
245            rust_decimal::Decimal::from(9500),
246            "USD",
247            datasynth_core::models::banking::Direction::Inbound,
248            datasynth_core::models::banking::TransactionChannel::Cash,
249            datasynth_core::models::banking::TransactionCategory::CashDeposit,
250            crate::models::CounterpartyRef::atm("ATM"),
251            "Test deposit",
252            chrono::Utc::now(),
253        )
254        .mark_suspicious(AmlTypology::Structuring, "TEST-001")
255        .with_laundering_stage(LaunderingStage::Placement);
256
257        let label = TransactionLabel::from_transaction(&txn);
258
259        assert!(label.is_suspicious);
260        assert_eq!(label.suspicion_reason, Some(AmlTypology::Structuring));
261        assert_eq!(label.laundering_stage, Some(LaunderingStage::Placement));
262        assert!(label.features.below_threshold);
263        assert!(label.features.is_cash);
264    }
265}