datasynth_banking/labels/
transaction_labels.rs1use chrono::{Datelike, Timelike};
4use datasynth_core::models::banking::{AmlTypology, LaunderingStage};
5use serde::{Deserialize, Serialize};
6use uuid::Uuid;
7
8use crate::models::BankTransaction;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct TransactionLabel {
13 pub transaction_id: Uuid,
15 pub is_suspicious: bool,
17 pub suspicion_reason: Option<AmlTypology>,
19 pub laundering_stage: Option<LaunderingStage>,
21 pub case_id: Option<String>,
23 pub is_spoofed: bool,
25 pub spoofing_intensity: Option<f64>,
27 pub scenario_sequence: Option<u32>,
29 pub confidence: f64,
31 pub features: TransactionLabelFeatures,
33}
34
35#[derive(Debug, Clone, Default, Serialize, Deserialize)]
37pub struct TransactionLabelFeatures {
38 pub is_structuring: bool,
40 pub below_threshold: bool,
42 pub is_cash: bool,
44 pub is_international: bool,
46 pub is_rapid_succession: bool,
48 pub new_counterparty: bool,
50 pub round_amount: bool,
52 pub unusual_timing: bool,
54}
55
56impl TransactionLabel {
57 pub fn from_transaction(txn: &BankTransaction) -> Self {
59 let amount_f64: f64 = txn.amount.try_into().unwrap_or(0.0);
60
61 Self {
62 transaction_id: txn.transaction_id,
63 is_suspicious: txn.is_suspicious,
64 suspicion_reason: txn.suspicion_reason,
65 laundering_stage: txn.laundering_stage,
66 case_id: txn.case_id.clone(),
67 is_spoofed: txn.is_spoofed,
68 spoofing_intensity: txn.spoofing_intensity,
69 scenario_sequence: txn.scenario_sequence,
70 confidence: 1.0, features: TransactionLabelFeatures {
72 is_structuring: txn.suspicion_reason == Some(AmlTypology::Structuring),
73 below_threshold: amount_f64 < 10_000.0 && amount_f64 > 8_000.0,
74 is_cash: matches!(
75 txn.channel,
76 datasynth_core::models::banking::TransactionChannel::Cash
77 ),
78 is_international: matches!(
79 txn.category,
80 datasynth_core::models::banking::TransactionCategory::InternationalTransfer
81 ),
82 is_rapid_succession: false, new_counterparty: false, round_amount: Self::is_round_amount(amount_f64),
85 unusual_timing: Self::is_unusual_timing(txn),
86 },
87 }
88 }
89
90 fn is_round_amount(amount: f64) -> bool {
92 let cents = (amount * 100.0) % 100.0;
93 cents.abs() < 0.01 && amount >= 100.0 && (amount % 100.0).abs() < 0.01
94 }
95
96 fn is_unusual_timing(txn: &BankTransaction) -> bool {
98 let weekday = txn.timestamp_initiated.weekday();
99 let hour = txn.timestamp_initiated.hour();
100
101 matches!(weekday, chrono::Weekday::Sat | chrono::Weekday::Sun) || !(6..=22).contains(&hour)
103 }
104}
105
106pub struct TransactionLabelExtractor;
108
109impl TransactionLabelExtractor {
110 pub fn extract(transactions: &[BankTransaction]) -> Vec<TransactionLabel> {
112 transactions
113 .iter()
114 .map(TransactionLabel::from_transaction)
115 .collect()
116 }
117
118 pub fn extract_with_features(transactions: &[BankTransaction]) -> Vec<TransactionLabel> {
120 let mut labels: Vec<_> = transactions
121 .iter()
122 .map(TransactionLabel::from_transaction)
123 .collect();
124
125 Self::compute_rapid_succession(&mut labels, transactions);
127
128 Self::compute_new_counterparty(&mut labels, transactions);
130
131 labels
132 }
133
134 fn compute_rapid_succession(labels: &mut [TransactionLabel], transactions: &[BankTransaction]) {
136 use std::collections::HashMap;
138 let mut by_account: HashMap<Uuid, Vec<usize>> = HashMap::new();
139
140 for (i, txn) in transactions.iter().enumerate() {
141 by_account.entry(txn.account_id).or_default().push(i);
142 }
143
144 for indices in by_account.values() {
146 for window in indices.windows(2) {
147 let t1 = &transactions[window[0]];
148 let t2 = &transactions[window[1]];
149
150 let duration = (t2.timestamp_initiated - t1.timestamp_initiated)
151 .num_minutes()
152 .abs();
153 if duration < 30 {
154 labels[window[0]].features.is_rapid_succession = true;
155 labels[window[1]].features.is_rapid_succession = true;
156 }
157 }
158 }
159 }
160
161 fn compute_new_counterparty(labels: &mut [TransactionLabel], transactions: &[BankTransaction]) {
163 use std::collections::{HashMap, HashSet};
164
165 let mut seen: HashMap<Uuid, HashSet<String>> = HashMap::new();
167
168 let mut sorted_indices: Vec<usize> = (0..transactions.len()).collect();
170 sorted_indices.sort_by_key(|&i| transactions[i].timestamp_initiated);
171
172 for idx in sorted_indices {
173 let txn = &transactions[idx];
174 let counterparty_key = txn.counterparty.name.clone();
175
176 let account_seen = seen.entry(txn.account_id).or_default();
177
178 if !account_seen.contains(&counterparty_key) {
179 labels[idx].features.new_counterparty = true;
180 account_seen.insert(counterparty_key);
181 }
182 }
183 }
184
185 pub fn summarize(labels: &[TransactionLabel]) -> LabelSummary {
187 let total = labels.len();
188 let suspicious = labels.iter().filter(|l| l.is_suspicious).count();
189 let spoofed = labels.iter().filter(|l| l.is_spoofed).count();
190
191 let mut by_typology = std::collections::HashMap::new();
192 let mut by_stage = std::collections::HashMap::new();
193
194 for label in labels {
195 if let Some(reason) = &label.suspicion_reason {
196 *by_typology.entry(*reason).or_insert(0) += 1;
197 }
198 if let Some(stage) = &label.laundering_stage {
199 *by_stage.entry(*stage).or_insert(0) += 1;
200 }
201 }
202
203 LabelSummary {
204 total_transactions: total,
205 suspicious_count: suspicious,
206 suspicious_rate: suspicious as f64 / total as f64,
207 spoofed_count: spoofed,
208 spoofed_rate: spoofed as f64 / total as f64,
209 by_typology,
210 by_stage,
211 }
212 }
213}
214
215#[derive(Debug, Clone)]
217pub struct LabelSummary {
218 pub total_transactions: usize,
220 pub suspicious_count: usize,
222 pub suspicious_rate: f64,
224 pub spoofed_count: usize,
226 pub spoofed_rate: f64,
228 pub by_typology: std::collections::HashMap<AmlTypology, usize>,
230 pub by_stage: std::collections::HashMap<LaunderingStage, usize>,
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237
238 #[test]
239 fn test_label_extraction() {
240 let account_id = Uuid::new_v4();
241
242 let txn = BankTransaction::new(
243 Uuid::new_v4(),
244 account_id,
245 rust_decimal::Decimal::from(9500),
246 "USD",
247 datasynth_core::models::banking::Direction::Inbound,
248 datasynth_core::models::banking::TransactionChannel::Cash,
249 datasynth_core::models::banking::TransactionCategory::CashDeposit,
250 crate::models::CounterpartyRef::atm("ATM"),
251 "Test deposit",
252 chrono::Utc::now(),
253 )
254 .mark_suspicious(AmlTypology::Structuring, "TEST-001")
255 .with_laundering_stage(LaunderingStage::Placement);
256
257 let label = TransactionLabel::from_transaction(&txn);
258
259 assert!(label.is_suspicious);
260 assert_eq!(label.suspicion_reason, Some(AmlTypology::Structuring));
261 assert_eq!(label.laundering_stage, Some(LaunderingStage::Placement));
262 assert!(label.features.below_threshold);
263 assert!(label.features.is_cash);
264 }
265}