Skip to main content

datasynth_eval/banking/
aml_detectability.rs

1//! AML typology detectability evaluator.
2//!
3//! Validates that AML typologies (structuring, layering, mule networks, etc.)
4//! produce statistically detectable patterns and maintain coherence.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// AML transaction data for a typology instance.
11#[derive(Debug, Clone)]
12pub struct AmlTransactionData {
13    /// Transaction identifier.
14    pub transaction_id: String,
15    /// Typology name (e.g., "structuring", "layering", "mule_network").
16    pub typology: String,
17    /// Case identifier (shared across related transactions).
18    pub case_id: String,
19    /// Transaction amount.
20    pub amount: f64,
21    /// Whether this is a flagged/suspicious transaction.
22    pub is_flagged: bool,
23}
24
25/// Overall typology data for coverage validation.
26#[derive(Debug, Clone)]
27pub struct TypologyData {
28    /// Typology name.
29    pub name: String,
30    /// Number of scenarios generated.
31    pub scenario_count: usize,
32    /// Whether all transactions in a scenario share a case_id.
33    pub case_ids_consistent: bool,
34}
35
36/// Thresholds for AML detectability.
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct AmlDetectabilityThresholds {
39    /// Minimum typology coverage (fraction of expected typologies present).
40    pub min_typology_coverage: f64,
41    /// Minimum scenario coherence rate.
42    pub min_scenario_coherence: f64,
43    /// Structuring threshold (transactions should cluster below this).
44    pub structuring_threshold: f64,
45}
46
47impl Default for AmlDetectabilityThresholds {
48    fn default() -> Self {
49        Self {
50            min_typology_coverage: 0.80,
51            min_scenario_coherence: 0.90,
52            structuring_threshold: 10_000.0,
53        }
54    }
55}
56
57/// Per-typology detectability result.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct TypologyDetectability {
60    /// Typology name.
61    pub name: String,
62    /// Number of transactions.
63    pub transaction_count: usize,
64    /// Number of unique cases.
65    pub case_count: usize,
66    /// Flag rate.
67    pub flag_rate: f64,
68    /// Whether the typology shows expected patterns.
69    pub pattern_detected: bool,
70}
71
72/// Results of AML detectability analysis.
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct AmlDetectabilityAnalysis {
75    /// Typology coverage: fraction of expected typologies present.
76    pub typology_coverage: f64,
77    /// Scenario coherence: fraction of scenarios with consistent case_ids.
78    pub scenario_coherence: f64,
79    /// Per-typology detectability.
80    pub per_typology: Vec<TypologyDetectability>,
81    /// Total transactions analyzed.
82    pub total_transactions: usize,
83    /// Overall pass/fail.
84    pub passes: bool,
85    /// Issues found.
86    pub issues: Vec<String>,
87}
88
89/// Expected typology names for coverage calculation.
90const EXPECTED_TYPOLOGIES: &[&str] = &[
91    "structuring",
92    "layering",
93    "mule_network",
94    "round_tripping",
95    "fraud",
96    "spoofing",
97];
98
99/// Analyzer for AML detectability.
100pub struct AmlDetectabilityAnalyzer {
101    thresholds: AmlDetectabilityThresholds,
102}
103
104impl AmlDetectabilityAnalyzer {
105    /// Create a new analyzer with default thresholds.
106    pub fn new() -> Self {
107        Self {
108            thresholds: AmlDetectabilityThresholds::default(),
109        }
110    }
111
112    /// Create with custom thresholds.
113    pub fn with_thresholds(thresholds: AmlDetectabilityThresholds) -> Self {
114        Self { thresholds }
115    }
116
117    /// Analyze AML transactions and typology data.
118    pub fn analyze(
119        &self,
120        transactions: &[AmlTransactionData],
121        typologies: &[TypologyData],
122    ) -> EvalResult<AmlDetectabilityAnalysis> {
123        let mut issues = Vec::new();
124
125        // 1. Typology coverage
126        let present_typologies: std::collections::HashSet<&str> =
127            typologies.iter().map(|t| t.name.as_str()).collect();
128        let covered = EXPECTED_TYPOLOGIES
129            .iter()
130            .filter(|&&t| present_typologies.contains(t))
131            .count();
132        let typology_coverage = covered as f64 / EXPECTED_TYPOLOGIES.len() as f64;
133
134        // 2. Scenario coherence
135        let coherent = typologies.iter().filter(|t| t.case_ids_consistent).count();
136        let scenario_coherence = if typologies.is_empty() {
137            1.0
138        } else {
139            coherent as f64 / typologies.len() as f64
140        };
141
142        // 3. Per-typology analysis
143        let mut by_typology: HashMap<String, Vec<&AmlTransactionData>> = HashMap::new();
144        for txn in transactions {
145            by_typology
146                .entry(txn.typology.clone())
147                .or_default()
148                .push(txn);
149        }
150
151        let mut per_typology = Vec::new();
152        for (name, txns) in &by_typology {
153            let case_ids: std::collections::HashSet<&str> =
154                txns.iter().map(|t| t.case_id.as_str()).collect();
155            let flagged = txns.iter().filter(|t| t.is_flagged).count();
156            let flag_rate = if txns.is_empty() {
157                0.0
158            } else {
159                flagged as f64 / txns.len() as f64
160            };
161
162            // Check typology-specific patterns
163            let pattern_detected = match name.as_str() {
164                "structuring" => {
165                    // Most amounts should be below threshold
166                    let below = txns
167                        .iter()
168                        .filter(|t| t.amount < self.thresholds.structuring_threshold)
169                        .count();
170                    below as f64 / txns.len().max(1) as f64 > 0.5
171                }
172                "layering" => {
173                    // Should have multiple cases with >2 transactions each
174                    !case_ids.is_empty() && txns.len() > case_ids.len()
175                }
176                _ => {
177                    // Generic: require a meaningful flag rate indicating
178                    // the typology produces detectable suspicious patterns.
179                    // A flag rate of 0 means no suspicious indicators at all.
180                    let suspicious_count = txns.iter().filter(|t| t.is_flagged).count();
181                    let suspicious_ratio = suspicious_count as f64 / txns.len().max(1) as f64;
182                    !txns.is_empty() && suspicious_ratio > 0.0
183                }
184            };
185
186            per_typology.push(TypologyDetectability {
187                name: name.clone(),
188                transaction_count: txns.len(),
189                case_count: case_ids.len(),
190                flag_rate,
191                pattern_detected,
192            });
193        }
194
195        // Check thresholds
196        if typology_coverage < self.thresholds.min_typology_coverage {
197            issues.push(format!(
198                "Typology coverage {:.3} < {:.3}",
199                typology_coverage, self.thresholds.min_typology_coverage
200            ));
201        }
202        if scenario_coherence < self.thresholds.min_scenario_coherence {
203            issues.push(format!(
204                "Scenario coherence {:.3} < {:.3}",
205                scenario_coherence, self.thresholds.min_scenario_coherence
206            ));
207        }
208
209        let passes = issues.is_empty();
210
211        Ok(AmlDetectabilityAnalysis {
212            typology_coverage,
213            scenario_coherence,
214            per_typology,
215            total_transactions: transactions.len(),
216            passes,
217            issues,
218        })
219    }
220}
221
222impl Default for AmlDetectabilityAnalyzer {
223    fn default() -> Self {
224        Self::new()
225    }
226}
227
228#[cfg(test)]
229#[allow(clippy::unwrap_used)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn test_good_aml_data() {
235        let analyzer = AmlDetectabilityAnalyzer::new();
236        let typologies: Vec<TypologyData> = EXPECTED_TYPOLOGIES
237            .iter()
238            .map(|name| TypologyData {
239                name: name.to_string(),
240                scenario_count: 5,
241                case_ids_consistent: true,
242            })
243            .collect();
244        let transactions = vec![
245            AmlTransactionData {
246                transaction_id: "T001".to_string(),
247                typology: "structuring".to_string(),
248                case_id: "C001".to_string(),
249                amount: 9_500.0,
250                is_flagged: true,
251            },
252            AmlTransactionData {
253                transaction_id: "T002".to_string(),
254                typology: "structuring".to_string(),
255                case_id: "C001".to_string(),
256                amount: 9_800.0,
257                is_flagged: true,
258            },
259        ];
260
261        let result = analyzer.analyze(&transactions, &typologies).unwrap();
262        assert!(result.passes);
263        assert_eq!(result.typology_coverage, 1.0);
264    }
265
266    #[test]
267    fn test_missing_typologies() {
268        let analyzer = AmlDetectabilityAnalyzer::new();
269        let typologies = vec![TypologyData {
270            name: "structuring".to_string(),
271            scenario_count: 5,
272            case_ids_consistent: true,
273        }];
274
275        let result = analyzer.analyze(&[], &typologies).unwrap();
276        assert!(!result.passes); // Coverage too low
277    }
278
279    #[test]
280    fn test_empty() {
281        let analyzer = AmlDetectabilityAnalyzer::new();
282        let result = analyzer.analyze(&[], &[]).unwrap();
283        assert!(!result.passes); // Zero coverage
284    }
285}