Skip to main content

datasynth_eval/banking/
aml_detectability.rs

1//! AML typology detectability evaluator.
2//!
3//! Validates that AML typologies (structuring, layering, mule networks, etc.)
4//! produce statistically detectable patterns and maintain coherence.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// AML transaction data for a typology instance.
11///
12/// The `typology` string should be the canonical lowercase name
13/// produced by `AmlTypology::canonical_name()` — see
14/// [`EXPECTED_TYPOLOGIES`] for the allowed values. Using PascalCase
15/// (e.g. the Debug format of the enum) will fail the coverage match.
16#[derive(Debug, Clone)]
17pub struct AmlTransactionData {
18    /// Transaction identifier.
19    pub transaction_id: String,
20    /// Canonical typology name, e.g. "structuring", "mule", "fraud".
21    pub typology: String,
22    /// Case identifier (shared across related transactions).
23    pub case_id: String,
24    /// Transaction amount.
25    pub amount: f64,
26    /// Whether this is a flagged/suspicious transaction.
27    pub is_flagged: bool,
28}
29
30/// Overall typology data for coverage validation.
31#[derive(Debug, Clone)]
32pub struct TypologyData {
33    /// Typology name.
34    pub name: String,
35    /// Number of scenarios generated.
36    pub scenario_count: usize,
37    /// Whether all transactions in a scenario share a case_id.
38    pub case_ids_consistent: bool,
39}
40
41/// Thresholds for AML detectability.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct AmlDetectabilityThresholds {
44    /// Minimum typology coverage (fraction of expected typologies present).
45    pub min_typology_coverage: f64,
46    /// Minimum scenario coherence rate.
47    pub min_scenario_coherence: f64,
48    /// Structuring threshold (transactions should cluster below this).
49    pub structuring_threshold: f64,
50}
51
52impl Default for AmlDetectabilityThresholds {
53    fn default() -> Self {
54        Self {
55            min_typology_coverage: 0.80,
56            min_scenario_coherence: 0.90,
57            structuring_threshold: 10_000.0,
58        }
59    }
60}
61
62/// Per-typology detectability result.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct TypologyDetectability {
65    /// Typology name.
66    pub name: String,
67    /// Number of transactions.
68    pub transaction_count: usize,
69    /// Number of unique cases.
70    pub case_count: usize,
71    /// Flag rate.
72    pub flag_rate: f64,
73    /// Whether the typology shows expected patterns.
74    pub pattern_detected: bool,
75}
76
77/// Results of AML detectability analysis.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct AmlDetectabilityAnalysis {
80    /// Typology coverage: fraction of expected typologies present.
81    pub typology_coverage: f64,
82    /// Scenario coherence: fraction of scenarios with consistent case_ids.
83    pub scenario_coherence: f64,
84    /// Per-typology detectability.
85    pub per_typology: Vec<TypologyDetectability>,
86    /// Total transactions analyzed.
87    pub total_transactions: usize,
88    /// Overall pass/fail.
89    pub passes: bool,
90    /// Issues found.
91    pub issues: Vec<String>,
92}
93
94/// Expected typology names for coverage calculation.
95///
96/// Matches the banking module catalog in CLAUDE.md:
97///   structuring, funnel, layering, mule, round_tripping, fraud, spoofing
98///
99/// Typology names written into `TypologyData.name` MUST use the canonical
100/// form produced by `AmlTypology::canonical_name()`; the evaluator does
101/// exact-string matching. The old list used `mule_network` which no
102/// variant maps to — callers using `format!("{:?}", typology)` (PascalCase
103/// Debug format) never matched, so `typology_coverage` was 0.0 in v3.1
104/// regardless of how many typologies fired.
105const EXPECTED_TYPOLOGIES: &[&str] = &[
106    "structuring",
107    "funnel",
108    "layering",
109    "mule",
110    "round_tripping",
111    "fraud",
112    "spoofing",
113];
114
115/// Analyzer for AML detectability.
116pub struct AmlDetectabilityAnalyzer {
117    thresholds: AmlDetectabilityThresholds,
118}
119
120impl AmlDetectabilityAnalyzer {
121    /// Create a new analyzer with default thresholds.
122    pub fn new() -> Self {
123        Self {
124            thresholds: AmlDetectabilityThresholds::default(),
125        }
126    }
127
128    /// Create with custom thresholds.
129    pub fn with_thresholds(thresholds: AmlDetectabilityThresholds) -> Self {
130        Self { thresholds }
131    }
132
133    /// Analyze AML transactions and typology data.
134    pub fn analyze(
135        &self,
136        transactions: &[AmlTransactionData],
137        typologies: &[TypologyData],
138    ) -> EvalResult<AmlDetectabilityAnalysis> {
139        let mut issues = Vec::new();
140
141        // 1. Typology coverage
142        let present_typologies: std::collections::HashSet<&str> =
143            typologies.iter().map(|t| t.name.as_str()).collect();
144        let covered = EXPECTED_TYPOLOGIES
145            .iter()
146            .filter(|&&t| present_typologies.contains(t))
147            .count();
148        let typology_coverage = covered as f64 / EXPECTED_TYPOLOGIES.len() as f64;
149
150        // 2. Scenario coherence
151        let coherent = typologies.iter().filter(|t| t.case_ids_consistent).count();
152        let scenario_coherence = if typologies.is_empty() {
153            1.0
154        } else {
155            coherent as f64 / typologies.len() as f64
156        };
157
158        // 3. Per-typology analysis
159        let mut by_typology: HashMap<String, Vec<&AmlTransactionData>> = HashMap::new();
160        for txn in transactions {
161            by_typology
162                .entry(txn.typology.clone())
163                .or_default()
164                .push(txn);
165        }
166
167        let mut per_typology = Vec::new();
168        for (name, txns) in &by_typology {
169            let case_ids: std::collections::HashSet<&str> =
170                txns.iter().map(|t| t.case_id.as_str()).collect();
171            let flagged = txns.iter().filter(|t| t.is_flagged).count();
172            let flag_rate = if txns.is_empty() {
173                0.0
174            } else {
175                flagged as f64 / txns.len() as f64
176            };
177
178            // Check typology-specific patterns
179            let pattern_detected = match name.as_str() {
180                "structuring" => {
181                    // Most amounts should be below threshold
182                    let below = txns
183                        .iter()
184                        .filter(|t| t.amount < self.thresholds.structuring_threshold)
185                        .count();
186                    below as f64 / txns.len().max(1) as f64 > 0.5
187                }
188                "layering" => {
189                    // Should have multiple cases with >2 transactions each
190                    !case_ids.is_empty() && txns.len() > case_ids.len()
191                }
192                _ => {
193                    // Generic: require a meaningful flag rate indicating
194                    // the typology produces detectable suspicious patterns.
195                    // A flag rate of 0 means no suspicious indicators at all.
196                    let suspicious_count = txns.iter().filter(|t| t.is_flagged).count();
197                    let suspicious_ratio = suspicious_count as f64 / txns.len().max(1) as f64;
198                    !txns.is_empty() && suspicious_ratio > 0.0
199                }
200            };
201
202            per_typology.push(TypologyDetectability {
203                name: name.clone(),
204                transaction_count: txns.len(),
205                case_count: case_ids.len(),
206                flag_rate,
207                pattern_detected,
208            });
209        }
210
211        // Check thresholds
212        if typology_coverage < self.thresholds.min_typology_coverage {
213            issues.push(format!(
214                "Typology coverage {:.3} < {:.3}",
215                typology_coverage, self.thresholds.min_typology_coverage
216            ));
217        }
218        if scenario_coherence < self.thresholds.min_scenario_coherence {
219            issues.push(format!(
220                "Scenario coherence {:.3} < {:.3}",
221                scenario_coherence, self.thresholds.min_scenario_coherence
222            ));
223        }
224
225        let passes = issues.is_empty();
226
227        Ok(AmlDetectabilityAnalysis {
228            typology_coverage,
229            scenario_coherence,
230            per_typology,
231            total_transactions: transactions.len(),
232            passes,
233            issues,
234        })
235    }
236}
237
238impl Default for AmlDetectabilityAnalyzer {
239    fn default() -> Self {
240        Self::new()
241    }
242}
243
244#[cfg(test)]
245#[allow(clippy::unwrap_used)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_good_aml_data() {
251        let analyzer = AmlDetectabilityAnalyzer::new();
252        let typologies: Vec<TypologyData> = EXPECTED_TYPOLOGIES
253            .iter()
254            .map(|name| TypologyData {
255                name: name.to_string(),
256                scenario_count: 5,
257                case_ids_consistent: true,
258            })
259            .collect();
260        let transactions = vec![
261            AmlTransactionData {
262                transaction_id: "T001".to_string(),
263                typology: "structuring".to_string(),
264                case_id: "C001".to_string(),
265                amount: 9_500.0,
266                is_flagged: true,
267            },
268            AmlTransactionData {
269                transaction_id: "T002".to_string(),
270                typology: "structuring".to_string(),
271                case_id: "C001".to_string(),
272                amount: 9_800.0,
273                is_flagged: true,
274            },
275        ];
276
277        let result = analyzer.analyze(&transactions, &typologies).unwrap();
278        assert!(result.passes);
279        assert_eq!(result.typology_coverage, 1.0);
280    }
281
282    #[test]
283    fn test_missing_typologies() {
284        let analyzer = AmlDetectabilityAnalyzer::new();
285        let typologies = vec![TypologyData {
286            name: "structuring".to_string(),
287            scenario_count: 5,
288            case_ids_consistent: true,
289        }];
290
291        let result = analyzer.analyze(&[], &typologies).unwrap();
292        assert!(!result.passes); // Coverage too low
293    }
294
295    #[test]
296    fn test_empty() {
297        let analyzer = AmlDetectabilityAnalyzer::new();
298        let result = analyzer.analyze(&[], &[]).unwrap();
299        assert!(!result.passes); // Zero coverage
300    }
301}