datasynth_eval/banking/
aml_detectability.rs

1//! AML typology detectability evaluator.
2//!
3//! Validates that AML typologies (structuring, layering, mule networks, etc.)
4//! produce statistically detectable patterns and maintain coherence.
5
6use crate::error::EvalResult;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// AML transaction data for a typology instance.
11///
12/// The `typology` string should be the canonical lowercase name
13/// produced by `AmlTypology::canonical_name()` — see
14/// [`EXPECTED_TYPOLOGIES`] for the allowed values. Using PascalCase
15/// (e.g. the Debug format of the enum) will fail the coverage match.
16#[derive(Debug, Clone)]
17pub struct AmlTransactionData {
18    /// Transaction identifier.
19    pub transaction_id: String,
20    /// Canonical typology name, e.g. "structuring", "mule", "fraud".
21    pub typology: String,
22    /// Case identifier (shared across related transactions).
23    pub case_id: String,
24    /// Transaction amount.
25    pub amount: f64,
26    /// Whether this is a flagged/suspicious transaction.
27    pub is_flagged: bool,
28}
29
30/// Overall typology data for coverage validation.
31#[derive(Debug, Clone)]
32pub struct TypologyData {
33    /// Typology name.
34    pub name: String,
35    /// Number of scenarios generated.
36    pub scenario_count: usize,
37    /// Whether all transactions in a scenario share a case_id.
38    pub case_ids_consistent: bool,
39}
40
41/// Thresholds for AML detectability.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct AmlDetectabilityThresholds {
44    /// Minimum typology coverage (fraction of expected typologies present).
45    pub min_typology_coverage: f64,
46    /// Minimum scenario coherence rate.
47    pub min_scenario_coherence: f64,
48    /// Structuring threshold (transactions should cluster below this).
49    pub structuring_threshold: f64,
50    /// Minimum transaction count below which the typology-coverage
51    /// metric is reported as advisory only (not a fail signal).
52    ///
53    /// v5.0.1 (Gap 2): with seven typology categories at heterogeneous
54    /// per-category prevalence, a 1 k-row sample can miss a single
55    /// low-rate category just by chance — that's a 14.3 pp drop in
56    /// reported coverage even though the generator is firing all
57    /// seven. Below this floor, we still compute coverage but skip
58    /// the threshold-failure issue and emit an advisory note instead.
59    pub min_sample_for_coverage: usize,
60}
61
62impl Default for AmlDetectabilityThresholds {
63    fn default() -> Self {
64        Self {
65            min_typology_coverage: 0.80,
66            min_scenario_coherence: 0.90,
67            structuring_threshold: 10_000.0,
68            min_sample_for_coverage: 5_000,
69        }
70    }
71}
72
73/// Per-typology detectability result.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct TypologyDetectability {
76    /// Typology name.
77    pub name: String,
78    /// Number of transactions.
79    pub transaction_count: usize,
80    /// Number of unique cases.
81    pub case_count: usize,
82    /// Flag rate.
83    pub flag_rate: f64,
84    /// Whether the typology shows expected patterns.
85    pub pattern_detected: bool,
86}
87
88/// Results of AML detectability analysis.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90pub struct AmlDetectabilityAnalysis {
91    /// Typology coverage: fraction of expected typologies present.
92    pub typology_coverage: f64,
93    /// Scenario coherence: fraction of scenarios with consistent case_ids.
94    pub scenario_coherence: f64,
95    /// Per-typology detectability.
96    pub per_typology: Vec<TypologyDetectability>,
97    /// Total transactions analyzed.
98    pub total_transactions: usize,
99    /// Overall pass/fail.
100    pub passes: bool,
101    /// Issues found.
102    pub issues: Vec<String>,
103}
104
105/// Expected typology categories for coverage calculation.
106///
107/// Matches the banking module catalog in CLAUDE.md:
108///   structuring, funnel, layering, mule, round_tripping, fraud, spoofing
109///
110/// v4.4.2: each category is represented by a canonical name *plus* the
111/// aliases the typology injectors emit into `TypologyData.name` and
112/// `suspicion_reason`. Before v4.4.2 the evaluator did exact-string
113/// matching against short names, so "money_mule" / "funnel_account" /
114/// "first_party_fraud" / "authorized_push_payment" didn't match even
115/// though the underlying typologies were firing — the SDK team saw
116/// coverage 0.71 / 5-of-7 where the real coverage was 1.0 / 7-of-7.
117///
118/// Each entry is `(canonical, aliases)`. A category is "covered" when
119/// ANY of its names appears in the typology set.
120const EXPECTED_TYPOLOGIES: &[(&str, &[&str])] = &[
121    (
122        "structuring",
123        &["structuring", "smurfing", "cuckoo_smurfing"],
124    ),
125    (
126        "funnel",
127        &[
128            "funnel",
129            "funnel_account",
130            "concentration_account",
131            "pouch_activity",
132        ],
133    ),
134    ("layering", &["layering", "rapid_movement", "shell_company"]),
135    (
136        "mule",
137        &[
138            "mule",
139            "money_mule",
140            "authorized_push_payment",
141            "synthetic_identity",
142        ],
143    ),
144    (
145        "round_tripping",
146        &[
147            "round_tripping",
148            "trade_based_ml",
149            "real_estate_integration",
150        ],
151    ),
152    (
153        "fraud",
154        &[
155            "fraud",
156            "first_party_fraud",
157            "account_takeover",
158            "romance_scam",
159            "sanctions_evasion",
160        ],
161    ),
162    (
163        "spoofing",
164        &["spoofing", "casino_integration", "crypto_integration"],
165    ),
166];
167
168/// Analyzer for AML detectability.
169pub struct AmlDetectabilityAnalyzer {
170    thresholds: AmlDetectabilityThresholds,
171}
172
173impl AmlDetectabilityAnalyzer {
174    /// Create a new analyzer with default thresholds.
175    pub fn new() -> Self {
176        Self {
177            thresholds: AmlDetectabilityThresholds::default(),
178        }
179    }
180
181    /// Create with custom thresholds.
182    pub fn with_thresholds(thresholds: AmlDetectabilityThresholds) -> Self {
183        Self { thresholds }
184    }
185
186    /// Analyze AML transactions and typology data.
187    pub fn analyze(
188        &self,
189        transactions: &[AmlTransactionData],
190        typologies: &[TypologyData],
191    ) -> EvalResult<AmlDetectabilityAnalysis> {
192        let mut issues = Vec::new();
193
194        // 1. Typology coverage — a category counts as covered when ANY
195        // of its canonical / alias names appears in the observed
196        // typology set. v4.4.2+ matching against the alias table lets
197        // injector-emitted names like "money_mule" map to the "mule"
198        // category without forcing a rename in every injector.
199        let present_typologies: std::collections::HashSet<&str> =
200            typologies.iter().map(|t| t.name.as_str()).collect();
201        let covered = EXPECTED_TYPOLOGIES
202            .iter()
203            .filter(|(_, aliases)| aliases.iter().any(|a| present_typologies.contains(a)))
204            .count();
205        let typology_coverage = covered as f64 / EXPECTED_TYPOLOGIES.len() as f64;
206
207        // 2. Scenario coherence
208        let coherent = typologies.iter().filter(|t| t.case_ids_consistent).count();
209        let scenario_coherence = if typologies.is_empty() {
210            1.0
211        } else {
212            coherent as f64 / typologies.len() as f64
213        };
214
215        // 3. Per-typology analysis
216        let mut by_typology: HashMap<String, Vec<&AmlTransactionData>> = HashMap::new();
217        for txn in transactions {
218            by_typology
219                .entry(txn.typology.clone())
220                .or_default()
221                .push(txn);
222        }
223
224        let mut per_typology = Vec::new();
225        for (name, txns) in &by_typology {
226            let case_ids: std::collections::HashSet<&str> =
227                txns.iter().map(|t| t.case_id.as_str()).collect();
228            let flagged = txns.iter().filter(|t| t.is_flagged).count();
229            let flag_rate = if txns.is_empty() {
230                0.0
231            } else {
232                flagged as f64 / txns.len() as f64
233            };
234
235            // Check typology-specific patterns
236            let pattern_detected = match name.as_str() {
237                "structuring" => {
238                    // Most amounts should be below threshold
239                    let below = txns
240                        .iter()
241                        .filter(|t| t.amount < self.thresholds.structuring_threshold)
242                        .count();
243                    below as f64 / txns.len().max(1) as f64 > 0.5
244                }
245                "layering" => {
246                    // Should have multiple cases with >2 transactions each
247                    !case_ids.is_empty() && txns.len() > case_ids.len()
248                }
249                _ => {
250                    // Generic: require a meaningful flag rate indicating
251                    // the typology produces detectable suspicious patterns.
252                    // A flag rate of 0 means no suspicious indicators at all.
253                    let suspicious_count = txns.iter().filter(|t| t.is_flagged).count();
254                    let suspicious_ratio = suspicious_count as f64 / txns.len().max(1) as f64;
255                    !txns.is_empty() && suspicious_ratio > 0.0
256                }
257            };
258
259            per_typology.push(TypologyDetectability {
260                name: name.clone(),
261                transaction_count: txns.len(),
262                case_count: case_ids.len(),
263                flag_rate,
264                pattern_detected,
265            });
266        }
267
268        // Check thresholds. v5.0.1 (Gap 2): on samples below the
269        // coverage floor, emit an advisory but don't fail — the
270        // metric is statistically unstable at small N because a
271        // single low-prevalence category missing on chance produces
272        // a 14.3 pp wobble (1 / 7 categories). We track failures
273        // separately from advisories so the advisory text remains
274        // visible in `issues` without flipping `passes` to false.
275        let mut failed = false;
276        if transactions.len() < self.thresholds.min_sample_for_coverage {
277            issues.push(format!(
278                "Advisory: typology coverage {:.3} computed on {} txns \
279                 (< {} sample floor) — metric is statistically unstable; \
280                 increase sample size for a reliable reading.",
281                typology_coverage,
282                transactions.len(),
283                self.thresholds.min_sample_for_coverage
284            ));
285        } else if typology_coverage < self.thresholds.min_typology_coverage {
286            issues.push(format!(
287                "Typology coverage {:.3} < {:.3}",
288                typology_coverage, self.thresholds.min_typology_coverage
289            ));
290            failed = true;
291        }
292        if scenario_coherence < self.thresholds.min_scenario_coherence {
293            issues.push(format!(
294                "Scenario coherence {:.3} < {:.3}",
295                scenario_coherence, self.thresholds.min_scenario_coherence
296            ));
297            failed = true;
298        }
299
300        let passes = !failed;
301
302        Ok(AmlDetectabilityAnalysis {
303            typology_coverage,
304            scenario_coherence,
305            per_typology,
306            total_transactions: transactions.len(),
307            passes,
308            issues,
309        })
310    }
311}
312
313impl Default for AmlDetectabilityAnalyzer {
314    fn default() -> Self {
315        Self::new()
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn test_good_aml_data() {
325        let analyzer = AmlDetectabilityAnalyzer::new();
326        // Use the canonical names (first of each tuple) so every
327        // category counts as covered.
328        let typologies: Vec<TypologyData> = EXPECTED_TYPOLOGIES
329            .iter()
330            .map(|(canonical, _aliases)| TypologyData {
331                name: canonical.to_string(),
332                scenario_count: 5,
333                case_ids_consistent: true,
334            })
335            .collect();
336        let transactions = vec![
337            AmlTransactionData {
338                transaction_id: "T001".to_string(),
339                typology: "structuring".to_string(),
340                case_id: "C001".to_string(),
341                amount: 9_500.0,
342                is_flagged: true,
343            },
344            AmlTransactionData {
345                transaction_id: "T002".to_string(),
346                typology: "structuring".to_string(),
347                case_id: "C001".to_string(),
348                amount: 9_800.0,
349                is_flagged: true,
350            },
351        ];
352
353        let result = analyzer.analyze(&transactions, &typologies).unwrap();
354        assert!(result.passes);
355        assert_eq!(result.typology_coverage, 1.0);
356    }
357
358    #[test]
359    fn test_missing_typologies() {
360        // Override the sample-size floor so the threshold-failure path
361        // engages on this small synthetic input. v5.0.1 (Gap 2): the
362        // default 5_000-row floor means coverage failures become
363        // advisories below that — exercising the strict-failure path
364        // requires either a large sample or a lowered floor.
365        let mut thresholds = AmlDetectabilityThresholds::default();
366        thresholds.min_sample_for_coverage = 0;
367        let analyzer = AmlDetectabilityAnalyzer::with_thresholds(thresholds);
368        let typologies = vec![TypologyData {
369            name: "structuring".to_string(),
370            scenario_count: 5,
371            case_ids_consistent: true,
372        }];
373
374        let result = analyzer.analyze(&[], &typologies).unwrap();
375        assert!(!result.passes); // Coverage too low
376    }
377
378    #[test]
379    fn test_empty() {
380        let mut thresholds = AmlDetectabilityThresholds::default();
381        thresholds.min_sample_for_coverage = 0;
382        let analyzer = AmlDetectabilityAnalyzer::with_thresholds(thresholds);
383        let result = analyzer.analyze(&[], &[]).unwrap();
384        assert!(!result.passes); // Zero coverage
385    }
386
387    #[test]
388    fn test_small_sample_advisory_does_not_fail() {
389        // v5.0.1 (Gap 2): below the 5_000-row floor, missing a
390        // single typology produces an advisory (still surfaced in
391        // `issues` for visibility) but does not flip `passes` to
392        // false. This protects users against the 14.3 pp coverage
393        // wobble inherent to small samples.
394        let analyzer = AmlDetectabilityAnalyzer::new();
395        let typologies = vec![TypologyData {
396            name: "structuring".to_string(),
397            scenario_count: 5,
398            case_ids_consistent: true,
399        }];
400        let transactions = vec![AmlTransactionData {
401            transaction_id: "T001".to_string(),
402            typology: "structuring".to_string(),
403            case_id: "C001".to_string(),
404            amount: 9_500.0,
405            is_flagged: true,
406        }];
407
408        let result = analyzer.analyze(&transactions, &typologies).unwrap();
409        assert!(result.passes, "small sample should not fail on coverage");
410        assert!(
411            result.issues.iter().any(|i| i.starts_with("Advisory:")),
412            "small sample should surface an advisory issue, got: {:?}",
413            result.issues
414        );
415    }
416}
datasynth_eval/banking/aml_detectability.rs

datasynth_eval/banking/
aml_detectability.rs