datasynth_eval/banking/
sanctions_screening.rs

1//! Sanctions screening evaluator.
2//!
3//! Validates that screening outcomes correlate appropriately with risk factors:
4//! - Low-risk customers mostly Clear
5//! - High-risk / sanctioned-country customers have elevated hit rates
6//! - PEPs have name variations populated
7
8use serde::{Deserialize, Serialize};
9
10use crate::error::EvalResult;
11
12#[derive(Debug, Clone)]
13pub struct ScreeningObservation {
14    pub risk_tier: String, // "low" | "medium" | "high" | "very_high" | "prohibited"
15    pub is_pep: bool,
16    pub is_high_risk_country: bool,
17    pub screening_result: String, // "clear" | "potential_match" | "confirmed_match"
18    pub has_name_variations: bool,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct SanctionsScreeningThresholds {
23    /// Low-risk customers should be Clear >95% of the time
24    pub min_low_risk_clear_rate: f64,
25    /// High-risk customers should have non-Clear >5% of the time
26    pub min_high_risk_match_rate: f64,
27    /// PEPs should have name_variations populated >90%
28    pub min_pep_variations_rate: f64,
29    /// High-risk country customers should have elevated match rate
30    pub min_high_risk_country_match_rate: f64,
31}
32
33impl Default for SanctionsScreeningThresholds {
34    fn default() -> Self {
35        Self {
36            min_low_risk_clear_rate: 0.95,
37            min_high_risk_match_rate: 0.05,
38            min_pep_variations_rate: 0.90,
39            min_high_risk_country_match_rate: 0.05,
40        }
41    }
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct SanctionsScreeningAnalysis {
46    pub total_customers: usize,
47    pub low_risk_clear_rate: f64,
48    pub high_risk_match_rate: f64,
49    pub pep_variations_rate: f64,
50    pub high_risk_country_match_rate: f64,
51    pub confirmed_match_count: usize,
52    pub potential_match_count: usize,
53    pub passes: bool,
54    pub issues: Vec<String>,
55}
56
57pub struct SanctionsScreeningAnalyzer {
58    pub thresholds: SanctionsScreeningThresholds,
59}
60
61impl SanctionsScreeningAnalyzer {
62    pub fn new() -> Self {
63        Self {
64            thresholds: SanctionsScreeningThresholds::default(),
65        }
66    }
67
68    pub fn analyze(
69        &self,
70        observations: &[ScreeningObservation],
71    ) -> EvalResult<SanctionsScreeningAnalysis> {
72        let total = observations.len();
73        let mut low_risk_total = 0usize;
74        let mut low_risk_clear = 0usize;
75        let mut high_risk_total = 0usize;
76        let mut high_risk_match = 0usize;
77        let mut pep_total = 0usize;
78        let mut pep_with_variations = 0usize;
79        let mut hrc_total = 0usize;
80        let mut hrc_match = 0usize;
81        let mut confirmed = 0usize;
82        let mut potential = 0usize;
83
84        for obs in observations {
85            let is_match = obs.screening_result != "clear";
86            if obs.screening_result == "confirmed_match" {
87                confirmed += 1;
88            } else if obs.screening_result == "potential_match" {
89                potential += 1;
90            }
91
92            if obs.risk_tier == "low" {
93                low_risk_total += 1;
94                if !is_match {
95                    low_risk_clear += 1;
96                }
97            }
98            if matches!(obs.risk_tier.as_str(), "high" | "very_high" | "prohibited") {
99                high_risk_total += 1;
100                if is_match {
101                    high_risk_match += 1;
102                }
103            }
104            if obs.is_pep {
105                pep_total += 1;
106                if obs.has_name_variations {
107                    pep_with_variations += 1;
108                }
109            }
110            if obs.is_high_risk_country {
111                hrc_total += 1;
112                if is_match {
113                    hrc_match += 1;
114                }
115            }
116        }
117
118        let low_rate = if low_risk_total > 0 {
119            low_risk_clear as f64 / low_risk_total as f64
120        } else {
121            1.0
122        };
123        let high_rate = if high_risk_total > 0 {
124            high_risk_match as f64 / high_risk_total as f64
125        } else {
126            1.0
127        };
128        let pep_rate = if pep_total > 0 {
129            pep_with_variations as f64 / pep_total as f64
130        } else {
131            1.0
132        };
133        let hrc_rate = if hrc_total > 0 {
134            hrc_match as f64 / hrc_total as f64
135        } else {
136            1.0
137        };
138
139        let mut issues = Vec::new();
140        if low_risk_total > 10 && low_rate < self.thresholds.min_low_risk_clear_rate {
141            issues.push(format!(
142                "Low-risk clear rate {:.1}% below minimum {:.1}% — too many false matches",
143                low_rate * 100.0,
144                self.thresholds.min_low_risk_clear_rate * 100.0,
145            ));
146        }
147        if high_risk_total > 10 && high_rate < self.thresholds.min_high_risk_match_rate {
148            issues.push(format!(
149                "High-risk match rate {:.1}% below minimum {:.1}% — screening not detecting risky customers",
150                high_rate * 100.0,
151                self.thresholds.min_high_risk_match_rate * 100.0,
152            ));
153        }
154        if pep_total > 0 && pep_rate < self.thresholds.min_pep_variations_rate {
155            issues.push(format!(
156                "PEP name-variation rate {:.1}% below minimum {:.1}%",
157                pep_rate * 100.0,
158                self.thresholds.min_pep_variations_rate * 100.0,
159            ));
160        }
161        if hrc_total > 10 && hrc_rate < self.thresholds.min_high_risk_country_match_rate {
162            issues.push(format!(
163                "High-risk-country match rate {:.1}% below minimum {:.1}%",
164                hrc_rate * 100.0,
165                self.thresholds.min_high_risk_country_match_rate * 100.0,
166            ));
167        }
168
169        Ok(SanctionsScreeningAnalysis {
170            total_customers: total,
171            low_risk_clear_rate: low_rate,
172            high_risk_match_rate: high_rate,
173            pep_variations_rate: pep_rate,
174            high_risk_country_match_rate: hrc_rate,
175            confirmed_match_count: confirmed,
176            potential_match_count: potential,
177            passes: issues.is_empty(),
178            issues,
179        })
180    }
181}
182
183impl Default for SanctionsScreeningAnalyzer {
184    fn default() -> Self {
185        Self::new()
186    }
187}
188
189#[cfg(test)]
190#[allow(clippy::unwrap_used)]
191mod tests {
192    use super::*;
193
194    fn mk_obs(tier: &str, pep: bool, hrc: bool, res: &str, vars: bool) -> ScreeningObservation {
195        ScreeningObservation {
196            risk_tier: tier.into(),
197            is_pep: pep,
198            is_high_risk_country: hrc,
199            screening_result: res.into(),
200            has_name_variations: vars,
201        }
202    }
203
204    #[test]
205    fn test_realistic_distribution_passes() {
206        let mut obs = Vec::new();
207        // 200 low-risk, 98% clear
208        for i in 0..200 {
209            let res = if i < 196 { "clear" } else { "potential_match" };
210            obs.push(mk_obs("low", false, false, res, false));
211        }
212        // 30 high-risk, 20% match
213        for i in 0..30 {
214            let res = if i < 24 { "clear" } else { "potential_match" };
215            obs.push(mk_obs("high", false, false, res, false));
216        }
217        // 5 PEPs all with variations
218        for _ in 0..5 {
219            obs.push(mk_obs("medium", true, false, "clear", true));
220        }
221        let a = SanctionsScreeningAnalyzer::new();
222        let r = a.analyze(&obs).unwrap();
223        assert!(r.passes, "Issues: {:?}", r.issues);
224    }
225
226    #[test]
227    fn test_high_risk_with_zero_matches_flagged() {
228        let obs: Vec<_> = (0..50)
229            .map(|_| mk_obs("very_high", false, false, "clear", false))
230            .collect();
231        let a = SanctionsScreeningAnalyzer::new();
232        let r = a.analyze(&obs).unwrap();
233        assert!(!r.passes);
234        assert!(r.issues.iter().any(|i| i.contains("High-risk")));
235    }
236}
datasynth_eval/banking/sanctions_screening.rs

datasynth_eval/banking/
sanctions_screening.rs