datasynth_eval/banking/
sanctions_screening.rs1use serde::{Deserialize, Serialize};
9
10use crate::error::EvalResult;
11
12#[derive(Debug, Clone)]
13pub struct ScreeningObservation {
14 pub risk_tier: String, pub is_pep: bool,
16 pub is_high_risk_country: bool,
17 pub screening_result: String, pub has_name_variations: bool,
19}
20
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct SanctionsScreeningThresholds {
23 pub min_low_risk_clear_rate: f64,
25 pub min_high_risk_match_rate: f64,
27 pub min_pep_variations_rate: f64,
29 pub min_high_risk_country_match_rate: f64,
31}
32
33impl Default for SanctionsScreeningThresholds {
34 fn default() -> Self {
35 Self {
36 min_low_risk_clear_rate: 0.95,
37 min_high_risk_match_rate: 0.05,
38 min_pep_variations_rate: 0.90,
39 min_high_risk_country_match_rate: 0.05,
40 }
41 }
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct SanctionsScreeningAnalysis {
46 pub total_customers: usize,
47 pub low_risk_clear_rate: f64,
48 pub high_risk_match_rate: f64,
49 pub pep_variations_rate: f64,
50 pub high_risk_country_match_rate: f64,
51 pub confirmed_match_count: usize,
52 pub potential_match_count: usize,
53 pub passes: bool,
54 pub issues: Vec<String>,
55}
56
57pub struct SanctionsScreeningAnalyzer {
58 pub thresholds: SanctionsScreeningThresholds,
59}
60
61impl SanctionsScreeningAnalyzer {
62 pub fn new() -> Self {
63 Self {
64 thresholds: SanctionsScreeningThresholds::default(),
65 }
66 }
67
68 pub fn analyze(
69 &self,
70 observations: &[ScreeningObservation],
71 ) -> EvalResult<SanctionsScreeningAnalysis> {
72 let total = observations.len();
73 let mut low_risk_total = 0usize;
74 let mut low_risk_clear = 0usize;
75 let mut high_risk_total = 0usize;
76 let mut high_risk_match = 0usize;
77 let mut pep_total = 0usize;
78 let mut pep_with_variations = 0usize;
79 let mut hrc_total = 0usize;
80 let mut hrc_match = 0usize;
81 let mut confirmed = 0usize;
82 let mut potential = 0usize;
83
84 for obs in observations {
85 let is_match = obs.screening_result != "clear";
86 if obs.screening_result == "confirmed_match" {
87 confirmed += 1;
88 } else if obs.screening_result == "potential_match" {
89 potential += 1;
90 }
91
92 if obs.risk_tier == "low" {
93 low_risk_total += 1;
94 if !is_match {
95 low_risk_clear += 1;
96 }
97 }
98 if matches!(obs.risk_tier.as_str(), "high" | "very_high" | "prohibited") {
99 high_risk_total += 1;
100 if is_match {
101 high_risk_match += 1;
102 }
103 }
104 if obs.is_pep {
105 pep_total += 1;
106 if obs.has_name_variations {
107 pep_with_variations += 1;
108 }
109 }
110 if obs.is_high_risk_country {
111 hrc_total += 1;
112 if is_match {
113 hrc_match += 1;
114 }
115 }
116 }
117
118 let low_rate = if low_risk_total > 0 {
119 low_risk_clear as f64 / low_risk_total as f64
120 } else {
121 1.0
122 };
123 let high_rate = if high_risk_total > 0 {
124 high_risk_match as f64 / high_risk_total as f64
125 } else {
126 1.0
127 };
128 let pep_rate = if pep_total > 0 {
129 pep_with_variations as f64 / pep_total as f64
130 } else {
131 1.0
132 };
133 let hrc_rate = if hrc_total > 0 {
134 hrc_match as f64 / hrc_total as f64
135 } else {
136 1.0
137 };
138
139 let mut issues = Vec::new();
140 if low_risk_total > 10 && low_rate < self.thresholds.min_low_risk_clear_rate {
141 issues.push(format!(
142 "Low-risk clear rate {:.1}% below minimum {:.1}% — too many false matches",
143 low_rate * 100.0,
144 self.thresholds.min_low_risk_clear_rate * 100.0,
145 ));
146 }
147 if high_risk_total > 10 && high_rate < self.thresholds.min_high_risk_match_rate {
148 issues.push(format!(
149 "High-risk match rate {:.1}% below minimum {:.1}% — screening not detecting risky customers",
150 high_rate * 100.0,
151 self.thresholds.min_high_risk_match_rate * 100.0,
152 ));
153 }
154 if pep_total > 0 && pep_rate < self.thresholds.min_pep_variations_rate {
155 issues.push(format!(
156 "PEP name-variation rate {:.1}% below minimum {:.1}%",
157 pep_rate * 100.0,
158 self.thresholds.min_pep_variations_rate * 100.0,
159 ));
160 }
161 if hrc_total > 10 && hrc_rate < self.thresholds.min_high_risk_country_match_rate {
162 issues.push(format!(
163 "High-risk-country match rate {:.1}% below minimum {:.1}%",
164 hrc_rate * 100.0,
165 self.thresholds.min_high_risk_country_match_rate * 100.0,
166 ));
167 }
168
169 Ok(SanctionsScreeningAnalysis {
170 total_customers: total,
171 low_risk_clear_rate: low_rate,
172 high_risk_match_rate: high_rate,
173 pep_variations_rate: pep_rate,
174 high_risk_country_match_rate: hrc_rate,
175 confirmed_match_count: confirmed,
176 potential_match_count: potential,
177 passes: issues.is_empty(),
178 issues,
179 })
180 }
181}
182
183impl Default for SanctionsScreeningAnalyzer {
184 fn default() -> Self {
185 Self::new()
186 }
187}
188
189#[cfg(test)]
190#[allow(clippy::unwrap_used)]
191mod tests {
192 use super::*;
193
194 fn mk_obs(tier: &str, pep: bool, hrc: bool, res: &str, vars: bool) -> ScreeningObservation {
195 ScreeningObservation {
196 risk_tier: tier.into(),
197 is_pep: pep,
198 is_high_risk_country: hrc,
199 screening_result: res.into(),
200 has_name_variations: vars,
201 }
202 }
203
204 #[test]
205 fn test_realistic_distribution_passes() {
206 let mut obs = Vec::new();
207 for i in 0..200 {
209 let res = if i < 196 { "clear" } else { "potential_match" };
210 obs.push(mk_obs("low", false, false, res, false));
211 }
212 for i in 0..30 {
214 let res = if i < 24 { "clear" } else { "potential_match" };
215 obs.push(mk_obs("high", false, false, res, false));
216 }
217 for _ in 0..5 {
219 obs.push(mk_obs("medium", true, false, "clear", true));
220 }
221 let a = SanctionsScreeningAnalyzer::new();
222 let r = a.analyze(&obs).unwrap();
223 assert!(r.passes, "Issues: {:?}", r.issues);
224 }
225
226 #[test]
227 fn test_high_risk_with_zero_matches_flagged() {
228 let obs: Vec<_> = (0..50)
229 .map(|_| mk_obs("very_high", false, false, "clear", false))
230 .collect();
231 let a = SanctionsScreeningAnalyzer::new();
232 let r = a.analyze(&obs).unwrap();
233 assert!(!r.passes);
234 assert!(r.issues.iter().any(|i| i.contains("High-risk")));
235 }
236}