datasynth_eval/coherence/
fraud_packs.rs1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, Serialize, Deserialize)]
6pub struct FraudPackData {
7 pub configured_fraud_rate: f64,
8 pub actual_fraud_count: usize,
9 pub total_records: usize,
10 pub configured_scheme_types: Vec<String>,
11 pub actual_scheme_types: Vec<String>,
12 pub scheme_type_counts: HashMap<String, usize>,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct FraudPackThresholds {
18 pub min_rate_accuracy: f64,
21 pub min_scheme_coverage: f64,
24 pub min_distribution_entropy: f64,
27}
28
29impl Default for FraudPackThresholds {
30 fn default() -> Self {
31 Self {
32 min_rate_accuracy: 0.70,
33 min_scheme_coverage: 0.80,
34 min_distribution_entropy: 0.5,
35 }
36 }
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FraudPackAnalysis {
42 pub configured_rate: f64,
43 pub actual_rate: f64,
44 pub rate_accuracy: f64,
46 pub scheme_coverage: f64,
48 pub scheme_distribution_entropy: f64,
50 pub passes: bool,
51 pub issues: Vec<String>,
52}
53
54pub struct FraudPackAnalyzer {
55 thresholds: FraudPackThresholds,
56}
57
58impl FraudPackAnalyzer {
59 pub fn new(thresholds: FraudPackThresholds) -> Self {
60 Self { thresholds }
61 }
62
63 pub fn with_defaults() -> Self {
64 Self::new(FraudPackThresholds::default())
65 }
66
67 pub fn analyze(&self, data: &FraudPackData) -> FraudPackAnalysis {
68 let mut issues = Vec::new();
69
70 let actual_rate = if data.total_records > 0 {
72 data.actual_fraud_count as f64 / data.total_records as f64
73 } else {
74 0.0
75 };
76
77 let rate_accuracy = if data.configured_fraud_rate > 0.0 {
79 1.0 - ((data.configured_fraud_rate - actual_rate).abs() / data.configured_fraud_rate)
80 } else if actual_rate == 0.0 {
81 1.0 } else {
83 0.0 };
85
86 let scheme_coverage = if data.configured_scheme_types.is_empty() {
88 1.0
89 } else {
90 let covered = data
91 .configured_scheme_types
92 .iter()
93 .filter(|t| data.actual_scheme_types.contains(t))
94 .count();
95 covered as f64 / data.configured_scheme_types.len() as f64
96 };
97
98 let scheme_distribution_entropy = {
100 let total: usize = data.scheme_type_counts.values().sum();
101 if total == 0 || data.scheme_type_counts.len() <= 1 {
102 0.0
103 } else {
104 let mut entropy = 0.0f64;
105 for &count in data.scheme_type_counts.values() {
106 if count > 0 {
107 let p = count as f64 / total as f64;
108 entropy -= p * p.log2();
109 }
110 }
111 let max_entropy = (data.scheme_type_counts.len() as f64).log2();
113 if max_entropy > 0.0 {
114 entropy / max_entropy
115 } else {
116 0.0
117 }
118 }
119 };
120
121 if rate_accuracy < self.thresholds.min_rate_accuracy {
123 issues.push(format!(
124 "Rate accuracy {:.3} < threshold {:.3} (configured={:.4}, actual={:.4})",
125 rate_accuracy,
126 self.thresholds.min_rate_accuracy,
127 data.configured_fraud_rate,
128 actual_rate
129 ));
130 }
131 if scheme_coverage < self.thresholds.min_scheme_coverage {
132 issues.push(format!(
133 "Scheme coverage {:.2} < threshold {:.2}",
134 scheme_coverage, self.thresholds.min_scheme_coverage
135 ));
136 }
137 if scheme_distribution_entropy < self.thresholds.min_distribution_entropy {
138 issues.push(format!(
139 "Distribution entropy {:.3} < threshold {:.3}",
140 scheme_distribution_entropy, self.thresholds.min_distribution_entropy
141 ));
142 }
143
144 let passes = rate_accuracy >= self.thresholds.min_rate_accuracy
145 && scheme_coverage >= self.thresholds.min_scheme_coverage
146 && scheme_distribution_entropy >= self.thresholds.min_distribution_entropy;
147
148 FraudPackAnalysis {
149 configured_rate: data.configured_fraud_rate,
150 actual_rate,
151 rate_accuracy,
152 scheme_coverage,
153 scheme_distribution_entropy,
154 passes,
155 issues,
156 }
157 }
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163
164 #[test]
165 fn test_perfect_fraud_pack() {
166 let analyzer = FraudPackAnalyzer::with_defaults();
167 let data = FraudPackData {
168 configured_fraud_rate: 0.05,
169 actual_fraud_count: 50,
170 total_records: 1000,
171 configured_scheme_types: vec!["DuplicatePayment".into(), "SplitTransaction".into()],
172 actual_scheme_types: vec!["DuplicatePayment".into(), "SplitTransaction".into()],
173 scheme_type_counts: HashMap::from([
174 ("DuplicatePayment".into(), 25),
175 ("SplitTransaction".into(), 25),
176 ]),
177 };
178 let result = analyzer.analyze(&data);
179 assert!(result.passes, "issues: {:?}", result.issues);
180 assert_eq!(result.rate_accuracy, 1.0);
181 assert_eq!(result.scheme_coverage, 1.0);
182 assert!(result.scheme_distribution_entropy > 0.9); }
184
185 #[test]
186 fn test_rate_deviation_detected() {
187 let analyzer = FraudPackAnalyzer::with_defaults();
188 let data = FraudPackData {
189 configured_fraud_rate: 0.10,
190 actual_fraud_count: 20,
191 total_records: 1000,
192 configured_scheme_types: vec!["DuplicatePayment".into()],
193 actual_scheme_types: vec!["DuplicatePayment".into()],
194 scheme_type_counts: HashMap::from([("DuplicatePayment".into(), 20)]),
195 };
196 let result = analyzer.analyze(&data);
197 assert!(!result.passes);
199 assert!(result.rate_accuracy < 0.7);
200 }
201
202 #[test]
203 fn test_missing_scheme_types() {
204 let analyzer = FraudPackAnalyzer::with_defaults();
205 let data = FraudPackData {
206 configured_fraud_rate: 0.05,
207 actual_fraud_count: 50,
208 total_records: 1000,
209 configured_scheme_types: vec![
210 "DuplicatePayment".into(),
211 "SplitTransaction".into(),
212 "GhostEmployee".into(),
213 "RoundTripping".into(),
214 "FictitiousTransaction".into(),
215 ],
216 actual_scheme_types: vec!["DuplicatePayment".into()],
217 scheme_type_counts: HashMap::from([("DuplicatePayment".into(), 50)]),
218 };
219 let result = analyzer.analyze(&data);
220 assert!(!result.passes);
221 assert_eq!(result.scheme_coverage, 0.2); }
223
224 #[test]
225 fn test_zero_records_handles_gracefully() {
226 let analyzer = FraudPackAnalyzer::with_defaults();
227 let data = FraudPackData {
228 configured_fraud_rate: 0.05,
229 actual_fraud_count: 0,
230 total_records: 0,
231 configured_scheme_types: vec!["DuplicatePayment".into()],
232 actual_scheme_types: vec![],
233 scheme_type_counts: HashMap::new(),
234 };
235 let result = analyzer.analyze(&data);
236 assert!(!result.passes);
238 }
239
240 #[test]
241 fn test_uniform_distribution_high_entropy() {
242 let analyzer = FraudPackAnalyzer::with_defaults();
243 let data = FraudPackData {
244 configured_fraud_rate: 0.05,
245 actual_fraud_count: 100,
246 total_records: 2000,
247 configured_scheme_types: vec!["A".into(), "B".into(), "C".into(), "D".into()],
248 actual_scheme_types: vec!["A".into(), "B".into(), "C".into(), "D".into()],
249 scheme_type_counts: HashMap::from([
250 ("A".into(), 25),
251 ("B".into(), 25),
252 ("C".into(), 25),
253 ("D".into(), 25),
254 ]),
255 };
256 let result = analyzer.analyze(&data);
257 assert!(result.scheme_distribution_entropy > 0.99);
258 assert!(result.passes, "issues: {:?}", result.issues);
259 }
260
261 #[test]
262 fn test_skewed_distribution_low_entropy() {
263 let analyzer = FraudPackAnalyzer::with_defaults();
264 let data = FraudPackData {
265 configured_fraud_rate: 0.05,
266 actual_fraud_count: 100,
267 total_records: 2000,
268 configured_scheme_types: vec!["A".into(), "B".into(), "C".into()],
269 actual_scheme_types: vec!["A".into(), "B".into(), "C".into()],
270 scheme_type_counts: HashMap::from([("A".into(), 98), ("B".into(), 1), ("C".into(), 1)]),
271 };
272 let result = analyzer.analyze(&data);
273 assert!(result.scheme_distribution_entropy < 0.5);
274 }
275}