datasynth_eval/banking/
device_fingerprint.rs1use std::collections::HashMap;
9
10use serde::{Deserialize, Serialize};
11
12use crate::error::EvalResult;
13
14#[derive(Debug, Clone)]
16pub struct DeviceObservation {
17 pub customer_id: String,
18 pub device_id: String,
19 pub trust_score: f64,
20 pub is_known: bool,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct DeviceFingerprintThresholds {
25 pub min_coverage: f64,
27 pub max_heavy_tail_rate: f64,
29 pub min_single_device_rate: f64,
31 pub min_mean_trust: f64,
33}
34
35impl Default for DeviceFingerprintThresholds {
36 fn default() -> Self {
37 Self {
38 min_coverage: 0.90,
39 max_heavy_tail_rate: 0.10,
40 min_single_device_rate: 0.40,
41 min_mean_trust: 0.3,
42 }
43 }
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DeviceFingerprintAnalysis {
48 pub total_customers: usize,
49 pub customers_with_devices: usize,
50 pub single_device_customers: usize,
51 pub heavy_tail_customers: usize,
52 pub mean_devices_per_customer: f64,
53 pub mean_trust_score: f64,
54 pub known_device_rate: f64,
55 pub passes: bool,
56 pub issues: Vec<String>,
57}
58
59pub struct DeviceFingerprintAnalyzer {
60 pub thresholds: DeviceFingerprintThresholds,
61}
62
63impl DeviceFingerprintAnalyzer {
64 pub fn new() -> Self {
65 Self {
66 thresholds: DeviceFingerprintThresholds::default(),
67 }
68 }
69
70 pub fn analyze(
71 &self,
72 observations: &[DeviceObservation],
73 total_customers: usize,
74 ) -> EvalResult<DeviceFingerprintAnalysis> {
75 let mut devices_per_customer: HashMap<String, std::collections::HashSet<String>> =
77 HashMap::new();
78 let mut trust_scores: Vec<f64> = Vec::new();
79 let mut known_count = 0usize;
80
81 for obs in observations {
82 devices_per_customer
83 .entry(obs.customer_id.clone())
84 .or_default()
85 .insert(obs.device_id.clone());
86 trust_scores.push(obs.trust_score);
87 if obs.is_known {
88 known_count += 1;
89 }
90 }
91
92 let customers_with_devices = devices_per_customer.len();
93 let single_device = devices_per_customer
94 .values()
95 .filter(|s| s.len() == 1)
96 .count();
97 let heavy_tail = devices_per_customer
98 .values()
99 .filter(|s| s.len() > 5)
100 .count();
101 let mean_devices = if customers_with_devices > 0 {
102 devices_per_customer
103 .values()
104 .map(|s| s.len())
105 .sum::<usize>() as f64
106 / customers_with_devices as f64
107 } else {
108 0.0
109 };
110 let mean_trust = if !trust_scores.is_empty() {
111 trust_scores.iter().sum::<f64>() / trust_scores.len() as f64
112 } else {
113 0.0
114 };
115 let known_rate = if !observations.is_empty() {
116 known_count as f64 / observations.len() as f64
117 } else {
118 0.0
119 };
120
121 let coverage = if total_customers > 0 {
122 customers_with_devices as f64 / total_customers as f64
123 } else {
124 0.0
125 };
126 let heavy_tail_rate = if customers_with_devices > 0 {
127 heavy_tail as f64 / customers_with_devices as f64
128 } else {
129 0.0
130 };
131 let single_rate = if customers_with_devices > 0 {
132 single_device as f64 / customers_with_devices as f64
133 } else {
134 0.0
135 };
136
137 let mut issues = Vec::new();
138 if total_customers > 0 && coverage < self.thresholds.min_coverage {
139 issues.push(format!(
140 "Device coverage {:.1}% below minimum {:.1}%",
141 coverage * 100.0,
142 self.thresholds.min_coverage * 100.0,
143 ));
144 }
145 if heavy_tail_rate > self.thresholds.max_heavy_tail_rate {
146 issues.push(format!(
147 "Heavy-tail rate {:.1}% above maximum {:.1}% — too many multi-device customers",
148 heavy_tail_rate * 100.0,
149 self.thresholds.max_heavy_tail_rate * 100.0,
150 ));
151 }
152 if customers_with_devices > 0 && single_rate < self.thresholds.min_single_device_rate {
153 issues.push(format!(
154 "Single-device rate {:.1}% below minimum {:.1}% — distribution not power-law",
155 single_rate * 100.0,
156 self.thresholds.min_single_device_rate * 100.0,
157 ));
158 }
159 if !trust_scores.is_empty() && mean_trust < self.thresholds.min_mean_trust {
160 issues.push(format!(
161 "Mean trust {:.3} below minimum {:.3} — trust score not evolving",
162 mean_trust, self.thresholds.min_mean_trust,
163 ));
164 }
165
166 Ok(DeviceFingerprintAnalysis {
167 total_customers,
168 customers_with_devices,
169 single_device_customers: single_device,
170 heavy_tail_customers: heavy_tail,
171 mean_devices_per_customer: mean_devices,
172 mean_trust_score: mean_trust,
173 known_device_rate: known_rate,
174 passes: issues.is_empty(),
175 issues,
176 })
177 }
178}
179
180impl Default for DeviceFingerprintAnalyzer {
181 fn default() -> Self {
182 Self::new()
183 }
184}
185
186#[cfg(test)]
187#[allow(clippy::unwrap_used)]
188mod tests {
189 use super::*;
190
191 #[test]
192 fn test_power_law_distribution_passes() {
193 let mut obs = Vec::new();
194 for c in 0..70 {
196 obs.push(DeviceObservation {
197 customer_id: format!("C{c}"),
198 device_id: format!("D{c}"),
199 trust_score: 0.9,
200 is_known: true,
201 });
202 }
203 for c in 70..95 {
205 for d in 0..2 {
206 obs.push(DeviceObservation {
207 customer_id: format!("C{c}"),
208 device_id: format!("D{c}-{d}"),
209 trust_score: 0.8,
210 is_known: true,
211 });
212 }
213 }
214 for c in 95..100 {
216 for d in 0..3 {
217 obs.push(DeviceObservation {
218 customer_id: format!("C{c}"),
219 device_id: format!("D{c}-{d}"),
220 trust_score: 0.7,
221 is_known: true,
222 });
223 }
224 }
225 let analyzer = DeviceFingerprintAnalyzer::new();
226 let result = analyzer.analyze(&obs, 100).unwrap();
227 assert!(result.passes, "Issues: {:?}", result.issues);
228 assert_eq!(result.single_device_customers, 70);
229 }
230
231 #[test]
232 fn test_too_many_heavy_tail_detected() {
233 let mut obs = Vec::new();
234 for c in 0..50 {
236 for d in 0..10 {
237 obs.push(DeviceObservation {
238 customer_id: format!("C{c}"),
239 device_id: format!("D{c}-{d}"),
240 trust_score: 0.5,
241 is_known: true,
242 });
243 }
244 }
245 let analyzer = DeviceFingerprintAnalyzer::new();
246 let result = analyzer.analyze(&obs, 50).unwrap();
247 assert!(!result.passes);
248 assert!(result.issues.iter().any(|i| i.contains("Heavy-tail")));
249 }
250}