Skip to main content

datasynth_eval/banking/
device_fingerprint.rs

1//! Device fingerprint quality evaluator.
2//!
3//! Validates that device usage patterns are realistic:
4//! - Power-law distribution of devices per customer (most have 1)
5//! - Trust scores calibrated (primary > secondary)
6//! - Reuse rate aligned with observed patterns
7
8use std::collections::HashMap;
9
10use serde::{Deserialize, Serialize};
11
12use crate::error::EvalResult;
13
14/// Device observation data.
15#[derive(Debug, Clone)]
16pub struct DeviceObservation {
17    pub customer_id: String,
18    pub device_id: String,
19    pub trust_score: f64,
20    pub is_known: bool,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct DeviceFingerprintThresholds {
25    /// Minimum fraction of customers with ≥1 device observed
26    pub min_coverage: f64,
27    /// Maximum fraction of customers with >5 devices (realistic heavy tail cap)
28    pub max_heavy_tail_rate: f64,
29    /// Minimum fraction of single-device customers (should dominate)
30    pub min_single_device_rate: f64,
31    /// Minimum mean trust score (should be reasonable)
32    pub min_mean_trust: f64,
33}
34
35impl Default for DeviceFingerprintThresholds {
36    fn default() -> Self {
37        Self {
38            min_coverage: 0.90,
39            max_heavy_tail_rate: 0.10,
40            min_single_device_rate: 0.40,
41            min_mean_trust: 0.3,
42        }
43    }
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DeviceFingerprintAnalysis {
48    pub total_customers: usize,
49    pub customers_with_devices: usize,
50    pub single_device_customers: usize,
51    pub heavy_tail_customers: usize,
52    pub mean_devices_per_customer: f64,
53    pub mean_trust_score: f64,
54    pub known_device_rate: f64,
55    pub passes: bool,
56    pub issues: Vec<String>,
57}
58
59pub struct DeviceFingerprintAnalyzer {
60    pub thresholds: DeviceFingerprintThresholds,
61}
62
63impl DeviceFingerprintAnalyzer {
64    pub fn new() -> Self {
65        Self {
66            thresholds: DeviceFingerprintThresholds::default(),
67        }
68    }
69
70    pub fn analyze(
71        &self,
72        observations: &[DeviceObservation],
73        total_customers: usize,
74    ) -> EvalResult<DeviceFingerprintAnalysis> {
75        // Per-customer unique device set
76        let mut devices_per_customer: HashMap<String, std::collections::HashSet<String>> =
77            HashMap::new();
78        let mut trust_scores: Vec<f64> = Vec::new();
79        let mut known_count = 0usize;
80
81        for obs in observations {
82            devices_per_customer
83                .entry(obs.customer_id.clone())
84                .or_default()
85                .insert(obs.device_id.clone());
86            trust_scores.push(obs.trust_score);
87            if obs.is_known {
88                known_count += 1;
89            }
90        }
91
92        let customers_with_devices = devices_per_customer.len();
93        let single_device = devices_per_customer
94            .values()
95            .filter(|s| s.len() == 1)
96            .count();
97        let heavy_tail = devices_per_customer
98            .values()
99            .filter(|s| s.len() > 5)
100            .count();
101        let mean_devices = if customers_with_devices > 0 {
102            devices_per_customer
103                .values()
104                .map(|s| s.len())
105                .sum::<usize>() as f64
106                / customers_with_devices as f64
107        } else {
108            0.0
109        };
110        let mean_trust = if !trust_scores.is_empty() {
111            trust_scores.iter().sum::<f64>() / trust_scores.len() as f64
112        } else {
113            0.0
114        };
115        let known_rate = if !observations.is_empty() {
116            known_count as f64 / observations.len() as f64
117        } else {
118            0.0
119        };
120
121        let coverage = if total_customers > 0 {
122            customers_with_devices as f64 / total_customers as f64
123        } else {
124            0.0
125        };
126        let heavy_tail_rate = if customers_with_devices > 0 {
127            heavy_tail as f64 / customers_with_devices as f64
128        } else {
129            0.0
130        };
131        let single_rate = if customers_with_devices > 0 {
132            single_device as f64 / customers_with_devices as f64
133        } else {
134            0.0
135        };
136
137        let mut issues = Vec::new();
138        if total_customers > 0 && coverage < self.thresholds.min_coverage {
139            issues.push(format!(
140                "Device coverage {:.1}% below minimum {:.1}%",
141                coverage * 100.0,
142                self.thresholds.min_coverage * 100.0,
143            ));
144        }
145        if heavy_tail_rate > self.thresholds.max_heavy_tail_rate {
146            issues.push(format!(
147                "Heavy-tail rate {:.1}% above maximum {:.1}% — too many multi-device customers",
148                heavy_tail_rate * 100.0,
149                self.thresholds.max_heavy_tail_rate * 100.0,
150            ));
151        }
152        if customers_with_devices > 0 && single_rate < self.thresholds.min_single_device_rate {
153            issues.push(format!(
154                "Single-device rate {:.1}% below minimum {:.1}% — distribution not power-law",
155                single_rate * 100.0,
156                self.thresholds.min_single_device_rate * 100.0,
157            ));
158        }
159        if !trust_scores.is_empty() && mean_trust < self.thresholds.min_mean_trust {
160            issues.push(format!(
161                "Mean trust {:.3} below minimum {:.3} — trust score not evolving",
162                mean_trust, self.thresholds.min_mean_trust,
163            ));
164        }
165
166        Ok(DeviceFingerprintAnalysis {
167            total_customers,
168            customers_with_devices,
169            single_device_customers: single_device,
170            heavy_tail_customers: heavy_tail,
171            mean_devices_per_customer: mean_devices,
172            mean_trust_score: mean_trust,
173            known_device_rate: known_rate,
174            passes: issues.is_empty(),
175            issues,
176        })
177    }
178}
179
180impl Default for DeviceFingerprintAnalyzer {
181    fn default() -> Self {
182        Self::new()
183    }
184}
185
186#[cfg(test)]
187#[allow(clippy::unwrap_used)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_power_law_distribution_passes() {
193        let mut obs = Vec::new();
194        // 70 customers with 1 device
195        for c in 0..70 {
196            obs.push(DeviceObservation {
197                customer_id: format!("C{c}"),
198                device_id: format!("D{c}"),
199                trust_score: 0.9,
200                is_known: true,
201            });
202        }
203        // 25 customers with 2 devices
204        for c in 70..95 {
205            for d in 0..2 {
206                obs.push(DeviceObservation {
207                    customer_id: format!("C{c}"),
208                    device_id: format!("D{c}-{d}"),
209                    trust_score: 0.8,
210                    is_known: true,
211                });
212            }
213        }
214        // 5 customers with 3 devices
215        for c in 95..100 {
216            for d in 0..3 {
217                obs.push(DeviceObservation {
218                    customer_id: format!("C{c}"),
219                    device_id: format!("D{c}-{d}"),
220                    trust_score: 0.7,
221                    is_known: true,
222                });
223            }
224        }
225        let analyzer = DeviceFingerprintAnalyzer::new();
226        let result = analyzer.analyze(&obs, 100).unwrap();
227        assert!(result.passes, "Issues: {:?}", result.issues);
228        assert_eq!(result.single_device_customers, 70);
229    }
230
231    #[test]
232    fn test_too_many_heavy_tail_detected() {
233        let mut obs = Vec::new();
234        // 50 customers with 10 devices each — way too much
235        for c in 0..50 {
236            for d in 0..10 {
237                obs.push(DeviceObservation {
238                    customer_id: format!("C{c}"),
239                    device_id: format!("D{c}-{d}"),
240                    trust_score: 0.5,
241                    is_known: true,
242                });
243            }
244        }
245        let analyzer = DeviceFingerprintAnalyzer::new();
246        let result = analyzer.analyze(&obs, 50).unwrap();
247        assert!(!result.passes);
248        assert!(result.issues.iter().any(|i| i.contains("Heavy-tail")));
249    }
250}