Skip to main content

datasynth_eval/banking/
velocity_quality.rs

1//! Velocity feature quality evaluator.
2//!
3//! Validates that pre-computed velocity features on bank transactions are
4//! internally consistent (windows nest correctly) and statistically calibrated
5//! (z-scores are centered, amounts sum coherently).
6
7use serde::{Deserialize, Serialize};
8
9use crate::error::EvalResult;
10
11/// Velocity features extracted from a single transaction.
12#[derive(Debug, Clone, Default)]
13pub struct VelocityFeaturesData {
14    pub txn_count_1h: u32,
15    pub txn_count_24h: u32,
16    pub txn_count_7d: u32,
17    pub txn_count_30d: u32,
18    pub amount_sum_24h: f64,
19    pub amount_sum_7d: f64,
20    pub amount_sum_30d: f64,
21    pub amount_zscore: f64,
22}
23
24/// Thresholds for velocity quality evaluation.
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct VelocityQualityThresholds {
27    /// Minimum fraction of transactions that must have velocity features populated
28    pub min_coverage: f64,
29    /// Maximum fraction allowed to have window-ordering violations
30    pub max_ordering_violation_rate: f64,
31    /// Minimum fraction allowed to have amount-sum-ordering violations
32    pub max_amount_violation_rate: f64,
33    /// Expected mean of aggregated z-scores (should be near 0)
34    pub zscore_mean_tolerance: f64,
35}
36
37impl Default for VelocityQualityThresholds {
38    fn default() -> Self {
39        Self {
40            min_coverage: 0.95,
41            max_ordering_violation_rate: 0.01,
42            max_amount_violation_rate: 0.01,
43            zscore_mean_tolerance: 0.5,
44        }
45    }
46}
47
48/// Velocity feature quality analysis result.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct VelocityQualityAnalysis {
51    pub total_transactions: usize,
52    pub with_velocity: usize,
53    pub coverage_rate: f64,
54    pub window_ordering_violations: usize,
55    pub amount_ordering_violations: usize,
56    pub zscore_mean: f64,
57    pub zscore_std: f64,
58    pub passes: bool,
59    pub issues: Vec<String>,
60}
61
62/// Velocity feature quality analyzer.
63pub struct VelocityQualityAnalyzer {
64    pub thresholds: VelocityQualityThresholds,
65}
66
67impl VelocityQualityAnalyzer {
68    pub fn new() -> Self {
69        Self {
70            thresholds: VelocityQualityThresholds::default(),
71        }
72    }
73
74    pub fn with_thresholds(thresholds: VelocityQualityThresholds) -> Self {
75        Self { thresholds }
76    }
77
78    /// Analyze velocity features.
79    ///
80    /// `features`: iterator of Option<VelocityFeaturesData> — None means no velocity computed.
81    /// `total_transactions`: total number of transactions (including those without velocity).
82    pub fn analyze(
83        &self,
84        features: impl IntoIterator<Item = Option<VelocityFeaturesData>>,
85        total_transactions: usize,
86    ) -> EvalResult<VelocityQualityAnalysis> {
87        let mut with_velocity = 0usize;
88        let mut window_violations = 0usize;
89        let mut amount_violations = 0usize;
90        let mut zscores: Vec<f64> = Vec::new();
91
92        for opt_f in features {
93            let Some(f) = opt_f else { continue };
94            with_velocity += 1;
95
96            // Window ordering: 1h ≤ 24h ≤ 7d ≤ 30d
97            if !(f.txn_count_1h <= f.txn_count_24h
98                && f.txn_count_24h <= f.txn_count_7d
99                && f.txn_count_7d <= f.txn_count_30d)
100            {
101                window_violations += 1;
102            }
103
104            // Amount sum ordering: 24h ≤ 7d ≤ 30d
105            if !(f.amount_sum_24h <= f.amount_sum_7d + 1e-6
106                && f.amount_sum_7d <= f.amount_sum_30d + 1e-6)
107            {
108                amount_violations += 1;
109            }
110
111            if f.amount_zscore.is_finite() {
112                zscores.push(f.amount_zscore);
113            }
114        }
115
116        let coverage_rate = if total_transactions > 0 {
117            with_velocity as f64 / total_transactions as f64
118        } else {
119            0.0
120        };
121
122        let zscore_mean = if !zscores.is_empty() {
123            zscores.iter().sum::<f64>() / zscores.len() as f64
124        } else {
125            0.0
126        };
127        let zscore_std = if zscores.len() >= 2 {
128            let var = zscores
129                .iter()
130                .map(|z| (z - zscore_mean).powi(2))
131                .sum::<f64>()
132                / (zscores.len() as f64 - 1.0);
133            var.sqrt()
134        } else {
135            0.0
136        };
137
138        let window_rate = if with_velocity > 0 {
139            window_violations as f64 / with_velocity as f64
140        } else {
141            0.0
142        };
143        let amount_rate = if with_velocity > 0 {
144            amount_violations as f64 / with_velocity as f64
145        } else {
146            0.0
147        };
148
149        let mut issues = Vec::new();
150        if coverage_rate < self.thresholds.min_coverage {
151            issues.push(format!(
152                "Velocity coverage {:.1}% below minimum {:.1}%",
153                coverage_rate * 100.0,
154                self.thresholds.min_coverage * 100.0,
155            ));
156        }
157        if window_rate > self.thresholds.max_ordering_violation_rate {
158            issues.push(format!(
159                "{} transactions have window ordering violations ({:.2}%)",
160                window_violations,
161                window_rate * 100.0,
162            ));
163        }
164        if amount_rate > self.thresholds.max_amount_violation_rate {
165            issues.push(format!(
166                "{} transactions have amount ordering violations ({:.2}%)",
167                amount_violations,
168                amount_rate * 100.0,
169            ));
170        }
171        if zscore_mean.abs() > self.thresholds.zscore_mean_tolerance {
172            issues.push(format!(
173                "Z-score mean {:.3} deviates from expected ≈0",
174                zscore_mean,
175            ));
176        }
177
178        Ok(VelocityQualityAnalysis {
179            total_transactions,
180            with_velocity,
181            coverage_rate,
182            window_ordering_violations: window_violations,
183            amount_ordering_violations: amount_violations,
184            zscore_mean,
185            zscore_std,
186            passes: issues.is_empty(),
187            issues,
188        })
189    }
190}
191
192impl Default for VelocityQualityAnalyzer {
193    fn default() -> Self {
194        Self::new()
195    }
196}
197
198#[cfg(test)]
199#[allow(clippy::unwrap_used)]
200mod tests {
201    use super::*;
202
203    #[test]
204    fn test_well_ordered_velocity_passes() {
205        let data = vec![
206            Some(VelocityFeaturesData {
207                txn_count_1h: 1,
208                txn_count_24h: 5,
209                txn_count_7d: 20,
210                txn_count_30d: 80,
211                amount_sum_24h: 500.0,
212                amount_sum_7d: 2000.0,
213                amount_sum_30d: 8000.0,
214                amount_zscore: 0.2,
215            }),
216            Some(VelocityFeaturesData {
217                txn_count_1h: 0,
218                txn_count_24h: 3,
219                txn_count_7d: 15,
220                txn_count_30d: 60,
221                amount_sum_24h: 200.0,
222                amount_sum_7d: 1500.0,
223                amount_sum_30d: 6000.0,
224                amount_zscore: -0.1,
225            }),
226        ];
227        let analyzer = VelocityQualityAnalyzer::new();
228        let result = analyzer.analyze(data, 2).unwrap();
229        assert!(result.passes, "Issues: {:?}", result.issues);
230        assert_eq!(result.with_velocity, 2);
231        assert_eq!(result.window_ordering_violations, 0);
232    }
233
234    #[test]
235    fn test_window_ordering_violation_detected() {
236        let data = vec![Some(VelocityFeaturesData {
237            // 24h > 7d — violation!
238            txn_count_1h: 1,
239            txn_count_24h: 50,
240            txn_count_7d: 20,
241            txn_count_30d: 80,
242            amount_sum_24h: 100.0,
243            amount_sum_7d: 200.0,
244            amount_sum_30d: 300.0,
245            amount_zscore: 0.0,
246        })];
247        let analyzer = VelocityQualityAnalyzer::new();
248        let result = analyzer.analyze(data, 1).unwrap();
249        assert!(!result.passes);
250        assert_eq!(result.window_ordering_violations, 1);
251    }
252
253    #[test]
254    fn test_low_coverage_flagged() {
255        // 1 with velocity out of 10 total = 10% coverage, fails min_coverage=95%
256        let data: Vec<Option<VelocityFeaturesData>> =
257            std::iter::once(Some(VelocityFeaturesData::default()))
258                .chain(std::iter::repeat_n(None, 9))
259                .collect();
260        let analyzer = VelocityQualityAnalyzer::new();
261        let result = analyzer.analyze(data, 10).unwrap();
262        assert!(!result.passes);
263        assert!(result.issues.iter().any(|i| i.contains("coverage")));
264    }
265}