Skip to main content

nd_300/speedtest/
statistics.rs

1//! Statistical utilities for accurate speed test measurement.
2//!
3//! Port of the QubeTX web speed test's `statistics.ts` module.
4//! Implements Ookla-style trimean, IQR outlier filtering, slow-start discard,
5//! RFC 3550 jitter, inverse-variance weighting, and bootstrap confidence intervals.
6
7use serde::Serialize;
8
9// ── Percentile ──────────────────────────────────────────────────────────
10
11/// Linear-interpolation percentile on a pre-sorted slice. `p` is in [0.0, 1.0].
12pub fn percentile(sorted: &[f64], p: f64) -> f64 {
13    if sorted.is_empty() {
14        return 0.0;
15    }
16    if sorted.len() == 1 {
17        return sorted[0];
18    }
19    let idx = p * (sorted.len() - 1) as f64;
20    let lo = idx.floor() as usize;
21    let hi = idx.ceil() as usize;
22    if lo == hi {
23        return sorted[lo];
24    }
25    sorted[lo] + (sorted[hi] - sorted[lo]) * (idx - lo as f64)
26}
27
28// ── Central tendency ────────────────────────────────────────────────────
29
30pub fn mean(values: &[f64]) -> f64 {
31    if values.is_empty() {
32        return 0.0;
33    }
34    values.iter().sum::<f64>() / values.len() as f64
35}
36
37pub fn median(values: &[f64]) -> f64 {
38    if values.is_empty() {
39        return 0.0;
40    }
41    let mut sorted = values.to_vec();
42    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
43    percentile(&sorted, 0.5)
44}
45
46/// Sample standard deviation (Bessel's correction).
47pub fn stddev(values: &[f64]) -> f64 {
48    if values.len() < 2 {
49        return 0.0;
50    }
51    variance(values).sqrt()
52}
53
54/// Sample variance (Bessel's correction).
55pub fn variance(values: &[f64]) -> f64 {
56    if values.len() < 2 {
57        return 0.0;
58    }
59    let m = mean(values);
60    values.iter().map(|v| (v - m).powi(2)).sum::<f64>() / (values.len() - 1) as f64
61}
62
63// ── Trimean ─────────────────────────────────────────────────────────────
64
65/// Classic Tukey trimean: `(Q1 + 2*median + Q3) / 4`.
66pub fn trimean(values: &[f64]) -> f64 {
67    if values.is_empty() {
68        return 0.0;
69    }
70    let mut sorted = values.to_vec();
71    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
72    let q1 = percentile(&sorted, 0.25);
73    let q2 = percentile(&sorted, 0.50);
74    let q3 = percentile(&sorted, 0.75);
75    (q1 + 2.0 * q2 + q3) / 4.0
76}
77
78/// Ookla-style modified trimean: `(P10 + 8*P50 + P90) / 10`.
79pub fn modified_trimean(values: &[f64]) -> f64 {
80    if values.is_empty() {
81        return 0.0;
82    }
83    let mut sorted = values.to_vec();
84    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
85    let p10 = percentile(&sorted, 0.10);
86    let p50 = percentile(&sorted, 0.50);
87    let p90 = percentile(&sorted, 0.90);
88    (p10 + 8.0 * p50 + p90) / 10.0
89}
90
91// ── Outlier filtering ───────────────────────────────────────────────────
92
93/// Remove values outside `[Q1 - k*IQR, Q3 + k*IQR]`. Default `k = 1.5`.
94pub fn filter_outliers_iqr(values: &[f64], k: f64) -> Vec<f64> {
95    if values.len() < 4 {
96        return values.to_vec();
97    }
98    let mut sorted = values.to_vec();
99    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
100    let q1 = percentile(&sorted, 0.25);
101    let q3 = percentile(&sorted, 0.75);
102    let iqr = q3 - q1;
103    let lo = q1 - k * iqr;
104    let hi = q3 + k * iqr;
105    values
106        .iter()
107        .copied()
108        .filter(|v| *v >= lo && *v <= hi)
109        .collect()
110}
111
112// ── Slow-start discard ──────────────────────────────────────────────────
113
114/// Discard the first `fraction` of samples to eliminate TCP slow-start
115/// ramp-up contamination. Default: discard first 30%.
116pub fn discard_slow_start(values: &[f64], fraction: f64) -> Vec<f64> {
117    if values.len() < 4 {
118        return values.to_vec();
119    }
120    let cut = (values.len() as f64 * fraction).ceil() as usize;
121    values[cut..].to_vec()
122}
123
124// ── Winsorization ───────────────────────────────────────────────────────
125
126/// Cap extreme values at the given percentiles instead of removing them.
127pub fn winsorize(values: &[f64], lower: f64, upper: f64) -> Vec<f64> {
128    if values.len() < 4 {
129        return values.to_vec();
130    }
131    let mut sorted = values.to_vec();
132    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
133    let lo = percentile(&sorted, lower);
134    let hi = percentile(&sorted, upper);
135    values.iter().map(|v| v.max(lo).min(hi)).collect()
136}
137
138// ── Bandwidth pipelines ─────────────────────────────────────────────────
139
140/// Full accuracy pipeline for download bandwidth samples:
141/// 1. Discard slow-start ramp-up (first 30%)
142/// 2. Remove IQR outliers
143/// 3. Compute modified trimean
144/// 4. Cross-validate with Winsorized trimean (average if >15% divergence)
145pub fn accurate_bandwidth(samples: &[f64]) -> f64 {
146    if samples.is_empty() {
147        return 0.0;
148    }
149    let after_slow_start = discard_slow_start(samples, 0.3);
150
151    // Primary: IQR-filtered trimean
152    let cleaned = filter_outliers_iqr(&after_slow_start, 1.5);
153    let iqr_result = if cleaned.is_empty() {
154        modified_trimean(&after_slow_start)
155    } else {
156        modified_trimean(&cleaned)
157    };
158
159    // Cross-check: Winsorized trimean
160    if after_slow_start.len() >= 4 {
161        let winsorized = winsorize(&after_slow_start, 0.05, 0.95);
162        let win_result = modified_trimean(&winsorized);
163
164        if iqr_result > 0.0 && win_result > 0.0 {
165            let divergence = (iqr_result - win_result).abs() / iqr_result.max(win_result);
166            if divergence > 0.15 {
167                return (iqr_result + win_result) / 2.0;
168            }
169        }
170    }
171
172    iqr_result
173}
174
175/// Upload-specific accuracy pipeline.
176/// Upload ramp-up is slower and more variable than download. Following
177/// Speedtest.net's methodology, we keep only the fastest 50% of post-warmup
178/// samples before computing the trimean.
179///
180/// Pipeline:
181/// 1. Discard slow-start ramp-up (first 30%)
182/// 2. Keep only the fastest 50% of remaining samples
183/// 3. Remove IQR outliers
184/// 4. Compute modified trimean + Winsorized cross-validation
185pub fn accurate_upload_bandwidth(samples: &[f64]) -> f64 {
186    if samples.is_empty() {
187        return 0.0;
188    }
189    let after_slow_start = discard_slow_start(samples, 0.3);
190    if after_slow_start.len() < 2 {
191        return accurate_bandwidth(samples);
192    }
193
194    // Keep fastest 50% (sort descending, take top half)
195    let mut sorted_desc = after_slow_start.clone();
196    sorted_desc.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
197    let top_half_count = (sorted_desc.len() as f64 / 2.0).ceil() as usize;
198    let top_half: Vec<f64> = sorted_desc[..top_half_count].to_vec();
199
200    // Primary: IQR filter → modified trimean
201    let cleaned = filter_outliers_iqr(&top_half, 1.5);
202    let iqr_result = if cleaned.is_empty() {
203        modified_trimean(&top_half)
204    } else {
205        modified_trimean(&cleaned)
206    };
207
208    // Cross-check: Winsorized trimean on same top-half set
209    if top_half.len() >= 4 {
210        let winsorized = winsorize(&top_half, 0.05, 0.95);
211        let win_result = modified_trimean(&winsorized);
212
213        if iqr_result > 0.0 && win_result > 0.0 {
214            let divergence = (iqr_result - win_result).abs() / iqr_result.max(win_result);
215            if divergence > 0.15 {
216                return (iqr_result + win_result) / 2.0;
217            }
218        }
219    }
220
221    iqr_result
222}
223
224// ── Jitter ──────────────────────────────────────────────────────────────
225
226/// RFC 3550 jitter: exponentially weighted moving average of inter-arrival
227/// variance. `J[i] = J[i-1] + (|D(i-1,i)| - J[i-1]) / 16`
228pub fn jitter_rfc3550(samples: &[f64]) -> f64 {
229    if samples.len() < 2 {
230        return 0.0;
231    }
232    let mut j = 0.0_f64;
233    for i in 1..samples.len() {
234        let d = (samples[i] - samples[i - 1]).abs();
235        j += (d - j) / 16.0;
236    }
237    j
238}
239
240/// Mean absolute deviation of consecutive samples (original method).
241pub fn jitter_mad(samples: &[f64]) -> f64 {
242    if samples.len() < 2 {
243        return 0.0;
244    }
245    let sum: f64 = samples.windows(2).map(|w| (w[1] - w[0]).abs()).sum();
246    sum / (samples.len() - 1) as f64
247}
248
249// ── Stability ───────────────────────────────────────────────────────────
250
251/// Coefficient of variation (stddev / mean). Lower = more stable.
252pub fn coefficient_of_variation(values: &[f64]) -> f64 {
253    let m = mean(values);
254    if m == 0.0 {
255        return 0.0;
256    }
257    stddev(values) / m
258}
259
260// ── Confidence-weighted merge ───────────────────────────────────────────
261
262/// Weighted average of two values. If one is zero/missing, return the other.
263/// `weight_a` is the weight for `a`; `b` gets `1 - weight_a`.
264pub fn weighted_merge(a: f64, b: f64, weight_a: f64) -> f64 {
265    let has_a = a > 0.0;
266    let has_b = b > 0.0;
267    if has_a && has_b {
268        a * weight_a + b * (1.0 - weight_a)
269    } else if has_a {
270        a
271    } else {
272        b
273    }
274}
275
276// ── Inverse-variance merge ─────────────────────────────────────────────
277
278#[derive(Debug, Clone, Serialize)]
279pub struct InverseVarianceResult {
280    pub value: f64,
281    pub weight_a: f64,
282    pub weight_b: f64,
283}
284
285/// Inverse-variance weighted merge of two estimates.
286/// Minimum-variance unbiased estimator for combining independent measurements.
287/// Weights clamped to [0.3, 0.7] to prevent one source from dominating.
288pub fn inverse_variance_merge(a: f64, var_a: f64, b: f64, var_b: f64) -> InverseVarianceResult {
289    if a <= 0.0 && b <= 0.0 {
290        return InverseVarianceResult {
291            value: 0.0,
292            weight_a: 0.5,
293            weight_b: 0.5,
294        };
295    }
296    if a <= 0.0 {
297        return InverseVarianceResult {
298            value: b,
299            weight_a: 0.0,
300            weight_b: 1.0,
301        };
302    }
303    if b <= 0.0 {
304        return InverseVarianceResult {
305            value: a,
306            weight_a: 1.0,
307            weight_b: 0.0,
308        };
309    }
310    if var_a <= 0.0 && var_b <= 0.0 {
311        return InverseVarianceResult {
312            value: (a + b) / 2.0,
313            weight_a: 0.5,
314            weight_b: 0.5,
315        };
316    }
317    if var_a <= 0.0 {
318        return InverseVarianceResult {
319            value: a,
320            weight_a: 1.0,
321            weight_b: 0.0,
322        };
323    }
324    if var_b <= 0.0 {
325        return InverseVarianceResult {
326            value: b,
327            weight_a: 0.0,
328            weight_b: 1.0,
329        };
330    }
331
332    let w_a = 1.0 / var_a;
333    let w_b = 1.0 / var_b;
334    let total = w_a + w_b;
335    let mut weight_a = w_a / total;
336    let mut weight_b = w_b / total;
337
338    // Clamp to [0.3, 0.7] to prevent degenerate weighting
339    if weight_a < 0.3 {
340        weight_a = 0.3;
341        weight_b = 0.7;
342    } else if weight_a > 0.7 {
343        weight_a = 0.7;
344        weight_b = 0.3;
345    }
346
347    InverseVarianceResult {
348        value: a * weight_a + b * weight_b,
349        weight_a,
350        weight_b,
351    }
352}
353
354// ── Bootstrap confidence interval ──────────────────────────────────────
355
356#[derive(Debug, Clone, Serialize)]
357pub struct BootstrapCI {
358    pub estimate: f64,
359    pub lower: f64,
360    pub upper: f64,
361    pub margin: f64,
362}
363
364/// Simple xorshift64 PRNG for bootstrap resampling. Deterministic given seed.
365struct Xorshift64(u64);
366
367impl Xorshift64 {
368    fn new(seed: u64) -> Self {
369        // Ensure non-zero seed
370        Self(if seed == 0 { 0x517cc1b727220a95 } else { seed })
371    }
372
373    fn next(&mut self) -> u64 {
374        let mut x = self.0;
375        x ^= x << 13;
376        x ^= x >> 7;
377        x ^= x << 17;
378        self.0 = x;
379        x
380    }
381
382    fn next_usize(&mut self, bound: usize) -> usize {
383        (self.next() % bound as u64) as usize
384    }
385}
386
387/// Bootstrap confidence interval via percentile method.
388/// Resamples the data `b` times, computes the statistic on each,
389/// then takes the alpha/2 and 1-alpha/2 percentiles as CI bounds.
390pub fn bootstrap_ci(
391    samples: &[f64],
392    stat_fn: fn(&[f64]) -> f64,
393    b: usize,
394    alpha: f64,
395) -> BootstrapCI {
396    if samples.len() < 4 {
397        let est = stat_fn(samples);
398        return BootstrapCI {
399            estimate: est,
400            lower: est,
401            upper: est,
402            margin: 0.0,
403        };
404    }
405
406    let estimate = stat_fn(samples);
407
408    // Seed from sample data for deterministic results
409    let seed = samples.iter().fold(0u64, |acc, v| {
410        acc.wrapping_add(v.to_bits())
411            .wrapping_mul(6364136223846793005)
412    });
413    let mut rng = Xorshift64::new(seed);
414
415    let n = samples.len();
416    let mut bootstrap_stats: Vec<f64> = Vec::with_capacity(b);
417    let mut resample = vec![0.0_f64; n];
418
419    for _ in 0..b {
420        for val in resample.iter_mut() {
421            *val = samples[rng.next_usize(n)];
422        }
423        bootstrap_stats.push(stat_fn(&resample));
424    }
425
426    bootstrap_stats.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
427
428    let lower = percentile(&bootstrap_stats, alpha / 2.0);
429    let upper = percentile(&bootstrap_stats, 1.0 - alpha / 2.0);
430
431    BootstrapCI {
432        estimate,
433        lower,
434        upper,
435        margin: (upper - lower) / 2.0,
436    }
437}