codec_eval/stats/
mod.rs

1//! Statistical analysis and Pareto front calculation.
2//!
3//! This module provides tools for analyzing codec comparison results,
4//! including Pareto front calculation for rate-distortion analysis.
5
6pub mod chart;
7mod pareto;
8
9pub use chart::{ChartConfig, ChartPoint, ChartSeries, generate_svg};
10pub use pareto::{ParetoFront, RDPoint};
11
12use serde::{Deserialize, Serialize};
13
14/// Descriptive statistics for a set of measurements.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct Summary {
17    /// Number of values.
18    pub count: usize,
19    /// Mean value.
20    pub mean: f64,
21    /// Median value.
22    pub median: f64,
23    /// Standard deviation.
24    pub std_dev: f64,
25    /// Minimum value.
26    pub min: f64,
27    /// Maximum value.
28    pub max: f64,
29    /// 5th percentile.
30    pub p5: f64,
31    /// 25th percentile.
32    pub p25: f64,
33    /// 75th percentile.
34    pub p75: f64,
35    /// 95th percentile.
36    pub p95: f64,
37}
38
39impl Summary {
40    /// Compute summary statistics for a slice of values.
41    ///
42    /// Returns `None` if the slice is empty.
43    #[must_use]
44    pub fn compute(values: &[f64]) -> Option<Self> {
45        if values.is_empty() {
46            return None;
47        }
48
49        let mut sorted = values.to_vec();
50        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
51
52        let count = sorted.len();
53        let sum: f64 = sorted.iter().sum();
54        let mean = sum / count as f64;
55
56        let variance: f64 = sorted.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / count as f64;
57        let std_dev = variance.sqrt();
58
59        let median = percentile(&sorted, 50.0);
60        let min = sorted[0];
61        let max = sorted[count - 1];
62
63        Some(Self {
64            count,
65            mean,
66            median,
67            std_dev,
68            min,
69            max,
70            p5: percentile(&sorted, 5.0),
71            p25: percentile(&sorted, 25.0),
72            p75: percentile(&sorted, 75.0),
73            p95: percentile(&sorted, 95.0),
74        })
75    }
76}
77
78/// Calculate a percentile from sorted values.
79fn percentile(sorted: &[f64], p: f64) -> f64 {
80    if sorted.is_empty() {
81        return 0.0;
82    }
83    if sorted.len() == 1 {
84        return sorted[0];
85    }
86
87    let p = p.clamp(0.0, 100.0) / 100.0;
88    let idx = p * (sorted.len() - 1) as f64;
89    let lower = idx.floor() as usize;
90    let upper = idx.ceil() as usize;
91    let frac = idx - lower as f64;
92
93    if lower == upper {
94        sorted[lower]
95    } else {
96        sorted[lower] * (1.0 - frac) + sorted[upper] * frac
97    }
98}
99
100/// Calculate BD-Rate (Bjontegaard Delta Rate).
101///
102/// BD-Rate measures the average bitrate difference between two rate-distortion
103/// curves at the same quality level. A negative value means the test curve
104/// is more efficient (lower bitrate at same quality).
105///
106/// # Arguments
107///
108/// * `reference` - Reference curve points (bitrate, quality).
109/// * `test` - Test curve points (bitrate, quality).
110///
111/// # Returns
112///
113/// BD-Rate as a percentage. Negative = test is better.
114#[must_use]
115pub fn bd_rate(reference: &[(f64, f64)], test: &[(f64, f64)]) -> Option<f64> {
116    if reference.len() < 4 || test.len() < 4 {
117        return None;
118    }
119
120    // Sort by quality
121    let mut ref_sorted: Vec<_> = reference.to_vec();
122    let mut test_sorted: Vec<_> = test.to_vec();
123    ref_sorted.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
124    test_sorted.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
125
126    // Find overlapping quality range
127    let min_quality = ref_sorted[0].1.max(test_sorted[0].1);
128    let max_quality = ref_sorted.last()?.1.min(test_sorted.last()?.1);
129
130    if min_quality >= max_quality {
131        return None;
132    }
133
134    // Use log-rate for integration
135    let ref_log: Vec<_> = ref_sorted.iter().map(|(r, q)| (r.ln(), *q)).collect();
136    let test_log: Vec<_> = test_sorted.iter().map(|(r, q)| (r.ln(), *q)).collect();
137
138    // Integrate using trapezoidal rule (simplified)
139    let ref_area = integrate_curve(&ref_log, min_quality, max_quality);
140    let test_area = integrate_curve(&test_log, min_quality, max_quality);
141
142    let avg_ref = ref_area / (max_quality - min_quality);
143    let avg_test = test_area / (max_quality - min_quality);
144
145    // BD-Rate = (10^(avg_test - avg_ref) - 1) * 100
146    let bd = (10_f64.powf(avg_test - avg_ref) - 1.0) * 100.0;
147
148    Some(bd)
149}
150
151/// Simple trapezoidal integration of a curve.
152fn integrate_curve(points: &[(f64, f64)], min_x: f64, max_x: f64) -> f64 {
153    let mut area = 0.0;
154
155    for window in points.windows(2) {
156        let (y0, x0) = window[0];
157        let (y1, x1) = window[1];
158
159        // Skip segments outside range
160        if x1 < min_x || x0 > max_x {
161            continue;
162        }
163
164        // Clip to range
165        let x0 = x0.max(min_x);
166        let x1 = x1.min(max_x);
167
168        // Trapezoidal area
169        area += (y0 + y1) / 2.0 * (x1 - x0);
170    }
171
172    area
173}
174
175#[cfg(test)]
176mod tests {
177    use super::*;
178
179    #[test]
180    fn test_summary_compute() {
181        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
182        let summary = Summary::compute(&values).unwrap();
183
184        assert_eq!(summary.count, 5);
185        assert!((summary.mean - 3.0).abs() < 0.001);
186        assert!((summary.median - 3.0).abs() < 0.001);
187        assert!((summary.min - 1.0).abs() < 0.001);
188        assert!((summary.max - 5.0).abs() < 0.001);
189    }
190
191    #[test]
192    fn test_summary_empty() {
193        assert!(Summary::compute(&[]).is_none());
194    }
195
196    #[test]
197    fn test_percentile() {
198        let sorted = vec![1.0, 2.0, 3.0, 4.0, 5.0];
199        assert!((percentile(&sorted, 0.0) - 1.0).abs() < 0.001);
200        assert!((percentile(&sorted, 50.0) - 3.0).abs() < 0.001);
201        assert!((percentile(&sorted, 100.0) - 5.0).abs() < 0.001);
202    }
203
204    #[test]
205    fn test_bd_rate_same_curve() {
206        let curve = vec![
207            (1000.0, 30.0),
208            (2000.0, 35.0),
209            (4000.0, 40.0),
210            (8000.0, 45.0),
211        ];
212
213        let bd = bd_rate(&curve, &curve);
214        assert!(bd.is_some());
215        assert!(bd.unwrap().abs() < 0.1); // Should be ~0 for same curve
216    }
217}