1use crate::outliers::{OutlierAnalysis, OutlierMethod, detect_outliers};
8use crate::percentiles::compute_percentile;
9
10#[derive(Debug, Clone)]
12pub struct SummaryStatistics {
13 pub mean: f64,
15 pub median: f64,
17 pub std_dev: f64,
19
20 pub min: f64,
22 pub max: f64,
24
25 pub p50: f64,
27 pub p90: f64,
29 pub p95: f64,
31 pub p99: f64,
33 pub p999: f64,
35
36 pub skewness: f64,
38 pub kurtosis: f64,
40
41 pub sample_count: usize,
43 pub outlier_count: usize,
45 pub outlier_analysis: OutlierAnalysis,
47}
48
49#[derive(Debug, Clone, Default)]
51pub struct CyclesStatistics {
52 pub mean_cycles: f64,
54 pub median_cycles: f64,
56 pub std_dev_cycles: f64,
58 pub min_cycles: u64,
60 pub max_cycles: u64,
62 pub cycles_per_ns: f64,
64}
65
66pub fn compute_summary(samples: &[f64], outlier_method: OutlierMethod) -> SummaryStatistics {
82 if samples.is_empty() {
83 return SummaryStatistics {
84 mean: 0.0,
85 median: 0.0,
86 std_dev: 0.0,
87 min: 0.0,
88 max: 0.0,
89 p50: 0.0,
90 p90: 0.0,
91 p95: 0.0,
92 p99: 0.0,
93 p999: 0.0,
94 skewness: 0.0,
95 kurtosis: 0.0,
96 sample_count: 0,
97 outlier_count: 0,
98 outlier_analysis: detect_outliers(samples, outlier_method),
99 };
100 }
101
102 let analysis = detect_outliers(samples, outlier_method);
104 let cleaned = &analysis.cleaned_samples;
105 let all = &analysis.all_samples;
106
107 let mean = if cleaned.is_empty() {
109 0.0
110 } else {
111 cleaned.iter().sum::<f64>() / cleaned.len() as f64
112 };
113
114 let median = if cleaned.is_empty() {
115 0.0
116 } else {
117 compute_percentile(cleaned, 50.0)
118 };
119
120 let std_dev = if cleaned.len() < 2 {
121 0.0
122 } else {
123 let variance =
124 cleaned.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (cleaned.len() - 1) as f64;
125 variance.sqrt()
126 };
127
128 let min = all
130 .iter()
131 .cloned()
132 .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
133 .unwrap_or(0.0);
134 let max = all
135 .iter()
136 .cloned()
137 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
138 .unwrap_or(0.0);
139
140 let p50 = compute_percentile(all, 50.0);
142 let p90 = compute_percentile(all, 90.0);
143 let p95 = compute_percentile(all, 95.0);
144 let p99 = compute_percentile(all, 99.0);
145 let p999 = compute_percentile(all, 99.9);
146
147 let (skewness, kurtosis) = if cleaned.len() < 3 || std_dev < f64::EPSILON {
149 (0.0, 0.0)
150 } else {
151 let n = cleaned.len() as f64;
152 let mut m3 = 0.0;
153 let mut m4 = 0.0;
154 for &x in cleaned.iter() {
155 let d = (x - mean) / std_dev;
156 let d2 = d * d;
157 m3 += d2 * d;
158 m4 += d2 * d2;
159 }
160 (m3 / n, m4 / n - 3.0)
161 };
162
163 SummaryStatistics {
164 mean,
165 median,
166 std_dev,
167 min,
168 max,
169 p50,
170 p90,
171 p95,
172 p99,
173 p999,
174 skewness,
175 kurtosis,
176 sample_count: all.len(),
177 outlier_count: analysis.outlier_indices.len(),
178 outlier_analysis: analysis,
179 }
180}
181
182impl SummaryStatistics {
183 pub fn coefficient_of_variation(&self) -> f64 {
185 if self.mean == 0.0 {
186 0.0
187 } else {
188 (self.std_dev / self.mean) * 100.0
189 }
190 }
191
192 pub fn iqr(&self) -> f64 {
194 let q1 = compute_percentile(&self.outlier_analysis.all_samples, 25.0);
195 let q3 = compute_percentile(&self.outlier_analysis.all_samples, 75.0);
196 q3 - q1
197 }
198
199 pub fn is_stable(&self, cv_threshold: f64) -> bool {
201 self.coefficient_of_variation() < cv_threshold
202 }
203
204 pub fn distribution_shape(&self) -> String {
208 let skew_label = if self.skewness.abs() < 0.5 {
209 "symmetric"
210 } else if self.skewness > 0.0 {
211 "right-skewed"
212 } else {
213 "left-skewed"
214 };
215
216 let kurt_label = if self.kurtosis.abs() < 1.0 {
217 "normal-tailed"
218 } else if self.kurtosis > 0.0 {
219 "heavy-tailed"
220 } else {
221 "light-tailed"
222 };
223
224 format!("{}, {}", skew_label, kurt_label)
225 }
226}
227
228pub fn compute_cycles_stats(cycles: &[u64], nanos: &[f64]) -> CyclesStatistics {
232 if cycles.is_empty() {
233 return CyclesStatistics::default();
234 }
235
236 let cycles_f64: Vec<f64> = cycles.iter().map(|&c| c as f64).collect();
238
239 let mean_cycles = cycles_f64.iter().sum::<f64>() / cycles_f64.len() as f64;
241
242 let mut sorted = cycles_f64.clone();
244 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
245 let median_cycles = if sorted.len() % 2 == 0 {
246 let mid = sorted.len() / 2;
247 (sorted[mid - 1] + sorted[mid]) / 2.0
248 } else {
249 sorted[sorted.len() / 2]
250 };
251
252 let std_dev_cycles = if cycles.len() < 2 {
254 0.0
255 } else {
256 let variance = cycles_f64
257 .iter()
258 .map(|x| (x - mean_cycles).powi(2))
259 .sum::<f64>()
260 / (cycles_f64.len() - 1) as f64;
261 variance.sqrt()
262 };
263
264 let min_cycles = *cycles.iter().min().unwrap_or(&0);
266 let max_cycles = *cycles.iter().max().unwrap_or(&0);
267
268 let cycles_per_ns = if !nanos.is_empty() {
270 let total_nanos: f64 = nanos.iter().sum();
271 let total_cycles: f64 = cycles_f64.iter().sum();
272 if total_nanos > 0.0 {
273 total_cycles / total_nanos
274 } else {
275 0.0
276 }
277 } else {
278 0.0
279 };
280
281 CyclesStatistics {
282 mean_cycles,
283 median_cycles,
284 std_dev_cycles,
285 min_cycles,
286 max_cycles,
287 cycles_per_ns,
288 }
289}
290
291#[cfg(test)]
292mod tests {
293 use super::*;
294
295 #[test]
296 fn test_basic_summary() {
297 let samples = vec![1.0, 2.0, 3.0, 4.0, 5.0];
298 let summary = compute_summary(&samples, OutlierMethod::default());
299
300 assert!((summary.mean - 3.0).abs() < 0.01);
301 assert!((summary.median - 3.0).abs() < 0.01);
302 assert_eq!(summary.min, 1.0);
303 assert_eq!(summary.max, 5.0);
304 assert_eq!(summary.sample_count, 5);
305 }
306
307 #[test]
308 fn test_outlier_handling() {
309 let samples = vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0];
311 let summary = compute_summary(&samples, OutlierMethod::default());
312
313 assert!(summary.mean < 10.0);
315
316 assert_eq!(summary.max, 100.0);
318
319 assert!(summary.p99 > 50.0);
321
322 assert_eq!(summary.outlier_count, 1);
323 }
324
325 #[test]
326 fn test_coefficient_of_variation() {
327 let samples = vec![100.0, 100.0, 100.0, 100.0, 100.0];
328 let summary = compute_summary(&samples, OutlierMethod::None);
329
330 assert!((summary.coefficient_of_variation() - 0.0).abs() < f64::EPSILON);
332 }
333
334 #[test]
335 fn test_empty_samples() {
336 let samples: Vec<f64> = Vec::new();
337 let summary = compute_summary(&samples, OutlierMethod::default());
338
339 assert_eq!(summary.sample_count, 0);
340 assert!((summary.mean - 0.0).abs() < f64::EPSILON);
341 }
342
343 #[test]
344 fn test_skewness_symmetric() {
345 let samples: Vec<f64> = (1..=100).map(|x| x as f64).collect();
347 let summary = compute_summary(&samples, OutlierMethod::None);
348 assert!(
349 summary.skewness.abs() < 0.1,
350 "skewness={}",
351 summary.skewness
352 );
353 assert!(summary.kurtosis < 0.0, "kurtosis={}", summary.kurtosis);
355 assert!(summary.distribution_shape().contains("symmetric"));
356 }
357
358 #[test]
359 fn test_skewness_right_skewed() {
360 let samples: Vec<f64> = (0..200).map(|x| (x as f64 * 0.05).exp()).collect();
362 let summary = compute_summary(&samples, OutlierMethod::None);
363 assert!(summary.skewness > 0.5, "skewness={}", summary.skewness);
364 assert!(summary.distribution_shape().contains("right-skewed"));
365 }
366
367 #[test]
368 fn test_skewness_left_skewed() {
369 let samples: Vec<f64> = (0..200).map(|x| 200.0 - (x as f64 * 0.05).exp()).collect();
371 let summary = compute_summary(&samples, OutlierMethod::None);
372 assert!(summary.skewness < -0.5, "skewness={}", summary.skewness);
373 assert!(summary.distribution_shape().contains("left-skewed"));
374 }
375
376 #[test]
377 fn test_skewness_constant_values() {
378 let samples = vec![5.0, 5.0, 5.0, 5.0, 5.0];
379 let summary = compute_summary(&samples, OutlierMethod::None);
380 assert_eq!(summary.skewness, 0.0);
381 assert_eq!(summary.kurtosis, 0.0);
382 }
383
384 #[test]
385 fn test_skewness_two_samples() {
386 let samples = vec![1.0, 2.0];
387 let summary = compute_summary(&samples, OutlierMethod::None);
388 assert_eq!(summary.skewness, 0.0);
390 assert_eq!(summary.kurtosis, 0.0);
391 }
392
393 #[test]
394 fn test_kurtosis_normal() {
395 let samples: Vec<f64> = (0..1000).map(|x| x as f64).collect();
397 let summary = compute_summary(&samples, OutlierMethod::None);
398 assert!(summary.kurtosis < 0.0, "kurtosis={}", summary.kurtosis);
400 }
401
402 #[test]
403 fn test_cycles_stats() {
404 let cycles = vec![3000u64, 3100, 2900, 3050, 2950];
405 let nanos = vec![1000.0, 1033.0, 967.0, 1017.0, 983.0];
406 let stats = compute_cycles_stats(&cycles, &nanos);
407
408 assert!((stats.mean_cycles - 3000.0).abs() < 50.0);
410 assert_eq!(stats.min_cycles, 2900);
411 assert_eq!(stats.max_cycles, 3100);
412 assert!((stats.cycles_per_ns - 3.0).abs() < 0.5);
414 }
415
416 #[test]
417 fn test_cycles_stats_empty() {
418 let cycles: Vec<u64> = vec![];
419 let nanos: Vec<f64> = vec![];
420 let stats = compute_cycles_stats(&cycles, &nanos);
421
422 assert!((stats.mean_cycles - 0.0).abs() < f64::EPSILON);
423 assert_eq!(stats.min_cycles, 0);
424 assert_eq!(stats.max_cycles, 0);
425 }
426}