1use std::collections::HashMap;
7
8pub type CompareResult<T> = Result<T, CompareError>;
10
11#[derive(Debug, Clone, PartialEq)]
13pub enum CompareError {
14 InsufficientSamples { got: usize, need: usize },
16 MetricNotFound { name: String },
18 ZeroVariance { metric: String },
20 InvalidConfidence { value: f64 },
22 NoCommonMetrics,
24}
25
26impl std::fmt::Display for CompareError {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 match self {
29 Self::InsufficientSamples { got, need } => {
30 write!(f, "Insufficient samples: got {}, need {}", got, need)
31 }
32 Self::MetricNotFound { name } => {
33 write!(f, "Metric not found: {}", name)
34 }
35 Self::ZeroVariance { metric } => {
36 write!(f, "Zero variance in metric: {}", metric)
37 }
38 Self::InvalidConfidence { value } => {
39 write!(f, "Invalid confidence level: {}", value)
40 }
41 Self::NoCommonMetrics => {
42 write!(f, "Profiles have no common metrics")
43 }
44 }
45 }
46}
47
48impl std::error::Error for CompareError {}
49
50#[derive(Debug, Clone)]
52pub struct BenchmarkProfile {
53 pub name: String,
55 pub description: Option<String>,
57 pub metrics: HashMap<String, MetricSamples>,
59 pub metadata: HashMap<String, String>,
61 pub timestamp_ns: u64,
63}
64
65impl BenchmarkProfile {
66 pub fn new(name: impl Into<String>) -> Self {
68 Self {
69 name: name.into(),
70 description: None,
71 metrics: HashMap::new(),
72 metadata: HashMap::new(),
73 timestamp_ns: std::time::SystemTime::now()
74 .duration_since(std::time::UNIX_EPOCH)
75 .map(|d| d.as_nanos() as u64)
76 .unwrap_or(0),
77 }
78 }
79
80 pub fn with_description(mut self, desc: impl Into<String>) -> Self {
82 self.description = Some(desc.into());
83 self
84 }
85
86 pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
88 self.metadata.insert(key.into(), value.into());
89 self
90 }
91
92 pub fn add_metric(&mut self, name: impl Into<String>, samples: Vec<f64>) {
94 self.metrics
95 .insert(name.into(), MetricSamples::new(samples));
96 }
97
98 pub fn get_metric(&self, name: &str) -> Option<&MetricSamples> {
100 self.metrics.get(name)
101 }
102
103 pub fn metric_names(&self) -> impl Iterator<Item = &String> {
105 self.metrics.keys()
106 }
107
108 pub fn metric_count(&self) -> usize {
110 self.metrics.len()
111 }
112}
113
114#[derive(Debug, Clone)]
116pub struct MetricSamples {
117 pub values: Vec<f64>,
119 mean: f64,
121 variance: f64,
123 std_dev: f64,
125}
126
127impl MetricSamples {
128 pub fn new(values: Vec<f64>) -> Self {
130 let (mean, variance, std_dev) = if values.is_empty() {
131 (0.0, 0.0, 0.0)
132 } else {
133 let mean = values.iter().sum::<f64>() / values.len() as f64;
134 let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
135 / (values.len() - 1).max(1) as f64;
136 let std_dev = variance.sqrt();
137 (mean, variance, std_dev)
138 };
139
140 Self {
141 values,
142 mean,
143 variance,
144 std_dev,
145 }
146 }
147
148 pub fn count(&self) -> usize {
150 self.values.len()
151 }
152
153 pub fn mean(&self) -> f64 {
155 self.mean
156 }
157
158 pub fn variance(&self) -> f64 {
160 self.variance
161 }
162
163 pub fn std_dev(&self) -> f64 {
165 self.std_dev
166 }
167
168 pub fn min(&self) -> f64 {
170 self.values.iter().cloned().fold(f64::INFINITY, f64::min)
171 }
172
173 pub fn max(&self) -> f64 {
175 self.values
176 .iter()
177 .cloned()
178 .fold(f64::NEG_INFINITY, f64::max)
179 }
180}
181
182#[derive(Debug, Clone)]
184pub struct WelchTestResult {
185 pub t_statistic: f64,
187 pub degrees_of_freedom: f64,
189 pub p_value: f64,
191 pub significant: bool,
193 pub confidence_level: f64,
195}
196
197#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub enum EffectMagnitude {
200 Negligible,
202 Small,
204 Medium,
206 Large,
208}
209
210impl EffectMagnitude {
211 pub fn from_cohens_d(d: f64) -> Self {
213 let abs_d = d.abs();
214 if abs_d < 0.2 {
215 Self::Negligible
216 } else if abs_d < 0.5 {
217 Self::Small
218 } else if abs_d < 0.8 {
219 Self::Medium
220 } else {
221 Self::Large
222 }
223 }
224}
225
226#[derive(Debug, Clone)]
228pub struct EffectSizeResult {
229 pub cohens_d: f64,
231 pub magnitude: EffectMagnitude,
233 pub percent_change: f64,
235}
236
237#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum ChangeDirection {
240 Improved,
242 Regressed,
244 NoChange,
246}
247
248#[derive(Debug, Clone)]
250pub struct MetricComparison {
251 pub name: String,
253 pub baseline_mean: f64,
255 pub baseline_std: f64,
257 pub comparison_mean: f64,
259 pub comparison_std: f64,
261 pub t_test: WelchTestResult,
263 pub effect_size: EffectSizeResult,
265 pub direction: ChangeDirection,
267 pub is_regression: bool,
269 pub ci_lower: f64,
271 pub ci_upper: f64,
273}
274
275#[derive(Debug, Clone)]
277pub struct ProfileComparison {
278 pub baseline_name: String,
280 pub comparison_name: String,
282 pub metrics: Vec<MetricComparison>,
284 pub regressions: Vec<String>,
286 pub improvements: Vec<String>,
288 pub verdict: ComparisonVerdict,
290 pub corrected_alpha: f64,
292}
293
294impl ProfileComparison {
295 pub fn regression_count(&self) -> usize {
297 self.regressions.len()
298 }
299
300 pub fn improvement_count(&self) -> usize {
302 self.improvements.len()
303 }
304
305 pub fn unchanged_count(&self) -> usize {
307 self.metrics.len() - self.regression_count() - self.improvement_count()
308 }
309
310 pub fn has_regressions(&self) -> bool {
312 !self.regressions.is_empty()
313 }
314}
315
316#[derive(Debug, Clone, Copy, PartialEq, Eq)]
318pub enum ComparisonVerdict {
319 Pass,
321 Warning,
323 Fail,
325}