Skip to main content

cbtop/profile_compare/
types.rs

1//! Data types for profile comparison (PMAT-045)
2//!
3//! Contains error types, benchmark profiles, metric samples,
4//! and all result/verdict structures.
5
6use std::collections::HashMap;
7
8/// Result type for profile comparison operations
9pub type CompareResult<T> = Result<T, CompareError>;
10
11/// Errors in profile comparison
12#[derive(Debug, Clone, PartialEq)]
13pub enum CompareError {
14    /// Insufficient samples for comparison
15    InsufficientSamples { got: usize, need: usize },
16    /// Metric not found in profile
17    MetricNotFound { name: String },
18    /// Variance is zero (no variation in data)
19    ZeroVariance { metric: String },
20    /// Invalid confidence level
21    InvalidConfidence { value: f64 },
22    /// Profiles have no common metrics
23    NoCommonMetrics,
24}
25
26impl std::fmt::Display for CompareError {
27    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28        match self {
29            Self::InsufficientSamples { got, need } => {
30                write!(f, "Insufficient samples: got {}, need {}", got, need)
31            }
32            Self::MetricNotFound { name } => {
33                write!(f, "Metric not found: {}", name)
34            }
35            Self::ZeroVariance { metric } => {
36                write!(f, "Zero variance in metric: {}", metric)
37            }
38            Self::InvalidConfidence { value } => {
39                write!(f, "Invalid confidence level: {}", value)
40            }
41            Self::NoCommonMetrics => {
42                write!(f, "Profiles have no common metrics")
43            }
44        }
45    }
46}
47
48impl std::error::Error for CompareError {}
49
50/// A benchmark profile containing multiple metrics
51#[derive(Debug, Clone)]
52pub struct BenchmarkProfile {
53    /// Profile name/identifier
54    pub name: String,
55    /// Profile description
56    pub description: Option<String>,
57    /// Metrics with their sample values
58    pub metrics: HashMap<String, MetricSamples>,
59    /// Profile metadata
60    pub metadata: HashMap<String, String>,
61    /// Timestamp when profile was captured
62    pub timestamp_ns: u64,
63}
64
65impl BenchmarkProfile {
66    /// Create a new benchmark profile
67    pub fn new(name: impl Into<String>) -> Self {
68        Self {
69            name: name.into(),
70            description: None,
71            metrics: HashMap::new(),
72            metadata: HashMap::new(),
73            timestamp_ns: std::time::SystemTime::now()
74                .duration_since(std::time::UNIX_EPOCH)
75                .map(|d| d.as_nanos() as u64)
76                .unwrap_or(0),
77        }
78    }
79
80    /// Set profile description
81    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
82        self.description = Some(desc.into());
83        self
84    }
85
86    /// Add metadata
87    pub fn with_metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
88        self.metadata.insert(key.into(), value.into());
89        self
90    }
91
92    /// Add a metric with samples
93    pub fn add_metric(&mut self, name: impl Into<String>, samples: Vec<f64>) {
94        self.metrics
95            .insert(name.into(), MetricSamples::new(samples));
96    }
97
98    /// Get metric by name
99    pub fn get_metric(&self, name: &str) -> Option<&MetricSamples> {
100        self.metrics.get(name)
101    }
102
103    /// Get all metric names
104    pub fn metric_names(&self) -> impl Iterator<Item = &String> {
105        self.metrics.keys()
106    }
107
108    /// Number of metrics
109    pub fn metric_count(&self) -> usize {
110        self.metrics.len()
111    }
112}
113
114/// Samples for a single metric
115#[derive(Debug, Clone)]
116pub struct MetricSamples {
117    /// Raw sample values
118    pub values: Vec<f64>,
119    /// Precomputed mean
120    mean: f64,
121    /// Precomputed variance
122    variance: f64,
123    /// Precomputed standard deviation
124    std_dev: f64,
125}
126
127impl MetricSamples {
128    /// Create new metric samples
129    pub fn new(values: Vec<f64>) -> Self {
130        let (mean, variance, std_dev) = if values.is_empty() {
131            (0.0, 0.0, 0.0)
132        } else {
133            let mean = values.iter().sum::<f64>() / values.len() as f64;
134            let variance = values.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
135                / (values.len() - 1).max(1) as f64;
136            let std_dev = variance.sqrt();
137            (mean, variance, std_dev)
138        };
139
140        Self {
141            values,
142            mean,
143            variance,
144            std_dev,
145        }
146    }
147
148    /// Get sample count
149    pub fn count(&self) -> usize {
150        self.values.len()
151    }
152
153    /// Get mean
154    pub fn mean(&self) -> f64 {
155        self.mean
156    }
157
158    /// Get variance
159    pub fn variance(&self) -> f64 {
160        self.variance
161    }
162
163    /// Get standard deviation
164    pub fn std_dev(&self) -> f64 {
165        self.std_dev
166    }
167
168    /// Get minimum value
169    pub fn min(&self) -> f64 {
170        self.values.iter().cloned().fold(f64::INFINITY, f64::min)
171    }
172
173    /// Get maximum value
174    pub fn max(&self) -> f64 {
175        self.values
176            .iter()
177            .cloned()
178            .fold(f64::NEG_INFINITY, f64::max)
179    }
180}
181
182/// Result of Welch's t-test comparison
183#[derive(Debug, Clone)]
184pub struct WelchTestResult {
185    /// T-statistic
186    pub t_statistic: f64,
187    /// Degrees of freedom (Welch-Satterthwaite)
188    pub degrees_of_freedom: f64,
189    /// Two-tailed p-value
190    pub p_value: f64,
191    /// Whether result is statistically significant
192    pub significant: bool,
193    /// Confidence level used
194    pub confidence_level: f64,
195}
196
197/// Effect size interpretation
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub enum EffectMagnitude {
200    /// Negligible effect (|d| < 0.2)
201    Negligible,
202    /// Small effect (0.2 <= |d| < 0.5)
203    Small,
204    /// Medium effect (0.5 <= |d| < 0.8)
205    Medium,
206    /// Large effect (|d| >= 0.8)
207    Large,
208}
209
210impl EffectMagnitude {
211    /// Get magnitude from Cohen's d value
212    pub fn from_cohens_d(d: f64) -> Self {
213        let abs_d = d.abs();
214        if abs_d < 0.2 {
215            Self::Negligible
216        } else if abs_d < 0.5 {
217            Self::Small
218        } else if abs_d < 0.8 {
219            Self::Medium
220        } else {
221            Self::Large
222        }
223    }
224}
225
226/// Effect size result
227#[derive(Debug, Clone)]
228pub struct EffectSizeResult {
229    /// Cohen's d effect size
230    pub cohens_d: f64,
231    /// Effect magnitude interpretation
232    pub magnitude: EffectMagnitude,
233    /// Percentage change (new - old) / old * 100
234    pub percent_change: f64,
235}
236
237/// Direction of change
238#[derive(Debug, Clone, Copy, PartialEq, Eq)]
239pub enum ChangeDirection {
240    /// Performance improved (metric decreased for latency, increased for throughput)
241    Improved,
242    /// Performance regressed
243    Regressed,
244    /// No significant change
245    NoChange,
246}
247
248/// Result of comparing a single metric
249#[derive(Debug, Clone)]
250pub struct MetricComparison {
251    /// Metric name
252    pub name: String,
253    /// Baseline (A) statistics
254    pub baseline_mean: f64,
255    /// Baseline standard deviation
256    pub baseline_std: f64,
257    /// Comparison (B) statistics
258    pub comparison_mean: f64,
259    /// Comparison standard deviation
260    pub comparison_std: f64,
261    /// Welch's t-test result
262    pub t_test: WelchTestResult,
263    /// Effect size analysis
264    pub effect_size: EffectSizeResult,
265    /// Direction of change
266    pub direction: ChangeDirection,
267    /// Whether this is a regression
268    pub is_regression: bool,
269    /// Confidence interval for the difference
270    pub ci_lower: f64,
271    /// Upper bound of confidence interval
272    pub ci_upper: f64,
273}
274
275/// Complete A/B comparison result
276#[derive(Debug, Clone)]
277pub struct ProfileComparison {
278    /// Baseline profile name
279    pub baseline_name: String,
280    /// Comparison profile name
281    pub comparison_name: String,
282    /// Individual metric comparisons
283    pub metrics: Vec<MetricComparison>,
284    /// Metrics that regressed significantly
285    pub regressions: Vec<String>,
286    /// Metrics that improved significantly
287    pub improvements: Vec<String>,
288    /// Overall verdict
289    pub verdict: ComparisonVerdict,
290    /// Bonferroni-corrected alpha for multiple comparisons
291    pub corrected_alpha: f64,
292}
293
294impl ProfileComparison {
295    /// Get regression count
296    pub fn regression_count(&self) -> usize {
297        self.regressions.len()
298    }
299
300    /// Get improvement count
301    pub fn improvement_count(&self) -> usize {
302        self.improvements.len()
303    }
304
305    /// Get metrics with no significant change
306    pub fn unchanged_count(&self) -> usize {
307        self.metrics.len() - self.regression_count() - self.improvement_count()
308    }
309
310    /// Check if comparison detected any regressions
311    pub fn has_regressions(&self) -> bool {
312        !self.regressions.is_empty()
313    }
314}
315
316/// Overall comparison verdict
317#[derive(Debug, Clone, Copy, PartialEq, Eq)]
318pub enum ComparisonVerdict {
319    /// All metrics stable or improved
320    Pass,
321    /// Minor regressions detected (< 5%)
322    Warning,
323    /// Significant regressions detected (>= 5%)
324    Fail,
325}