Skip to main content

certeza/benchmark/
mod.rs

1//! Scientific benchmarking infrastructure for certeza
2//!
3//! This module provides data structures and utilities for reproducible
4//! performance measurement following the scientific reporting specification.
5//!
6//! # Examples
7//!
8//! See the `metadata` module for metadata collection functions.
9
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13/// Complete benchmark report with metadata and results
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct BenchmarkReport {
16    /// Schema version for backward compatibility
17    pub schema_version: String,
18
19    /// Complete environmental metadata
20    pub metadata: BenchmarkMetadata,
21
22    /// Individual benchmark results
23    pub benchmarks: Vec<BenchmarkResult>,
24
25    /// Summary statistics across all benchmarks
26    pub summary: BenchmarkSummary,
27}
28
29/// Environmental metadata for reproducibility
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct BenchmarkMetadata {
32    /// Benchmark suite name
33    pub benchmark_suite: String,
34
35    /// ISO 8601 timestamp
36    pub timestamp: String,
37
38    /// Git commit hash (short)
39    pub git_commit: String,
40
41    /// Git branch name
42    pub git_branch: String,
43
44    /// Operator (human or automated-ci)
45    pub operator: String,
46
47    /// Hardware specifications
48    pub hardware: HardwareInfo,
49
50    /// Software environment
51    pub software: SoftwareInfo,
52
53    /// Runtime configuration
54    pub environment: EnvironmentConfig,
55}
56
57/// Hardware specifications
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct HardwareInfo {
60    /// CPU information
61    pub cpu: CpuInfo,
62
63    /// Memory information
64    pub memory: MemoryInfo,
65
66    /// Storage information
67    pub storage: Option<StorageInfo>,
68}
69
70/// CPU details
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct CpuInfo {
73    /// CPU model name
74    pub model: String,
75
76    /// Number of physical cores
77    pub cores: usize,
78
79    /// Number of logical threads
80    pub threads: usize,
81
82    /// Base frequency in `MHz`
83    pub frequency_mhz: u64,
84
85    /// L1 cache size
86    pub cache_l1: String,
87
88    /// L2 cache size
89    pub cache_l2: String,
90
91    /// L3 cache size
92    pub cache_l3: String,
93}
94
95/// Memory details
96#[derive(Debug, Clone, Serialize, Deserialize)]
97pub struct MemoryInfo {
98    /// Total memory in GB
99    pub total_gb: f64,
100
101    /// Memory type (DDR4, DDR5, etc.)
102    #[serde(rename = "type")]
103    pub memory_type: String,
104
105    /// Memory frequency in `MHz`
106    pub frequency_mhz: Option<u64>,
107}
108
109/// Storage details
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct StorageInfo {
112    /// Storage type (SSD, `NVMe`, HDD)
113    #[serde(rename = "type")]
114    pub storage_type: String,
115
116    /// Device model
117    pub model: String,
118
119    /// Capacity in GB
120    pub capacity_gb: u64,
121}
122
123/// Software environment
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct SoftwareInfo {
126    /// Operating system
127    pub os: String,
128
129    /// Kernel version
130    pub kernel: String,
131
132    /// Rust compiler version
133    pub rustc: String,
134
135    /// Cargo version
136    pub cargo: String,
137
138    /// LLVM version
139    pub llvm: String,
140}
141
142/// Runtime configuration
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct EnvironmentConfig {
145    /// CPU frequency governor
146    pub cpu_governor: String,
147
148    /// Turbo boost enabled/disabled
149    pub turbo_boost: String,
150
151    /// Swap enabled/disabled
152    pub swap: String,
153
154    /// CPU isolation configuration
155    pub isolation: Option<String>,
156}
157
158/// Individual benchmark result
159#[derive(Debug, Clone, Serialize, Deserialize)]
160pub struct BenchmarkResult {
161    /// Benchmark name
162    pub name: String,
163
164    /// Category (cpu-bound, memory-intensive, io-bound)
165    pub category: String,
166
167    /// Scope (component, system)
168    pub scope: String,
169
170    /// Binary path
171    pub binary: String,
172
173    /// Optimization profile used
174    pub optimization_profile: String,
175
176    /// Measurement data
177    pub measurements: Measurements,
178
179    /// Comparison against baseline (if available)
180    pub comparison: Option<Comparison>,
181}
182
183/// Measurement data and statistics
184#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct Measurements {
186    /// Number of warmup runs
187    pub warmup_runs: usize,
188
189    /// Number of measured runs
190    pub measured_runs: usize,
191
192    /// Raw measurement values in milliseconds
193    pub raw_values_ms: Vec<f64>,
194
195    /// Outliers removed (if any)
196    pub outliers_removed: Vec<f64>,
197
198    /// Statistical summary
199    pub statistics: Statistics,
200
201    /// Distribution analysis
202    pub distribution: Distribution,
203}
204
205/// Statistical summary
206#[derive(Debug, Clone, Serialize, Deserialize)]
207pub struct Statistics {
208    /// Arithmetic mean (ms)
209    pub mean_ms: f64,
210
211    /// Median (ms)
212    pub median_ms: f64,
213
214    /// Standard deviation (ms)
215    pub std_dev_ms: f64,
216
217    /// Minimum value (ms)
218    pub min_ms: f64,
219
220    /// Maximum value (ms)
221    pub max_ms: f64,
222
223    /// Coefficient of variation (`std_dev` / mean)
224    pub coefficient_of_variation: f64,
225
226    /// 95% confidence interval [lower, upper]
227    pub confidence_interval_95: (f64, f64),
228}
229
230/// Distribution analysis
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct Distribution {
233    /// Normality test used (shapiro, ks)
234    pub normality_test: String,
235
236    /// P-value from normality test
237    pub normality_p_value: f64,
238
239    /// Whether distribution is normal (p > 0.05)
240    pub is_normal: bool,
241}
242
243/// Comparison against baseline
244#[derive(Debug, Clone, Serialize, Deserialize)]
245pub struct Comparison {
246    /// Baseline commit hash
247    pub baseline_commit: String,
248
249    /// Baseline mean runtime (ms)
250    pub baseline_mean_ms: f64,
251
252    /// Speedup ratio (baseline / current)
253    pub speedup_ratio: f64,
254
255    /// 95% confidence interval on speedup [lower, upper]
256    pub speedup_ci_95: (f64, f64),
257
258    /// T-test p-value
259    pub t_test_p_value: f64,
260
261    /// Effect size (Cohen's d)
262    pub effect_size_cohens_d: f64,
263
264    /// Whether improvement is statistically significant
265    pub significant_improvement: bool,
266
267    /// Whether regression is statistically significant
268    pub significant_regression: bool,
269}
270
271/// Summary across all benchmarks
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub struct BenchmarkSummary {
274    /// Total number of benchmarks
275    pub total_benchmarks: usize,
276
277    /// Successfully completed benchmarks
278    pub successful: usize,
279
280    /// Failed benchmarks
281    pub failed: usize,
282
283    /// Total runtime in seconds
284    pub total_runtime_seconds: f64,
285
286    /// Number of significant improvements
287    pub significant_improvements: usize,
288
289    /// Number of significant regressions
290    pub significant_regressions: usize,
291
292    /// Additional metrics
293    pub metrics: HashMap<String, f64>,
294}
295
296impl BenchmarkReport {
297    /// Create a new benchmark report with default values
298    #[must_use]
299    pub fn new(suite_name: &str, metadata: BenchmarkMetadata) -> Self {
300        // GH-18: Use suite_name to override metadata.benchmark_suite if it differs
301        let mut metadata = metadata;
302        if metadata.benchmark_suite.is_empty() && !suite_name.is_empty() {
303            metadata.benchmark_suite = suite_name.to_string();
304        }
305        Self {
306            schema_version: "1.0".to_string(),
307            metadata,
308            benchmarks: Vec::new(),
309            summary: BenchmarkSummary {
310                total_benchmarks: 0,
311                successful: 0,
312                failed: 0,
313                total_runtime_seconds: 0.0,
314                significant_improvements: 0,
315                significant_regressions: 0,
316                metrics: HashMap::new(),
317            },
318        }
319    }
320
321    /// Add a benchmark result
322    pub fn add_benchmark(&mut self, result: BenchmarkResult) {
323        self.benchmarks.push(result);
324        self.update_summary();
325    }
326
327    /// Update summary statistics
328    fn update_summary(&mut self) {
329        self.summary.total_benchmarks = self.benchmarks.len();
330
331        // Count significant changes and failed benchmarks
332        let mut improvements = 0;
333        let mut regressions = 0;
334
335        for bench in &self.benchmarks {
336            if let Some(comp) = &bench.comparison {
337                if comp.significant_improvement {
338                    improvements += 1;
339                }
340                if comp.significant_regression {
341                    regressions += 1;
342                }
343            }
344        }
345
346        // GH-18: Compute failed count (benchmarks with significant regressions)
347        self.summary.failed = regressions;
348        self.summary.successful = self.benchmarks.len().saturating_sub(regressions);
349
350        // GH-18: Compute total runtime from individual benchmark mean times
351        self.summary.total_runtime_seconds =
352            self.benchmarks.iter().map(|b| b.measurements.statistics.mean_ms / 1000.0).sum();
353
354        self.summary.significant_improvements = improvements;
355        self.summary.significant_regressions = regressions;
356    }
357
358    /// Serialize to JSON string
359    ///
360    /// # Errors
361    ///
362    /// Returns an error if serialization fails
363    pub fn to_json(&self) -> Result<String, serde_json::Error> {
364        serde_json::to_string_pretty(self)
365    }
366
367    /// Serialize to JSON file
368    ///
369    /// # Errors
370    ///
371    /// Returns an error if file creation or writing fails
372    pub fn to_json_file(&self, path: &std::path::Path) -> std::io::Result<()> {
373        let json = self.to_json().map_err(std::io::Error::other)?;
374        std::fs::write(path, json)
375    }
376
377    /// Deserialize from JSON string
378    ///
379    /// # Errors
380    ///
381    /// Returns an error if deserialization fails or JSON is invalid
382    pub fn from_json(json: &str) -> Result<Self, serde_json::Error> {
383        serde_json::from_str(json)
384    }
385
386    /// Deserialize from JSON file
387    ///
388    /// # Errors
389    ///
390    /// Returns an error if file reading fails or JSON is invalid
391    pub fn from_json_file(path: &std::path::Path) -> std::io::Result<Self> {
392        let json = std::fs::read_to_string(path)?;
393        Self::from_json(&json).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    #[test]
402    fn test_benchmark_report_serialization() {
403        let metadata = BenchmarkMetadata {
404            benchmark_suite: "test-suite".to_string(),
405            timestamp: "2025-11-18T10:30:00Z".to_string(),
406            git_commit: "abc123".to_string(),
407            git_branch: "main".to_string(),
408            operator: "test".to_string(),
409            hardware: HardwareInfo {
410                cpu: CpuInfo {
411                    model: "Test CPU".to_string(),
412                    cores: 4,
413                    threads: 8,
414                    frequency_mhz: 3000,
415                    cache_l1: "32KB".to_string(),
416                    cache_l2: "256KB".to_string(),
417                    cache_l3: "8MB".to_string(),
418                },
419                memory: MemoryInfo {
420                    total_gb: 16.0,
421                    memory_type: "DDR4".to_string(),
422                    frequency_mhz: Some(2400),
423                },
424                storage: None,
425            },
426            software: SoftwareInfo {
427                os: "Linux".to_string(),
428                kernel: "5.15.0".to_string(),
429                rustc: "1.75.0".to_string(),
430                cargo: "1.75.0".to_string(),
431                llvm: "17.0".to_string(),
432            },
433            environment: EnvironmentConfig {
434                cpu_governor: "performance".to_string(),
435                turbo_boost: "disabled".to_string(),
436                swap: "disabled".to_string(),
437                isolation: None,
438            },
439        };
440
441        let report = BenchmarkReport::new("test-suite", metadata);
442
443        // Test JSON serialization
444        let json = report.to_json().expect("Failed to serialize");
445        assert!(json.contains("\"schema_version\": \"1.0\""));
446
447        // Test round-trip
448        let deserialized = BenchmarkReport::from_json(&json).expect("Failed to deserialize");
449        assert_eq!(deserialized.schema_version, "1.0");
450        assert_eq!(deserialized.metadata.benchmark_suite, "test-suite");
451    }
452}