#![allow(clippy::cast_precision_loss)]
use std::time::Duration;
use serde::{Deserialize, Serialize};
mod gpu_parity;
mod load_testing;
mod matrix;
mod runtime;
mod statistics;
pub use runtime::RuntimeType;
#[cfg(feature = "bench-http")]
pub use runtime::{
BackendInfo, BackendRegistry, InferenceRequest, InferenceResponse, LlamaCppBackend,
LlamaCppConfig, MockBackend, OllamaBackend, OllamaConfig, RuntimeBackend, VllmBackend,
VllmConfig,
};
pub use statistics::{
detect_outliers, welch_t_test, BenchmarkMetrics, LatencyStatistics, MeasurementProtocol,
Regression, RegressionDetector, RegressionReport, WelchTTestResult,
};
pub use load_testing::{LoadTestConfig, LoadTestResult, LoadTestRunner};
pub use matrix::{
BackendSummary, BenchmarkMatrix, ComputeBackendType, MatrixBenchmarkConfig,
MatrixBenchmarkEntry, MatrixSummary,
};
pub use gpu_parity::{
FalsifiableClaim, FlashAttentionConfig, FusedOpSpec, FusedOpType, GapAnalysis,
GemmPerformanceResult, GpuParityBenchmark, GpuParityResult, Imp900Result, MemoryPoolConfig,
OptimizedGemmBenchmark, OptimizedGemmConfig,
};
#[derive(Debug, Clone)]
pub struct DynamicSampler {
pub min_samples: usize,
pub max_samples: usize,
pub cv_threshold: f64,
pub cv_window: usize,
pub stability_count: usize,
stable_streak: usize,
}
impl Default for DynamicSampler {
fn default() -> Self {
Self {
min_samples: 100,
max_samples: 10_000,
cv_threshold: 0.05,
cv_window: 50,
stability_count: 3,
stable_streak: 0,
}
}
}
impl DynamicSampler {
#[must_use]
pub fn new(min_samples: usize, max_samples: usize, cv_threshold: f64) -> Self {
Self {
min_samples,
max_samples,
cv_threshold,
cv_window: 50,
stability_count: 3,
stable_streak: 0,
}
}
#[must_use]
pub fn should_continue(&mut self, samples: &[f64]) -> bool {
let n = samples.len();
if n < self.min_samples {
return true;
}
if n >= self.max_samples {
return false;
}
let window_start = n.saturating_sub(self.cv_window);
let window = &samples[window_start..];
let cv = compute_cv(window);
if cv < self.cv_threshold {
self.stable_streak += 1;
if self.stable_streak >= self.stability_count {
return false; }
} else {
self.stable_streak = 0;
}
true }
#[must_use]
pub fn current_cv(&self, samples: &[f64]) -> f64 {
if samples.len() < 2 {
return f64::INFINITY;
}
let window_start = samples.len().saturating_sub(self.cv_window);
compute_cv(&samples[window_start..])
}
pub fn reset(&mut self) {
self.stable_streak = 0;
}
}
fn compute_cv(data: &[f64]) -> f64 {
if data.len() < 2 {
return f64::INFINITY;
}
let n = data.len() as f64;
let mean = data.iter().sum::<f64>() / n;
if mean.abs() < 1e-10 {
return f64::INFINITY;
}
let variance = data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0);
let std_dev = variance.sqrt();
std_dev / mean.abs()
}
#[derive(Debug, Clone)]
pub struct ThermalGuard {
pub max_temp_c: f64,
pub cooldown_threshold_c: f64,
pub cooldown_sleep_ms: u64,
pub temp_variance_c: f64,
}
impl Default for ThermalGuard {
fn default() -> Self {
Self {
max_temp_c: 80.0,
cooldown_threshold_c: 70.0,
cooldown_sleep_ms: 10_000,
temp_variance_c: 2.0,
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum ThermalValidity {
Valid,
Invalid(String),
}
impl ThermalGuard {
#[must_use]
pub fn new(
max_temp_c: f64,
cooldown_threshold_c: f64,
cooldown_sleep_ms: u64,
temp_variance_c: f64,
) -> Self {
Self {
max_temp_c,
cooldown_threshold_c,
cooldown_sleep_ms,
temp_variance_c,
}
}
#[must_use]
pub fn needs_cooldown(&self, current_temp: f64) -> bool {
current_temp > self.max_temp_c
}
#[must_use]
pub fn validate_run(&self, temps: &[f64]) -> ThermalValidity {
if temps.is_empty() {
return ThermalValidity::Valid;
}
let variance = compute_variance(temps);
let std_dev = variance.sqrt();
if std_dev > self.temp_variance_c {
ThermalValidity::Invalid(format!(
"Temperature variance {std_dev:.2}°C exceeds threshold {:.2}°C",
self.temp_variance_c
))
} else {
ThermalValidity::Valid
}
}
pub fn cooldown_if_needed(&self, current_temp: f64) {
if current_temp > self.max_temp_c {
std::thread::sleep(Duration::from_millis(self.cooldown_sleep_ms));
}
}
#[must_use]
pub fn max_temp(&self, temps: &[f64]) -> f64 {
if temps.is_empty() {
return 0.0;
}
temps.iter().copied().fold(f64::NEG_INFINITY, f64::max)
}
#[must_use]
pub fn temp_variance(&self, temps: &[f64]) -> f64 {
compute_variance(temps).sqrt()
}
}
fn compute_variance(data: &[f64]) -> f64 {
if data.len() < 2 {
return 0.0;
}
let n = data.len() as f64;
let mean = data.iter().sum::<f64>() / n;
data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (n - 1.0)
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct KvCacheMetrics {
pub allocated_bytes: u64,
pub used_bytes: u64,
pub fragmentation_pct: f64,
}
impl KvCacheMetrics {
#[must_use]
pub fn new(allocated_bytes: u64, used_bytes: u64) -> Self {
let waste = allocated_bytes.saturating_sub(used_bytes);
let fragmentation_pct = if allocated_bytes > 0 {
(waste as f64 / allocated_bytes as f64) * 100.0
} else {
0.0
};
Self {
allocated_bytes,
used_bytes,
fragmentation_pct,
}
}
#[must_use]
pub fn allocated_mb(&self) -> f64 {
self.allocated_bytes as f64 / (1024.0 * 1024.0)
}
#[must_use]
pub fn used_mb(&self) -> f64 {
self.used_bytes as f64 / (1024.0 * 1024.0)
}
#[must_use]
pub fn is_acceptable(&self, threshold_pct: f64) -> bool {
self.fragmentation_pct < threshold_pct
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EnergyMetrics {
pub total_joules: f64,
pub idle_watts: f64,
pub active_watts_avg: f64,
pub tokens_generated: u64,
}
impl EnergyMetrics {
#[must_use]
pub fn new(total_joules: f64, idle_watts: f64, active_watts_avg: f64, tokens: u64) -> Self {
Self {
total_joules,
idle_watts,
active_watts_avg,
tokens_generated: tokens,
}
}
#[must_use]
pub fn joules_per_token(&self) -> f64 {
if self.tokens_generated == 0 {
return 0.0;
}
self.total_joules / self.tokens_generated as f64
}
#[must_use]
pub fn tokens_per_joule(&self) -> f64 {
if self.total_joules < 1e-10 {
return 0.0;
}
self.tokens_generated as f64 / self.total_joules
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ItlMetrics {
pub median_ms: f64,
pub std_dev_ms: f64,
pub p99_ms: f64,
pub p999_ms: f64,
}
include!("mod_measurements_itl_metrics.rs");
include!("convoy_test_result.rs");
include!("mod_benchmark_full.rs");