impl ConvoyTestResult {
#[must_use]
pub fn new(
config: &ConvoyTestConfig,
baseline_short_latencies: &[f64],
convoy_short_latencies: &[f64],
hol_blocking_times: &[f64],
kv_fragmentation_pct: f64,
) -> Self {
let baseline_short_p99 = percentile(baseline_short_latencies, 99.0);
let convoy_short_p99 = percentile(convoy_short_latencies, 99.0);
let p99_increase_pct = if baseline_short_p99 > 0.0 {
((convoy_short_p99 - baseline_short_p99) / baseline_short_p99) * 100.0
} else {
0.0
};
let max_hol_blocking = hol_blocking_times.iter().copied().fold(0.0_f64, f64::max);
let avg_hol_blocking = if hol_blocking_times.is_empty() {
0.0
} else {
hol_blocking_times.iter().sum::<f64>() / hol_blocking_times.len() as f64
};
let mut failure_reasons = Vec::new();
if p99_increase_pct > config.max_p99_increase_pct {
failure_reasons.push(format!(
"P99 increase {p99_increase_pct:.1}% exceeds threshold {:.1}%",
config.max_p99_increase_pct
));
}
if max_hol_blocking > config.max_hol_blocking_ms {
failure_reasons.push(format!(
"Max HOL blocking {max_hol_blocking:.1}ms exceeds threshold {:.1}ms",
config.max_hol_blocking_ms
));
}
if kv_fragmentation_pct > config.max_kv_fragmentation_pct {
failure_reasons.push(format!(
"KV fragmentation {kv_fragmentation_pct:.1}% exceeds threshold {:.1}%",
config.max_kv_fragmentation_pct
));
}
Self {
long_requests: config.long_requests,
short_requests: config.short_requests,
baseline_short_p99_ms: baseline_short_p99,
convoy_short_p99_ms: convoy_short_p99,
p99_increase_pct,
max_hol_blocking_ms: max_hol_blocking,
avg_hol_blocking_ms: avg_hol_blocking,
kv_fragmentation_pct,
passed: failure_reasons.is_empty(),
failure_reasons,
}
}
}
#[derive(Debug, Clone)]
pub struct SaturationTestConfig {
pub cpu_load_pct: u8,
pub max_throughput_degradation_pct: f64,
pub max_p99_increase_pct: f64,
}
impl Default for SaturationTestConfig {
fn default() -> Self {
Self {
cpu_load_pct: 50,
max_throughput_degradation_pct: 30.0,
max_p99_increase_pct: 100.0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SaturationTestResult {
pub cpu_load_pct: u8,
pub baseline_throughput: f64,
pub stressed_throughput: f64,
pub throughput_degradation_pct: f64,
pub baseline_p99_ms: f64,
pub stressed_p99_ms: f64,
pub p99_increase_pct: f64,
pub passed: bool,
pub failure_reasons: Vec<String>,
}
impl SaturationTestResult {
#[must_use]
pub fn new(
config: &SaturationTestConfig,
baseline_throughputs: &[f64],
stressed_throughputs: &[f64],
baseline_latencies: &[f64],
stressed_latencies: &[f64],
) -> Self {
let baseline_throughput = if baseline_throughputs.is_empty() {
0.0
} else {
baseline_throughputs.iter().sum::<f64>() / baseline_throughputs.len() as f64
};
let stressed_throughput = if stressed_throughputs.is_empty() {
0.0
} else {
stressed_throughputs.iter().sum::<f64>() / stressed_throughputs.len() as f64
};
let throughput_degradation_pct = if baseline_throughput > 0.0 {
((baseline_throughput - stressed_throughput) / baseline_throughput) * 100.0
} else {
0.0
};
let baseline_p99 = percentile(baseline_latencies, 99.0);
let stressed_p99 = percentile(stressed_latencies, 99.0);
let p99_increase_pct = if baseline_p99 > 0.0 {
((stressed_p99 - baseline_p99) / baseline_p99) * 100.0
} else {
0.0
};
let mut failure_reasons = Vec::new();
if throughput_degradation_pct > config.max_throughput_degradation_pct {
failure_reasons.push(format!(
"Throughput degradation {throughput_degradation_pct:.1}% exceeds threshold {:.1}%",
config.max_throughput_degradation_pct
));
}
if p99_increase_pct > config.max_p99_increase_pct {
failure_reasons.push(format!(
"P99 increase {p99_increase_pct:.1}% exceeds threshold {:.1}%",
config.max_p99_increase_pct
));
}
Self {
cpu_load_pct: config.cpu_load_pct,
baseline_throughput,
stressed_throughput,
throughput_degradation_pct,
baseline_p99_ms: baseline_p99,
stressed_p99_ms: stressed_p99,
p99_increase_pct,
passed: failure_reasons.is_empty(),
failure_reasons,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardwareSpec {
pub cpu: String,
pub gpu: Option<String>,
pub memory_gb: u64,
pub storage: String,
}
impl Default for HardwareSpec {
fn default() -> Self {
Self {
cpu: "Unknown".to_string(),
gpu: None,
memory_gb: 0,
storage: "Unknown".to_string(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SamplingConfig {
pub method: String,
pub cv_threshold: f64,
pub actual_iterations: usize,
pub cv_at_stop: f64,
pub warmup_iterations: usize,
}
impl Default for SamplingConfig {
fn default() -> Self {
Self {
method: "dynamic_cv".to_string(),
cv_threshold: 0.05,
actual_iterations: 0,
cv_at_stop: 0.0,
warmup_iterations: 100,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThermalInfo {
pub valid: bool,
pub temp_variance_c: f64,
pub max_temp_c: f64,
}
impl Default for ThermalInfo {
fn default() -> Self {
Self {
valid: true,
temp_variance_c: 0.0,
max_temp_c: 0.0,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TtftResults {
pub p50: f64,
pub p95: f64,
pub p99: f64,
pub p999: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ItlResults {
pub median: f64,
pub std_dev: f64,
pub p99: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThroughputResults {
pub median: f64,
pub ci_95: (f64, f64),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MemoryResults {
pub model_mb: u64,
pub peak_rss_mb: u64,
pub kv_waste_pct: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EnergyResults {
pub total_joules: f64,
pub token_joules: f64,
pub idle_watts: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ColdStartResults {
pub median: f64,
pub p99: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct QualityValidation {
pub kl_divergence_vs_fp32: f64,
pub perplexity_wikitext2: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FullBenchmarkResult {
pub version: String,
pub timestamp: String,
pub config: BenchmarkConfig,
pub hardware: HardwareSpec,
pub sampling: SamplingConfig,
pub thermal: ThermalInfo,
pub results: BenchmarkResults,
pub quality: QualityValidation,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenchmarkResults {
pub ttft_ms: TtftResults,
pub itl_ms: ItlResults,
pub throughput_tok_s: ThroughputResults,
pub memory_mb: MemoryResults,
pub energy: EnergyResults,
pub cold_start_ms: ColdStartResults,
}