#[derive(Debug, Clone)]
pub struct BenchmarkReport {
pub brick_name: String,
pub mean_us: f64,
pub std_us: f64,
pub cv: f64,
pub p50_us: f64,
pub p99_us: f64,
pub tokens_per_sec: f64,
pub budget_us: f64,
pub budget_met: bool,
pub statistically_valid: bool,
}
impl fmt::Display for BenchmarkReport {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let status = if self.budget_met { "PASS" } else { "FAIL" };
write!(
f,
"{}: {:.1}µs ± {:.1}µs (CV={:.1}%) | {:.0} tok/s | budget: {} ({})",
self.brick_name,
self.mean_us,
self.std_us,
self.cv * 100.0,
self.tokens_per_sec,
self.budget_us,
status
)
}
}
fn percentile(samples: &[f64], p: f64) -> f64 {
if samples.is_empty() {
return 0.0;
}
let idx = ((samples.len() as f64) * p).floor() as usize;
samples[idx.min(samples.len() - 1)]
}
pub fn benchmark_brick<B: ComputeBrick>(
brick: &B,
run_fn: impl Fn() -> f64,
config: &BenchmarkConfig,
) -> BenchmarkReport {
for _ in 0..config.warmup {
let _ = run_fn();
}
let mut samples: Vec<f64> = Vec::with_capacity(config.samples);
for _ in 0..config.samples {
samples.push(run_fn());
}
samples.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = samples.len().max(1) as f64;
let mean = samples.iter().sum::<f64>() / n;
let std =
(samples.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / n).sqrt();
let cv = if mean.abs() > f64::EPSILON { std / mean } else { 0.0 };
let budget = brick.budget();
BenchmarkReport {
brick_name: brick.name().to_string(),
mean_us: mean,
std_us: std,
cv,
p50_us: percentile(&samples, 0.50),
p99_us: percentile(&samples, 0.99),
tokens_per_sec: if mean > 0.0 { 1_000_000.0 / mean } else { 0.0 },
budget_us: budget.us_per_token,
budget_met: mean <= budget.us_per_token,
statistically_valid: cv <= config.max_cv,
}
}
#[derive(Debug, Clone)]
pub struct CudaGraphBrick {
pub num_layers: usize,
pub hidden_dim: usize,
pub captured: bool,
budget: TokenBudget,
}
impl CudaGraphBrick {
#[must_use]
pub fn new(num_layers: usize, hidden_dim: usize) -> Self {
let budget_us = 20.0; Self {
num_layers,
hidden_dim,
captured: false,
budget: TokenBudget::from_latency(budget_us),
}
}
#[must_use]
pub fn with_budget(mut self, budget: TokenBudget) -> Self {
self.budget = budget;
self
}
pub fn set_captured(&mut self, captured: bool) {
self.captured = captured;
}
#[must_use]
pub fn can_replay(&self) -> bool {
self.captured
}
pub fn replay(&self) -> Result<(), BrickError> {
if !self.captured {
return Err(BrickError::ComputeError(
"CUDA graph not captured yet".to_string(),
));
}
Ok(())
}
}
impl ComputeBrick for CudaGraphBrick {
type Output = ();
fn name(&self) -> &'static str {
"cuda_graph"
}
fn budget(&self) -> TokenBudget {
self.budget
}
fn assertions(&self) -> Vec<BrickAssertion> {
vec![
BrickAssertion::budget_met(),
BrickAssertion {
name: "graph_speedup".to_string(),
description: "Graph replay faster than eager execution".to_string(),
kind: AssertionKind::Custom {
check_name: "graph_speedup".to_string(),
},
},
]
}
fn can_run(&self) -> bool {
self.num_layers > 0 && self.hidden_dim > 0
}
}
#[cfg(test)]
#[path = "tests.rs"]
mod brick_tests;
#[cfg(test)]
#[path = "tests_token_budget.rs"]
mod brick_tests_part_02;
#[cfg(test)]
#[path = "tests_flash_attention.rs"]
mod brick_tests_part_03;
#[cfg(test)]
#[path = "profiler_tests.rs"]
mod profiler_tests;
#[cfg(all(test, feature = "cuda"))]
mod fused_tests;