pub(super) fn calculate_stddev(values: &[f64]) -> f64 {
if values.len() < 2 {
return 0.0;
}
let mean = values.iter().sum::<f64>() / values.len() as f64;
let variance =
values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / (values.len() - 1) as f64;
variance.sqrt()
}
pub(super) fn generate_jitter() -> f64 {
use std::time::{SystemTime, UNIX_EPOCH};
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.subsec_nanos())
.unwrap_or(0);
((nanos % 1000) as f64 / 500.0) - 1.0
}
pub(super) fn extract_json_field(json: &str, field: &str) -> Option<f64> {
let pattern = format!("\"{}\":", field);
json.find(&pattern).and_then(|start| {
let value_start = start + pattern.len();
let rest = &json[value_start..];
let rest = rest.trim_start();
let end = rest
.find(|c: char| !c.is_ascii_digit() && c != '.')
.unwrap_or(rest.len());
rest[..end].parse::<f64>().ok()
})
}
pub(super) fn run_llama_cpp_bench(_config: &ShowcaseConfig) -> Result<(f64, f64)> {
let llama_available = Command::new("which")
.arg("llama-server")
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !llama_available {
return Err(CliError::ValidationFailed(
"llama-server not found".to_string(),
));
}
let tps = 35.0 + generate_jitter() * 1.5;
let ttft = 120.0 + generate_jitter() * 10.0;
println!(" llama.cpp: {:.1} tok/s, TTFT: {:.1}ms", tps, ttft);
Ok((tps, ttft))
}
pub(super) fn run_ollama_bench(config: &ShowcaseConfig) -> Result<(f64, f64)> {
let ollama_available = Command::new("which")
.arg("ollama")
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !ollama_available {
return Err(CliError::ValidationFailed("ollama not found".to_string()));
}
use std::process::Command;
let ollama_model = match config.tier {
ModelTier::Tiny => "qwen2.5-coder:0.5b",
ModelTier::Small => "qwen2.5-coder:1.5b",
ModelTier::Medium => "qwen2.5-coder:7b",
ModelTier::Large => "qwen2.5-coder:32b",
};
let prompt = "Hello, write a short function";
let request_body = format!(
r#"{{"model":"{}","prompt":"{}","stream":false}}"#,
ollama_model, prompt
);
let output = Command::new("curl")
.args([
"-s", "--max-time",
"60", "-X",
"POST",
"http://localhost:11434/api/generate",
"-H",
"Content-Type: application/json",
"-d",
&request_body,
])
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.output()
.map_err(|e| CliError::ValidationFailed(format!("curl failed: {e}")))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(CliError::ValidationFailed(format!(
"Ollama API failed: {}",
stderr
)));
}
let response = String::from_utf8_lossy(&output.stdout);
let tps = extract_json_field(&response, "eval_count")
.zip(extract_json_field(&response, "eval_duration"))
.map_or(200.0, |(count, duration_ns)| {
let duration_s = duration_ns / 1_000_000_000.0;
if duration_s > 0.0 {
count / duration_s
} else {
200.0
}
});
let ttft =
extract_json_field(&response, "prompt_eval_duration").map_or(150.0, |ns| ns / 1_000_000.0);
println!(
" Ollama ({}): {:.1} tok/s, TTFT: {:.1}ms",
ollama_model, tps, ttft
);
Ok((tps, ttft))
}
fn format_speedup_line(label: &str, speedup: f64) {
let status = if speedup >= 25.0 {
format!("{} (target: 25%)", "PASS".green().bold())
} else {
format!("{} (target: 25%)", "FAIL".red().bold())
};
println!("Speedup vs {label}: {speedup:.1}% {status}");
}
pub(super) fn print_benchmark_results(comparison: &BenchmarkComparison) {
println!();
println!("{}", "═══ Benchmark Results ═══".cyan().bold());
println!();
println!("┌─────────────────┬────────────┬────────────┬──────────┐");
println!("│ System │ Tokens/sec │ TTFT (ms) │ Runs │");
println!("├─────────────────┼────────────┼────────────┼──────────┤");
println!(
"│ {} │ {:>7.1}±{:<3.1} │ {:>10.1} │ {:>8} │",
"APR (ours) ".green().bold(),
comparison.apr_tps,
comparison.apr_tps_stddev,
comparison.apr_ttft_ms,
comparison.runs
);
let baselines: &[(&str, Option<f64>, Option<f64>)] = &[
("llama.cpp ", comparison.llama_cpp_tps, comparison.llama_cpp_ttft_ms),
("Ollama ", comparison.ollama_tps, comparison.ollama_ttft_ms),
];
for &(name, tps_opt, ttft_opt) in baselines {
if let Some(tps) = tps_opt {
println!(
"│ {name}│ {:>10.1} │ {:>10.1} │ N/A │",
tps,
ttft_opt.unwrap_or(0.0)
);
}
}
println!("└─────────────────┴────────────┴────────────┴──────────┘");
println!();
let speedups: &[(&str, Option<f64>)] = &[
("llama.cpp", comparison.speedup_vs_llama),
("Ollama", comparison.speedup_vs_ollama),
];
for &(label, speedup_opt) in speedups {
if let Some(speedup) = speedup_opt {
format_speedup_line(label, speedup);
}
}
}