use super::benchmark::{AggregateStats, Regression};
use super::loadtest::LoadTestResult;
use std::path::Path;
pub fn to_json(result: &LoadTestResult) -> String {
serde_json::to_string_pretty(result).unwrap_or_else(|_| "{}".to_string())
}
pub fn to_markdown_row(result: &LoadTestResult) -> String {
let decode = if result.decode_tok_per_sec > 0.0 {
format!("{:.1}", result.decode_tok_per_sec)
} else {
"-".to_string()
};
let itl = if result.itl_p50_ms > 0.0 {
format!("{:.1}", result.itl_p50_ms)
} else {
"-".to_string()
};
let tpot = if result.tpot_p50_ms > 0.0 {
format!("{:.1}", result.tpot_p50_ms)
} else {
"-".to_string()
};
let err_rate = if result.error_rate > 0.0 {
format!("{:.1}%", result.error_rate * 100.0)
} else {
"0%".to_string()
};
let prefill = if result.prefill_tok_per_sec > 0.0 {
format!("{:.1}", result.prefill_tok_per_sec)
} else {
"-".to_string()
};
format!(
"| {} | {} | {} | {:.1} | {:.1} | {:.1} | {:.1} | {:.1} | {:.1} | {:.1} | {} | {} | {} | {} | {} | {} |",
result.timestamp.split('T').next().unwrap_or(&result.timestamp),
result.runtime_name,
result.concurrency,
result.throughput_rps,
result.latency_p50_ms,
result.latency_p95_ms,
result.latency_p99_ms,
result.ttft_p50_ms,
result.tokens_per_sec,
result.avg_tok_per_req,
itl,
decode,
prefill,
tpot,
err_rate,
result.total_requests,
)
}
const TABLE_HEADER: &str = "\
| Date | Runtime | Concurrency | RPS | P50 (ms) | P95 (ms) | P99 (ms) | TTFT P50 (ms) | Tok/s | Avg tok/req | ITL P50 (ms) | Decode tok/s | Prefill tok/s | TPOT P50 (ms) | Err% | Requests |
|------|---------|-------------|-----|----------|----------|----------|---------------|-------|-------------|--------------|--------------|---------------|---------------|------|----------|";
pub fn to_markdown_table(results: &[LoadTestResult]) -> String {
let mut lines = vec![
"## Performance Results".to_string(),
String::new(),
TABLE_HEADER.to_string(),
];
for r in results {
lines.push(to_markdown_row(r));
}
lines.push(String::new());
lines.join("\n")
}
pub fn update_performance_md(
path: &Path,
results: &[LoadTestResult],
) -> Result<(), std::io::Error> {
let existing = if path.exists() {
std::fs::read_to_string(path)?
} else {
String::new()
};
let new_rows: Vec<String> = results.iter().map(|r| to_markdown_row(r)).collect();
let content = if existing.is_empty() {
let mut lines = vec![
"# LLM Inference Performance".to_string(),
String::new(),
TABLE_HEADER.to_string(),
];
lines.extend(new_rows);
lines.push(String::new());
lines.join("\n")
} else if existing.contains(TABLE_HEADER.lines().next().unwrap_or("")) {
let trimmed = existing.trim_end();
let mut out = trimmed.to_string();
for row in &new_rows {
out.push('\n');
out.push_str(row);
}
out.push('\n');
out
} else {
let mut out = existing;
if !out.ends_with('\n') {
out.push('\n');
}
out.push('\n');
out.push_str(&to_markdown_table(results));
out
};
std::fs::write(path, content)
}
pub fn compare_to_baseline(
current: &AggregateStats,
baseline: &LoadTestResult,
threshold_pct: f64,
) -> Vec<Regression> {
let mut regressions = Vec::new();
check_regression_higher_better(
"throughput_rps",
baseline.throughput_rps,
current.throughput_rps.mean,
threshold_pct,
&mut regressions,
);
check_regression_higher_better(
"tokens_per_sec",
baseline.tokens_per_sec,
current.tokens_per_sec.mean,
threshold_pct,
&mut regressions,
);
check_regression_lower_better(
"latency_p50_ms",
baseline.latency_p50_ms,
current.latency_p50.mean,
threshold_pct,
&mut regressions,
);
if baseline.tpot_p50_ms > 0.0 {
check_regression_lower_better(
"tpot_p50_ms",
baseline.tpot_p50_ms,
current.tpot_p50.mean,
threshold_pct,
&mut regressions,
);
}
regressions
}
fn check_regression_higher_better(
metric: &str,
baseline: f64,
current: f64,
threshold_pct: f64,
regressions: &mut Vec<Regression>,
) {
if baseline <= 0.0 {
return;
}
let change_pct = ((current - baseline) / baseline) * 100.0;
let exceeds = change_pct < -threshold_pct;
if exceeds {
regressions.push(Regression {
metric: metric.to_string(),
baseline_value: baseline,
current_value: current,
change_pct,
exceeds_threshold: true,
});
}
}
fn check_regression_lower_better(
metric: &str,
baseline: f64,
current: f64,
threshold_pct: f64,
regressions: &mut Vec<Regression>,
) {
if baseline <= 0.0 {
return;
}
let change_pct = ((current - baseline) / baseline) * 100.0;
let exceeds = change_pct > threshold_pct;
if exceeds {
regressions.push(Regression {
metric: metric.to_string(),
baseline_value: baseline,
current_value: current,
change_pct,
exceeds_threshold: true,
});
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::super::benchmark::StatSummary;
use super::*;
fn sample_result(runtime: &str) -> LoadTestResult {
LoadTestResult {
total_requests: 100,
successful: 95,
failed: 5,
throughput_rps: 10.5,
latency_p50_ms: 150.3,
latency_p95_ms: 300.7,
latency_p99_ms: 500.2,
ttft_p50_ms: 80.1,
tokens_per_sec: 200.0,
avg_tok_per_req: 15.0,
itl_p50_ms: 5.0,
decode_tok_per_sec: 200.0,
prefill_tok_per_sec: 0.0,
timestamp: "2026-03-01T04:00:00Z".to_string(),
runtime_name: runtime.to_string(),
elapsed_secs: 10.0,
concurrency: 4,
ttft_p90_ms: 90.0,
ttft_p95_ms: 95.0,
ttft_p99_ms: 99.0,
tpot_p50_ms: 6.0,
tpot_p90_ms: 8.0,
tpot_p95_ms: 9.0,
tpot_p99_ms: 12.0,
latency_min_ms: 50.0,
latency_max_ms: 800.0,
latency_stddev_ms: 120.0,
error_rate: 0.05,
prompt_tokens_total: 950,
completion_tokens_total: 1425,
truncated_pct: 0.0,
sse_batch_ratio: 0.0,
goodput_pct: 0.0,
decode_us_per_layer: None,
num_layers: None,
output_tokens_dist: None,
brick_trace_summary: None,
request_details: Vec::new(),
quality: None,
tail_analysis: None,
gpu_telemetry: None,
dataset_stats: None,
cold_start_ms: None,
}
}
fn sample_aggregate(throughput: f64, latency: f64, tps: f64, tpot: f64) -> AggregateStats {
AggregateStats {
throughput_rps: StatSummary {
mean: throughput,
stddev: 0.5,
ci_95_lower: throughput - 1.0,
ci_95_upper: throughput + 1.0,
values: vec![throughput],
},
latency_p50: StatSummary {
mean: latency,
stddev: 5.0,
ci_95_lower: latency - 10.0,
ci_95_upper: latency + 10.0,
values: vec![latency],
},
tokens_per_sec: StatSummary {
mean: tps,
stddev: 10.0,
ci_95_lower: tps - 20.0,
ci_95_upper: tps + 20.0,
values: vec![tps],
},
ttft_p50: StatSummary {
mean: 50.0,
stddev: 2.0,
ci_95_lower: 48.0,
ci_95_upper: 52.0,
values: vec![50.0],
},
tpot_p50: StatSummary {
mean: tpot,
stddev: 0.5,
ci_95_lower: tpot - 1.0,
ci_95_upper: tpot + 1.0,
values: vec![tpot],
},
}
}
#[test]
fn test_to_json() {
let result = sample_result("realizar");
let json = to_json(&result);
assert!(json.contains("\"runtime_name\": \"realizar\""));
assert!(json.contains("\"total_requests\": 100"));
}
#[test]
fn test_to_markdown_row() {
let result = sample_result("ollama");
let row = to_markdown_row(&result);
assert!(row.starts_with("| 2026-03-01"));
assert!(row.contains("ollama"));
assert!(row.contains("10.5"));
assert!(row.contains("150.3"));
assert!(row.contains("6.0")); assert!(row.contains("5.0%")); }
#[test]
fn test_to_markdown_row_zero_error_rate() {
let mut result = sample_result("test");
result.error_rate = 0.0;
let row = to_markdown_row(&result);
assert!(row.contains("0%"));
}
#[test]
fn test_to_markdown_table() {
let results = vec![sample_result("realizar"), sample_result("ollama")];
let table = to_markdown_table(&results);
assert!(table.contains("## Performance Results"));
assert!(table.contains("| Date |"));
assert!(table.contains("TPOT P50"));
assert!(table.contains("Err%"));
assert!(table.contains("realizar"));
assert!(table.contains("ollama"));
}
#[test]
fn test_update_performance_md_new_file() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("performance.md");
let results = vec![sample_result("realizar")];
update_performance_md(&path, &results).unwrap();
let content = std::fs::read_to_string(&path).unwrap();
assert!(content.contains("# LLM Inference Performance"));
assert!(content.contains("realizar"));
}
#[test]
fn test_update_performance_md_append() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("performance.md");
update_performance_md(&path, &[sample_result("realizar")]).unwrap();
update_performance_md(&path, &[sample_result("ollama")]).unwrap();
let content = std::fs::read_to_string(&path).unwrap();
assert!(content.contains("realizar"));
assert!(content.contains("ollama"));
assert_eq!(content.matches("# LLM Inference Performance").count(), 1);
}
#[test]
fn test_update_performance_md_no_table_existing() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("performance.md");
std::fs::write(&path, "# Some other content\n\nHello world.\n").unwrap();
update_performance_md(&path, &[sample_result("llamacpp")]).unwrap();
let content = std::fs::read_to_string(&path).unwrap();
assert!(content.contains("# Some other content"));
assert!(content.contains("llamacpp"));
assert!(content.contains("## Performance Results"));
}
#[test]
fn test_to_json_roundtrip() {
let result = sample_result("test");
let json = to_json(&result);
let back: LoadTestResult = serde_json::from_str(&json).unwrap();
assert_eq!(back.runtime_name, "test");
assert_eq!(back.total_requests, 100);
}
#[test]
fn test_markdown_row_date_extraction() {
let mut result = sample_result("x");
result.timestamp = "2026-12-25T12:00:00Z".to_string();
let row = to_markdown_row(&result);
assert!(row.contains("2026-12-25"));
assert!(!row.contains("T12:00:00Z"));
}
#[test]
fn test_compare_to_baseline_no_regression() {
let baseline = sample_result("baseline");
let current = sample_aggregate(10.5, 150.3, 200.0, 6.0);
let regressions = compare_to_baseline(¤t, &baseline, 10.0);
assert!(regressions.is_empty());
}
#[test]
fn test_compare_to_baseline_throughput_regression() {
let baseline = sample_result("baseline");
let current = sample_aggregate(8.0, 150.3, 200.0, 6.0);
let regressions = compare_to_baseline(¤t, &baseline, 10.0);
assert_eq!(regressions.len(), 1);
assert_eq!(regressions[0].metric, "throughput_rps");
assert!(regressions[0].exceeds_threshold);
assert!(regressions[0].change_pct < 0.0);
}
#[test]
fn test_compare_to_baseline_latency_regression() {
let baseline = sample_result("baseline");
let current = sample_aggregate(10.5, 200.0, 200.0, 6.0);
let regressions = compare_to_baseline(¤t, &baseline, 10.0);
assert_eq!(regressions.len(), 1);
assert_eq!(regressions[0].metric, "latency_p50_ms");
assert!(regressions[0].change_pct > 0.0);
}
#[test]
fn test_compare_to_baseline_multiple_regressions() {
let baseline = sample_result("baseline");
let current = sample_aggregate(5.0, 300.0, 100.0, 15.0);
let regressions = compare_to_baseline(¤t, &baseline, 10.0);
assert!(regressions.len() >= 2);
}
}