1use serde::{Deserialize, Serialize};
7use std::path::Path;
8
9#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct BenchmarkResult {
12 pub version: String,
13 pub timestamp: String,
14 pub model: String,
15 pub backend: String,
16 pub config: BenchmarkConfig,
17 pub results: BenchmarkMetrics,
18 #[serde(skip_serializing_if = "Option::is_none")]
19 pub memory: Option<MemoryMetrics>,
20}
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct BenchmarkConfig {
24 pub concurrency: usize,
25 pub max_tokens: usize,
26 pub rounds: usize,
27 pub prompt_tokens: usize,
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31pub struct BenchmarkMetrics {
32 pub throughput_tps: StatSummary,
33 pub ttft_ms: PercentileSummary,
34 pub tpot_ms: PercentileSummary,
35 pub total_tokens: usize,
36 pub total_time_ms: f64,
37 pub requests_completed: usize,
38 pub requests_failed: usize,
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct StatSummary {
43 pub mean: f64,
44 pub min: f64,
45 pub max: f64,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct PercentileSummary {
50 pub mean: f64,
51 pub p50: f64,
52 pub p90: f64,
53 pub p95: f64,
54 pub p99: f64,
55}
56
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct MemoryMetrics {
59 pub peak_kv_blocks_used: usize,
60 pub total_kv_blocks: usize,
61 pub peak_kv_utilization: f64,
62}
63
64impl BenchmarkResult {
65 pub fn write_json(&self, path: &Path) -> std::io::Result<()> {
67 let json = serde_json::to_string_pretty(self)
68 .map_err(|e| std::io::Error::other(format!("JSON serialize: {e}")))?;
69 std::fs::write(path, json)
70 }
71
72 pub fn to_json(&self) -> String {
74 serde_json::to_string_pretty(self).unwrap_or_default()
75 }
76}
77
78pub fn percentile(data: &mut [f64], p: f64) -> f64 {
82 if data.is_empty() {
83 return 0.0;
84 }
85 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
86 if data.len() == 1 {
87 return data[0];
88 }
89 let idx = (p / 100.0) * (data.len() - 1) as f64;
90 let lo = idx.floor() as usize;
91 let hi = lo + 1;
92 if hi >= data.len() {
93 return data[data.len() - 1];
94 }
95 let frac = idx - lo as f64;
96 data[lo] * (1.0 - frac) + data[hi] * frac
97}
98
99pub fn percentile_summary(samples: &[f64]) -> PercentileSummary {
101 if samples.is_empty() {
102 return PercentileSummary {
103 mean: 0.0,
104 p50: 0.0,
105 p90: 0.0,
106 p95: 0.0,
107 p99: 0.0,
108 };
109 }
110 let mut data = samples.to_vec();
111 let mean = data.iter().sum::<f64>() / data.len() as f64;
112 PercentileSummary {
113 mean,
114 p50: percentile(&mut data, 50.0),
115 p90: percentile(&mut data, 90.0),
116 p95: percentile(&mut data, 95.0),
117 p99: percentile(&mut data, 99.0),
118 }
119}
120
121#[cfg(test)]
122mod tests {
123 use super::*;
124
125 #[test]
126 fn test_percentile() {
127 let mut data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
128 assert!((percentile(&mut data, 50.0) - 5.5).abs() < 0.01);
129 assert!((percentile(&mut data, 0.0) - 1.0).abs() < 0.01);
130 assert!((percentile(&mut data, 100.0) - 10.0).abs() < 0.01);
131 }
132
133 #[test]
134 fn test_percentile_summary() {
135 let samples: Vec<f64> = (1..=100).map(|x| x as f64).collect();
136 let ps = percentile_summary(&samples);
137 assert!((ps.mean - 50.5).abs() < 0.01);
138 assert!((ps.p50 - 50.5).abs() < 0.6);
139 assert!((ps.p99 - 99.0).abs() < 1.0);
140 }
141}