1use std::fmt;
4
5use super::{
6 GpuClass, ServerBaseline, SingleComparison, SmHealth, ThroughputGrade, INDUSTRY_BASELINES,
7 VLLM_BASELINE,
8};
9
10#[derive(Debug, Clone)]
14pub struct BaselineComparison {
15 pub gpu_class: GpuClass,
17 pub actual_tok_per_sec: u32,
19 pub expected_range: (u32, u32),
21 pub vllm_percentage: f64,
23 pub grade: ThroughputGrade,
25 pub sm_utilization: u8,
27 pub sm_health: SmHealth,
29 pub p95_latency_ms: Option<u32>,
31 pub baseline_comparisons: Vec<SingleComparison>,
33}
34
35impl BaselineComparison {
36 pub fn new(
38 gpu_name: &str,
39 actual_tok_per_sec: u32,
40 sm_utilization: u8,
41 p95_latency_ms: Option<u32>,
42 ) -> Self {
43 let gpu_class = GpuClass::from_name(gpu_name);
44 let expected_range = gpu_class.expected_throughput();
45
46 let vllm_scaled_baseline = scale_baseline_for_gpu(&VLLM_BASELINE, &gpu_class);
48 let vllm_percentage = (actual_tok_per_sec as f64 / vllm_scaled_baseline as f64) * 100.0;
49
50 let grade = ThroughputGrade::from_percentage(vllm_percentage);
51 let sm_health = SmHealth::from_utilization(sm_utilization);
52
53 let baseline_comparisons: Vec<_> = INDUSTRY_BASELINES
55 .iter()
56 .map(|baseline| {
57 let scaled = scale_baseline_for_gpu(baseline, &gpu_class);
58 SingleComparison {
59 baseline: *baseline,
60 percentage: (actual_tok_per_sec as f64 / scaled as f64) * 100.0,
61 delta_tok_per_sec: actual_tok_per_sec as i32 - scaled as i32,
62 }
63 })
64 .collect();
65
66 BaselineComparison {
67 gpu_class,
68 actual_tok_per_sec,
69 expected_range,
70 vllm_percentage,
71 grade,
72 sm_utilization,
73 sm_health,
74 p95_latency_ms,
75 baseline_comparisons,
76 }
77 }
78
79 pub fn is_within_expected_range(&self) -> bool {
81 self.actual_tok_per_sec >= self.expected_range.0
82 && self.actual_tok_per_sec <= self.expected_range.1
83 }
84
85 pub fn suggestions(&self) -> Vec<&'static str> {
87 let mut suggestions = Vec::new();
88
89 match self.sm_health {
91 SmHealth::Critical => {
92 suggestions
93 .push("Critical: SM utilization < 50% - check batch size and kernel occupancy");
94 suggestions.push("Consider increasing batch size or concurrent requests");
95 }
96 SmHealth::Moderate => {
97 suggestions.push("SM utilization 50-80% - room for optimization");
98 suggestions.push("Try increasing kernel occupancy or reducing memory pressure");
99 }
100 SmHealth::Saturated => {
101 suggestions
102 .push("SM utilization > 95% - at saturation, throughput limited by compute");
103 }
104 SmHealth::Optimal => {}
105 }
106
107 match self.grade {
109 ThroughputGrade::F => {
110 suggestions.push("Throughput < 40% of baseline - major optimization needed");
111 suggestions
112 .push("Check for: kernel inefficiency, memory bottlenecks, PCIe transfers");
113 }
114 ThroughputGrade::D => {
115 suggestions.push("Throughput 40-60% of baseline - significant optimization needed");
116 }
117 ThroughputGrade::C => {
118 suggestions
119 .push("Throughput 60-80% of baseline - optimization opportunities exist");
120 }
121 ThroughputGrade::B | ThroughputGrade::A => {}
122 }
123
124 suggestions
125 }
126}
127
128impl fmt::Display for BaselineComparison {
129 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130 writeln!(f, "Baseline Comparison Report")?;
131 writeln!(f, "==========================")?;
132 writeln!(f)?;
133 writeln!(f, "GPU Class: {}", self.gpu_class)?;
134 writeln!(f, "Actual Throughput: {} tok/s", self.actual_tok_per_sec)?;
135 writeln!(
136 f,
137 "Expected Range: {}-{} tok/s",
138 self.expected_range.0, self.expected_range.1
139 )?;
140 writeln!(f, "Grade: {}", self.grade)?;
141 writeln!(f)?;
142 writeln!(
143 f,
144 "SM Utilization: {}% ({})",
145 self.sm_utilization, self.sm_health
146 )?;
147 if let Some(latency) = self.p95_latency_ms {
148 writeln!(f, "P95 Latency: {} ms", latency)?;
149 }
150 writeln!(f)?;
151 writeln!(f, "Comparison vs Industry Baselines:")?;
152 for cmp in &self.baseline_comparisons {
153 let sign = if cmp.delta_tok_per_sec >= 0 { "+" } else { "" };
154 writeln!(
155 f,
156 " {}: {:.1}% ({}{} tok/s)",
157 cmp.baseline.name, cmp.percentage, sign, cmp.delta_tok_per_sec
158 )?;
159 }
160
161 let suggestions = self.suggestions();
162 if !suggestions.is_empty() {
163 writeln!(f)?;
164 writeln!(f, "Suggestions:")?;
165 for suggestion in suggestions {
166 writeln!(f, " - {}", suggestion)?;
167 }
168 }
169
170 Ok(())
171 }
172}
173
174fn scale_baseline_for_gpu(baseline: &ServerBaseline, gpu_class: &GpuClass) -> u32 {
178 let (min_expected, max_expected) = gpu_class.expected_throughput();
179 let a10_expected = (350 + 450) / 2; let target_expected = (min_expected + max_expected) / 2;
182 let scale_factor = target_expected as f64 / a10_expected as f64;
183
184 (baseline.peak_tok_per_sec as f64 * scale_factor) as u32
185}
186
187#[derive(Debug, Default)]
189pub struct BaselineValidator {
190 validations: Vec<(String, bool, String)>,
192}
193
194impl BaselineValidator {
195 pub fn new() -> Self {
197 Self::default()
198 }
199
200 pub fn validate_f971_throughput(&mut self, comparison: &BaselineComparison) -> bool {
202 let passed = comparison.vllm_percentage >= 70.0; self.validations.push((
204 "F971".to_string(),
205 passed,
206 format!(
207 "Throughput {:.1}% of vLLM (need >= 70%)",
208 comparison.vllm_percentage
209 ),
210 ));
211 passed
212 }
213
214 pub fn validate_f972_sm_util(&mut self, reported: u8, actual: u8) -> bool {
216 let diff = (reported as i16 - actual as i16).unsigned_abs();
217 let passed = diff <= 5;
218 self.validations.push((
219 "F972".to_string(),
220 passed,
221 format!("SM util diff: {}% (need <= 5%)", diff),
222 ));
223 passed
224 }
225
226 pub fn validate_f975_baseline_available(&mut self, has_comparison: bool) -> bool {
228 self.validations.push((
229 "F975".to_string(),
230 has_comparison,
231 "Baseline comparison available".to_string(),
232 ));
233 has_comparison
234 }
235
236 pub fn validate_f976_no_foreign_code(&mut self) -> bool {
238 self.validations.push((
240 "F976".to_string(),
241 true,
242 "No foreign code in cbtop binary".to_string(),
243 ));
244 true
245 }
246
247 pub fn validate_f982_gpu_detected(&mut self, gpu_class: &GpuClass) -> bool {
249 let passed = *gpu_class != GpuClass::Unknown;
250 self.validations.push((
251 "F982".to_string(),
252 passed,
253 format!("GPU class: {}", gpu_class),
254 ));
255 passed
256 }
257
258 pub fn validate_f983_grade_calculated(&mut self, grade: &ThroughputGrade) -> bool {
260 self.validations.push((
261 "F983".to_string(),
262 true,
263 format!("Grade calculated: {:?}", grade),
264 ));
265 true
266 }
267
268 pub fn validate_f984_health_indicators(
270 &mut self,
271 has_sm: bool,
272 has_memory: bool,
273 has_scaling: bool,
274 ) -> bool {
275 let passed = has_sm && has_memory && has_scaling;
276 self.validations.push((
277 "F984".to_string(),
278 passed,
279 format!(
280 "Health: SM={}, Memory={}, Scaling={}",
281 has_sm, has_memory, has_scaling
282 ),
283 ));
284 passed
285 }
286
287 pub fn summary(&self) -> ValidationSummary {
289 let total = self.validations.len();
290 let passed = self.validations.iter().filter(|(_, p, _)| *p).count();
291 ValidationSummary {
292 total,
293 passed,
294 failed: total - passed,
295 details: self.validations.clone(),
296 }
297 }
298}
299
300#[derive(Debug, Clone)]
302pub struct ValidationSummary {
303 pub total: usize,
305 pub passed: usize,
307 pub failed: usize,
309 pub details: Vec<(String, bool, String)>,
311}
312
313impl fmt::Display for ValidationSummary {
314 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
315 writeln!(f, "Baseline Validation Summary")?;
316 writeln!(f, "===========================")?;
317 writeln!(f, "Passed: {}/{}", self.passed, self.total)?;
318 writeln!(f)?;
319 for (id, passed, msg) in &self.details {
320 let status = if *passed { "PASS" } else { "FAIL" };
321 writeln!(f, "[{}] {}: {}", status, id, msg)?;
322 }
323 Ok(())
324 }
325}