Skip to main content

cbtop/baseline/
comparison.rs

1//! Baseline comparison and validation logic.
2
3use std::fmt;
4
5use super::{
6    GpuClass, ServerBaseline, SingleComparison, SmHealth, ThroughputGrade, INDUSTRY_BASELINES,
7    VLLM_BASELINE,
8};
9
10/// Baseline comparison result.
11///
12/// Compares actual metrics against industry baselines.
13#[derive(Debug, Clone)]
14pub struct BaselineComparison {
15    /// Detected GPU class
16    pub gpu_class: GpuClass,
17    /// Actual throughput (tok/s)
18    pub actual_tok_per_sec: u32,
19    /// Expected throughput range
20    pub expected_range: (u32, u32),
21    /// Percentage of vLLM baseline achieved
22    pub vllm_percentage: f64,
23    /// Throughput grade
24    pub grade: ThroughputGrade,
25    /// SM utilization
26    pub sm_utilization: u8,
27    /// SM health indicator
28    pub sm_health: SmHealth,
29    /// P95 latency (ms)
30    pub p95_latency_ms: Option<u32>,
31    /// Comparison with each baseline
32    pub baseline_comparisons: Vec<SingleComparison>,
33}
34
35impl BaselineComparison {
36    /// Create a new baseline comparison.
37    pub fn new(
38        gpu_name: &str,
39        actual_tok_per_sec: u32,
40        sm_utilization: u8,
41        p95_latency_ms: Option<u32>,
42    ) -> Self {
43        let gpu_class = GpuClass::from_name(gpu_name);
44        let expected_range = gpu_class.expected_throughput();
45
46        // Calculate percentage of vLLM baseline (scaled by GPU class)
47        let vllm_scaled_baseline = scale_baseline_for_gpu(&VLLM_BASELINE, &gpu_class);
48        let vllm_percentage = (actual_tok_per_sec as f64 / vllm_scaled_baseline as f64) * 100.0;
49
50        let grade = ThroughputGrade::from_percentage(vllm_percentage);
51        let sm_health = SmHealth::from_utilization(sm_utilization);
52
53        // Compare against all baselines
54        let baseline_comparisons: Vec<_> = INDUSTRY_BASELINES
55            .iter()
56            .map(|baseline| {
57                let scaled = scale_baseline_for_gpu(baseline, &gpu_class);
58                SingleComparison {
59                    baseline: *baseline,
60                    percentage: (actual_tok_per_sec as f64 / scaled as f64) * 100.0,
61                    delta_tok_per_sec: actual_tok_per_sec as i32 - scaled as i32,
62                }
63            })
64            .collect();
65
66        BaselineComparison {
67            gpu_class,
68            actual_tok_per_sec,
69            expected_range,
70            vllm_percentage,
71            grade,
72            sm_utilization,
73            sm_health,
74            p95_latency_ms,
75            baseline_comparisons,
76        }
77    }
78
79    /// Check if throughput is within expected range for GPU class.
80    pub fn is_within_expected_range(&self) -> bool {
81        self.actual_tok_per_sec >= self.expected_range.0
82            && self.actual_tok_per_sec <= self.expected_range.1
83    }
84
85    /// Get improvement suggestions based on metrics.
86    pub fn suggestions(&self) -> Vec<&'static str> {
87        let mut suggestions = Vec::new();
88
89        // SM utilization suggestions
90        match self.sm_health {
91            SmHealth::Critical => {
92                suggestions
93                    .push("Critical: SM utilization < 50% - check batch size and kernel occupancy");
94                suggestions.push("Consider increasing batch size or concurrent requests");
95            }
96            SmHealth::Moderate => {
97                suggestions.push("SM utilization 50-80% - room for optimization");
98                suggestions.push("Try increasing kernel occupancy or reducing memory pressure");
99            }
100            SmHealth::Saturated => {
101                suggestions
102                    .push("SM utilization > 95% - at saturation, throughput limited by compute");
103            }
104            SmHealth::Optimal => {}
105        }
106
107        // Grade-based suggestions
108        match self.grade {
109            ThroughputGrade::F => {
110                suggestions.push("Throughput < 40% of baseline - major optimization needed");
111                suggestions
112                    .push("Check for: kernel inefficiency, memory bottlenecks, PCIe transfers");
113            }
114            ThroughputGrade::D => {
115                suggestions.push("Throughput 40-60% of baseline - significant optimization needed");
116            }
117            ThroughputGrade::C => {
118                suggestions
119                    .push("Throughput 60-80% of baseline - optimization opportunities exist");
120            }
121            ThroughputGrade::B | ThroughputGrade::A => {}
122        }
123
124        suggestions
125    }
126}
127
128impl fmt::Display for BaselineComparison {
129    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
130        writeln!(f, "Baseline Comparison Report")?;
131        writeln!(f, "==========================")?;
132        writeln!(f)?;
133        writeln!(f, "GPU Class: {}", self.gpu_class)?;
134        writeln!(f, "Actual Throughput: {} tok/s", self.actual_tok_per_sec)?;
135        writeln!(
136            f,
137            "Expected Range: {}-{} tok/s",
138            self.expected_range.0, self.expected_range.1
139        )?;
140        writeln!(f, "Grade: {}", self.grade)?;
141        writeln!(f)?;
142        writeln!(
143            f,
144            "SM Utilization: {}% ({})",
145            self.sm_utilization, self.sm_health
146        )?;
147        if let Some(latency) = self.p95_latency_ms {
148            writeln!(f, "P95 Latency: {} ms", latency)?;
149        }
150        writeln!(f)?;
151        writeln!(f, "Comparison vs Industry Baselines:")?;
152        for cmp in &self.baseline_comparisons {
153            let sign = if cmp.delta_tok_per_sec >= 0 { "+" } else { "" };
154            writeln!(
155                f,
156                "  {}: {:.1}% ({}{} tok/s)",
157                cmp.baseline.name, cmp.percentage, sign, cmp.delta_tok_per_sec
158            )?;
159        }
160
161        let suggestions = self.suggestions();
162        if !suggestions.is_empty() {
163            writeln!(f)?;
164            writeln!(f, "Suggestions:")?;
165            for suggestion in suggestions {
166                writeln!(f, "  - {}", suggestion)?;
167            }
168        }
169
170        Ok(())
171    }
172}
173
174/// Scale baseline for different GPU classes.
175///
176/// Baselines are measured on A10; scale for other GPUs based on expected performance.
177fn scale_baseline_for_gpu(baseline: &ServerBaseline, gpu_class: &GpuClass) -> u32 {
178    let (min_expected, max_expected) = gpu_class.expected_throughput();
179    let a10_expected = (350 + 450) / 2; // A10 midpoint
180
181    let target_expected = (min_expected + max_expected) / 2;
182    let scale_factor = target_expected as f64 / a10_expected as f64;
183
184    (baseline.peak_tok_per_sec as f64 * scale_factor) as u32
185}
186
187/// Baseline validator for F971-F985 falsification criteria.
188#[derive(Debug, Default)]
189pub struct BaselineValidator {
190    /// Validated criteria
191    validations: Vec<(String, bool, String)>,
192}
193
194impl BaselineValidator {
195    /// Create a new validator.
196    pub fn new() -> Self {
197        Self::default()
198    }
199
200    /// Validate F971: Realistic GPU throughput (within 30% of vLLM).
201    pub fn validate_f971_throughput(&mut self, comparison: &BaselineComparison) -> bool {
202        let passed = comparison.vllm_percentage >= 70.0; // Within 30% means at least 70%
203        self.validations.push((
204            "F971".to_string(),
205            passed,
206            format!(
207                "Throughput {:.1}% of vLLM (need >= 70%)",
208                comparison.vllm_percentage
209            ),
210        ));
211        passed
212    }
213
214    /// Validate F972: SM utilization correct (within 5% of nvidia-smi).
215    pub fn validate_f972_sm_util(&mut self, reported: u8, actual: u8) -> bool {
216        let diff = (reported as i16 - actual as i16).unsigned_abs();
217        let passed = diff <= 5;
218        self.validations.push((
219            "F972".to_string(),
220            passed,
221            format!("SM util diff: {}% (need <= 5%)", diff),
222        ));
223        passed
224    }
225
226    /// Validate F975: Baseline comparison available.
227    pub fn validate_f975_baseline_available(&mut self, has_comparison: bool) -> bool {
228        self.validations.push((
229            "F975".to_string(),
230            has_comparison,
231            "Baseline comparison available".to_string(),
232        ));
233        has_comparison
234    }
235
236    /// Validate F976: No foreign code dependency.
237    pub fn validate_f976_no_foreign_code(&mut self) -> bool {
238        // This is always true for cbtop - we don't depend on vLLM/llama.cpp
239        self.validations.push((
240            "F976".to_string(),
241            true,
242            "No foreign code in cbtop binary".to_string(),
243        ));
244        true
245    }
246
247    /// Validate F982: GPU class detected correctly.
248    pub fn validate_f982_gpu_detected(&mut self, gpu_class: &GpuClass) -> bool {
249        let passed = *gpu_class != GpuClass::Unknown;
250        self.validations.push((
251            "F982".to_string(),
252            passed,
253            format!("GPU class: {}", gpu_class),
254        ));
255        passed
256    }
257
258    /// Validate F983: Throughput grade calculated.
259    pub fn validate_f983_grade_calculated(&mut self, grade: &ThroughputGrade) -> bool {
260        self.validations.push((
261            "F983".to_string(),
262            true,
263            format!("Grade calculated: {:?}", grade),
264        ));
265        true
266    }
267
268    /// Validate F984: Health indicators displayed.
269    pub fn validate_f984_health_indicators(
270        &mut self,
271        has_sm: bool,
272        has_memory: bool,
273        has_scaling: bool,
274    ) -> bool {
275        let passed = has_sm && has_memory && has_scaling;
276        self.validations.push((
277            "F984".to_string(),
278            passed,
279            format!(
280                "Health: SM={}, Memory={}, Scaling={}",
281                has_sm, has_memory, has_scaling
282            ),
283        ));
284        passed
285    }
286
287    /// Get validation summary.
288    pub fn summary(&self) -> ValidationSummary {
289        let total = self.validations.len();
290        let passed = self.validations.iter().filter(|(_, p, _)| *p).count();
291        ValidationSummary {
292            total,
293            passed,
294            failed: total - passed,
295            details: self.validations.clone(),
296        }
297    }
298}
299
300/// Validation summary.
301#[derive(Debug, Clone)]
302pub struct ValidationSummary {
303    /// Total validations run
304    pub total: usize,
305    /// Passed validations
306    pub passed: usize,
307    /// Failed validations
308    pub failed: usize,
309    /// Detailed results
310    pub details: Vec<(String, bool, String)>,
311}
312
313impl fmt::Display for ValidationSummary {
314    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
315        writeln!(f, "Baseline Validation Summary")?;
316        writeln!(f, "===========================")?;
317        writeln!(f, "Passed: {}/{}", self.passed, self.total)?;
318        writeln!(f)?;
319        for (id, passed, msg) in &self.details {
320            let status = if *passed { "PASS" } else { "FAIL" };
321            writeln!(f, "[{}] {}: {}", status, id, msg)?;
322        }
323        Ok(())
324    }
325}