Skip to main content

cbtop/baseline/
types.rs

1//! Baseline types: server baselines, GPU classes, grades, and health indicators.
2
3use std::fmt;
4
5/// Industry server baseline data from Satna (2026) benchmarks.
6///
7/// Citation: [21] Satna, R. (2026). "LLM Inference Benchmarking Framework."
8#[derive(Debug, Clone, Copy, PartialEq)]
9pub struct ServerBaseline {
10    /// Server name (vLLM, TGI, Triton)
11    pub name: &'static str,
12    /// Peak tokens per second
13    pub peak_tok_per_sec: u32,
14    /// P95 latency in milliseconds
15    pub p95_latency_ms: u32,
16    /// SM utilization percentage
17    pub sm_utilization: u8,
18    /// Memory overhead percentage
19    pub memory_overhead: u8,
20    /// Reference GPU
21    pub gpu: &'static str,
22}
23
24/// Industry baselines from Satna (2026) on A10 GPU.
25pub const VLLM_BASELINE: ServerBaseline = ServerBaseline {
26    name: "vLLM",
27    peak_tok_per_sec: 412,
28    p95_latency_ms: 1715,
29    sm_utilization: 99,
30    memory_overhead: 42,
31    gpu: "A10",
32};
33
34pub const TGI_BASELINE: ServerBaseline = ServerBaseline {
35    name: "TGI",
36    peak_tok_per_sec: 408,
37    p95_latency_ms: 1704,
38    sm_utilization: 98,
39    memory_overhead: 44,
40    gpu: "A10",
41};
42
43pub const TRITON_BASELINE: ServerBaseline = ServerBaseline {
44    name: "Triton",
45    peak_tok_per_sec: 385,
46    p95_latency_ms: 2007,
47    sm_utilization: 97,
48    memory_overhead: 45,
49    gpu: "A10",
50};
51
52/// All industry baselines.
53pub const INDUSTRY_BASELINES: [ServerBaseline; 3] = [VLLM_BASELINE, TGI_BASELINE, TRITON_BASELINE];
54
55/// GPU class with expected throughput ranges.
56///
57/// From cbtop spec §21.7 "Expected Throughput by GPU Class".
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum GpuClass {
60    /// NVIDIA A10 (24GB) - Data center inference GPU
61    A10,
62    /// NVIDIA A100 (40GB or 80GB) - Data center training/inference GPU
63    A100,
64    /// NVIDIA H100 (80GB) - Hopper architecture flagship
65    H100,
66    /// NVIDIA RTX 4090 (24GB) - Consumer flagship
67    Rtx4090,
68    /// NVIDIA RTX 3090 (24GB) - Previous gen consumer flagship
69    Rtx3090,
70    /// Unknown GPU class
71    Unknown,
72}
73
74/// Per-variant specification for a GPU class.
75struct GpuSpec {
76    /// Display label (e.g. "A10 (24GB)")
77    label: &'static str,
78    /// Expected throughput range (min, max) in tok/s
79    throughput: (u32, u32),
80    /// VRAM size in GB
81    vram_gb: u32,
82}
83
84/// Single source of truth for all GPU class specifications.
85///
86/// Adding a new GPU class only requires adding one entry here (plus the enum variant
87/// and a `from_name` match arm), eliminating the previous 4 separate match blocks.
88const fn gpu_spec(class: &GpuClass) -> GpuSpec {
89    match class {
90        GpuClass::A10 => GpuSpec {
91            label: "A10 (24GB)",
92            throughput: (350, 450),
93            vram_gb: 24,
94        },
95        GpuClass::A100 => GpuSpec {
96            label: "A100 (40/80GB)",
97            throughput: (800, 1200),
98            vram_gb: 80, // Using 80GB variant
99        },
100        GpuClass::H100 => GpuSpec {
101            label: "H100 (80GB)",
102            throughput: (1800, 2400),
103            vram_gb: 80,
104        },
105        GpuClass::Rtx4090 => GpuSpec {
106            label: "RTX 4090 (24GB)",
107            throughput: (300, 400),
108            vram_gb: 24,
109        },
110        GpuClass::Rtx3090 => GpuSpec {
111            label: "RTX 3090 (24GB)",
112            throughput: (200, 300),
113            vram_gb: 24,
114        },
115        GpuClass::Unknown => GpuSpec {
116            label: "Unknown GPU",
117            throughput: (100, 500), // Conservative estimate
118            vram_gb: 8,
119        },
120    }
121}
122
123impl GpuClass {
124    /// Expected throughput range (min, max) in tok/s.
125    ///
126    /// From cbtop spec §21.7.
127    pub fn expected_throughput(&self) -> (u32, u32) {
128        gpu_spec(self).throughput
129    }
130
131    /// VRAM size in GB.
132    pub fn vram_gb(&self) -> u32 {
133        gpu_spec(self).vram_gb
134    }
135
136    /// Detect GPU class from GPU name string.
137    ///
138    /// Parses common GPU name formats from nvidia-smi, NVML, etc.
139    pub fn from_name(name: &str) -> Self {
140        let name_lower = name.to_lowercase();
141
142        if name_lower.contains("h100") {
143            GpuClass::H100
144        } else if name_lower.contains("a100") {
145            GpuClass::A100
146        } else if name_lower.contains("a10") && !name_lower.contains("a100") {
147            GpuClass::A10
148        } else if name_lower.contains("4090") {
149            GpuClass::Rtx4090
150        } else if name_lower.contains("3090") {
151            GpuClass::Rtx3090
152        } else {
153            GpuClass::Unknown
154        }
155    }
156}
157
158impl fmt::Display for GpuClass {
159    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
160        write!(f, "{}", gpu_spec(self).label)
161    }
162}
163
164/// Throughput grade (A/B/C/D/F) based on baseline comparison.
165///
166/// From cbtop spec F983: "Throughput grade calculated".
167/// Ordering: F < D < C < B < A (A is best).
168#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
169pub enum ThroughputGrade {
170    /// < 40% of vLLM baseline (worst)
171    F,
172    /// >= 40% of vLLM baseline
173    D,
174    /// >= 60% of vLLM baseline
175    C,
176    /// >= 80% of vLLM baseline
177    B,
178    /// >= 100% of vLLM baseline (best)
179    A,
180}
181
182/// Per-grade specification for throughput grading.
183struct GradeSpec {
184    /// Minimum percentage threshold to earn this grade
185    threshold: f64,
186    /// Short letter label
187    label: &'static str,
188    /// Human-readable description
189    description: &'static str,
190}
191
192/// Ordered from highest to lowest grade for `from_percentage` lookup.
193const GRADE_SPECS: [(ThroughputGrade, GradeSpec); 5] = [
194    (
195        ThroughputGrade::A,
196        GradeSpec {
197            threshold: 100.0,
198            label: "A",
199            description: "Excellent - meets or exceeds baseline",
200        },
201    ),
202    (
203        ThroughputGrade::B,
204        GradeSpec {
205            threshold: 80.0,
206            label: "B",
207            description: "Good - 80%+ of baseline",
208        },
209    ),
210    (
211        ThroughputGrade::C,
212        GradeSpec {
213            threshold: 60.0,
214            label: "C",
215            description: "Fair - 60%+ of baseline",
216        },
217    ),
218    (
219        ThroughputGrade::D,
220        GradeSpec {
221            threshold: 40.0,
222            label: "D",
223            description: "Poor - 40%+ of baseline",
224        },
225    ),
226    (
227        ThroughputGrade::F,
228        GradeSpec {
229            threshold: 0.0,
230            label: "F",
231            description: "Failing - below 40% of baseline",
232        },
233    ),
234];
235
236/// Look up the spec for a given grade variant.
237fn grade_spec(grade: &ThroughputGrade) -> &'static GradeSpec {
238    &GRADE_SPECS
239        .iter()
240        .find(|(g, _)| g == grade)
241        .expect("all variants present in GRADE_SPECS")
242        .1
243}
244
245impl ThroughputGrade {
246    /// Calculate grade from actual throughput vs baseline.
247    pub fn from_percentage(percentage: f64) -> Self {
248        GRADE_SPECS
249            .iter()
250            .find(|(_, spec)| percentage >= spec.threshold)
251            .map(|(grade, _)| *grade)
252            .unwrap_or(ThroughputGrade::F)
253    }
254
255    /// Get threshold percentage for this grade.
256    pub fn threshold(&self) -> f64 {
257        grade_spec(self).threshold
258    }
259}
260
261impl fmt::Display for ThroughputGrade {
262    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263        let spec = grade_spec(self);
264        write!(f, "{} ({})", spec.label, spec.description)
265    }
266}
267
268/// SM utilization health indicator.
269///
270/// From cbtop spec §21.7: SM utilization thresholds.
271#[derive(Debug, Clone, Copy, PartialEq, Eq)]
272pub enum SmHealth {
273    /// SM utilization > 95% - at risk of saturation
274    Saturated,
275    /// SM utilization 80-95% - optimal range
276    Optimal,
277    /// SM utilization 50-80% - room for improvement
278    Moderate,
279    /// SM utilization < 50% - critical underutilization
280    Critical,
281}
282
283/// Per-variant specification for SM health status.
284struct SmHealthSpec {
285    /// Minimum SM utilization to qualify (exclusive for Saturated, inclusive otherwise)
286    min_util: u8,
287    /// Whether the threshold comparison is strict greater-than
288    exclusive: bool,
289    /// Display label (e.g. "OPTIMAL (80-95%)")
290    label: &'static str,
291}
292
293/// Ordered from highest to lowest threshold for `from_utilization` lookup.
294const SM_HEALTH_SPECS: [(SmHealth, SmHealthSpec); 4] = [
295    (
296        SmHealth::Saturated,
297        SmHealthSpec {
298            min_util: 95,
299            exclusive: true,
300            label: "SATURATED (>95%)",
301        },
302    ),
303    (
304        SmHealth::Optimal,
305        SmHealthSpec {
306            min_util: 80,
307            exclusive: false,
308            label: "OPTIMAL (80-95%)",
309        },
310    ),
311    (
312        SmHealth::Moderate,
313        SmHealthSpec {
314            min_util: 50,
315            exclusive: false,
316            label: "MODERATE (50-80%)",
317        },
318    ),
319    (
320        SmHealth::Critical,
321        SmHealthSpec {
322            min_util: 0,
323            exclusive: false,
324            label: "CRITICAL (<50%)",
325        },
326    ),
327];
328
329/// Look up the spec for a given SM health variant.
330fn sm_health_spec(health: &SmHealth) -> &'static SmHealthSpec {
331    &SM_HEALTH_SPECS
332        .iter()
333        .find(|(h, _)| h == health)
334        .expect("all variants present in SM_HEALTH_SPECS")
335        .1
336}
337
338impl SmHealth {
339    /// Calculate SM health from utilization percentage.
340    pub fn from_utilization(sm_util: u8) -> Self {
341        SM_HEALTH_SPECS
342            .iter()
343            .find(|(_, spec)| {
344                if spec.exclusive {
345                    sm_util > spec.min_util
346                } else {
347                    sm_util >= spec.min_util
348                }
349            })
350            .map(|(health, _)| *health)
351            .unwrap_or(SmHealth::Critical)
352    }
353
354    /// Is this health status acceptable for production?
355    pub fn is_acceptable(&self) -> bool {
356        matches!(self, SmHealth::Optimal | SmHealth::Saturated)
357    }
358}
359
360impl fmt::Display for SmHealth {
361    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362        write!(f, "{}", sm_health_spec(self).label)
363    }
364}
365
366/// Comparison with a single baseline server.
367#[derive(Debug, Clone)]
368pub struct SingleComparison {
369    /// Baseline server
370    pub baseline: ServerBaseline,
371    /// Percentage achieved (actual / baseline * 100)
372    pub percentage: f64,
373    /// Delta in tok/s (actual - baseline)
374    pub delta_tok_per_sec: i32,
375}