1use std::fmt;
4
5#[derive(Debug, Clone, Copy, PartialEq)]
9pub struct ServerBaseline {
10 pub name: &'static str,
12 pub peak_tok_per_sec: u32,
14 pub p95_latency_ms: u32,
16 pub sm_utilization: u8,
18 pub memory_overhead: u8,
20 pub gpu: &'static str,
22}
23
24pub const VLLM_BASELINE: ServerBaseline = ServerBaseline {
26 name: "vLLM",
27 peak_tok_per_sec: 412,
28 p95_latency_ms: 1715,
29 sm_utilization: 99,
30 memory_overhead: 42,
31 gpu: "A10",
32};
33
34pub const TGI_BASELINE: ServerBaseline = ServerBaseline {
35 name: "TGI",
36 peak_tok_per_sec: 408,
37 p95_latency_ms: 1704,
38 sm_utilization: 98,
39 memory_overhead: 44,
40 gpu: "A10",
41};
42
43pub const TRITON_BASELINE: ServerBaseline = ServerBaseline {
44 name: "Triton",
45 peak_tok_per_sec: 385,
46 p95_latency_ms: 2007,
47 sm_utilization: 97,
48 memory_overhead: 45,
49 gpu: "A10",
50};
51
52pub const INDUSTRY_BASELINES: [ServerBaseline; 3] = [VLLM_BASELINE, TGI_BASELINE, TRITON_BASELINE];
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
59pub enum GpuClass {
60 A10,
62 A100,
64 H100,
66 Rtx4090,
68 Rtx3090,
70 Unknown,
72}
73
74struct GpuSpec {
76 label: &'static str,
78 throughput: (u32, u32),
80 vram_gb: u32,
82}
83
84const fn gpu_spec(class: &GpuClass) -> GpuSpec {
89 match class {
90 GpuClass::A10 => GpuSpec {
91 label: "A10 (24GB)",
92 throughput: (350, 450),
93 vram_gb: 24,
94 },
95 GpuClass::A100 => GpuSpec {
96 label: "A100 (40/80GB)",
97 throughput: (800, 1200),
98 vram_gb: 80, },
100 GpuClass::H100 => GpuSpec {
101 label: "H100 (80GB)",
102 throughput: (1800, 2400),
103 vram_gb: 80,
104 },
105 GpuClass::Rtx4090 => GpuSpec {
106 label: "RTX 4090 (24GB)",
107 throughput: (300, 400),
108 vram_gb: 24,
109 },
110 GpuClass::Rtx3090 => GpuSpec {
111 label: "RTX 3090 (24GB)",
112 throughput: (200, 300),
113 vram_gb: 24,
114 },
115 GpuClass::Unknown => GpuSpec {
116 label: "Unknown GPU",
117 throughput: (100, 500), vram_gb: 8,
119 },
120 }
121}
122
123impl GpuClass {
124 pub fn expected_throughput(&self) -> (u32, u32) {
128 gpu_spec(self).throughput
129 }
130
131 pub fn vram_gb(&self) -> u32 {
133 gpu_spec(self).vram_gb
134 }
135
136 pub fn from_name(name: &str) -> Self {
140 let name_lower = name.to_lowercase();
141
142 if name_lower.contains("h100") {
143 GpuClass::H100
144 } else if name_lower.contains("a100") {
145 GpuClass::A100
146 } else if name_lower.contains("a10") && !name_lower.contains("a100") {
147 GpuClass::A10
148 } else if name_lower.contains("4090") {
149 GpuClass::Rtx4090
150 } else if name_lower.contains("3090") {
151 GpuClass::Rtx3090
152 } else {
153 GpuClass::Unknown
154 }
155 }
156}
157
158impl fmt::Display for GpuClass {
159 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
160 write!(f, "{}", gpu_spec(self).label)
161 }
162}
163
164#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
169pub enum ThroughputGrade {
170 F,
172 D,
174 C,
176 B,
178 A,
180}
181
182struct GradeSpec {
184 threshold: f64,
186 label: &'static str,
188 description: &'static str,
190}
191
192const GRADE_SPECS: [(ThroughputGrade, GradeSpec); 5] = [
194 (
195 ThroughputGrade::A,
196 GradeSpec {
197 threshold: 100.0,
198 label: "A",
199 description: "Excellent - meets or exceeds baseline",
200 },
201 ),
202 (
203 ThroughputGrade::B,
204 GradeSpec {
205 threshold: 80.0,
206 label: "B",
207 description: "Good - 80%+ of baseline",
208 },
209 ),
210 (
211 ThroughputGrade::C,
212 GradeSpec {
213 threshold: 60.0,
214 label: "C",
215 description: "Fair - 60%+ of baseline",
216 },
217 ),
218 (
219 ThroughputGrade::D,
220 GradeSpec {
221 threshold: 40.0,
222 label: "D",
223 description: "Poor - 40%+ of baseline",
224 },
225 ),
226 (
227 ThroughputGrade::F,
228 GradeSpec {
229 threshold: 0.0,
230 label: "F",
231 description: "Failing - below 40% of baseline",
232 },
233 ),
234];
235
236fn grade_spec(grade: &ThroughputGrade) -> &'static GradeSpec {
238 &GRADE_SPECS
239 .iter()
240 .find(|(g, _)| g == grade)
241 .expect("all variants present in GRADE_SPECS")
242 .1
243}
244
245impl ThroughputGrade {
246 pub fn from_percentage(percentage: f64) -> Self {
248 GRADE_SPECS
249 .iter()
250 .find(|(_, spec)| percentage >= spec.threshold)
251 .map(|(grade, _)| *grade)
252 .unwrap_or(ThroughputGrade::F)
253 }
254
255 pub fn threshold(&self) -> f64 {
257 grade_spec(self).threshold
258 }
259}
260
261impl fmt::Display for ThroughputGrade {
262 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
263 let spec = grade_spec(self);
264 write!(f, "{} ({})", spec.label, spec.description)
265 }
266}
267
268#[derive(Debug, Clone, Copy, PartialEq, Eq)]
272pub enum SmHealth {
273 Saturated,
275 Optimal,
277 Moderate,
279 Critical,
281}
282
283struct SmHealthSpec {
285 min_util: u8,
287 exclusive: bool,
289 label: &'static str,
291}
292
293const SM_HEALTH_SPECS: [(SmHealth, SmHealthSpec); 4] = [
295 (
296 SmHealth::Saturated,
297 SmHealthSpec {
298 min_util: 95,
299 exclusive: true,
300 label: "SATURATED (>95%)",
301 },
302 ),
303 (
304 SmHealth::Optimal,
305 SmHealthSpec {
306 min_util: 80,
307 exclusive: false,
308 label: "OPTIMAL (80-95%)",
309 },
310 ),
311 (
312 SmHealth::Moderate,
313 SmHealthSpec {
314 min_util: 50,
315 exclusive: false,
316 label: "MODERATE (50-80%)",
317 },
318 ),
319 (
320 SmHealth::Critical,
321 SmHealthSpec {
322 min_util: 0,
323 exclusive: false,
324 label: "CRITICAL (<50%)",
325 },
326 ),
327];
328
329fn sm_health_spec(health: &SmHealth) -> &'static SmHealthSpec {
331 &SM_HEALTH_SPECS
332 .iter()
333 .find(|(h, _)| h == health)
334 .expect("all variants present in SM_HEALTH_SPECS")
335 .1
336}
337
338impl SmHealth {
339 pub fn from_utilization(sm_util: u8) -> Self {
341 SM_HEALTH_SPECS
342 .iter()
343 .find(|(_, spec)| {
344 if spec.exclusive {
345 sm_util > spec.min_util
346 } else {
347 sm_util >= spec.min_util
348 }
349 })
350 .map(|(health, _)| *health)
351 .unwrap_or(SmHealth::Critical)
352 }
353
354 pub fn is_acceptable(&self) -> bool {
356 matches!(self, SmHealth::Optimal | SmHealth::Saturated)
357 }
358}
359
360impl fmt::Display for SmHealth {
361 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362 write!(f, "{}", sm_health_spec(self).label)
363 }
364}
365
366#[derive(Debug, Clone)]
368pub struct SingleComparison {
369 pub baseline: ServerBaseline,
371 pub percentage: f64,
373 pub delta_tok_per_sec: i32,
375}