1use crate::error::{CliError, Result};
63use crate::output;
64use colored::Colorize;
65use serde::{Deserialize, Serialize};
66use std::path::Path;
67use std::time::{Duration, Instant};
68
69#[derive(Debug, Clone)]
71pub struct QaConfig {
72 pub min_tps: f64,
74 pub min_speedup: f64,
76 pub min_gpu_speedup: f64,
78 pub skip_golden: bool,
80 pub skip_throughput: bool,
82 pub skip_ollama: bool,
84 pub skip_gpu_speedup: bool,
86 pub skip_contract: bool,
88 pub skip_format_parity: bool,
90 pub skip_ptx_parity: bool,
92 pub safetensors_path: Option<std::path::PathBuf>,
94 pub iterations: usize,
96 pub warmup: usize,
98 pub max_tokens: usize,
100 pub json: bool,
102 pub verbose: bool,
104 pub min_executed: Option<usize>,
106 pub previous_report: Option<std::path::PathBuf>,
108 pub regression_threshold: f64,
110 pub skip_gpu_state: bool,
112 pub skip_metadata: bool,
114 pub skip_capability: bool,
116}
117
118impl Default for QaConfig {
119 fn default() -> Self {
120 Self {
121 min_tps: 100.0, min_speedup: 0.2, min_gpu_speedup: 2.0, skip_golden: false,
125 skip_throughput: false,
126 skip_ollama: false,
127 skip_gpu_speedup: false,
128 skip_contract: false,
129 skip_format_parity: false,
130 skip_ptx_parity: false,
131 safetensors_path: None,
132 iterations: 10,
133 warmup: 3,
134 max_tokens: 32,
135 json: false,
136 verbose: false,
137 min_executed: None,
138 previous_report: None,
139 regression_threshold: 0.10,
140 skip_gpu_state: false,
141 skip_metadata: false,
142 skip_capability: false,
143 }
144 }
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct GateResult {
150 pub name: String,
152 pub passed: bool,
154 pub message: String,
156 #[serde(skip_serializing_if = "Option::is_none")]
158 pub value: Option<f64>,
159 #[serde(skip_serializing_if = "Option::is_none")]
161 pub threshold: Option<f64>,
162 pub duration_ms: u64,
164 pub skipped: bool,
166}
167
168impl GateResult {
169 pub(crate) fn passed(
170 name: &str,
171 message: &str,
172 value: Option<f64>,
173 threshold: Option<f64>,
174 duration: Duration,
175 ) -> Self {
176 Self {
177 name: name.to_string(),
178 passed: true,
179 message: message.to_string(),
180 value,
181 threshold,
182 duration_ms: duration.as_millis() as u64,
183 skipped: false,
184 }
185 }
186
187 pub(crate) fn failed(
188 name: &str,
189 message: &str,
190 value: Option<f64>,
191 threshold: Option<f64>,
192 duration: Duration,
193 ) -> Self {
194 Self {
195 name: name.to_string(),
196 passed: false,
197 message: message.to_string(),
198 value,
199 threshold,
200 duration_ms: duration.as_millis() as u64,
201 skipped: false,
202 }
203 }
204
205 fn skipped(name: &str, reason: &str) -> Self {
206 Self {
207 name: name.to_string(),
208 passed: true, message: format!("Skipped: {reason}"),
210 value: None,
211 threshold: None,
212 duration_ms: 0,
213 skipped: true,
214 }
215 }
216}
217
218#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct SystemInfo {
221 pub cpu_model: String,
223 #[serde(skip_serializing_if = "Option::is_none")]
225 pub gpu_model: Option<String>,
226 #[serde(skip_serializing_if = "Option::is_none")]
228 pub gpu_driver: Option<String>,
229}
230
231impl SystemInfo {
232 fn capture() -> Self {
233 let cpu_model = std::fs::read_to_string("/proc/cpuinfo")
234 .ok()
235 .and_then(|s| {
236 s.lines()
237 .find(|l| l.starts_with("model name"))
238 .and_then(|l| l.split(':').nth(1))
239 .map(|s| s.trim().to_string())
240 })
241 .unwrap_or_else(|| "unknown".to_string());
242
243 let (gpu_model, gpu_driver) = Self::detect_gpu();
244
245 Self {
246 cpu_model,
247 gpu_model,
248 gpu_driver,
249 }
250 }
251
252 fn detect_gpu() -> (Option<String>, Option<String>) {
253 let output = std::process::Command::new("nvidia-smi")
254 .args(["--query-gpu=name,driver_version", "--format=csv,noheader"])
255 .output()
256 .ok();
257 if let Some(out) = output {
258 if out.status.success() {
259 let text = String::from_utf8_lossy(&out.stdout);
260 let parts: Vec<&str> = text.trim().splitn(2, ',').collect();
261 return (
262 parts.first().map(|s| s.trim().to_string()),
263 parts.get(1).map(|s| s.trim().to_string()),
264 );
265 }
266 }
267 (None, None)
268 }
269}
270
271#[derive(Debug, Clone, Serialize, Deserialize)]
273pub struct QaReport {
274 pub model: String,
276 pub passed: bool,
278 pub gates: Vec<GateResult>,
280 #[serde(default)]
282 pub gates_executed: usize,
283 #[serde(default)]
285 pub gates_skipped: usize,
286 pub total_duration_ms: u64,
288 pub timestamp: String,
290 pub summary: String,
292 #[serde(default, skip_serializing_if = "Option::is_none")]
294 pub system_info: Option<SystemInfo>,
295}
296
297#[allow(clippy::too_many_arguments)]
299pub fn run(
300 path: &Path,
301 min_tps: Option<f64>,
302 min_speedup: Option<f64>,
303 min_gpu_speedup: Option<f64>,
304 skip_golden: bool,
305 skip_throughput: bool,
306 skip_ollama: bool,
307 skip_gpu_speedup: bool,
308 skip_contract: bool,
309 skip_format_parity: bool,
310 skip_ptx_parity: bool,
311 safetensors_path: Option<std::path::PathBuf>,
312 iterations: usize,
313 warmup: usize,
314 max_tokens: usize,
315 json: bool,
316 verbose: bool,
317 min_executed: Option<usize>,
318 previous_report: Option<std::path::PathBuf>,
319 regression_threshold: Option<f64>,
320 skip_gpu_state: bool,
321 skip_metadata: bool,
322 skip_capability: bool,
323) -> Result<()> {
324 let config = QaConfig {
325 min_tps: min_tps.unwrap_or(100.0),
326 min_speedup: min_speedup.unwrap_or(0.2), min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0), skip_golden,
329 skip_throughput,
330 skip_ollama,
331 skip_gpu_speedup,
332 skip_contract,
333 skip_format_parity,
334 skip_ptx_parity,
335 safetensors_path,
336 iterations,
337 warmup,
338 max_tokens,
339 json,
340 verbose,
341 min_executed,
342 previous_report,
343 regression_threshold: regression_threshold.unwrap_or(0.10),
344 skip_gpu_state,
345 skip_metadata,
346 skip_capability,
347 };
348
349 let report = run_qa(path, &config)?;
350
351 if json {
352 println!(
353 "{}",
354 serde_json::to_string_pretty(&report).unwrap_or_default()
355 );
356 }
357
358 if !report.passed {
359 return Err(CliError::ValidationFailed(report.summary));
360 }
361
362 Ok(())
363}
364
365fn dispatch_gate(
367 gates: &mut Vec<GateResult>,
368 json: bool,
369 skip: bool,
370 name: &str,
371 skip_reason: &str,
372 runner: impl FnOnce() -> Result<GateResult>,
373) -> Result<()> {
374 let result = if skip {
375 GateResult::skipped(name, skip_reason)
376 } else {
377 runner()?
378 };
379 if !json {
380 print_gate_result(&result);
381 }
382 gates.push(result);
383 Ok(())
384}
385
386fn gate_display_name(name: &str) -> &str {
389 match name {
390 "capability_match" => "Capability Match",
391 "tensor_contract" => "Tensor Contract",
392 "golden_output" => "Golden Output",
393 "throughput" => "Throughput",
394 "ollama_parity" => "Ollama Parity",
395 "gpu_speedup" => "GPU Speedup",
396 "format_parity" => "Format Parity",
397 "ptx_parity" => "PTX Parity",
398 "gpu_state_isolation" => "GPU State Isolation",
399 "performance_regression" => "Perf Regression",
400 "metadata_plausibility" => "Metadata Plausibility",
401 other => other,
402 }
403}
404
405fn print_qa_summary(gates: &[GateResult], passed: bool, total_duration: Duration) {
407 output::header("QA Summary");
408
409 let gate_rows: Vec<Vec<String>> = gates
410 .iter()
411 .map(|g| {
412 let badge = if g.skipped {
413 output::badge_skip("SKIP")
414 } else if g.passed {
415 output::badge_pass("PASS")
416 } else {
417 output::badge_fail("FAIL")
418 };
419 let measured = g.value.map_or("—".to_string(), |v| format!("{v:.2}"));
420 let threshold = g.threshold.map_or("—".to_string(), |v| format!("{v:.2}"));
421 vec![
422 gate_display_name(&g.name).to_string(),
423 badge,
424 measured,
425 threshold,
426 output::duration_fmt(g.duration_ms),
427 ]
428 })
429 .collect();
430 println!(
431 "{}",
432 output::table(
433 &["Gate", "Status", "Measured", "Threshold", "Duration"],
434 &gate_rows,
435 )
436 );
437
438 println!();
439 if passed {
440 println!(" {}", output::badge_pass("ALL GATES PASSED"));
441 } else {
442 println!(" {}", output::badge_fail("GATES FAILED"));
443 for gate in gates.iter().filter(|g| !g.passed && !g.skipped) {
444 println!(" {} {}", "✗".red(), gate.name);
445 }
446 }
447 output::metric(
448 "Total Duration",
449 output::duration_fmt(total_duration.as_millis() as u64),
450 "",
451 );
452}
453
454include!("qa_gguf.rs");
455include!("output_verification.rs");
456include!("golden_output.rs");
457include!("speedup.rs");
458include!("forward_error.rs");
459include!("gpu_isolation_result.rs");
460include!("qa_08.rs");