1use crate::error::{CliError, Result};
63use crate::output;
64use colored::Colorize;
65use serde::{Deserialize, Serialize};
66use std::path::Path;
67use std::time::{Duration, Instant};
68
69#[derive(Debug, Clone)]
71pub struct QaConfig {
72 pub min_tps: f64,
74 pub min_speedup: f64,
76 pub min_gpu_speedup: f64,
78 pub skip_golden: bool,
80 pub skip_throughput: bool,
82 pub skip_ollama: bool,
84 pub skip_gpu_speedup: bool,
86 pub skip_contract: bool,
88 pub skip_format_parity: bool,
90 pub skip_ptx_parity: bool,
92 pub safetensors_path: Option<std::path::PathBuf>,
94 pub iterations: usize,
96 pub warmup: usize,
98 pub max_tokens: usize,
100 pub json: bool,
102 pub verbose: bool,
104 pub min_executed: Option<usize>,
106 pub previous_report: Option<std::path::PathBuf>,
108 pub regression_threshold: f64,
110 pub skip_gpu_state: bool,
112 pub skip_metadata: bool,
114 pub skip_capability: bool,
116 pub assert_classifier_head: bool,
118}
119
120impl Default for QaConfig {
121 fn default() -> Self {
122 Self {
123 min_tps: 100.0, min_speedup: 0.2, min_gpu_speedup: 2.0, skip_golden: false,
127 skip_throughput: false,
128 skip_ollama: false,
129 skip_gpu_speedup: false,
130 skip_contract: false,
131 skip_format_parity: false,
132 skip_ptx_parity: false,
133 safetensors_path: None,
134 iterations: 10,
135 warmup: 3,
136 max_tokens: 32,
137 json: false,
138 verbose: false,
139 min_executed: None,
140 previous_report: None,
141 regression_threshold: 0.10,
142 skip_gpu_state: false,
143 skip_metadata: false,
144 skip_capability: false,
145 assert_classifier_head: false,
146 }
147 }
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct GateResult {
153 pub name: String,
155 pub passed: bool,
157 pub message: String,
159 #[serde(skip_serializing_if = "Option::is_none")]
161 pub value: Option<f64>,
162 #[serde(skip_serializing_if = "Option::is_none")]
164 pub threshold: Option<f64>,
165 pub duration_ms: u64,
167 pub skipped: bool,
169}
170
171impl GateResult {
172 pub(crate) fn passed(
173 name: &str,
174 message: &str,
175 value: Option<f64>,
176 threshold: Option<f64>,
177 duration: Duration,
178 ) -> Self {
179 Self {
180 name: name.to_string(),
181 passed: true,
182 message: message.to_string(),
183 value,
184 threshold,
185 duration_ms: duration.as_millis() as u64,
186 skipped: false,
187 }
188 }
189
190 pub(crate) fn failed(
191 name: &str,
192 message: &str,
193 value: Option<f64>,
194 threshold: Option<f64>,
195 duration: Duration,
196 ) -> Self {
197 Self {
198 name: name.to_string(),
199 passed: false,
200 message: message.to_string(),
201 value,
202 threshold,
203 duration_ms: duration.as_millis() as u64,
204 skipped: false,
205 }
206 }
207
208 fn skipped(name: &str, reason: &str) -> Self {
209 Self {
210 name: name.to_string(),
211 passed: true, message: format!("Skipped: {reason}"),
213 value: None,
214 threshold: None,
215 duration_ms: 0,
216 skipped: true,
217 }
218 }
219}
220
221#[derive(Debug, Clone, Serialize, Deserialize)]
223pub struct SystemInfo {
224 pub cpu_model: String,
226 #[serde(skip_serializing_if = "Option::is_none")]
228 pub gpu_model: Option<String>,
229 #[serde(skip_serializing_if = "Option::is_none")]
231 pub gpu_driver: Option<String>,
232}
233
234impl SystemInfo {
235 fn capture() -> Self {
236 let cpu_model = std::fs::read_to_string("/proc/cpuinfo")
237 .ok()
238 .and_then(|s| {
239 s.lines()
240 .find(|l| l.starts_with("model name"))
241 .and_then(|l| l.split(':').nth(1))
242 .map(|s| s.trim().to_string())
243 })
244 .unwrap_or_else(|| "unknown".to_string());
245
246 let (gpu_model, gpu_driver) = Self::detect_gpu();
247
248 Self {
249 cpu_model,
250 gpu_model,
251 gpu_driver,
252 }
253 }
254
255 fn detect_gpu() -> (Option<String>, Option<String>) {
256 let output = std::process::Command::new("nvidia-smi")
257 .args(["--query-gpu=name,driver_version", "--format=csv,noheader"])
258 .output()
259 .ok();
260 if let Some(out) = output {
261 if out.status.success() {
262 let text = String::from_utf8_lossy(&out.stdout);
263 let parts: Vec<&str> = text.trim().splitn(2, ',').collect();
264 return (
265 parts.first().map(|s| s.trim().to_string()),
266 parts.get(1).map(|s| s.trim().to_string()),
267 );
268 }
269 }
270 (None, None)
271 }
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct QaReport {
277 pub model: String,
279 pub passed: bool,
281 pub gates: Vec<GateResult>,
283 #[serde(default)]
285 pub gates_executed: usize,
286 #[serde(default)]
288 pub gates_skipped: usize,
289 pub total_duration_ms: u64,
291 pub timestamp: String,
293 pub summary: String,
295 #[serde(default, skip_serializing_if = "Option::is_none")]
297 pub system_info: Option<SystemInfo>,
298}
299
300#[allow(clippy::too_many_arguments)]
302pub fn run(
303 path: &Path,
304 min_tps: Option<f64>,
305 min_speedup: Option<f64>,
306 min_gpu_speedup: Option<f64>,
307 skip_golden: bool,
308 skip_throughput: bool,
309 skip_ollama: bool,
310 skip_gpu_speedup: bool,
311 skip_contract: bool,
312 skip_format_parity: bool,
313 skip_ptx_parity: bool,
314 safetensors_path: Option<std::path::PathBuf>,
315 iterations: usize,
316 warmup: usize,
317 max_tokens: usize,
318 json: bool,
319 verbose: bool,
320 min_executed: Option<usize>,
321 previous_report: Option<std::path::PathBuf>,
322 regression_threshold: Option<f64>,
323 skip_gpu_state: bool,
324 skip_metadata: bool,
325 skip_capability: bool,
326 assert_classifier_head: bool,
327) -> Result<()> {
328 let config = QaConfig {
329 min_tps: min_tps.unwrap_or(100.0),
330 min_speedup: min_speedup.unwrap_or(0.2), min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0), skip_golden,
333 skip_throughput,
334 skip_ollama,
335 skip_gpu_speedup,
336 skip_contract,
337 skip_format_parity,
338 skip_ptx_parity,
339 safetensors_path,
340 iterations,
341 warmup,
342 max_tokens,
343 json,
344 verbose,
345 min_executed,
346 previous_report,
347 regression_threshold: regression_threshold.unwrap_or(0.10),
348 skip_gpu_state,
349 skip_metadata,
350 skip_capability,
351 assert_classifier_head,
352 };
353
354 let report = run_qa(path, &config)?;
355
356 if json {
357 println!(
358 "{}",
359 serde_json::to_string_pretty(&report).unwrap_or_default()
360 );
361 }
362
363 if !report.passed {
364 return Err(CliError::ValidationFailed(report.summary));
365 }
366
367 Ok(())
368}
369
370fn dispatch_gate(
372 gates: &mut Vec<GateResult>,
373 json: bool,
374 skip: bool,
375 name: &str,
376 skip_reason: &str,
377 runner: impl FnOnce() -> Result<GateResult>,
378) -> Result<()> {
379 let result = if skip {
380 GateResult::skipped(name, skip_reason)
381 } else {
382 runner()?
383 };
384 if !json {
385 print_gate_result(&result);
386 }
387 gates.push(result);
388 Ok(())
389}
390
391fn gate_display_name(name: &str) -> &str {
394 match name {
395 "capability_match" => "Capability Match",
396 "tensor_contract" => "Tensor Contract",
397 "golden_output" => "Golden Output",
398 "throughput" => "Throughput",
399 "ollama_parity" => "Ollama Parity",
400 "gpu_speedup" => "GPU Speedup",
401 "format_parity" => "Format Parity",
402 "ptx_parity" => "PTX Parity",
403 "gpu_state_isolation" => "GPU State Isolation",
404 "performance_regression" => "Perf Regression",
405 "metadata_plausibility" => "Metadata Plausibility",
406 "classifier_head" => "Classifier Head",
407 other => other,
408 }
409}
410
411fn print_qa_summary(gates: &[GateResult], passed: bool, total_duration: Duration) {
413 output::header("QA Summary");
414
415 let gate_rows: Vec<Vec<String>> = gates
416 .iter()
417 .map(|g| {
418 let badge = if g.skipped {
419 output::badge_skip("SKIP")
420 } else if g.passed {
421 output::badge_pass("PASS")
422 } else {
423 output::badge_fail("FAIL")
424 };
425 let measured = g.value.map_or("—".to_string(), |v| format!("{v:.2}"));
426 let threshold = g.threshold.map_or("—".to_string(), |v| format!("{v:.2}"));
427 vec![
428 gate_display_name(&g.name).to_string(),
429 badge,
430 measured,
431 threshold,
432 output::duration_fmt(g.duration_ms),
433 ]
434 })
435 .collect();
436 println!(
437 "{}",
438 output::table(
439 &["Gate", "Status", "Measured", "Threshold", "Duration"],
440 &gate_rows,
441 )
442 );
443
444 println!();
445 if passed {
446 println!(" {}", output::badge_pass("ALL GATES PASSED"));
447 } else {
448 println!(" {}", output::badge_fail("GATES FAILED"));
449 for gate in gates.iter().filter(|g| !g.passed && !g.skipped) {
450 println!(" {} {}", "✗".red(), gate.name);
451 }
452 }
453 output::metric(
454 "Total Duration",
455 output::duration_fmt(total_duration.as_millis() as u64),
456 "",
457 );
458}
459
460include!("qa_gguf.rs");
461include!("output_verification.rs");
462include!("golden_output.rs");
463include!("speedup.rs");
464include!("forward_error.rs");
465include!("gpu_isolation_result.rs");
466include!("qa_08.rs");