1use crate::error::{CliError, Result};
63use crate::output;
64use colored::Colorize;
65use serde::{Deserialize, Serialize};
66use std::path::Path;
67use std::time::{Duration, Instant};
68
69#[cfg(not(feature = "visualization"))]
70use brick_tracer_shim::BrickTracer as TracerImpl;
71#[cfg(feature = "visualization")]
72use renacer::brick_tracer::BrickTracer as TracerImpl;
73
74#[cfg(not(feature = "visualization"))]
77mod brick_tracer_shim {
78 pub struct SyscallBreakdown {
80 pub compute_us: u64,
81 pub mmap_us: u64,
82 pub futex_us: u64,
83 pub ioctl_us: u64,
84 }
85 impl SyscallBreakdown {
86 pub fn syscall_overhead_percent(&self) -> f64 {
87 0.0
88 }
89 pub fn dominant_syscall(&self) -> &'static str {
90 "none"
91 }
92 }
93
94 pub struct TraceMetadata {
96 pub budget_us: u64,
97 pub actual_us: u64,
98 pub efficiency: f64,
99 }
100
101 pub struct TracedResult<T> {
103 pub result: T,
104 pub duration_us: u64,
105 pub syscall_breakdown: SyscallBreakdown,
106 pub metadata: Option<TraceMetadata>,
107 }
108
109 pub struct BrickTracer;
111 impl BrickTracer {
112 pub fn new_local() -> Self {
113 Self
114 }
115 pub fn trace<T>(
116 &self,
117 _name: &str,
118 _budget_us: u64,
119 f: impl FnOnce() -> T,
120 ) -> TracedResult<T> {
121 let start = std::time::Instant::now();
122 let result = f();
123 let duration_us = start.elapsed().as_micros() as u64;
124 TracedResult {
125 result,
126 duration_us,
127 syscall_breakdown: SyscallBreakdown {
128 compute_us: duration_us,
129 mmap_us: 0,
130 futex_us: 0,
131 ioctl_us: 0,
132 },
133 metadata: None,
134 }
135 }
136 }
137}
138
139#[derive(Debug, Clone)]
141pub struct QaConfig {
142 pub min_tps: f64,
144 pub min_speedup: f64,
146 pub min_gpu_speedup: f64,
148 pub skip_golden: bool,
150 pub skip_throughput: bool,
152 pub skip_ollama: bool,
154 pub skip_gpu_speedup: bool,
156 pub skip_contract: bool,
158 pub skip_format_parity: bool,
160 pub skip_ptx_parity: bool,
162 pub safetensors_path: Option<std::path::PathBuf>,
164 pub iterations: usize,
166 pub warmup: usize,
168 pub max_tokens: usize,
170 pub json: bool,
172 pub verbose: bool,
174 pub min_executed: Option<usize>,
176 pub previous_report: Option<std::path::PathBuf>,
178 pub regression_threshold: f64,
180 pub skip_gpu_state: bool,
182 pub skip_metadata: bool,
184 pub skip_capability: bool,
186 pub assert_classifier_head: bool,
188}
189
190impl Default for QaConfig {
191 fn default() -> Self {
192 Self {
193 min_tps: 100.0, min_speedup: 0.2, min_gpu_speedup: 2.0, skip_golden: false,
197 skip_throughput: false,
198 skip_ollama: false,
199 skip_gpu_speedup: false,
200 skip_contract: false,
201 skip_format_parity: false,
202 skip_ptx_parity: false,
203 safetensors_path: None,
204 iterations: 10,
205 warmup: 3,
206 max_tokens: 32,
207 json: false,
208 verbose: false,
209 min_executed: None,
210 previous_report: None,
211 regression_threshold: 0.10,
212 skip_gpu_state: false,
213 skip_metadata: false,
214 skip_capability: false,
215 assert_classifier_head: false,
216 }
217 }
218}
219
220#[derive(Debug, Clone, Serialize, Deserialize)]
222pub struct GateResult {
223 pub name: String,
225 pub passed: bool,
227 pub message: String,
229 #[serde(skip_serializing_if = "Option::is_none")]
231 pub value: Option<f64>,
232 #[serde(skip_serializing_if = "Option::is_none")]
234 pub threshold: Option<f64>,
235 pub duration_ms: u64,
237 pub skipped: bool,
239}
240
241impl GateResult {
242 pub(crate) fn passed(
243 name: &str,
244 message: &str,
245 value: Option<f64>,
246 threshold: Option<f64>,
247 duration: Duration,
248 ) -> Self {
249 Self {
250 name: name.to_string(),
251 passed: true,
252 message: message.to_string(),
253 value,
254 threshold,
255 duration_ms: duration.as_millis() as u64,
256 skipped: false,
257 }
258 }
259
260 pub(crate) fn failed(
261 name: &str,
262 message: &str,
263 value: Option<f64>,
264 threshold: Option<f64>,
265 duration: Duration,
266 ) -> Self {
267 Self {
268 name: name.to_string(),
269 passed: false,
270 message: message.to_string(),
271 value,
272 threshold,
273 duration_ms: duration.as_millis() as u64,
274 skipped: false,
275 }
276 }
277
278 fn skipped(name: &str, reason: &str) -> Self {
279 Self {
280 name: name.to_string(),
281 passed: true, message: format!("Skipped: {reason}"),
283 value: None,
284 threshold: None,
285 duration_ms: 0,
286 skipped: true,
287 }
288 }
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
293pub struct SystemInfo {
294 pub cpu_model: String,
296 #[serde(skip_serializing_if = "Option::is_none")]
298 pub gpu_model: Option<String>,
299 #[serde(skip_serializing_if = "Option::is_none")]
301 pub gpu_driver: Option<String>,
302}
303
304impl SystemInfo {
305 fn capture() -> Self {
306 let cpu_model = std::fs::read_to_string("/proc/cpuinfo")
307 .ok()
308 .and_then(|s| {
309 s.lines()
310 .find(|l| l.starts_with("model name"))
311 .and_then(|l| l.split(':').nth(1))
312 .map(|s| s.trim().to_string())
313 })
314 .unwrap_or_else(|| "unknown".to_string());
315
316 let (gpu_model, gpu_driver) = Self::detect_gpu();
317
318 Self {
319 cpu_model,
320 gpu_model,
321 gpu_driver,
322 }
323 }
324
325 fn detect_gpu() -> (Option<String>, Option<String>) {
326 let output = std::process::Command::new("nvidia-smi")
327 .args(["--query-gpu=name,driver_version", "--format=csv,noheader"])
328 .output()
329 .ok();
330 if let Some(out) = output {
331 if out.status.success() {
332 let text = String::from_utf8_lossy(&out.stdout);
333 let parts: Vec<&str> = text.trim().splitn(2, ',').collect();
334 return (
335 parts.first().map(|s| s.trim().to_string()),
336 parts.get(1).map(|s| s.trim().to_string()),
337 );
338 }
339 }
340 (None, None)
341 }
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
346pub struct QaReport {
347 pub model: String,
349 pub passed: bool,
351 pub gates: Vec<GateResult>,
353 #[serde(default)]
355 pub gates_executed: usize,
356 #[serde(default)]
358 pub gates_skipped: usize,
359 pub total_duration_ms: u64,
361 pub timestamp: String,
363 pub summary: String,
365 #[serde(default, skip_serializing_if = "Option::is_none")]
367 pub system_info: Option<SystemInfo>,
368}
369
370#[allow(clippy::too_many_arguments)]
372pub fn run(
373 path: &Path,
374 min_tps: Option<f64>,
375 min_speedup: Option<f64>,
376 min_gpu_speedup: Option<f64>,
377 skip_golden: bool,
378 skip_throughput: bool,
379 skip_ollama: bool,
380 skip_gpu_speedup: bool,
381 skip_contract: bool,
382 skip_format_parity: bool,
383 skip_ptx_parity: bool,
384 safetensors_path: Option<std::path::PathBuf>,
385 iterations: usize,
386 warmup: usize,
387 max_tokens: usize,
388 json: bool,
389 verbose: bool,
390 min_executed: Option<usize>,
391 previous_report: Option<std::path::PathBuf>,
392 regression_threshold: Option<f64>,
393 skip_gpu_state: bool,
394 skip_metadata: bool,
395 skip_capability: bool,
396 assert_classifier_head: bool,
397) -> Result<()> {
398 let config = QaConfig {
399 min_tps: min_tps.unwrap_or(100.0),
400 min_speedup: min_speedup.unwrap_or(0.2), min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0), skip_golden,
403 skip_throughput,
404 skip_ollama,
405 skip_gpu_speedup,
406 skip_contract,
407 skip_format_parity,
408 skip_ptx_parity,
409 safetensors_path,
410 iterations,
411 warmup,
412 max_tokens,
413 json,
414 verbose,
415 min_executed,
416 previous_report,
417 regression_threshold: regression_threshold.unwrap_or(0.10),
418 skip_gpu_state,
419 skip_metadata,
420 skip_capability,
421 assert_classifier_head,
422 };
423
424 let report = run_qa(path, &config)?;
425
426 if json {
427 println!(
428 "{}",
429 serde_json::to_string_pretty(&report).unwrap_or_default()
430 );
431 }
432
433 if !report.passed {
434 return Err(CliError::ValidationFailed(report.summary));
435 }
436
437 Ok(())
438}
439
440fn dispatch_gate(
442 gates: &mut Vec<GateResult>,
443 json: bool,
444 skip: bool,
445 name: &str,
446 skip_reason: &str,
447 runner: impl FnOnce() -> Result<GateResult>,
448) -> Result<()> {
449 let result = if skip {
450 GateResult::skipped(name, skip_reason)
451 } else {
452 runner()?
453 };
454 if !json {
455 print_gate_result(&result);
456 }
457 gates.push(result);
458 Ok(())
459}
460
461fn gate_display_name(name: &str) -> &str {
464 match name {
465 "capability_match" => "Capability Match",
466 "tensor_contract" => "Tensor Contract",
467 "golden_output" => "Golden Output",
468 "throughput" => "Throughput",
469 "ollama_parity" => "Ollama Parity",
470 "gpu_speedup" => "GPU Speedup",
471 "format_parity" => "Format Parity",
472 "ptx_parity" => "PTX Parity",
473 "gpu_state_isolation" => "GPU State Isolation",
474 "performance_regression" => "Perf Regression",
475 "metadata_plausibility" => "Metadata Plausibility",
476 "classifier_head" => "Classifier Head",
477 other => other,
478 }
479}
480
481fn print_qa_summary(gates: &[GateResult], passed: bool, total_duration: Duration) {
483 output::header("QA Summary");
484
485 let gate_rows: Vec<Vec<String>> = gates
486 .iter()
487 .map(|g| {
488 let badge = if g.skipped {
489 output::badge_skip("SKIP")
490 } else if g.passed {
491 output::badge_pass("PASS")
492 } else {
493 output::badge_fail("FAIL")
494 };
495 let measured = g.value.map_or("—".to_string(), |v| format!("{v:.2}"));
496 let threshold = g.threshold.map_or("—".to_string(), |v| format!("{v:.2}"));
497 vec![
498 gate_display_name(&g.name).to_string(),
499 badge,
500 measured,
501 threshold,
502 output::duration_fmt(g.duration_ms),
503 ]
504 })
505 .collect();
506 println!(
507 "{}",
508 output::table(
509 &["Gate", "Status", "Measured", "Threshold", "Duration"],
510 &gate_rows,
511 )
512 );
513
514 println!();
515 if passed {
516 println!(" {}", output::badge_pass("ALL GATES PASSED"));
517 } else {
518 println!(" {}", output::badge_fail("GATES FAILED"));
519 for gate in gates.iter().filter(|g| !g.passed && !g.skipped) {
520 println!(" {} {}", "✗".red(), gate.name);
521 }
522 }
523 output::metric(
524 "Total Duration",
525 output::duration_fmt(total_duration.as_millis() as u64),
526 "",
527 );
528}
529
530include!("qa_gguf.rs");
531include!("output_verification.rs");
532include!("golden_output.rs");
533include!("speedup.rs");
534include!("forward_error.rs");
535include!("gpu_isolation_result.rs");
536include!("qa_08.rs");