apr_cli/commands/
qa.rs

1//! QA Command Implementation - Falsifiable Quality Assurance Checklist
2//!
3//! Implements a scientific QA process for model releases. Every claim must be
4//! falsifiable - if a test can't fail, it doesn't provide information.
5//!
6//! # Gates
7//!
8//! 1. **Golden Output Test** (Correctness Gate)
9//!    - Run model with known prompts, verify expected patterns in output
10//!    - Falsifiable: Output must match expected pattern or test fails
11//!
12//! 2. **Throughput Falsification** (Performance Gate)
13//!    - Run benchmark with statistical rigor (CV < 5%)
14//!    - Assert minimum tok/s threshold
15//!    - Falsifiable: If tok/s < threshold, test fails
16//!
17//! 3. **Ollama Parity Test** (Parity Gate)
18//!    - Compare against Ollama baseline (if available)
19//!    - Assert speedup factor >= target
20//!    - Falsifiable: If speedup < target, test fails
21//!
22//! 4. **GPU vs CPU Speedup Test** (F-PERF-042)
23//!    - Measure throughput on both GPU and CPU
24//!    - Assert GPU >= 2x CPU (default threshold)
25//!    - Falsifiable: If GPU speedup < threshold, test fails
26//!    - Toyota Way: Genchi Genbutsu - measure real performance
27//!
28//! 5. **Cross-Format Parity Test** (F-QUAL-032)
29//!    - Compare argmax between GGUF and SafeTensors for same model
30//!    - Invariant: argmax(forward_gguf) == argmax(forward_safetensors)
31//!    - Falsifiable: If argmax differs, cross-format parity is BROKEN
32//!    - Cornerstone of architecture's logical validity
33//!
34//! 6. **PTX Parity Test** (GH-219, F-PTX-001)
35//!    - Validate batched GPU kernels maintain structural parity with single-vector references
36//!    - Checks: batch dispatch mechanism, u64 shared memory addressing, dispatch strategy
37//!    - Falsifiable: If any of 6 kernel pairs fails structural validation, test fails
38//!    - Toyota Way: Poka-Yoke - error-proof PTX generation at compile time
39//!
40//! # Usage
41//!
42//! ```bash
43//! apr qa model.gguf                           # Run all gates
44//! apr qa model.gguf --assert-tps 100          # Custom throughput threshold
45//! apr qa model.gguf --assert-speedup 2.0      # Custom Ollama speedup
46//! apr qa model.gguf --assert-gpu-speedup 3.0  # Custom GPU vs CPU speedup
47//! apr qa model.gguf --skip-ollama             # Skip Ollama comparison
48//! apr qa model.gguf --skip-gpu-speedup        # Skip GPU vs CPU test
49//! apr qa model.gguf --skip-format-parity      # Skip cross-format test
50//! apr qa model.gguf --safetensors-path m.st   # Compare with SafeTensors model
51//! apr qa model.gguf --json                    # JSON output for CI
52//! ```
53//!
54//! # Exit Codes
55//!
56//! - 0: All gates passed
57//! - 5: One or more gates failed (ValidationFailed)
58//!
59//! Toyota Way: Jidoka - Stop and fix quality issues immediately.
60//! Scientific Method: Claims must be falsifiable to have meaning.
61
62use crate::error::{CliError, Result};
63use crate::output;
64use colored::Colorize;
65use serde::{Deserialize, Serialize};
66use std::path::Path;
67use std::time::{Duration, Instant};
68
69/// QA configuration
70#[derive(Debug, Clone)]
71pub struct QaConfig {
72    /// Minimum throughput in tok/s (default: 100 for GPU, 10 for CPU)
73    pub min_tps: f64,
74    /// Minimum speedup vs Ollama (default: 2.0x)
75    pub min_speedup: f64,
76    /// Minimum GPU vs CPU speedup (default: 2.0x) - F-PERF-042
77    pub min_gpu_speedup: f64,
78    /// Skip golden output test
79    pub skip_golden: bool,
80    /// Skip throughput test
81    pub skip_throughput: bool,
82    /// Skip Ollama parity test
83    pub skip_ollama: bool,
84    /// Skip GPU vs CPU speedup test (F-PERF-042)
85    pub skip_gpu_speedup: bool,
86    /// Skip tensor contract validation (PMAT-235)
87    pub skip_contract: bool,
88    /// Skip cross-format parity test (F-QUAL-032)
89    pub skip_format_parity: bool,
90    /// Skip PTX parity validation (GH-219, F-PTX-001)
91    pub skip_ptx_parity: bool,
92    /// SafeTensors model path for cross-format parity (F-QUAL-032)
93    pub safetensors_path: Option<std::path::PathBuf>,
94    /// Number of benchmark iterations
95    pub iterations: usize,
96    /// Number of warmup iterations
97    pub warmup: usize,
98    /// Max tokens for generation
99    pub max_tokens: usize,
100    /// Output as JSON
101    pub json: bool,
102    /// Verbose output
103    pub verbose: bool,
104    /// Minimum number of gates that must execute (not be skipped)
105    pub min_executed: Option<usize>,
106    /// Path to previous QA report for regression comparison
107    pub previous_report: Option<std::path::PathBuf>,
108    /// Maximum allowed performance regression (0.10 = 10%)
109    pub regression_threshold: f64,
110    /// Skip GPU state isolation test
111    pub skip_gpu_state: bool,
112    /// Skip metadata plausibility validation (Bug 210, GH-222)
113    pub skip_metadata: bool,
114}
115
116impl Default for QaConfig {
117    fn default() -> Self {
118        Self {
119            min_tps: 100.0,       // GPU target
120            min_speedup: 0.2, // Ollama uses llama.cpp optimized kernels; 0.2x is realistic floor
121            min_gpu_speedup: 2.0, // GPU must be 2x faster than CPU (F-PERF-042)
122            skip_golden: false,
123            skip_throughput: false,
124            skip_ollama: false,
125            skip_gpu_speedup: false,
126            skip_contract: false,
127            skip_format_parity: false,
128            skip_ptx_parity: false,
129            safetensors_path: None,
130            iterations: 10,
131            warmup: 3,
132            max_tokens: 32,
133            json: false,
134            verbose: false,
135            min_executed: None,
136            previous_report: None,
137            regression_threshold: 0.10,
138            skip_gpu_state: false,
139            skip_metadata: false,
140        }
141    }
142}
143
144/// Result of a single QA gate
145#[derive(Debug, Clone, Serialize, Deserialize)]
146pub struct GateResult {
147    /// Gate name
148    pub name: String,
149    /// Whether the gate passed
150    pub passed: bool,
151    /// Human-readable result message
152    pub message: String,
153    /// Measured value (if applicable)
154    #[serde(skip_serializing_if = "Option::is_none")]
155    pub value: Option<f64>,
156    /// Expected/threshold value (if applicable)
157    #[serde(skip_serializing_if = "Option::is_none")]
158    pub threshold: Option<f64>,
159    /// Time taken to run the gate
160    pub duration_ms: u64,
161    /// Whether the gate was skipped
162    pub skipped: bool,
163}
164
165impl GateResult {
166    fn passed(
167        name: &str,
168        message: &str,
169        value: Option<f64>,
170        threshold: Option<f64>,
171        duration: Duration,
172    ) -> Self {
173        Self {
174            name: name.to_string(),
175            passed: true,
176            message: message.to_string(),
177            value,
178            threshold,
179            duration_ms: duration.as_millis() as u64,
180            skipped: false,
181        }
182    }
183
184    fn failed(
185        name: &str,
186        message: &str,
187        value: Option<f64>,
188        threshold: Option<f64>,
189        duration: Duration,
190    ) -> Self {
191        Self {
192            name: name.to_string(),
193            passed: false,
194            message: message.to_string(),
195            value,
196            threshold,
197            duration_ms: duration.as_millis() as u64,
198            skipped: false,
199        }
200    }
201
202    fn skipped(name: &str, reason: &str) -> Self {
203        Self {
204            name: name.to_string(),
205            passed: true, // Skipped gates don't fail
206            message: format!("Skipped: {reason}"),
207            value: None,
208            threshold: None,
209            duration_ms: 0,
210            skipped: true,
211        }
212    }
213}
214
215/// System information captured during QA run
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct SystemInfo {
218    /// CPU model name
219    pub cpu_model: String,
220    /// GPU model name (if available)
221    #[serde(skip_serializing_if = "Option::is_none")]
222    pub gpu_model: Option<String>,
223    /// GPU driver version (if available)
224    #[serde(skip_serializing_if = "Option::is_none")]
225    pub gpu_driver: Option<String>,
226}
227
228impl SystemInfo {
229    fn capture() -> Self {
230        let cpu_model = std::fs::read_to_string("/proc/cpuinfo")
231            .ok()
232            .and_then(|s| {
233                s.lines()
234                    .find(|l| l.starts_with("model name"))
235                    .and_then(|l| l.split(':').nth(1))
236                    .map(|s| s.trim().to_string())
237            })
238            .unwrap_or_else(|| "unknown".to_string());
239
240        let (gpu_model, gpu_driver) = Self::detect_gpu();
241
242        Self {
243            cpu_model,
244            gpu_model,
245            gpu_driver,
246        }
247    }
248
249    fn detect_gpu() -> (Option<String>, Option<String>) {
250        let output = std::process::Command::new("nvidia-smi")
251            .args(["--query-gpu=name,driver_version", "--format=csv,noheader"])
252            .output()
253            .ok();
254        if let Some(out) = output {
255            if out.status.success() {
256                let text = String::from_utf8_lossy(&out.stdout);
257                let parts: Vec<&str> = text.trim().splitn(2, ',').collect();
258                return (
259                    parts.first().map(|s| s.trim().to_string()),
260                    parts.get(1).map(|s| s.trim().to_string()),
261                );
262            }
263        }
264        (None, None)
265    }
266}
267
268/// Full QA report
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct QaReport {
271    /// Model path
272    pub model: String,
273    /// Whether all gates passed
274    pub passed: bool,
275    /// Individual gate results
276    pub gates: Vec<GateResult>,
277    /// Number of gates that actually executed (not skipped)
278    #[serde(default)]
279    pub gates_executed: usize,
280    /// Number of gates that were skipped
281    #[serde(default)]
282    pub gates_skipped: usize,
283    /// Total duration
284    pub total_duration_ms: u64,
285    /// Timestamp (ISO 8601)
286    pub timestamp: String,
287    /// Summary message
288    pub summary: String,
289    /// System information
290    #[serde(default, skip_serializing_if = "Option::is_none")]
291    pub system_info: Option<SystemInfo>,
292}
293
294/// Run the QA command
295#[allow(clippy::too_many_arguments)]
296pub fn run(
297    path: &Path,
298    min_tps: Option<f64>,
299    min_speedup: Option<f64>,
300    min_gpu_speedup: Option<f64>,
301    skip_golden: bool,
302    skip_throughput: bool,
303    skip_ollama: bool,
304    skip_gpu_speedup: bool,
305    skip_contract: bool,
306    skip_format_parity: bool,
307    skip_ptx_parity: bool,
308    safetensors_path: Option<std::path::PathBuf>,
309    iterations: usize,
310    warmup: usize,
311    max_tokens: usize,
312    json: bool,
313    verbose: bool,
314    min_executed: Option<usize>,
315    previous_report: Option<std::path::PathBuf>,
316    regression_threshold: Option<f64>,
317    skip_gpu_state: bool,
318    skip_metadata: bool,
319) -> Result<()> {
320    let config = QaConfig {
321        min_tps: min_tps.unwrap_or(100.0),
322        min_speedup: min_speedup.unwrap_or(0.2), // Ollama uses llama.cpp optimized kernels
323        min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0), // GPU must be 2x faster (F-PERF-042)
324        skip_golden,
325        skip_throughput,
326        skip_ollama,
327        skip_gpu_speedup,
328        skip_contract,
329        skip_format_parity,
330        skip_ptx_parity,
331        safetensors_path,
332        iterations,
333        warmup,
334        max_tokens,
335        json,
336        verbose,
337        min_executed,
338        previous_report,
339        regression_threshold: regression_threshold.unwrap_or(0.10),
340        skip_gpu_state,
341        skip_metadata,
342    };
343
344    let report = run_qa(path, &config)?;
345
346    if json {
347        println!(
348            "{}",
349            serde_json::to_string_pretty(&report).unwrap_or_default()
350        );
351    }
352
353    if !report.passed {
354        return Err(CliError::ValidationFailed(report.summary));
355    }
356
357    Ok(())
358}
359
360/// Dispatch a single QA gate: skip if flagged, otherwise run, then print and collect.
361fn dispatch_gate(
362    gates: &mut Vec<GateResult>,
363    json: bool,
364    skip: bool,
365    name: &str,
366    skip_reason: &str,
367    runner: impl FnOnce() -> Result<GateResult>,
368) -> Result<()> {
369    let result = if skip {
370        GateResult::skipped(name, skip_reason)
371    } else {
372        runner()?
373    };
374    if !json {
375        print_gate_result(&result);
376    }
377    gates.push(result);
378    Ok(())
379}
380
381/// Run all QA gates and produce a report
382/// Human-readable gate name for display.
383fn gate_display_name(name: &str) -> &str {
384    match name {
385        "tensor_contract" => "Tensor Contract",
386        "golden_output" => "Golden Output",
387        "throughput" => "Throughput",
388        "ollama_parity" => "Ollama Parity",
389        "gpu_speedup" => "GPU Speedup",
390        "format_parity" => "Format Parity",
391        "ptx_parity" => "PTX Parity",
392        "gpu_state_isolation" => "GPU State Isolation",
393        "performance_regression" => "Perf Regression",
394        "metadata_plausibility" => "Metadata Plausibility",
395        other => other,
396    }
397}
398
399/// Print the QA summary table and pass/fail badges.
400fn print_qa_summary(gates: &[GateResult], passed: bool, total_duration: Duration) {
401    output::header("QA Summary");
402
403    let gate_rows: Vec<Vec<String>> = gates
404        .iter()
405        .map(|g| {
406            let badge = if g.skipped {
407                output::badge_skip("SKIP")
408            } else if g.passed {
409                output::badge_pass("PASS")
410            } else {
411                output::badge_fail("FAIL")
412            };
413            let measured = g.value.map_or("—".to_string(), |v| format!("{v:.2}"));
414            let threshold = g.threshold.map_or("—".to_string(), |v| format!("{v:.2}"));
415            vec![
416                gate_display_name(&g.name).to_string(),
417                badge,
418                measured,
419                threshold,
420                output::duration_fmt(g.duration_ms),
421            ]
422        })
423        .collect();
424    println!(
425        "{}",
426        output::table(
427            &["Gate", "Status", "Measured", "Threshold", "Duration"],
428            &gate_rows,
429        )
430    );
431
432    println!();
433    if passed {
434        println!("  {}", output::badge_pass("ALL GATES PASSED"));
435    } else {
436        println!("  {}", output::badge_fail("GATES FAILED"));
437        for gate in gates.iter().filter(|g| !g.passed && !g.skipped) {
438            println!("    {} {}", "✗".red(), gate.name);
439        }
440    }
441    output::metric(
442        "Total Duration",
443        output::duration_fmt(total_duration.as_millis() as u64),
444        "",
445    );
446}
447
448/// Check if model is GGUF format (for Ollama parity gate).
449fn is_gguf_format(path: &Path) -> bool {
450    #[cfg(feature = "inference")]
451    {
452        use realizar::format::{detect_format, ModelFormat};
453        let magic = std::fs::read(path).ok().and_then(|b| {
454            if b.len() >= 8 {
455                Some(b[..8].to_vec())
456            } else {
457                None
458            }
459        });
460        magic.and_then(|m| detect_format(&m).ok()) == Some(ModelFormat::Gguf)
461    }
462    #[cfg(not(feature = "inference"))]
463    {
464        let _ = path;
465        false
466    }
467}
468
469fn run_qa(path: &Path, config: &QaConfig) -> Result<QaReport> {
470    let start = Instant::now();
471    let mut gates = Vec::new();
472
473    if !config.json {
474        output::header("APR Quality Assurance");
475        let config_pairs = vec![
476            ("Model", path.display().to_string()),
477            ("Min TPS", format!("{:.0} tok/s", config.min_tps)),
478            ("Min Speedup", format!("{:.1}x Ollama", config.min_speedup)),
479        ];
480        println!("{}", output::kv_table(&config_pairs));
481    }
482
483    dispatch_gate(
484        &mut gates,
485        config.json,
486        config.skip_contract,
487        "tensor_contract",
488        "Skipped by --skip-contract",
489        || run_tensor_contract_gate(path, config),
490    )?;
491    dispatch_gate(
492        &mut gates,
493        config.json,
494        config.skip_metadata,
495        "metadata_plausibility",
496        "Skipped by --skip-metadata",
497        || run_metadata_plausibility_gate(path, config),
498    )?;
499    dispatch_gate(
500        &mut gates,
501        config.json,
502        config.skip_golden,
503        "golden_output",
504        "Skipped by --skip-golden",
505        || run_golden_output_gate(path, config),
506    )?;
507    dispatch_gate(
508        &mut gates,
509        config.json,
510        config.skip_throughput,
511        "throughput",
512        "Skipped by --skip-throughput",
513        || run_throughput_gate(path, config),
514    )?;
515    dispatch_gate(
516        &mut gates,
517        config.json,
518        config.skip_ollama,
519        "ollama_parity",
520        "Skipped by --skip-ollama",
521        || {
522            if is_gguf_format(path) {
523                run_ollama_parity_gate(path, config)
524            } else {
525                Ok(GateResult::skipped(
526                    "ollama_parity",
527                    "Non-GGUF format (F32/F16 lacks fused kernels for Ollama parity)",
528                ))
529            }
530        },
531    )?;
532    dispatch_gate(
533        &mut gates,
534        config.json,
535        config.skip_gpu_speedup,
536        "gpu_speedup",
537        "Skipped by --skip-gpu-speedup",
538        || run_gpu_speedup_gate(path, config),
539    )?;
540
541    let skip_format = config.skip_format_parity || config.safetensors_path.is_none();
542    let format_skip_reason = if config.skip_format_parity {
543        "Skipped by --skip-format-parity"
544    } else {
545        "No --safetensors-path provided"
546    };
547    dispatch_gate(
548        &mut gates,
549        config.json,
550        skip_format,
551        "format_parity",
552        format_skip_reason,
553        || run_format_parity_gate(path, config),
554    )?;
555    dispatch_gate(
556        &mut gates,
557        config.json,
558        config.skip_ptx_parity,
559        "ptx_parity",
560        "Skipped by --skip-ptx-parity",
561        || run_ptx_parity_gate(path, config),
562    )?;
563    dispatch_gate(
564        &mut gates,
565        config.json,
566        config.skip_gpu_state,
567        "gpu_state_isolation",
568        "Skipped by --skip-gpu-state",
569        || run_gpu_state_isolation_gate(path, config),
570    )?;
571
572    // Gate 9: Performance regression detection (when --previous-report provided)
573    // Note: Cannot use dispatch_gate here because run_performance_regression_gate
574    // needs to read `gates` (immutable borrow) while dispatch_gate mutably borrows it.
575    let regression_result = if config.previous_report.is_none() {
576        GateResult::skipped("performance_regression", "No --previous-report provided")
577    } else {
578        run_performance_regression_gate(&gates, config)?
579    };
580    if !config.json {
581        print_gate_result(&regression_result);
582    }
583    gates.push(regression_result);
584
585    let total_duration = start.elapsed();
586    let gates_executed = gates.iter().filter(|g| !g.skipped).count();
587    let gates_skipped = gates.iter().filter(|g| g.skipped).count();
588
589    // JIDOKA: Warn when >50% skipped — QA is not rigorous
590    if !config.json && gates_skipped > gates_executed {
591        println!(
592            "  {} {} of {} gates SKIPPED — QA not rigorous",
593            "WARN".yellow().bold(),
594            gates_skipped,
595            gates_skipped + gates_executed
596        );
597    }
598
599    let mut passed = gates.iter().all(|g| g.passed);
600
601    // Enforce --min-executed: fail if fewer than N gates actually ran
602    if let Some(min) = config.min_executed {
603        if gates_executed < min {
604            if !config.json {
605                println!(
606                    "  {} Only {} gates executed, minimum required: {}",
607                    "FAIL".red().bold(),
608                    gates_executed,
609                    min,
610                );
611            }
612            passed = false;
613        }
614    }
615
616    let summary = if passed {
617        format!(
618            "All QA gates passed ({} executed, {} skipped)",
619            gates_executed, gates_skipped
620        )
621    } else {
622        let names: Vec<_> = gates
623            .iter()
624            .filter(|g| !g.passed && !g.skipped)
625            .map(|g| g.name.as_str())
626            .collect();
627        if names.is_empty() && !passed {
628            format!(
629                "Insufficient gate execution: {} < {} minimum",
630                gates_executed,
631                config.min_executed.unwrap_or(0)
632            )
633        } else {
634            format!("Failed gates: {}", names.join(", "))
635        }
636    };
637
638    if !config.json {
639        print_qa_summary(&gates, passed, total_duration);
640    }
641
642    Ok(QaReport {
643        model: path.display().to_string(),
644        passed,
645        gates,
646        gates_executed,
647        gates_skipped,
648        total_duration_ms: total_duration.as_millis() as u64,
649        timestamp: chrono::Utc::now().to_rfc3339(),
650        summary,
651        system_info: Some(SystemInfo::capture()),
652    })
653}
654
655/// Gate 0: Tensor Contract Validation (PMAT-235)
656///
657/// Validates model tensors against the PMAT-235 data quality contract BEFORE
658/// running any inference. This catches bad models early (density, NaN/Inf,
659/// degenerate distributions) without expensive forward passes.
660///
661/// Toyota Way: Jidoka - Stop the line before producing defective output.
662/// Poka-Yoke: Invalid tensor data is rejected before it can cause garbage inference.
663fn run_tensor_contract_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
664    let start = Instant::now();
665
666    if !config.json && config.verbose {
667        println!(
668            "{}",
669            "Running tensor contract validation (PMAT-235)...".yellow()
670        );
671    }
672
673    let rosetta = aprender::format::rosetta::RosettaStone::new();
674    let report = match rosetta.validate(path) {
675        Ok(r) => r,
676        Err(e) => {
677            let duration = start.elapsed();
678            return Ok(GateResult::failed(
679                "tensor_contract",
680                &format!("Failed to validate: {e}"),
681                None,
682                None,
683                duration,
684            ));
685        }
686    };
687
688    let duration = start.elapsed();
689
690    // Collect all contract violations (F-DATA-QUALITY-* rules)
691    let contract_failures: Vec<String> = report
692        .tensors
693        .iter()
694        .flat_map(|t| t.failures.iter().map(|f| format!("{}: {}", t.name, f)))
695        .collect();
696
697    if contract_failures.is_empty() {
698        Ok(GateResult::passed(
699            "tensor_contract",
700            &format!(
701                "{} tensors passed all PMAT-235 contract gates",
702                report.tensor_count
703            ),
704            Some(report.tensor_count as f64),
705            Some(0.0),
706            duration,
707        ))
708    } else {
709        let summary = if contract_failures.len() <= 3 {
710            contract_failures.join("; ")
711        } else {
712            format!(
713                "{}; ... and {} more",
714                contract_failures[..3].join("; "),
715                contract_failures.len() - 3
716            )
717        };
718        Ok(GateResult::failed(
719            "tensor_contract",
720            &format!(
721                "{} contract violations in {} tensors: {}",
722                contract_failures.len(),
723                report.failed_tensor_count,
724                summary
725            ),
726            Some(contract_failures.len() as f64),
727            Some(0.0),
728            duration,
729        ))
730    }
731}
732
733/// Gate 1: Metadata Plausibility Validation (Bug 210, GH-222)
734///
735/// Validates that model hyperparameters (rope_theta, max_position_embeddings, rms_norm_eps)
736/// fall within known plausible ranges for the detected architecture family.
737///
738/// This gate catches the root cause of GH-222: importing SafeTensors without config.json
739/// silently stored rope_theta=10000.0 for Qwen2, which should be 1000000.0.
740fn run_metadata_plausibility_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
741    let start = Instant::now();
742
743    if !config.json && config.verbose {
744        println!(
745            "{}",
746            "Running metadata plausibility validation (Bug 210)...".yellow()
747        );
748    }
749
750    // Extract metadata from the model file
751    let data = std::fs::read(path)
752        .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
753
754    if data.len() < 4 {
755        let duration = start.elapsed();
756        return Ok(GateResult::failed(
757            "metadata_plausibility",
758            "File too small for metadata extraction",
759            None,
760            None,
761            duration,
762        ));
763    }
764
765    let (architecture, rope_theta, max_pos, rms_norm_eps) = extract_model_metadata(&data, path)?;
766
767    let mut violations: Vec<String> = Vec::new();
768    let mut checks_passed = 0usize;
769
770    check_rope_theta(architecture.as_deref(), rope_theta, &data, &mut violations, &mut checks_passed);
771    check_max_position_embeddings(max_pos, &mut violations, &mut checks_passed);
772    check_rms_norm_eps(rms_norm_eps, &mut violations, &mut checks_passed);
773    check_arch_theta_cross_validation(architecture.as_ref(), rope_theta, &mut violations, &mut checks_passed);
774
775    let duration = start.elapsed();
776
777    if violations.is_empty() {
778        Ok(GateResult::passed(
779            "metadata_plausibility",
780            &format!(
781                "{checks_passed} metadata checks passed (arch={}, rope_theta={}, max_pos={})",
782                architecture.as_deref().unwrap_or("unknown"),
783                rope_theta.map_or("none".to_string(), |t| format!("{t}")),
784                max_pos.map_or("none".to_string(), |p| format!("{p}")),
785            ),
786            Some(checks_passed as f64),
787            Some(0.0),
788            duration,
789        ))
790    } else {
791        Ok(GateResult::failed(
792            "metadata_plausibility",
793            &format!(
794                "{} metadata violation(s): {}",
795                violations.len(),
796                violations.join("; ")
797            ),
798            Some(violations.len() as f64),
799            Some(0.0),
800            duration,
801        ))
802    }
803}
804
805/// Check rope_theta plausibility per architecture family.
806fn check_rope_theta(
807    arch: Option<&str>,
808    rope_theta: Option<f32>,
809    data: &[u8],
810    violations: &mut Vec<String>,
811    checks_passed: &mut usize,
812) {
813    if let Some(theta) = rope_theta {
814        let theta_f64 = f64::from(theta);
815        match arch {
816            Some("qwen2" | "qwen2.5" | "qwen") => {
817                if theta_f64 < 100_000.0 {
818                    violations.push(format!(
819                        "rope_theta={theta} for qwen2 — expected ~1000000.0 \
820                         (100x too low, will produce garbage)"
821                    ));
822                } else {
823                    *checks_passed += 1;
824                }
825            }
826            Some("llama" | "llama2" | "llama3") => {
827                if (1000.0..=10_000_000.0).contains(&theta_f64) {
828                    *checks_passed += 1;
829                } else {
830                    violations.push(format!(
831                        "rope_theta={theta} for llama — expected 10000-500000"
832                    ));
833                }
834            }
835            _ => {
836                if (100.0..=100_000_000.0).contains(&theta_f64) {
837                    *checks_passed += 1;
838                } else {
839                    violations.push(format!(
840                        "rope_theta={theta} outside plausible range [100, 100M]"
841                    ));
842                }
843            }
844        }
845    } else {
846        let magic = &data[0..4];
847        if magic == b"GGUF" {
848            *checks_passed += 1;
849        } else {
850            violations.push("rope_theta missing from APR metadata".to_string());
851        }
852    }
853}
854
855/// Check max_position_embeddings is within plausible range.
856fn check_max_position_embeddings(
857    max_pos: Option<usize>,
858    violations: &mut Vec<String>,
859    checks_passed: &mut usize,
860) {
861    if let Some(val) = max_pos {
862        if (128..=1_048_576).contains(&val) {
863            *checks_passed += 1;
864        } else {
865            violations.push(format!(
866                "max_position_embeddings={val} outside plausible range [128, 1M]"
867            ));
868        }
869    } else {
870        *checks_passed += 1;
871    }
872}
873
874/// Check rms_norm_eps is within plausible range.
875fn check_rms_norm_eps(
876    rms_norm_eps: Option<f32>,
877    violations: &mut Vec<String>,
878    checks_passed: &mut usize,
879) {
880    if let Some(eps) = rms_norm_eps {
881        let eps_f64 = f64::from(eps);
882        if eps_f64 <= 0.0 || eps_f64 > 0.01 {
883            violations.push(format!(
884                "rms_norm_eps={eps} outside plausible range (0, 0.01]"
885            ));
886        } else {
887            *checks_passed += 1;
888        }
889    } else {
890        *checks_passed += 1;
891    }
892}
893
894/// Cross-validate architecture against rope_theta (Bug 210 signature detection).
895fn check_arch_theta_cross_validation(
896    architecture: Option<&String>,
897    rope_theta: Option<f32>,
898    violations: &mut Vec<String>,
899    checks_passed: &mut usize,
900) {
901    if let (Some(arch), Some(theta)) = (architecture, rope_theta) {
902        let theta_f64 = f64::from(theta);
903        let suspicious = matches!(arch.as_str(), "qwen2" | "qwen2.5" | "qwen")
904            && (theta_f64 - 10000.0).abs() < 1.0;
905        if suspicious {
906            violations.push(format!(
907                "CRITICAL: {arch} with rope_theta=10000.0 — \
908                 likely missing config.json (Bug 210)"
909            ));
910        } else {
911            *checks_passed += 1;
912        }
913    } else {
914        *checks_passed += 1;
915    }
916}
917
918/// Metadata extracted from model file for plausibility validation.
919type ModelMetadata = (Option<String>, Option<f32>, Option<usize>, Option<f32>);
920
921/// Extract model metadata from file bytes (GGUF, APR, or SafeTensors format).
922fn extract_model_metadata(
923    data: &[u8],
924    path: &Path,
925) -> Result<ModelMetadata> {
926    let magic = &data[0..4];
927
928    if magic == b"GGUF" {
929        // GGUF format: use GgufReader
930        let reader = aprender::format::gguf::reader::GgufReader::from_bytes(data.to_vec())
931            .map_err(|e| CliError::ValidationFailed(format!("GGUF parse failed: {e}")))?;
932        let arch = reader.architecture();
933        let rope_theta = reader.rope_theta();
934        let max_pos = reader.context_length();
935        let rms_norm_eps = reader.rms_norm_eps();
936        Ok((arch, rope_theta, max_pos, rms_norm_eps))
937    } else if &magic[0..3] == b"APR" || magic == b"APRN" {
938        // APR format: parse v2 header + JSON metadata
939        use aprender::format::v2::AprV2Reader;
940        let reader = AprV2Reader::from_bytes(data)
941            .map_err(|e| CliError::ValidationFailed(format!("APR parse failed: {e}")))?;
942        let meta = reader.metadata();
943        let _ = path;
944        Ok((
945            meta.architecture.clone(),
946            meta.rope_theta,
947            meta.max_position_embeddings,
948            meta.rms_norm_eps,
949        ))
950    } else {
951        // SafeTensors or unknown format: try to load config.json from sibling
952        let config_path = path.with_file_name("config.json");
953        if config_path.exists() {
954            // Read architecture and rope_theta from HF config.json
955            let config_str = std::fs::read_to_string(&config_path)
956                .map_err(|e| CliError::ValidationFailed(format!("config.json read failed: {e}")))?;
957            let arch = extract_json_string(&config_str, "model_type");
958            let rope_theta = extract_json_f32(&config_str, "rope_theta");
959            let max_pos = extract_json_usize(&config_str, "max_position_embeddings");
960            let rms_norm_eps = extract_json_f32(&config_str, "rms_norm_eps");
961            Ok((arch, rope_theta, max_pos, rms_norm_eps))
962        } else {
963            // No config.json — return None for all fields (gate will note the gap)
964            Ok((None, None, None, None))
965        }
966    }
967}
968
969/// Extract a string value from JSON by key (simple parser, no serde dependency).
970fn extract_json_string(json: &str, key: &str) -> Option<String> {
971    let pattern = format!("\"{key}\"");
972    let idx = json.find(&pattern)?;
973    let after_key = &json[idx + pattern.len()..];
974    // Skip whitespace and colon
975    let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
976    let trimmed = after_colon.trim_start();
977    if trimmed.starts_with('"') {
978        let start = 1;
979        let end = trimmed[start..].find('"')?;
980        Some(trimmed[start..start + end].to_string())
981    } else {
982        None
983    }
984}
985
986/// Extract an f32 value from JSON by key.
987fn extract_json_f32(json: &str, key: &str) -> Option<f32> {
988    let pattern = format!("\"{key}\"");
989    let idx = json.find(&pattern)?;
990    let after_key = &json[idx + pattern.len()..];
991    let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
992    let trimmed = after_colon.trim_start();
993    // Parse until next comma, brace, or whitespace
994    let end = trimmed.find([',', '}', '\n'])?;
995    trimmed[..end].trim().parse::<f32>().ok()
996}
997
998/// Extract a usize value from JSON by key.
999fn extract_json_usize(json: &str, key: &str) -> Option<usize> {
1000    let pattern = format!("\"{key}\"");
1001    let idx = json.find(&pattern)?;
1002    let after_key = &json[idx + pattern.len()..];
1003    let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
1004    let trimmed = after_colon.trim_start();
1005    let end = trimmed.find([',', '}', '\n'])?;
1006    trimmed[..end].trim().parse::<usize>().ok()
1007}
1008
1009/// Output verification result (PMAT-QA-PROTOCOL-001 §7.4)
1010#[derive(Debug, Clone)]
1011pub enum OutputVerification {
1012    /// Output passed all checks
1013    Pass,
1014    /// Output failed verification
1015    Fail {
1016        /// Reason for failure
1017        reason: String,
1018    },
1019}
1020
1021/// Verify output is correct: not empty, no garbage, contains expected answer
1022/// (PMAT-QA-PROTOCOL-001 §7.4)
1023///
1024/// Order of checks is CRITICAL (fail fast on garbage):
1025/// 1. Not empty
1026/// 2. No garbage patterns (BEFORE checking answer)
1027/// 3. No BPE artifacts
1028/// 4. Contains expected answer
1029pub fn verify_output(
1030    output: &str,
1031    test_id: &str,
1032    expected_patterns: &[&str],
1033) -> OutputVerification {
1034    // Check 1: Not empty
1035    if output.trim().is_empty() {
1036        return OutputVerification::Fail {
1037            reason: format!("{test_id}: Empty output"),
1038        };
1039    }
1040
1041    // Check 2: Garbage patterns (fail fast BEFORE checking answer)
1042    let garbage_patterns = ["\u{FFFD}", "[UNK]", "akunji", "olumbia"];
1043    for pattern in &garbage_patterns {
1044        if output.contains(pattern) {
1045            return OutputVerification::Fail {
1046                reason: format!("{test_id}: Garbage detected: '{pattern}'"),
1047            };
1048        }
1049    }
1050
1051    // Check 3: BPE artifacts (null bytes, excessive control chars)
1052    let null_count = output.bytes().filter(|&b| b == 0).count();
1053    if null_count > 0 {
1054        return OutputVerification::Fail {
1055            reason: format!("{test_id}: {null_count} null bytes detected (BPE artifact)"),
1056        };
1057    }
1058
1059    // Check 4: Contains expected answer
1060    if !expected_patterns.is_empty() {
1061        let found = expected_patterns
1062            .iter()
1063            .any(|p| output.to_lowercase().contains(&p.to_lowercase()));
1064        if !found {
1065            return OutputVerification::Fail {
1066                reason: format!(
1067                    "{test_id}: Expected one of {:?}, got: '{}'",
1068                    expected_patterns,
1069                    output.chars().take(100).collect::<String>()
1070                ),
1071            };
1072        }
1073    }
1074
1075    OutputVerification::Pass
1076}
1077
1078/// JIDOKA: Validate GPU golden output matches expected patterns (PMAT-232 lesson).
1079///
1080/// Without this, GPU correctness was NEVER tested — `apr qa` golden output only ran CPU.
1081/// Returns `Some(failure_reason)` if GPU output fails, `None` if pass or skipped.
1082#[cfg(all(feature = "inference", feature = "cuda"))]
1083fn validate_gpu_golden_output(
1084    mapped: &realizar::gguf::MappedGGUFModel,
1085    prompt_tokens: &[u32],
1086    gen_config: &realizar::gguf::QuantizedGenerateConfig,
1087    gguf: &realizar::gguf::GGUFModel,
1088    expected_patterns: &[&str],
1089    config: &QaConfig,
1090) -> Result<Option<String>> {
1091    use realizar::gguf::{OwnedQuantizedModel, OwnedQuantizedModelCuda};
1092    let model = OwnedQuantizedModel::from_mapped(mapped)
1093        .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1094    match OwnedQuantizedModelCuda::new(model, 0) {
1095        Ok(mut cuda_model) => match cuda_model.generate_gpu_resident(prompt_tokens, gen_config) {
1096            Ok(gpu_tokens) => {
1097                let gpu_text = gguf.decode(&gpu_tokens);
1098                if let OutputVerification::Fail { reason } =
1099                    verify_output(&gpu_text, "golden_output_gpu", expected_patterns)
1100                {
1101                    return Ok(Some(format!("GPU output failed (CPU passed): {reason}")));
1102                }
1103            }
1104            Err(e) => {
1105                if !config.json && config.verbose {
1106                    println!("{}", format!("GPU golden output skipped: {e}").yellow());
1107                }
1108            }
1109        },
1110        Err(e) => {
1111            if !config.json && config.verbose {
1112                println!("{}", format!("CUDA init skipped: {e}").yellow());
1113            }
1114        }
1115    }
1116    Ok(None)
1117}
1118
1119/// Run golden output test for APR format models
1120#[cfg(feature = "inference")]
1121fn golden_output_apr(path: &Path, prompt: &str, max_tokens: usize) -> Result<(Vec<u32>, String)> {
1122    use realizar::apr::AprV2Model;
1123    use realizar::apr_transformer::{AprTransformer, GenerateConfig};
1124
1125    let apr_model = AprV2Model::load(path)
1126        .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR: {e}")))?;
1127    let tokenizer = apr_model
1128        .load_embedded_bpe_tokenizer()
1129        .ok_or_else(|| CliError::ValidationFailed("APR missing embedded tokenizer".to_string()))?;
1130    let transformer = AprTransformer::from_apr_file(path)
1131        .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR transformer: {e}")))?;
1132
1133    let prompt_tokens = tokenizer.encode(prompt);
1134    let gen_config = GenerateConfig {
1135        max_tokens,
1136        temperature: 0.0,
1137        top_k: 1,
1138        ..Default::default()
1139    };
1140
1141    let tokens = transformer
1142        .generate_with_cache(&prompt_tokens, &gen_config)
1143        .map_err(|e| CliError::ValidationFailed(format!("Generation failed: {e}")))?;
1144    let text = tokenizer.decode(&tokens);
1145    Ok((tokens, text))
1146}
1147
1148/// Run golden output test for SafeTensors format models. Returns None if tokenizer missing.
1149#[cfg(feature = "inference")]
1150fn golden_output_safetensors(
1151    path: &Path,
1152    prompt: &str,
1153    max_tokens: usize,
1154) -> Result<Option<(Vec<u32>, String)>> {
1155    use aprender::text::bpe::{load_from_json, BpeTokenizer};
1156    use realizar::safetensors_infer::SafetensorsToAprConverter;
1157
1158    let tokenizer_path = realizar::safetensors::find_sibling_file(path, "tokenizer.json");
1159    let tokenizer: Option<BpeTokenizer> = tokenizer_path
1160        .as_ref()
1161        .and_then(|p| std::fs::read_to_string(p).ok())
1162        .and_then(|json| load_from_json(&json).ok());
1163
1164    let Some(tokenizer) = tokenizer else {
1165        return Ok(None);
1166    };
1167
1168    let transformer = SafetensorsToAprConverter::convert(path)
1169        .map_err(|e| CliError::ValidationFailed(format!("SafeTensors convert failed: {e}")))?;
1170
1171    let prompt_tokens = tokenizer.encode(prompt);
1172    let gen_config = realizar::apr_transformer::GenerateConfig {
1173        max_tokens,
1174        temperature: 0.0,
1175        top_k: 1,
1176        ..Default::default()
1177    };
1178
1179    let tokens = transformer
1180        .generate_with_cache(&prompt_tokens, &gen_config)
1181        .map_err(|e| CliError::ValidationFailed(format!("Generation failed: {e}")))?;
1182    let text = tokenizer.decode(&tokens);
1183    Ok(Some((tokens, text)))
1184}
1185
1186/// Run golden output CPU generation for GGUF format.
1187#[cfg(feature = "inference")]
1188fn golden_output_gguf_cpu(
1189    mapped: &realizar::gguf::MappedGGUFModel,
1190    gguf: &realizar::gguf::GGUFModel,
1191    prompt: &str,
1192    max_tokens: usize,
1193) -> Result<(Vec<u32>, String)> {
1194    use realizar::gguf::{OwnedQuantizedModel, QuantizedGenerateConfig};
1195
1196    let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643, 9707]);
1197    let gen_config = QuantizedGenerateConfig {
1198        max_tokens,
1199        temperature: 0.0,
1200        top_k: 1,
1201        ..Default::default()
1202    };
1203    let model = OwnedQuantizedModel::from_mapped(mapped)
1204        .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1205    let tokens = model
1206        .generate_with_cache(&prompt_tokens, &gen_config)
1207        .map_err(|e| CliError::ValidationFailed(format!("CPU generation failed: {e}")))?;
1208    let text = gguf.decode(&tokens);
1209    Ok((tokens, text))
1210}
1211
1212/// Gate 1: Golden Output Test
1213///
1214/// Runs the model with a known prompt and verifies the output contains expected patterns.
1215/// Uses verify_output() for structured validation (PMAT-QA-PROTOCOL-001 §7.4).
1216/// Golden test cases: ChatML prompt + expected output patterns.
1217fn golden_test_cases() -> Vec<(&'static str, Vec<&'static str>)> {
1218    vec![
1219        (
1220            "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n",
1221            vec!["4"],
1222        ),
1223        (
1224            "<|im_start|>user\nHello<|im_end|>\n<|im_start|>assistant\n",
1225            vec!["Hello", "Hi", "hey", "hello", "!"],
1226        ),
1227    ]
1228}
1229
1230/// Generate output for a single test case based on model format.
1231#[cfg(feature = "inference")]
1232fn generate_golden_for_format(
1233    path: &Path,
1234    prompt: &str,
1235    max_tokens: usize,
1236    format: realizar::format::ModelFormat,
1237    mapped: &realizar::gguf::MappedGGUFModel,
1238    gguf_model: &realizar::gguf::GGUFModel,
1239) -> Result<Option<(Vec<u32>, String)>> {
1240    use realizar::format::ModelFormat;
1241
1242    match format {
1243        ModelFormat::Gguf => Ok(Some(golden_output_gguf_cpu(
1244            mapped, gguf_model, prompt, max_tokens,
1245        )?)),
1246        ModelFormat::Apr => Ok(Some(golden_output_apr(path, prompt, max_tokens)?)),
1247        ModelFormat::SafeTensors => golden_output_safetensors(path, prompt, max_tokens),
1248    }
1249}
1250
1251/// Validate a single golden test case: generate output, check GPU parity, verify patterns.
1252///
1253/// Returns `Ok(None)` on success, `Ok(Some(GateResult))` on failure/skip.
1254#[cfg(feature = "inference")]
1255fn validate_golden_test_case(
1256    path: &Path,
1257    prompt: &str,
1258    expected_patterns: &[&str],
1259    config: &QaConfig,
1260    format: realizar::format::ModelFormat,
1261    mapped: &realizar::gguf::MappedGGUFModel,
1262    gguf_model: &realizar::gguf::GGUFModel,
1263    cuda_available: bool,
1264    start: Instant,
1265) -> Result<Option<GateResult>> {
1266    use realizar::format::ModelFormat;
1267
1268    let Some((_, output_text)) =
1269        generate_golden_for_format(path, prompt, config.max_tokens, format, mapped, gguf_model)?
1270    else {
1271        return Ok(Some(GateResult::skipped(
1272            "golden_output",
1273            "SafeTensors: tokenizer.json not found",
1274        )));
1275    };
1276
1277    #[cfg(feature = "cuda")]
1278    if cuda_available && format == ModelFormat::Gguf {
1279        use realizar::gguf::QuantizedGenerateConfig;
1280        let prompt_tokens = gguf_model
1281            .encode(prompt)
1282            .unwrap_or_else(|| vec![151643, 9707]);
1283        let gen_config = QuantizedGenerateConfig {
1284            max_tokens: config.max_tokens,
1285            temperature: 0.0,
1286            top_k: 1,
1287            ..Default::default()
1288        };
1289        if let Some(failure) = validate_gpu_golden_output(
1290            mapped,
1291            &prompt_tokens,
1292            &gen_config,
1293            gguf_model,
1294            expected_patterns,
1295            config,
1296        )? {
1297            return Ok(Some(GateResult::failed(
1298                "golden_output",
1299                &failure,
1300                None,
1301                None,
1302                start.elapsed(),
1303            )));
1304        }
1305    }
1306    #[cfg(not(feature = "cuda"))]
1307    let _ = cuda_available;
1308
1309    if let OutputVerification::Fail { reason } =
1310        verify_output(&output_text, "golden_output", expected_patterns)
1311    {
1312        return Ok(Some(GateResult::failed(
1313            "golden_output",
1314            &reason,
1315            None,
1316            None,
1317            start.elapsed(),
1318        )));
1319    }
1320
1321    Ok(None)
1322}
1323
1324fn run_golden_output_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1325    let start = Instant::now();
1326
1327    if !config.json && config.verbose {
1328        println!("{}", "Running golden output test...".yellow());
1329    }
1330
1331    let test_cases = golden_test_cases();
1332
1333    #[cfg(feature = "inference")]
1334    {
1335        use realizar::cuda::CudaExecutor;
1336        use realizar::format::detect_format;
1337        use realizar::gguf::{GGUFModel, MappedGGUFModel};
1338
1339        let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
1340        let model_bytes = std::fs::read(path)
1341            .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1342        let format = detect_format(&model_bytes[..8.min(model_bytes.len())])
1343            .map_err(|e| CliError::ValidationFailed(format!("Failed to detect format: {e}")))?;
1344        let mapped = MappedGGUFModel::from_path(path)
1345            .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1346        let gguf_model = GGUFModel::from_bytes(&model_bytes)
1347            .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1348
1349        for (prompt, expected_patterns) in &test_cases {
1350            if let Some(result) = validate_golden_test_case(
1351                path,
1352                prompt,
1353                expected_patterns,
1354                config,
1355                format,
1356                &mapped,
1357                &gguf_model,
1358                cuda_available,
1359                start,
1360            )? {
1361                return Ok(result);
1362            }
1363        }
1364
1365        Ok(GateResult::passed(
1366            "golden_output",
1367            &format!("{} golden test cases passed", test_cases.len()),
1368            Some(test_cases.len() as f64),
1369            Some(test_cases.len() as f64),
1370            start.elapsed(),
1371        ))
1372    }
1373
1374    #[cfg(not(feature = "inference"))]
1375    {
1376        let _ = (path, config, test_cases);
1377        Ok(GateResult::skipped(
1378            "golden_output",
1379            "Requires 'inference' feature",
1380        ))
1381    }
1382}
1383
1384/// Run warmup+measure loop for throughput benchmarking.
1385///
1386/// Calls `generate_fn` for `warmup` iterations (discarding results), then
1387/// measures `iterations` runs to compute tokens per second.
1388#[cfg(feature = "inference")]
1389fn measure_generate_throughput(
1390    warmup: usize,
1391    iterations: usize,
1392    prompt_len: usize,
1393    overall_start: Instant,
1394    mut generate_fn: impl FnMut() -> Vec<u32>,
1395) -> (f64, Duration) {
1396    for _ in 0..warmup {
1397        let _ = generate_fn();
1398    }
1399
1400    let mut total_tokens = 0usize;
1401    let measure_start = Instant::now();
1402    for _ in 0..iterations {
1403        let output = generate_fn();
1404        total_tokens += output.len().saturating_sub(prompt_len);
1405    }
1406    let measure_time = measure_start.elapsed();
1407    (
1408        total_tokens as f64 / measure_time.as_secs_f64(),
1409        overall_start.elapsed(),
1410    )
1411}
1412
1413/// Measure throughput for a GGUF model (GPU or CPU path).
1414#[cfg(feature = "inference")]
1415fn throughput_gguf(
1416    path: &Path,
1417    model_bytes: &[u8],
1418    config: &QaConfig,
1419    cuda_available: bool,
1420    start: Instant,
1421    prompt: &str,
1422) -> Result<(f64, Duration)> {
1423    use realizar::gguf::{
1424        GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
1425        QuantizedGenerateConfig,
1426    };
1427
1428    let gguf = GGUFModel::from_bytes(model_bytes)
1429        .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1430    let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643, 9707]);
1431    let gen_config = QuantizedGenerateConfig {
1432        max_tokens: config.max_tokens,
1433        temperature: 0.0,
1434        top_k: 1,
1435        ..Default::default()
1436    };
1437
1438    let mapped = MappedGGUFModel::from_path(path)
1439        .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1440    let model = OwnedQuantizedModel::from_mapped(&mapped)
1441        .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1442
1443    if cuda_available {
1444        let mut cuda_model = OwnedQuantizedModelCuda::new(model, 0)
1445            .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
1446        Ok(measure_generate_throughput(
1447            config.warmup,
1448            config.iterations,
1449            prompt_tokens.len(),
1450            start,
1451            || {
1452                cuda_model
1453                    .generate_gpu_resident(&prompt_tokens, &gen_config)
1454                    .unwrap_or_default()
1455            },
1456        ))
1457    } else {
1458        Ok(measure_generate_throughput(
1459            config.warmup,
1460            config.iterations,
1461            prompt_tokens.len(),
1462            start,
1463            || {
1464                model
1465                    .generate_with_cache(&prompt_tokens, &gen_config)
1466                    .unwrap_or_default()
1467            },
1468        ))
1469    }
1470}
1471
1472/// Measure throughput for an APR model.
1473#[cfg(feature = "inference")]
1474fn throughput_apr(
1475    path: &Path,
1476    config: &QaConfig,
1477    start: Instant,
1478    prompt: &str,
1479) -> Result<(f64, Duration)> {
1480    use realizar::apr::AprV2Model;
1481    use realizar::apr_transformer::{AprTransformer, GenerateConfig};
1482
1483    let apr_model = AprV2Model::load(path)
1484        .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR: {e}")))?;
1485    let tokenizer = apr_model
1486        .load_embedded_bpe_tokenizer()
1487        .ok_or_else(|| CliError::ValidationFailed("APR missing embedded tokenizer".to_string()))?;
1488    let transformer = AprTransformer::from_apr_file(path)
1489        .map_err(|e| CliError::ValidationFailed(format!("Failed to load APR transformer: {e}")))?;
1490
1491    let prompt_tokens = tokenizer.encode(prompt);
1492    let gen_config = GenerateConfig {
1493        max_tokens: config.max_tokens,
1494        temperature: 0.0,
1495        top_k: 1,
1496        ..Default::default()
1497    };
1498
1499    Ok(measure_generate_throughput(
1500        config.warmup,
1501        config.iterations,
1502        prompt_tokens.len(),
1503        start,
1504        || {
1505            transformer
1506                .generate_with_cache(&prompt_tokens, &gen_config)
1507                .unwrap_or_default()
1508        },
1509    ))
1510}
1511
1512/// Measure throughput for a SafeTensors model.
1513#[cfg(feature = "inference")]
1514fn throughput_safetensors(
1515    path: &Path,
1516    config: &QaConfig,
1517    start: Instant,
1518    prompt: &str,
1519) -> Result<Option<(f64, Duration)>> {
1520    use aprender::text::bpe::{load_from_json, BpeTokenizer};
1521    use realizar::safetensors_infer::SafetensorsToAprConverter;
1522
1523    let tokenizer_path = realizar::safetensors::find_sibling_file(path, "tokenizer.json");
1524    let tokenizer: Option<BpeTokenizer> = tokenizer_path
1525        .as_ref()
1526        .and_then(|p| std::fs::read_to_string(p).ok())
1527        .and_then(|json| load_from_json(&json).ok());
1528
1529    let Some(tokenizer) = tokenizer else {
1530        return Ok(None);
1531    };
1532
1533    let transformer = SafetensorsToAprConverter::convert(path)
1534        .map_err(|e| CliError::ValidationFailed(format!("SafeTensors convert failed: {e}")))?;
1535
1536    let prompt_tokens = tokenizer.encode(prompt);
1537    let gen_config = realizar::apr_transformer::GenerateConfig {
1538        max_tokens: config.max_tokens,
1539        temperature: 0.0,
1540        top_k: 1,
1541        ..Default::default()
1542    };
1543
1544    Ok(Some(measure_generate_throughput(
1545        config.warmup,
1546        config.iterations,
1547        prompt_tokens.len(),
1548        start,
1549        || {
1550            transformer
1551                .generate_with_cache(&prompt_tokens, &gen_config)
1552                .unwrap_or_default()
1553        },
1554    )))
1555}
1556
1557/// Dispatch throughput measurement to the correct format handler.
1558#[cfg(feature = "inference")]
1559fn throughput_for_format(
1560    path: &Path,
1561    model_bytes: &[u8],
1562    format: realizar::format::ModelFormat,
1563    prompt: &str,
1564    config: &QaConfig,
1565    cuda_available: bool,
1566    start: Instant,
1567) -> Result<Option<(f64, Duration)>> {
1568    use realizar::format::ModelFormat;
1569
1570    match format {
1571        ModelFormat::Gguf => {
1572            throughput_gguf(path, model_bytes, config, cuda_available, start, prompt).map(Some)
1573        }
1574        ModelFormat::Apr => throughput_apr(path, config, start, prompt).map(Some),
1575        ModelFormat::SafeTensors => throughput_safetensors(path, config, start, prompt),
1576    }
1577}
1578
1579/// Gate 2: Throughput Falsification
1580///
1581/// Runs a benchmark and asserts minimum tokens per second.
1582/// This is falsifiable - if throughput < threshold, test fails.
1583fn run_throughput_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1584    let start = Instant::now();
1585
1586    if !config.json && config.verbose {
1587        println!("{}", "Running throughput benchmark...".yellow());
1588    }
1589
1590    #[cfg(feature = "inference")]
1591    {
1592        use realizar::cuda::CudaExecutor;
1593        use realizar::format::{detect_format, ModelFormat};
1594
1595        let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
1596
1597        let model_bytes = std::fs::read(path)
1598            .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1599
1600        let format = detect_format(&model_bytes[..8.min(model_bytes.len())])
1601            .map_err(|e| CliError::ValidationFailed(format!("Failed to detect format: {e}")))?;
1602
1603        let prompt = "Write a hello world program in Python:";
1604        let Some((tps, duration)) = throughput_for_format(
1605            path,
1606            &model_bytes,
1607            format,
1608            prompt,
1609            config,
1610            cuda_available,
1611            start,
1612        )?
1613        else {
1614            return Ok(GateResult::skipped(
1615                "throughput",
1616                "SafeTensors: tokenizer.json not found in model directory",
1617            ));
1618        };
1619
1620        // Format-aware thresholds: quantized GGUF on GPU is ~100x faster than F32 CPU.
1621        // Comparing unquantized F32 models against a quantized GPU target is meaningless.
1622        let threshold = match format {
1623            ModelFormat::Gguf => 10.0_f64.max(config.min_tps / 10.0),
1624            ModelFormat::Apr | ModelFormat::SafeTensors => 1.0, // F32 CPU: 1 tok/s minimum
1625        };
1626
1627        if tps >= threshold {
1628            Ok(GateResult::passed(
1629                "throughput",
1630                &format!("{:.1} tok/s >= {:.0} tok/s threshold", tps, threshold),
1631                Some(tps),
1632                Some(threshold),
1633                duration,
1634            ))
1635        } else {
1636            Ok(GateResult::failed(
1637                "throughput",
1638                &format!("{:.1} tok/s < {:.0} tok/s threshold", tps, threshold),
1639                Some(tps),
1640                Some(threshold),
1641                duration,
1642            ))
1643        }
1644    }
1645
1646    #[cfg(not(feature = "inference"))]
1647    {
1648        let _ = (path, config);
1649        Ok(GateResult::skipped(
1650            "throughput",
1651            "Requires 'inference' feature",
1652        ))
1653    }
1654}
1655
1656/// Compute Ollama parity letter grade from speedup ratio.
1657///
1658/// Grading system (from showcase spec §Executive Summary):
1659/// F (<50% Ollama) → D (50-75%) → C (75-100% = parity) → B (100-150%) → A (150-200%) → A+ (200%+)
1660#[cfg(feature = "inference")]
1661fn ollama_parity_grade(ratio: f64) -> &'static str {
1662    if ratio >= 2.0 {
1663        "A+"
1664    } else if ratio >= 1.5 {
1665        "A"
1666    } else if ratio >= 1.0 {
1667        "B"
1668    } else if ratio >= 0.75 {
1669        "C"
1670    } else if ratio >= 0.5 {
1671        "D"
1672    } else {
1673        "F"
1674    }
1675}
1676
1677/// Gate 3: Ollama Parity Test
1678///
1679/// Compares performance against Ollama baseline (if available).
1680/// This is falsifiable - if speedup < target, test fails.
1681/// Measure our GGUF throughput for Ollama parity comparison.
1682///
1683/// Uses 128-token minimum to amortize prefill overhead — Ollama reports
1684/// decode-only throughput (eval_count/eval_duration), so short runs
1685/// unfairly penalize our measurement.
1686#[cfg(feature = "inference")]
1687fn measure_our_gguf_tps(path: &Path, config: &QaConfig) -> Result<f64> {
1688    use realizar::gguf::{
1689        GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
1690        QuantizedGenerateConfig,
1691    };
1692
1693    let model_bytes = std::fs::read(path)
1694        .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1695    let gguf = GGUFModel::from_bytes(&model_bytes)
1696        .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1697
1698    let prompt = "Write a function to check if a number is prime:";
1699    let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643]);
1700    let parity_max_tokens = config.max_tokens.max(128);
1701    let gen_config = QuantizedGenerateConfig {
1702        max_tokens: parity_max_tokens,
1703        temperature: 0.0,
1704        top_k: 1,
1705        ..Default::default()
1706    };
1707
1708    let cuda_available = realizar::cuda::CudaExecutor::is_available()
1709        && realizar::cuda::CudaExecutor::num_devices() > 0;
1710
1711    let mapped = MappedGGUFModel::from_path(path)
1712        .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1713    let model = OwnedQuantizedModel::from_mapped(&mapped)
1714        .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1715
1716    if cuda_available {
1717        let mut cuda_model = OwnedQuantizedModelCuda::new(model, 0)
1718            .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
1719        let (tps, _) = measure_generate_throughput(
1720            config.warmup,
1721            config.iterations,
1722            prompt_tokens.len(),
1723            Instant::now(),
1724            || {
1725                cuda_model
1726                    .generate_gpu_resident(&prompt_tokens, &gen_config)
1727                    .unwrap_or_default()
1728            },
1729        );
1730        Ok(tps)
1731    } else {
1732        let (tps, _) = measure_generate_throughput(
1733            config.warmup,
1734            config.iterations,
1735            prompt_tokens.len(),
1736            Instant::now(),
1737            || {
1738                model
1739                    .generate_with_cache(&prompt_tokens, &gen_config)
1740                    .unwrap_or_default()
1741            },
1742        );
1743        Ok(tps)
1744    }
1745}
1746
1747fn run_ollama_parity_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1748    let start = Instant::now();
1749
1750    if !config.json && config.verbose {
1751        println!("{}", "Running Ollama parity test...".yellow());
1752    }
1753
1754    if !check_ollama_available() {
1755        return Ok(GateResult::skipped(
1756            "ollama_parity",
1757            "Ollama not available (start with: ollama serve)",
1758        ));
1759    }
1760
1761    #[cfg(feature = "inference")]
1762    {
1763        let ollama_tps = measure_ollama_throughput(path, config)?;
1764
1765        if ollama_tps <= 0.0 {
1766            return Ok(GateResult::skipped(
1767                "ollama_parity",
1768                "Could not measure Ollama throughput",
1769            ));
1770        }
1771
1772        let our_tps = measure_our_gguf_tps(path, config)?;
1773        let speedup = our_tps / ollama_tps;
1774        let grade = ollama_parity_grade(speedup);
1775        let duration = start.elapsed();
1776
1777        if speedup >= config.min_speedup {
1778            Ok(GateResult::passed(
1779                "ollama_parity",
1780                &format!(
1781                    "{:.1}x Ollama ({:.0} vs {:.0} tok/s) Grade {grade} >= {:.1}x threshold",
1782                    speedup, our_tps, ollama_tps, config.min_speedup
1783                ),
1784                Some(speedup),
1785                Some(config.min_speedup),
1786                duration,
1787            ))
1788        } else {
1789            Ok(GateResult::failed(
1790                "ollama_parity",
1791                &format!(
1792                    "{:.2}x Ollama ({:.0} vs {:.0} tok/s) Grade {grade} < {:.1}x threshold",
1793                    speedup, our_tps, ollama_tps, config.min_speedup
1794                ),
1795                Some(speedup),
1796                Some(config.min_speedup),
1797                duration,
1798            ))
1799        }
1800    }
1801
1802    #[cfg(not(feature = "inference"))]
1803    {
1804        let _ = (path, config);
1805        Ok(GateResult::skipped(
1806            "ollama_parity",
1807            "Requires 'inference' feature",
1808        ))
1809    }
1810}
1811
1812/// Gate 4: GPU vs CPU Speedup Test (F-PERF-042)
1813///
1814/// Measures throughput on both GPU and CPU, verifies GPU >= 2x CPU.
1815/// This is falsifiable - if GPU speedup < threshold, test fails.
1816///
1817/// Toyota Way: Genchi Genbutsu - Go and see for yourself. Measure real performance.
1818/// Measure GPU and CPU throughput for a GGUF model, returning (cpu_tps, gpu_tps).
1819#[cfg(feature = "inference")]
1820fn measure_gpu_cpu_tps(path: &Path, config: &QaConfig) -> Result<(f64, f64)> {
1821    use realizar::gguf::{
1822        GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
1823        QuantizedGenerateConfig,
1824    };
1825
1826    let model_bytes = std::fs::read(path)
1827        .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1828    let gguf = GGUFModel::from_bytes(&model_bytes)
1829        .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
1830
1831    let prompt = "Write a function to calculate factorial:";
1832    let prompt_tokens = gguf.encode(prompt).unwrap_or_else(|| vec![151643]);
1833    let gen_config = QuantizedGenerateConfig {
1834        max_tokens: config.max_tokens,
1835        temperature: 0.0,
1836        top_k: 1,
1837        ..Default::default()
1838    };
1839
1840    // CPU throughput
1841    let mapped = MappedGGUFModel::from_path(path)
1842        .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1843    let model = OwnedQuantizedModel::from_mapped(&mapped)
1844        .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1845    let (cpu_tps, _) = measure_generate_throughput(
1846        config.warmup,
1847        config.iterations,
1848        prompt_tokens.len(),
1849        Instant::now(),
1850        || {
1851            model
1852                .generate_with_cache(&prompt_tokens, &gen_config)
1853                .unwrap_or_default()
1854        },
1855    );
1856
1857    // GPU throughput
1858    let mapped2 = MappedGGUFModel::from_path(path)
1859        .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
1860    let model2 = OwnedQuantizedModel::from_mapped(&mapped2)
1861        .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
1862    let mut cuda_model = OwnedQuantizedModelCuda::new(model2, 0)
1863        .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
1864    let (gpu_tps, _) = measure_generate_throughput(
1865        config.warmup,
1866        config.iterations,
1867        prompt_tokens.len(),
1868        Instant::now(),
1869        || {
1870            cuda_model
1871                .generate_gpu_resident(&prompt_tokens, &gen_config)
1872                .unwrap_or_default()
1873        },
1874    );
1875
1876    Ok((cpu_tps, gpu_tps))
1877}
1878
1879fn run_gpu_speedup_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1880    let start = Instant::now();
1881
1882    if !config.json && config.verbose {
1883        println!("{}", "Running GPU vs CPU speedup test...".yellow());
1884    }
1885
1886    #[cfg(feature = "inference")]
1887    {
1888        use realizar::cuda::CudaExecutor;
1889        use realizar::format::{detect_format, ModelFormat};
1890
1891        let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
1892        if !cuda_available {
1893            return Ok(GateResult::skipped(
1894                "gpu_speedup",
1895                "CUDA not available - cannot compare GPU vs CPU",
1896            ));
1897        }
1898
1899        let model_bytes = std::fs::read(path)
1900            .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
1901        let format = detect_format(&model_bytes[..8.min(model_bytes.len())])
1902            .map_err(|e| CliError::ValidationFailed(format!("Failed to detect format: {e}")))?;
1903        if format != ModelFormat::Gguf {
1904            return Ok(GateResult::skipped(
1905                "gpu_speedup",
1906                "Only GGUF format supported",
1907            ));
1908        }
1909
1910        let (cpu_tps, gpu_tps) = measure_gpu_cpu_tps(path, config)?;
1911        let duration = start.elapsed();
1912
1913        if cpu_tps <= 0.0 {
1914            return Ok(GateResult::failed(
1915                "gpu_speedup",
1916                "CPU throughput was zero - cannot calculate speedup",
1917                None,
1918                None,
1919                duration,
1920            ));
1921        }
1922
1923        let speedup = gpu_tps / cpu_tps;
1924
1925        if speedup >= config.min_gpu_speedup {
1926            Ok(GateResult::passed(
1927                "gpu_speedup",
1928                &format!(
1929                    "GPU {:.1}x faster than CPU ({:.0} vs {:.0} tok/s) >= {:.1}x threshold",
1930                    speedup, gpu_tps, cpu_tps, config.min_gpu_speedup
1931                ),
1932                Some(speedup),
1933                Some(config.min_gpu_speedup),
1934                duration,
1935            ))
1936        } else {
1937            Ok(GateResult::failed(
1938                "gpu_speedup",
1939                &format!(
1940                    "GPU {:.2}x faster than CPU ({:.0} vs {:.0} tok/s) < {:.1}x threshold",
1941                    speedup, gpu_tps, cpu_tps, config.min_gpu_speedup
1942                ),
1943                Some(speedup),
1944                Some(config.min_gpu_speedup),
1945                duration,
1946            ))
1947        }
1948    }
1949
1950    #[cfg(not(feature = "inference"))]
1951    {
1952        let _ = (path, config);
1953        Ok(GateResult::skipped(
1954            "gpu_speedup",
1955            "Requires 'inference' feature",
1956        ))
1957    }
1958}
1959
1960/// Gate 5: Cross-Format Parity Test (F-QUAL-032)
1961///
1962/// Compares argmax output between GGUF and SafeTensors for the same model.
1963/// Invariant: argmax(forward_gguf(M, tokens)) == argmax(forward_safetensors(M, tokens))
1964///
1965/// This is the cornerstone of the architecture's logical validity - it demonstrates
1966/// that independent binary format readers can reach the same logical conclusion.
1967fn run_format_parity_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
1968    let start = Instant::now();
1969
1970    if !config.json && config.verbose {
1971        println!("{}", "Running cross-format parity test...".yellow());
1972    }
1973
1974    #[cfg(feature = "inference")]
1975    {
1976        use realizar::format::{detect_format, ModelFormat};
1977        use realizar::gguf::{GGUFModel, MappedGGUFModel, OwnedQuantizedModel};
1978        use realizar::safetensors_infer::SafetensorsToAprConverter;
1979
1980        let Some(safetensors_path) = &config.safetensors_path else {
1981            return Ok(GateResult::skipped(
1982                "format_parity",
1983                "No SafeTensors path provided (use --safetensors-path)",
1984            ));
1985        };
1986
1987        // Verify GGUF model
1988        let gguf_bytes = std::fs::read(path)
1989            .map_err(|e| CliError::ValidationFailed(format!("Failed to read GGUF: {e}")))?;
1990
1991        let gguf_format = detect_format(&gguf_bytes[..8.min(gguf_bytes.len())]).map_err(|e| {
1992            CliError::ValidationFailed(format!("Failed to detect GGUF format: {e}"))
1993        })?;
1994
1995        if gguf_format != ModelFormat::Gguf {
1996            return Ok(GateResult::skipped(
1997                "format_parity",
1998                "Primary model must be GGUF format",
1999            ));
2000        }
2001
2002        // Verify SafeTensors model exists
2003        if !safetensors_path.exists() {
2004            return Ok(GateResult::skipped(
2005                "format_parity",
2006                &format!("SafeTensors file not found: {}", safetensors_path.display()),
2007            ));
2008        }
2009
2010        // Load GGUF model and get tokenizer
2011        let gguf = GGUFModel::from_bytes(&gguf_bytes)
2012            .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
2013
2014        // Test prompt - use simple arithmetic for deterministic output
2015        let prompt = "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n";
2016        let prompt_tokens: Vec<u32> = gguf.encode(prompt).unwrap_or_else(|| vec![151643, 9707]);
2017
2018        // Run GGUF forward pass to get logits
2019        let gguf_logits = {
2020            let mapped = MappedGGUFModel::from_path(path)
2021                .map_err(|e| CliError::ValidationFailed(format!("GGUF map failed: {e}")))?;
2022            let model = OwnedQuantizedModel::from_mapped(&mapped)
2023                .map_err(|e| CliError::ValidationFailed(format!("GGUF model failed: {e}")))?;
2024            model
2025                .forward(&prompt_tokens)
2026                .map_err(|e| CliError::ValidationFailed(format!("GGUF forward failed: {e}")))?
2027        };
2028
2029        // Run SafeTensors forward pass to get logits
2030        let st_logits = {
2031            let transformer =
2032                SafetensorsToAprConverter::convert(safetensors_path).map_err(|e| {
2033                    CliError::ValidationFailed(format!("SafeTensors convert failed: {e}"))
2034                })?;
2035            transformer.forward(&prompt_tokens).map_err(|e| {
2036                CliError::ValidationFailed(format!("SafeTensors forward failed: {e}"))
2037            })?
2038        };
2039
2040        let duration = start.elapsed();
2041
2042        // Get argmax from logits
2043        let gguf_argmax = gguf_logits
2044            .iter()
2045            .enumerate()
2046            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
2047            .map(|(idx, _)| idx as u32);
2048
2049        let st_argmax = st_logits
2050            .iter()
2051            .enumerate()
2052            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
2053            .map(|(idx, _)| idx as u32);
2054
2055        match (gguf_argmax, st_argmax) {
2056            (Some(gguf_token), Some(st_token)) => {
2057                if gguf_token == st_token {
2058                    Ok(GateResult::passed(
2059                        "format_parity",
2060                        &format!(
2061                            "GGUF argmax={} == SafeTensors argmax={} (Cross-format parity VERIFIED)",
2062                            gguf_token, st_token
2063                        ),
2064                        Some(gguf_token as f64),
2065                        Some(st_token as f64),
2066                        duration,
2067                    ))
2068                } else {
2069                    Ok(GateResult::failed(
2070                        "format_parity",
2071                        &format!(
2072                            "GGUF argmax={} != SafeTensors argmax={} (Cross-format parity BROKEN)",
2073                            gguf_token, st_token
2074                        ),
2075                        Some(gguf_token as f64),
2076                        Some(st_token as f64),
2077                        duration,
2078                    ))
2079                }
2080            }
2081            _ => Ok(GateResult::failed(
2082                "format_parity",
2083                "Failed to get argmax from one or both formats",
2084                None,
2085                None,
2086                duration,
2087            )),
2088        }
2089    }
2090
2091    #[cfg(not(feature = "inference"))]
2092    {
2093        let _ = (path, config);
2094        Ok(GateResult::skipped(
2095            "format_parity",
2096            "Requires 'inference' feature",
2097        ))
2098    }
2099}
2100
2101/// Check if Ollama is available by pinging the API
2102fn check_ollama_available() -> bool {
2103    // Try to connect to Ollama API
2104    std::process::Command::new("curl")
2105        .args([
2106            "-s",
2107            "-o",
2108            "/dev/null",
2109            "-w",
2110            "%{http_code}",
2111            "http://localhost:11434/api/tags",
2112        ])
2113        .output()
2114        .map(|o| String::from_utf8_lossy(&o.stdout).trim() == "200")
2115        .unwrap_or(false)
2116}
2117
2118/// Detect Ollama model name from GGUF filename (BUG-QA-001 fix)
2119/// Matches model size to avoid unfair comparison (e.g., 0.5B APR vs 1.5B Ollama)
2120/// Detect the matching Ollama model tag for fair like-for-like comparison.
2121///
2122/// For quantized GGUF: uses the default Ollama tag (Q4_K_M quantized).
2123/// For F32/F16 (SafeTensors, APR): uses the `-instruct-fp16` Ollama tag
2124/// so we compare unquantized vs unquantized.
2125///
2126/// Detects model size from filename, or falls back to file size heuristic
2127/// for hash-named pacha-cached files.
2128fn detect_ollama_model_from_path(path: &Path) -> String {
2129    let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
2130    let filename_lower = filename.to_lowercase();
2131
2132    // Detect model size from filename first
2133    let size = if filename_lower.contains("0.5b") || filename_lower.contains("-0_5b") {
2134        "0.5b"
2135    } else if filename_lower.contains("1.5b") || filename_lower.contains("-1_5b") {
2136        "1.5b"
2137    } else if filename_lower.contains("3b") || filename_lower.contains("-3b") {
2138        "3b"
2139    } else if filename_lower.contains("7b") || filename_lower.contains("-7b") {
2140        "7b"
2141    } else if filename_lower.contains("14b") || filename_lower.contains("-14b") {
2142        "14b"
2143    } else if filename_lower.contains("32b") || filename_lower.contains("-32b") {
2144        "32b"
2145    } else {
2146        // Fallback: estimate from file size (for hash-named pacha-cached files).
2147        // GGUF Q4_K sizes: 0.5B≈400MB, 1.5B≈1GB, 3B≈2GB, 7B≈4.5GB
2148        match std::fs::metadata(path).map(|m| m.len()).unwrap_or(0) {
2149            0..=800_000_000 => "0.5b",
2150            800_000_001..=2_000_000_000 => "1.5b",
2151            2_000_000_001..=4_000_000_000 => "3b",
2152            _ => "7b",
2153        }
2154    };
2155
2156    // Default Ollama tag uses Q4_K_M — fair comparison for quantized GGUF
2157    format!("qwen2.5-coder:{size}")
2158}
2159
2160/// Measure Ollama throughput for comparison (GGUF only)
2161/// BUG-QA-002 FIX: Use Ollama's eval_duration instead of wall clock time
2162/// (wall clock includes HTTP overhead, making Ollama look 10x slower)
2163#[cfg(feature = "inference")]
2164#[allow(clippy::disallowed_methods)] // serde_json::json! macro internally uses unwrap()
2165fn measure_ollama_throughput(path: &Path, config: &QaConfig) -> Result<f64> {
2166    // Use curl to send a request to Ollama
2167    let prompt = "Write a hello world program in Python:";
2168    // BUG-QA-001 FIX: Match Ollama model to APR model size for fair comparison
2169    let model = detect_ollama_model_from_path(path);
2170
2171    // Match parity gate: use 128 tokens minimum to amortize prefill overhead
2172    let parity_max_tokens = config.max_tokens.max(128);
2173    let request_body = serde_json::json!({
2174        "model": model,
2175        "prompt": prompt,
2176        "stream": false,
2177        "options": {
2178            "num_predict": parity_max_tokens,
2179            "temperature": 0.0
2180        }
2181    });
2182
2183    let mut total_tokens = 0usize;
2184    let mut total_duration_ns = 0u64;
2185
2186    for _ in 0..config.iterations.min(3) {
2187        let output = std::process::Command::new("curl")
2188            .args([
2189                "-s",
2190                "-X",
2191                "POST",
2192                "http://localhost:11434/api/generate",
2193                "-H",
2194                "Content-Type: application/json",
2195                "-d",
2196                &request_body.to_string(),
2197            ])
2198            .output();
2199
2200        if let Ok(output) = output {
2201            if let Ok(response) = serde_json::from_slice::<serde_json::Value>(&output.stdout) {
2202                // BUG-QA-002 FIX: Use eval_count and eval_duration from Ollama response
2203                // This measures actual inference time, not HTTP overhead
2204                if let (Some(eval_count), Some(eval_duration)) = (
2205                    response
2206                        .get("eval_count")
2207                        .and_then(serde_json::Value::as_u64),
2208                    response
2209                        .get("eval_duration")
2210                        .and_then(serde_json::Value::as_u64),
2211                ) {
2212                    total_tokens += eval_count as usize;
2213                    total_duration_ns += eval_duration;
2214                }
2215            }
2216        }
2217    }
2218
2219    if total_tokens == 0 || total_duration_ns == 0 {
2220        return Ok(0.0);
2221    }
2222
2223    // Convert nanoseconds to seconds for tok/s calculation
2224    let duration_s = total_duration_ns as f64 / 1_000_000_000.0;
2225    Ok(total_tokens as f64 / duration_s)
2226}
2227
2228/// Print a gate result to the terminal
2229fn print_gate_result(result: &GateResult) {
2230    let badge = if result.skipped {
2231        output::badge_skip("SKIP")
2232    } else if result.passed {
2233        output::badge_pass("PASS")
2234    } else {
2235        output::badge_fail("FAIL")
2236    };
2237
2238    let name = gate_display_name(&result.name);
2239
2240    println!(
2241        "  {} {} {}",
2242        badge,
2243        name.white().bold(),
2244        result.message.dimmed()
2245    );
2246
2247    if !result.skipped {
2248        println!(
2249            "       {}",
2250            output::duration_fmt(result.duration_ms).dimmed()
2251        );
2252    }
2253    println!();
2254}
2255
2256/// Gate 6: PTX Parity Validation (GH-219, F-PTX-001)
2257///
2258/// Validates that all 6 batched GPU kernels maintain structural parity with their
2259/// single-vector references. This catches compile-time PTX generation bugs like:
2260/// - Missing batch dispatch mechanism (no ctaid.y or m_dim)
2261/// - u64 shared memory addressing (should use u32 for portability)
2262/// - Wrong dispatch strategy for kernel type
2263///
2264/// Toyota Way: Poka-Yoke - error-proof PTX at generation time, not at runtime.
2265fn run_ptx_parity_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
2266    let start = Instant::now();
2267
2268    if !config.json && config.verbose {
2269        println!("{}", "Running PTX parity validation...".yellow());
2270    }
2271
2272    // Extract model dimensions from GGUF metadata
2273    #[cfg(feature = "inference")]
2274    {
2275        use realizar::format::{detect_format, ModelFormat};
2276        use realizar::ptx_parity::{validate_all_kernel_pairs, KernelDimensions};
2277
2278        // Only run for GGUF models (PTX kernels are for quantized inference)
2279        // Read only first 8 bytes (not the entire multi-GB file)
2280        let magic = std::fs::File::open(path).ok().and_then(|mut f| {
2281            use std::io::Read;
2282            let mut buf = [0u8; 8];
2283            f.read_exact(&mut buf).ok()?;
2284            Some(buf.to_vec())
2285        });
2286        let fmt = magic.and_then(|m| detect_format(&m).ok());
2287        if fmt != Some(ModelFormat::Gguf) {
2288            return Ok(GateResult::skipped(
2289                "ptx_parity",
2290                "Non-GGUF format (PTX kernels only apply to quantized inference)",
2291            ));
2292        }
2293
2294        // Load model config to get dimensions
2295        let mapped = realizar::gguf::MappedGGUFModel::from_path(path.to_str().unwrap_or_default())
2296            .map_err(|e| CliError::ValidationFailed(format!("Failed to load GGUF: {e}")))?;
2297
2298        let model_config = realizar::gguf::GGUFConfig::from_gguf(&mapped.model)
2299            .map_err(|e| CliError::ValidationFailed(format!("Failed to read config: {e}")))?;
2300
2301        let dims = KernelDimensions {
2302            hidden_dim: model_config.hidden_dim as u32,
2303            intermediate_dim: model_config.intermediate_dim as u32,
2304            num_heads: model_config.num_heads as u32,
2305            head_dim: (model_config.hidden_dim / model_config.num_heads) as u32,
2306            rope_theta: model_config.rope_theta,
2307            epsilon: model_config.eps,
2308        };
2309
2310        let report = validate_all_kernel_pairs(&dims);
2311        let duration = start.elapsed();
2312
2313        if report.all_passed() {
2314            Ok(GateResult::passed(
2315                "ptx_parity",
2316                &report.summary(),
2317                Some(report.passed as f64),
2318                Some(report.total as f64),
2319                duration,
2320            ))
2321        } else {
2322            // Show violations in verbose mode
2323            if !config.json && config.verbose {
2324                for result in &report.results {
2325                    if !result.passed {
2326                        println!(
2327                            "  {} {} ({}): {}",
2328                            "FAIL".red(),
2329                            result.name,
2330                            result.dispatch_strategy,
2331                            result.violations.join("; ")
2332                        );
2333                    }
2334                }
2335            }
2336            Ok(GateResult::failed(
2337                "ptx_parity",
2338                &report.summary(),
2339                Some(report.passed as f64),
2340                Some(report.total as f64),
2341                duration,
2342            ))
2343        }
2344    }
2345
2346    #[cfg(not(feature = "inference"))]
2347    {
2348        let _ = (path, config, start);
2349        Ok(GateResult::skipped(
2350            "ptx_parity",
2351            "Requires inference feature",
2352        ))
2353    }
2354}
2355
2356/// Gate 8: GPU State Isolation Test
2357///
2358/// Verifies that GPU state (KV cache, CUDA graphs, position buffers) is properly
2359/// isolated between generations. Catches PMAT-PREFILL-FIX class bugs where stale
2360/// state from a previous generation leaks into the next.
2361///
2362/// Protocol:
2363/// 1. Generate with prompt A → output_A
2364/// 2. Reset KV cache
2365/// 3. Generate with prompt B → output_B
2366/// 4. Reset KV cache
2367/// 5. Generate with prompt A again → output_A2
2368/// 6. Assert: output_A == output_A2 (state isolation)
2369/// 7. Assert: output_A != output_B (model is functional)
2370fn run_gpu_state_isolation_gate(path: &Path, _config: &QaConfig) -> Result<GateResult> {
2371    let start = Instant::now();
2372
2373    #[cfg(all(feature = "inference", feature = "cuda"))]
2374    {
2375        use realizar::cuda::CudaExecutor;
2376        use realizar::format::{detect_format, ModelFormat};
2377        use realizar::gguf::{
2378            GGUFModel, MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda,
2379            QuantizedGenerateConfig,
2380        };
2381
2382        let cuda_available = CudaExecutor::is_available() && CudaExecutor::num_devices() > 0;
2383        if !cuda_available {
2384            return Ok(GateResult::skipped(
2385                "gpu_state_isolation",
2386                "CUDA not available",
2387            ));
2388        }
2389
2390        let magic = std::fs::File::open(path).ok().and_then(|mut f| {
2391            use std::io::Read;
2392            let mut buf = [0u8; 8];
2393            f.read_exact(&mut buf).ok()?;
2394            Some(buf.to_vec())
2395        });
2396        let fmt = magic.and_then(|m| detect_format(&m).ok());
2397        if fmt != Some(ModelFormat::Gguf) {
2398            return Ok(GateResult::skipped(
2399                "gpu_state_isolation",
2400                "Only GGUF format supported for GPU state isolation",
2401            ));
2402        }
2403
2404        let model_bytes = std::fs::read(path)
2405            .map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
2406        let gguf = GGUFModel::from_bytes(&model_bytes)
2407            .map_err(|e| CliError::ValidationFailed(format!("Failed to parse GGUF: {e}")))?;
2408
2409        let prompt_a = "<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n";
2410        let prompt_b = "<|im_start|>user\nWrite hello world in Python<|im_end|>\n<|im_start|>assistant\n";
2411
2412        let tokens_a = gguf.encode(prompt_a).unwrap_or_else(|| vec![151643, 9707]);
2413        let tokens_b = gguf.encode(prompt_b).unwrap_or_else(|| vec![151643, 1234]);
2414
2415        let gen_config = QuantizedGenerateConfig {
2416            max_tokens: 16,
2417            temperature: 0.0,
2418            top_k: 1,
2419            ..Default::default()
2420        };
2421
2422        let mapped = MappedGGUFModel::from_path(path)
2423            .map_err(|e| CliError::ValidationFailed(format!("Map failed: {e}")))?;
2424        let model = OwnedQuantizedModel::from_mapped(&mapped)
2425            .map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
2426        let mut cuda_model = OwnedQuantizedModelCuda::new(model, 0)
2427            .map_err(|e| CliError::ValidationFailed(format!("CUDA init failed: {e}")))?;
2428
2429        // Generation 1: prompt A
2430        let output_a = cuda_model
2431            .generate_gpu_resident(&tokens_a, &gen_config)
2432            .map_err(|e| CliError::ValidationFailed(format!("Gen 1 failed: {e}")))?;
2433
2434        // Generation 2: prompt B
2435        let output_b = cuda_model
2436            .generate_gpu_resident(&tokens_b, &gen_config)
2437            .map_err(|e| CliError::ValidationFailed(format!("Gen 2 failed: {e}")))?;
2438
2439        // Generation 3: prompt A again (must match generation 1)
2440        let output_a2 = cuda_model
2441            .generate_gpu_resident(&tokens_a, &gen_config)
2442            .map_err(|e| CliError::ValidationFailed(format!("Gen 3 failed: {e}")))?;
2443
2444        let duration = start.elapsed();
2445
2446        // State isolation: same prompt must produce same output
2447        if output_a != output_a2 {
2448            let text_a = gguf.decode(&output_a);
2449            let text_a2 = gguf.decode(&output_a2);
2450            return Ok(GateResult::failed(
2451                "gpu_state_isolation",
2452                &format!(
2453                    "State leak: prompt A produced different output on retry. \
2454                     First: '{}', Retry: '{}'",
2455                    text_a.chars().take(50).collect::<String>(),
2456                    text_a2.chars().take(50).collect::<String>()
2457                ),
2458                None,
2459                None,
2460                duration,
2461            ));
2462        }
2463
2464        // Model is functional: different prompts produce different output
2465        if output_a == output_b {
2466            return Ok(GateResult::failed(
2467                "gpu_state_isolation",
2468                "Model stuck: same output for different prompts (GPU state not functional)",
2469                None,
2470                None,
2471                duration,
2472            ));
2473        }
2474
2475        Ok(GateResult::passed(
2476            "gpu_state_isolation",
2477            "GPU state properly isolated: 3 generations, deterministic replay confirmed",
2478            Some(3.0),
2479            Some(3.0),
2480            duration,
2481        ))
2482    }
2483
2484    #[cfg(not(all(feature = "inference", feature = "cuda")))]
2485    {
2486        let _ = (path, config);
2487        Ok(GateResult::skipped(
2488            "gpu_state_isolation",
2489            "Requires inference+cuda features",
2490        ))
2491    }
2492}
2493
2494/// Gate 9: Performance Regression Detection
2495///
2496/// Compares current gate results against a previous QA report to detect
2497/// performance regressions. Catches Bug 206 class issues where metrics
2498/// silently degrade between rounds.
2499fn run_performance_regression_gate(
2500    current_gates: &[GateResult],
2501    config: &QaConfig,
2502) -> Result<GateResult> {
2503    let start = Instant::now();
2504
2505    let Some(prev_path) = &config.previous_report else {
2506        return Ok(GateResult::skipped(
2507            "performance_regression",
2508            "No previous report provided",
2509        ));
2510    };
2511
2512    let prev_json = std::fs::read_to_string(prev_path).map_err(|e| {
2513        CliError::ValidationFailed(format!("Failed to read previous report: {e}"))
2514    })?;
2515
2516    let prev_report: QaReport = serde_json::from_str(&prev_json).map_err(|e| {
2517        CliError::ValidationFailed(format!("Failed to parse previous report: {e}"))
2518    })?;
2519
2520    let threshold = config.regression_threshold;
2521    let mut regressions = Vec::new();
2522
2523    // Compare metrics for gates that have numeric values in both reports
2524    let comparable_gates = ["throughput", "ollama_parity", "gpu_speedup"];
2525    for gate_name in &comparable_gates {
2526        let prev_gate = prev_report.gates.iter().find(|g| g.name == *gate_name);
2527        let curr_gate = current_gates.iter().find(|g| g.name == *gate_name);
2528
2529        if let (Some(prev), Some(curr)) = (prev_gate, curr_gate) {
2530            if let (Some(prev_val), Some(curr_val)) = (prev.value, curr.value) {
2531                if prev_val > 0.0 && !prev.skipped && !curr.skipped {
2532                    let regression = (prev_val - curr_val) / prev_val;
2533                    if regression > threshold {
2534                        regressions.push(format!(
2535                            "{}: {:.1} -> {:.1} ({:.0}% regression)",
2536                            gate_name,
2537                            prev_val,
2538                            curr_val,
2539                            regression * 100.0
2540                        ));
2541                    }
2542                }
2543            }
2544        }
2545    }
2546
2547    let duration = start.elapsed();
2548
2549    if regressions.is_empty() {
2550        Ok(GateResult::passed(
2551            "performance_regression",
2552            &format!(
2553                "No regressions >{:.0}% vs {}",
2554                threshold * 100.0,
2555                prev_path.display()
2556            ),
2557            Some(0.0),
2558            Some(threshold),
2559            duration,
2560        ))
2561    } else {
2562        Ok(GateResult::failed(
2563            "performance_regression",
2564            &format!("Regressions detected: {}", regressions.join("; ")),
2565            Some(regressions.len() as f64),
2566            Some(0.0),
2567            duration,
2568        ))
2569    }
2570}
2571
2572#[cfg(test)]
2573mod tests {
2574    use super::*;
2575    use std::io::Write;
2576    use tempfile::NamedTempFile;
2577
2578    // ========================================================================
2579    // QaConfig Tests
2580    // ========================================================================
2581
2582    #[test]
2583    fn test_qa_config_default() {
2584        let config = QaConfig::default();
2585        assert!((config.min_tps - 100.0).abs() < f64::EPSILON);
2586        assert!((config.min_speedup - 0.2).abs() < f64::EPSILON);
2587        assert!((config.min_gpu_speedup - 2.0).abs() < f64::EPSILON);
2588        assert!(!config.skip_golden);
2589        assert!(!config.skip_throughput);
2590        assert!(!config.skip_ollama);
2591        assert!(!config.skip_gpu_speedup);
2592        assert!(!config.skip_format_parity);
2593        assert!(config.safetensors_path.is_none());
2594    }
2595
2596    #[test]
2597    fn test_qa_config_default_iterations() {
2598        let config = QaConfig::default();
2599        assert_eq!(config.iterations, 10);
2600        assert_eq!(config.warmup, 3);
2601        assert_eq!(config.max_tokens, 32);
2602    }
2603
2604    #[test]
2605    fn test_qa_config_default_output_flags() {
2606        let config = QaConfig::default();
2607        assert!(!config.json);
2608        assert!(!config.verbose);
2609    }
2610
2611    #[test]
2612    fn test_qa_config_clone() {
2613        let config = QaConfig {
2614            min_tps: 50.0,
2615            skip_golden: true,
2616            ..Default::default()
2617        };
2618        let cloned = config.clone();
2619        assert!((cloned.min_tps - 50.0).abs() < f64::EPSILON);
2620        assert!(cloned.skip_golden);
2621    }
2622
2623    #[test]
2624    fn test_qa_config_debug() {
2625        let config = QaConfig::default();
2626        let debug = format!("{config:?}");
2627        assert!(debug.contains("QaConfig"));
2628        assert!(debug.contains("min_tps"));
2629    }
2630
2631    // ========================================================================
2632    // GateResult Tests
2633    // ========================================================================
2634
2635    #[test]
2636    fn test_gate_result_passed() {
2637        let result = GateResult::passed(
2638            "test_gate",
2639            "Test passed",
2640            Some(150.0),
2641            Some(100.0),
2642            Duration::from_secs(1),
2643        );
2644        assert!(result.passed);
2645        assert!(!result.skipped);
2646        assert_eq!(result.name, "test_gate");
2647    }
2648
2649    #[test]
2650    fn test_gate_result_passed_duration() {
2651        let result = GateResult::passed(
2652            "test_gate",
2653            "Test passed",
2654            Some(150.0),
2655            Some(100.0),
2656            Duration::from_millis(1500),
2657        );
2658        assert_eq!(result.duration_ms, 1500);
2659    }
2660
2661    #[test]
2662    fn test_gate_result_passed_no_value() {
2663        let result = GateResult::passed(
2664            "test_gate",
2665            "Test passed",
2666            None,
2667            None,
2668            Duration::from_secs(1),
2669        );
2670        assert!(result.value.is_none());
2671        assert!(result.threshold.is_none());
2672    }
2673
2674    #[test]
2675    fn test_gate_result_failed() {
2676        let result = GateResult::failed(
2677            "test_gate",
2678            "Test failed",
2679            Some(50.0),
2680            Some(100.0),
2681            Duration::from_secs(1),
2682        );
2683        assert!(!result.passed);
2684        assert!(!result.skipped);
2685    }
2686
2687    #[test]
2688    fn test_gate_result_failed_message() {
2689        let result = GateResult::failed(
2690            "throughput",
2691            "50 tok/s < 100 tok/s",
2692            Some(50.0),
2693            Some(100.0),
2694            Duration::from_secs(1),
2695        );
2696        assert!(result.message.contains("50"));
2697        assert!(result.message.contains("100"));
2698    }
2699
2700    #[test]
2701    fn test_gate_result_skipped() {
2702        let result = GateResult::skipped("test_gate", "No GPU available");
2703        assert!(result.passed); // Skipped doesn't fail
2704        assert!(result.skipped);
2705    }
2706
2707    #[test]
2708    fn test_gate_result_skipped_message() {
2709        let result = GateResult::skipped("gpu_speedup", "GPU not available");
2710        assert!(result.message.contains("Skipped"));
2711        assert!(result.message.contains("GPU not available"));
2712    }
2713
2714    #[test]
2715    fn test_gate_result_skipped_no_duration() {
2716        let result = GateResult::skipped("test", "reason");
2717        assert_eq!(result.duration_ms, 0);
2718    }
2719
2720    #[test]
2721    fn test_gate_result_clone() {
2722        let result = GateResult::passed("test", "ok", Some(100.0), None, Duration::from_secs(1));
2723        let cloned = result.clone();
2724        assert_eq!(cloned.name, result.name);
2725        assert_eq!(cloned.passed, result.passed);
2726    }
2727
2728    #[test]
2729    fn test_gate_result_debug() {
2730        let result = GateResult::passed("test", "ok", None, None, Duration::from_secs(0));
2731        let debug = format!("{result:?}");
2732        assert!(debug.contains("GateResult"));
2733    }
2734
2735    #[test]
2736    fn test_gate_result_serialize() {
2737        let result = GateResult::passed(
2738            "throughput",
2739            "100 tok/s",
2740            Some(100.0),
2741            Some(60.0),
2742            Duration::from_secs(1),
2743        );
2744        let json = serde_json::to_string(&result).expect("serialize");
2745        assert!(json.contains("throughput"));
2746        assert!(json.contains("100"));
2747    }
2748
2749    #[test]
2750    fn test_gate_result_deserialize() {
2751        let json =
2752            r#"{"name":"test","passed":true,"message":"ok","duration_ms":1000,"skipped":false}"#;
2753        let result: GateResult = serde_json::from_str(json).expect("deserialize");
2754        assert_eq!(result.name, "test");
2755        assert!(result.passed);
2756    }
2757
2758    // ========================================================================
2759    // QaReport Tests
2760    // ========================================================================
2761
2762    #[test]
2763    fn test_qa_report_serialization() {
2764        let report = QaReport {
2765            model: "test.gguf".to_string(),
2766            passed: true,
2767            gates: vec![GateResult::passed(
2768                "throughput",
2769                "100 tok/s",
2770                Some(100.0),
2771                Some(60.0),
2772                Duration::from_secs(5),
2773            )],
2774            total_duration_ms: 5000,
2775            timestamp: "2026-01-15T00:00:00Z".to_string(),
2776            summary: "All gates passed".to_string(),
2777            gates_executed: 0,
2778            gates_skipped: 0,
2779            system_info: None,
2780        };
2781
2782        let json = serde_json::to_string(&report).expect("serialization failed");
2783        assert!(json.contains("throughput"));
2784        assert!(json.contains("passed"));
2785    }
2786
2787    #[test]
2788    fn test_qa_report_deserialization() {
2789        let json = r#"{
2790            "model": "test.gguf",
2791            "passed": true,
2792            "gates": [],
2793            "total_duration_ms": 1000,
2794            "timestamp": "2026-01-01T00:00:00Z",
2795            "summary": "All passed"
2796        }"#;
2797        let report: QaReport = serde_json::from_str(json).expect("deserialize");
2798        assert_eq!(report.model, "test.gguf");
2799        assert!(report.passed);
2800    }
2801
2802    #[test]
2803    fn test_qa_report_failed() {
2804        let report = QaReport {
2805            model: "test.gguf".to_string(),
2806            passed: false,
2807            gates: vec![GateResult::failed(
2808                "throughput",
2809                "50 tok/s < 100 tok/s",
2810                Some(50.0),
2811                Some(100.0),
2812                Duration::from_secs(5),
2813            )],
2814            total_duration_ms: 5000,
2815            timestamp: "2026-01-15T00:00:00Z".to_string(),
2816            summary: "1 gate failed".to_string(),
2817            gates_executed: 0,
2818            gates_skipped: 0,
2819            system_info: None,
2820        };
2821        assert!(!report.passed);
2822        assert_eq!(report.gates.len(), 1);
2823    }
2824
2825    #[test]
2826    fn test_qa_report_multiple_gates() {
2827        let report = QaReport {
2828            model: "test.gguf".to_string(),
2829            passed: true,
2830            gates: vec![
2831                GateResult::passed("golden", "ok", None, None, Duration::from_secs(1)),
2832                GateResult::passed(
2833                    "throughput",
2834                    "ok",
2835                    Some(100.0),
2836                    Some(60.0),
2837                    Duration::from_secs(2),
2838                ),
2839                GateResult::skipped("ollama", "skipped"),
2840            ],
2841            total_duration_ms: 3000,
2842            timestamp: "2026-01-15T00:00:00Z".to_string(),
2843            summary: "All passed".to_string(),
2844            gates_executed: 0,
2845            gates_skipped: 0,
2846            system_info: None,
2847        };
2848        assert_eq!(report.gates.len(), 3);
2849    }
2850
2851    #[test]
2852    fn test_qa_report_clone() {
2853        let report = QaReport {
2854            model: "test.gguf".to_string(),
2855            passed: true,
2856            gates: vec![],
2857            total_duration_ms: 1000,
2858            timestamp: "2026-01-15T00:00:00Z".to_string(),
2859            summary: "ok".to_string(),
2860            gates_executed: 0,
2861            gates_skipped: 0,
2862            system_info: None,
2863        };
2864        let cloned = report.clone();
2865        assert_eq!(cloned.model, report.model);
2866    }
2867
2868    #[test]
2869    fn test_qa_report_debug() {
2870        let report = QaReport {
2871            model: "test.gguf".to_string(),
2872            passed: true,
2873            gates: vec![],
2874            total_duration_ms: 1000,
2875            timestamp: "now".to_string(),
2876            summary: "ok".to_string(),
2877            gates_executed: 0,
2878            gates_skipped: 0,
2879            system_info: None,
2880        };
2881        let debug = format!("{report:?}");
2882        assert!(debug.contains("QaReport"));
2883    }
2884
2885    // ========================================================================
2886    // run Command Tests
2887    // ========================================================================
2888
2889    #[test]
2890    fn test_run_file_not_found() {
2891        let result = run(
2892            Path::new("/nonexistent/model.gguf"),
2893            None,
2894            None,
2895            None,
2896            false,
2897            false,
2898            false,
2899            false,
2900            false,
2901            false,
2902            false,
2903            None,
2904            10,
2905            3,
2906            32,
2907            false,
2908            false,
2909            None,
2910
2911            None,
2912
2913            None,
2914
2915            false,
2916
2917            false,
2918        );
2919        assert!(result.is_err());
2920    }
2921
2922    #[test]
2923    fn test_run_invalid_model() {
2924        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
2925        file.write_all(b"not a valid gguf file").expect("write");
2926
2927        let result = run(
2928            file.path(),
2929            None,
2930            None,
2931            None,
2932            false,
2933            false,
2934            false,
2935            false,
2936            false,
2937            false,
2938            false,
2939            None,
2940            10,
2941            3,
2942            32,
2943            false,
2944            false,
2945            None,
2946
2947            None,
2948
2949            None,
2950
2951            false,
2952
2953            false,
2954        );
2955        // Should fail (invalid GGUF)
2956        assert!(result.is_err());
2957    }
2958
2959    #[test]
2960    fn test_run_with_custom_thresholds() {
2961        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
2962        file.write_all(b"not valid").expect("write");
2963
2964        let result = run(
2965            file.path(),
2966            Some(50.0), // min_tps
2967            Some(1.5),  // min_speedup
2968            Some(3.0),  // min_gpu_speedup
2969            false,
2970            false,
2971            false,
2972            false,
2973            false,
2974            false,
2975            false,
2976            None,
2977            5,
2978            2,
2979            16,
2980            false,
2981            false,
2982            None,
2983
2984            None,
2985
2986            None,
2987
2988            false,
2989
2990            false,
2991        );
2992        // Should fail (invalid file)
2993        assert!(result.is_err());
2994    }
2995
2996    #[test]
2997    fn test_run_with_all_skips() {
2998        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
2999        file.write_all(b"not valid").expect("write");
3000
3001        let result = run(
3002            file.path(),
3003            None,
3004            None,
3005            None,
3006            true, // skip_golden
3007            true, // skip_throughput
3008            true, // skip_ollama
3009            true, // skip_gpu_speedup
3010            true, // skip_contract
3011            true, // skip_format_parity
3012            true, // skip_ptx_parity
3013            None,
3014            10,
3015            3,
3016            32,
3017            false,
3018            false,
3019            None,
3020
3021            None,
3022
3023            None,
3024
3025            true, // skip_gpu_state
3026
3027            true, // skip_metadata
3028        );
3029        // When all gates are skipped, the QA passes (skipped gates don't fail)
3030        assert!(result.is_ok());
3031    }
3032
3033    #[test]
3034    fn test_run_with_json_output() {
3035        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3036        file.write_all(b"not valid").expect("write");
3037
3038        let result = run(
3039            file.path(),
3040            None,
3041            None,
3042            None,
3043            false,
3044            false,
3045            false,
3046            false,
3047            false,
3048            false,
3049            false,
3050            None,
3051            10,
3052            3,
3053            32,
3054            true, // json output
3055            false,
3056            None,
3057
3058            None,
3059
3060            None,
3061
3062            false,
3063
3064            false,
3065        );
3066        // Should fail (invalid file)
3067        assert!(result.is_err());
3068    }
3069
3070    #[test]
3071    fn test_run_with_verbose() {
3072        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3073        file.write_all(b"not valid").expect("write");
3074
3075        let result = run(
3076            file.path(),
3077            None,
3078            None,
3079            None,
3080            false,
3081            false,
3082            false,
3083            false,
3084            false,
3085            false,
3086            false,
3087            None,
3088            10,
3089            3,
3090            32,
3091            false,
3092            true, // verbose
3093            None,
3094
3095            None,
3096
3097            None,
3098
3099            false,
3100
3101            false,
3102        );
3103        // Should fail (invalid file)
3104        assert!(result.is_err());
3105    }
3106
3107    #[test]
3108    fn test_run_with_safetensors_path() {
3109        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3110        file.write_all(b"not valid").expect("write");
3111        let st_file = NamedTempFile::with_suffix(".safetensors").expect("create st file");
3112
3113        let result = run(
3114            file.path(),
3115            None,
3116            None,
3117            None,
3118            false,
3119            false,
3120            false,
3121            false,
3122            false,
3123            false,
3124            false,
3125            Some(st_file.path().to_path_buf()), // safetensors path
3126            10,
3127            3,
3128            32,
3129            false,
3130            false,
3131            None,
3132
3133            None,
3134
3135            None,
3136
3137            false,
3138
3139            false,
3140        );
3141        // Should fail (invalid files)
3142        assert!(result.is_err());
3143    }
3144
3145    #[test]
3146    fn test_run_with_small_iterations() {
3147        let mut file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3148        file.write_all(b"not valid").expect("write");
3149
3150        let result = run(
3151            file.path(),
3152            None,
3153            None,
3154            None,
3155            false,
3156            false,
3157            false,
3158            false,
3159            false,
3160            false,
3161            false,
3162            None,
3163            1, // small iterations
3164            0, // no warmup
3165            8, // small max tokens
3166            false,
3167            false,
3168            None,
3169
3170            None,
3171
3172            None,
3173
3174            false,
3175
3176            false,
3177        );
3178        // Should fail (invalid file)
3179        assert!(result.is_err());
3180    }
3181
3182    // ========================================================================
3183    // FORMAT DISPATCH TESTS (P0: Verify formats don't incorrectly skip)
3184    // These tests ensure that APR, GGUF, and SafeTensors formats are properly
3185    // dispatched to their handlers and don't silently skip with "GGUF only".
3186    // ========================================================================
3187
3188    #[cfg(feature = "inference")]
3189    mod format_dispatch_tests {
3190        use realizar::format::{detect_format, ModelFormat};
3191
3192        /// Test that GGUF magic bytes are correctly detected
3193        #[test]
3194        fn test_gguf_format_detection() {
3195            // GGUF magic: "GGUF" (0x47475546)
3196            let gguf_magic = b"GGUF\x03\x00\x00\x00"; // GGUF v3
3197            let format = detect_format(gguf_magic).expect("detect GGUF");
3198            assert_eq!(format, ModelFormat::Gguf, "GGUF magic must detect as GGUF");
3199        }
3200
3201        /// Test that APR v2 magic bytes are correctly detected
3202        #[test]
3203        fn test_apr_v2_format_detection() {
3204            // APR v2 magic: "APR\0" (0x41505200)
3205            let apr_magic = b"APR\x00\x02\x00\x00\x00"; // APR v2
3206            let format = detect_format(apr_magic).expect("detect APR");
3207            assert_eq!(format, ModelFormat::Apr, "APR magic must detect as APR");
3208        }
3209
3210        /// Test that SafeTensors format is correctly detected
3211        #[test]
3212        fn test_safetensors_format_detection() {
3213            // SafeTensors starts with u64 header length, then JSON
3214            let mut st_magic = Vec::new();
3215            st_magic.extend_from_slice(&100u64.to_le_bytes()); // header length
3216            st_magic.extend_from_slice(b"{\""); // JSON start
3217            let format = detect_format(&st_magic).expect("detect SafeTensors");
3218            assert_eq!(
3219                format,
3220                ModelFormat::SafeTensors,
3221                "SafeTensors magic must detect as SafeTensors"
3222            );
3223        }
3224
3225        /// P0 REGRESSION TEST: APR format must NOT skip golden_output gate
3226        /// This test catches the bug where APR files silently returned "GGUF only"
3227        #[test]
3228        fn test_apr_format_does_not_skip_detection() {
3229            // Create minimal APR v2 header (8 bytes minimum for format detection)
3230            let apr_magic = b"APR\x00\x02\x00\x00\x00"; // APR v2 magic + version
3231            let format = detect_format(apr_magic).expect("detect APR");
3232
3233            // The critical assertion: APR must be detected as APR, not fail/skip
3234            assert_eq!(
3235                format,
3236                ModelFormat::Apr,
3237                "APR format MUST be detected - cannot skip with 'GGUF only' error"
3238            );
3239        }
3240
3241        /// P0 REGRESSION TEST: Verify ModelFormat enum covers all expected formats
3242        #[test]
3243        fn test_model_format_enum_completeness() {
3244            // This test documents the expected formats
3245            let formats = [
3246                ModelFormat::Gguf,
3247                ModelFormat::Apr,
3248                ModelFormat::SafeTensors,
3249            ];
3250            assert_eq!(
3251                formats.len(),
3252                3,
3253                "Must support exactly 3 formats: GGUF, APR, SafeTensors"
3254            );
3255        }
3256    }
3257
3258    // ========================================================================
3259    // GATE RESULT NON-SKIP TESTS
3260    // Verify that gates return actual results (pass/fail) not skipped
3261    // ========================================================================
3262
3263    #[test]
3264    fn test_gate_result_skipped_flag_semantics() {
3265        // Skipped gates have skipped=true
3266        let skipped = GateResult::skipped("test", "reason");
3267        assert!(skipped.skipped, "Skipped gate must have skipped=true");
3268        assert!(skipped.passed, "Skipped gates count as passed (don't fail)");
3269
3270        // Passed gates have skipped=false
3271        let passed = GateResult::passed("test", "ok", None, None, Duration::from_secs(1));
3272        assert!(!passed.skipped, "Passed gate must have skipped=false");
3273        assert!(passed.passed, "Passed gate must have passed=true");
3274
3275        // Failed gates have skipped=false
3276        let failed = GateResult::failed("test", "fail", None, None, Duration::from_secs(1));
3277        assert!(!failed.skipped, "Failed gate must have skipped=false");
3278        assert!(!failed.passed, "Failed gate must have passed=false");
3279    }
3280
3281    /// P0 REGRESSION TEST: Gates that skip must have explicit reason
3282    #[test]
3283    fn test_skipped_gate_must_have_reason() {
3284        let result = GateResult::skipped("test_gate", "Explicit reason required");
3285        assert!(
3286            result.message.contains("Skipped"),
3287            "Skip message must contain 'Skipped'"
3288        );
3289        assert!(result.message.len() > 10, "Skip reason must be descriptive");
3290    }
3291
3292    // ========================================================================
3293    // GateResult: boundary values and value/threshold interactions
3294    // ========================================================================
3295
3296    /// A gate whose measured value exactly equals the threshold should pass.
3297    /// Bug class: using > instead of >= in threshold comparison, causing
3298    /// exact-threshold values to fail.
3299    #[test]
3300    fn gate_result_value_equals_threshold_is_pass() {
3301        // When value == threshold, the gate is "passed" (caller constructs it)
3302        // This test documents the semantic contract: equality means pass.
3303        let result = GateResult::passed(
3304            "throughput",
3305            "100.0 tok/s >= 100.0 tok/s threshold",
3306            Some(100.0),
3307            Some(100.0),
3308            Duration::from_secs(1),
3309        );
3310        assert!(result.passed);
3311        assert_eq!(result.value, Some(100.0));
3312        assert_eq!(result.threshold, Some(100.0));
3313    }
3314
3315    /// A gate with value just below threshold should be failed.
3316    /// Bug class: floating-point equality masking near-miss failures.
3317    #[test]
3318    fn gate_result_value_just_below_threshold_is_fail() {
3319        let result = GateResult::failed(
3320            "throughput",
3321            "99.9 tok/s < 100.0 tok/s",
3322            Some(99.9),
3323            Some(100.0),
3324            Duration::from_secs(1),
3325        );
3326        assert!(!result.passed);
3327        assert!(!result.skipped);
3328    }
3329
3330    /// Zero-duration gate result should be representable.
3331    /// Bug class: division by zero in duration_ms calculation.
3332    #[test]
3333    fn gate_result_zero_duration() {
3334        let result = GateResult::passed(
3335            "fast_gate",
3336            "Sub-millisecond completion",
3337            None,
3338            None,
3339            Duration::from_nanos(0),
3340        );
3341        assert_eq!(result.duration_ms, 0);
3342        assert!(result.passed);
3343    }
3344
3345    /// Very large duration should not overflow u64 milliseconds.
3346    /// Bug class: u64 overflow when converting Duration to millis.
3347    #[test]
3348    fn gate_result_large_duration_no_overflow() {
3349        // 1 million seconds = ~11.5 days (extreme but valid)
3350        let result = GateResult::passed(
3351            "slow_gate",
3352            "Long-running test",
3353            None,
3354            None,
3355            Duration::from_secs(1_000_000),
3356        );
3357        assert_eq!(result.duration_ms, 1_000_000_000);
3358    }
3359
3360    /// Skipped gates must have None for value and threshold.
3361    /// Bug class: skipped constructor inadvertently setting default values
3362    /// that confuse downstream reporting (e.g., "0.0 vs 0.0 threshold").
3363    #[test]
3364    fn gate_result_skipped_has_no_metrics() {
3365        let result = GateResult::skipped("contract", "Model not found");
3366        assert!(result.value.is_none(), "Skipped gate must have no value");
3367        assert!(
3368            result.threshold.is_none(),
3369            "Skipped gate must have no threshold"
3370        );
3371    }
3372
3373    /// Failed gate with None value (e.g., infrastructure failure, not metric miss).
3374    /// Bug class: downstream code unwrapping value.unwrap() on failure.
3375    #[test]
3376    fn gate_result_failed_without_value() {
3377        let result = GateResult::failed(
3378            "golden_output",
3379            "Inference engine crashed",
3380            None,
3381            None,
3382            Duration::from_millis(50),
3383        );
3384        assert!(!result.passed);
3385        assert!(result.value.is_none());
3386    }
3387
3388    // ========================================================================
3389    // GateResult serialization: JSON round-trip fidelity
3390    // ========================================================================
3391
3392    /// Round-trip: passed gate with all fields must survive JSON serialization.
3393    /// Bug class: serde skip_serializing_if dropping fields that should be present.
3394    #[test]
3395    fn gate_result_json_roundtrip_with_values() {
3396        let original = GateResult::passed(
3397            "throughput",
3398            "150.0 tok/s >= 100.0 tok/s",
3399            Some(150.0),
3400            Some(100.0),
3401            Duration::from_millis(2500),
3402        );
3403        let json = serde_json::to_string(&original).expect("serialize");
3404        let restored: GateResult = serde_json::from_str(&json).expect("deserialize");
3405        assert_eq!(restored.name, "throughput");
3406        assert!(restored.passed);
3407        assert!(!restored.skipped);
3408        assert_eq!(restored.value, Some(150.0));
3409        assert_eq!(restored.threshold, Some(100.0));
3410        assert_eq!(restored.duration_ms, 2500);
3411    }
3412
3413    /// Round-trip: skipped gate should preserve skipped=true through JSON.
3414    /// Bug class: skipped field defaulting to false on deserialization.
3415    #[test]
3416    fn gate_result_json_roundtrip_skipped() {
3417        let original = GateResult::skipped("gpu_speedup", "No GPU");
3418        let json = serde_json::to_string(&original).expect("serialize");
3419        let restored: GateResult = serde_json::from_str(&json).expect("deserialize");
3420        assert!(restored.skipped, "skipped flag must survive round-trip");
3421        assert!(restored.passed, "skipped gates must still show passed=true");
3422        assert!(
3423            restored.value.is_none(),
3424            "value should be None after round-trip"
3425        );
3426    }
3427
3428    /// JSON with None value/threshold should omit those fields entirely.
3429    /// Bug class: serializing None as null instead of omitting.
3430    #[test]
3431    fn gate_result_json_omits_none_fields() {
3432        let result = GateResult::passed("test", "ok", None, None, Duration::from_secs(1));
3433        let json = serde_json::to_string(&result).expect("serialize");
3434        assert!(
3435            !json.contains("value"),
3436            "None value should be omitted from JSON, got: {json}"
3437        );
3438        assert!(
3439            !json.contains("threshold"),
3440            "None threshold should be omitted from JSON, got: {json}"
3441        );
3442    }
3443
3444    // ========================================================================
3445    // QaReport: aggregate pass/fail logic
3446    // ========================================================================
3447
3448    /// A report with all skipped gates should pass (skips never fail).
3449    /// Bug class: empty non-skipped gate list treated as failure.
3450    #[test]
3451    fn qa_report_all_skipped_gates_passes() {
3452        let report = QaReport {
3453            model: "test.gguf".to_string(),
3454            passed: true,
3455            gates: vec![
3456                GateResult::skipped("golden", "no model"),
3457                GateResult::skipped("throughput", "no engine"),
3458                GateResult::skipped("ollama", "not available"),
3459            ],
3460            total_duration_ms: 10,
3461            timestamp: "2026-02-06T00:00:00Z".to_string(),
3462            summary: "All skipped".to_string(),
3463            gates_executed: 0,
3464            gates_skipped: 0,
3465            system_info: None,
3466        };
3467        assert!(report.passed);
3468        assert!(
3469            report.gates.iter().all(|g| g.skipped),
3470            "All gates should be skipped"
3471        );
3472        assert!(
3473            report.gates.iter().all(|g| g.passed),
3474            "All skipped gates should count as passed"
3475        );
3476    }
3477
3478    /// A single failed gate should make the entire report fail.
3479    /// Bug class: report.passed computed as majority vote instead of all().
3480    #[test]
3481    fn qa_report_single_failure_taints_report() {
3482        let gates = [
3483            GateResult::passed("golden", "ok", None, None, Duration::from_secs(1)),
3484            GateResult::failed(
3485                "throughput",
3486                "too slow",
3487                Some(5.0),
3488                Some(100.0),
3489                Duration::from_secs(2),
3490            ),
3491            GateResult::passed(
3492                "contract",
3493                "ok",
3494                Some(100.0),
3495                Some(0.0),
3496                Duration::from_secs(1),
3497            ),
3498        ];
3499        let passed = gates.iter().all(|g| g.passed);
3500        assert!(!passed, "Single failure must taint the entire report");
3501    }
3502
3503    /// Mixed passed and skipped gates should produce overall pass.
3504    /// Bug class: treating skipped as neither-pass-nor-fail, which
3505    /// breaks the all() check.
3506    #[test]
3507    fn qa_report_mixed_pass_and_skip_passes() {
3508        let gates = [
3509            GateResult::passed("golden", "ok", None, None, Duration::from_secs(1)),
3510            GateResult::skipped("ollama", "not available"),
3511            GateResult::passed(
3512                "contract",
3513                "ok",
3514                Some(50.0),
3515                Some(0.0),
3516                Duration::from_secs(1),
3517            ),
3518            GateResult::skipped("gpu_speedup", "no GPU"),
3519        ];
3520        let passed = gates.iter().all(|g| g.passed);
3521        assert!(passed, "Mix of passed + skipped should be overall pass");
3522    }
3523
3524    /// Failed gates filtering should exclude skipped gates.
3525    /// Bug class: counting skipped gates as failures in summary.
3526    #[test]
3527    fn qa_report_failed_gate_filter_excludes_skipped() {
3528        let gates = [
3529            GateResult::failed(
3530                "throughput",
3531                "too slow",
3532                Some(1.0),
3533                Some(100.0),
3534                Duration::from_secs(1),
3535            ),
3536            GateResult::skipped("ollama", "not running"),
3537            GateResult::passed("contract", "ok", None, None, Duration::from_secs(1)),
3538        ];
3539        let failed_gates: Vec<_> = gates.iter().filter(|g| !g.passed && !g.skipped).collect();
3540        assert_eq!(
3541            failed_gates.len(),
3542            1,
3543            "Only non-skipped failures should appear"
3544        );
3545        assert_eq!(failed_gates[0].name, "throughput");
3546    }
3547
3548    // ========================================================================
3549    // QaReport JSON round-trip
3550    // ========================================================================
3551
3552    /// Full report round-trip through JSON preserves all field values.
3553    /// Bug class: field ordering or naming mismatch between ser/de.
3554    #[test]
3555    fn qa_report_json_roundtrip_complete() {
3556        let original = QaReport {
3557            model: "/path/to/model.gguf".to_string(),
3558            passed: false,
3559            gates: vec![
3560                GateResult::passed(
3561                    "contract",
3562                    "50 tensors ok",
3563                    Some(50.0),
3564                    Some(0.0),
3565                    Duration::from_millis(100),
3566                ),
3567                GateResult::failed(
3568                    "throughput",
3569                    "5 < 100",
3570                    Some(5.0),
3571                    Some(100.0),
3572                    Duration::from_millis(5000),
3573                ),
3574                GateResult::skipped("ollama", "not installed"),
3575            ],
3576            total_duration_ms: 5100,
3577            timestamp: "2026-02-06T12:00:00Z".to_string(),
3578            summary: "Failed gates: throughput".to_string(),
3579            gates_executed: 0,
3580            gates_skipped: 0,
3581            system_info: None,
3582        };
3583
3584        let json = serde_json::to_string_pretty(&original).expect("serialize");
3585        let restored: QaReport = serde_json::from_str(&json).expect("deserialize");
3586
3587        assert_eq!(restored.model, original.model);
3588        assert_eq!(restored.passed, original.passed);
3589        assert_eq!(restored.gates.len(), 3);
3590        assert_eq!(restored.total_duration_ms, original.total_duration_ms);
3591        assert_eq!(restored.summary, original.summary);
3592        // Verify individual gate fidelity
3593        assert!(restored.gates[0].passed);
3594        assert!(!restored.gates[1].passed);
3595        assert!(restored.gates[2].skipped);
3596    }
3597
3598    // ========================================================================
3599    // detect_ollama_model_from_path: filename-based model size detection
3600    // ========================================================================
3601
3602    /// Standard filename patterns should detect correct model size.
3603    /// Bug class: case-sensitive matching missing lowercase variants.
3604    #[test]
3605    fn detect_ollama_model_standard_sizes() {
3606        let cases = vec![
3607            ("/tmp/qwen2-0.5b-instruct-q4_0.gguf", "0.5b"),
3608            ("/tmp/qwen2-1.5b-instruct-q4_0.gguf", "1.5b"),
3609            ("/tmp/qwen2-7b-instruct-q4_0.gguf", "7b"),
3610            ("/tmp/qwen2-14b-instruct-q4_0.gguf", "14b"),
3611            ("/tmp/qwen2-32b-instruct-q4_0.gguf", "32b"),
3612        ];
3613        for (path, expected_size) in cases {
3614            let model = detect_ollama_model_from_path(std::path::Path::new(path));
3615            let expected = format!("qwen2.5-coder:{expected_size}");
3616            assert_eq!(
3617                model, expected,
3618                "Path '{path}' should detect size '{expected_size}'"
3619            );
3620        }
3621    }
3622
3623    /// Underscore-separated size variants (e.g., "-0_5b") should be detected.
3624    /// Bug class: only matching dot-separated sizes, missing underscore variant.
3625    #[test]
3626    fn detect_ollama_model_underscore_size() {
3627        let model = detect_ollama_model_from_path(std::path::Path::new(
3628            "/cache/qwen2.5-coder-0_5b-instruct-q4_k_m.gguf",
3629        ));
3630        assert!(
3631            model.contains("0.5b"),
3632            "Underscore-separated size should be detected: {model}"
3633        );
3634    }
3635
3636    /// The 3B model size should be detected.
3637    /// Bug class: regex matching "3b" inside "32b" or "13b" -- verify specificity.
3638    #[test]
3639    fn detect_ollama_model_3b_not_confused_with_32b() {
3640        let model_3b =
3641            detect_ollama_model_from_path(std::path::Path::new("/tmp/qwen2-3b-instruct.gguf"));
3642        assert!(
3643            model_3b.contains(":3b"),
3644            "Should detect 3b, got: {model_3b}"
3645        );
3646
3647        let model_32b =
3648            detect_ollama_model_from_path(std::path::Path::new("/tmp/qwen2-32b-instruct.gguf"));
3649        assert!(
3650            model_32b.contains(":32b"),
3651            "Should detect 32b, got: {model_32b}"
3652        );
3653    }
3654
3655    /// Hash-named files (no size in name) should fall back to file size.
3656    /// Bug class: panic or incorrect default when filename has no size hint.
3657    #[test]
3658    fn detect_ollama_model_hash_named_file() {
3659        // This file doesn't exist, so metadata will fail -> defaults to "7b"
3660        let model = detect_ollama_model_from_path(std::path::Path::new(
3661            "/tmp/e910cab26ae116eb.converted.gguf",
3662        ));
3663        assert!(
3664            model.contains("qwen2.5-coder:"),
3665            "Should produce valid model tag: {model}"
3666        );
3667    }
3668
3669    // ========================================================================
3670    // QaConfig: field interaction invariants
3671    // ========================================================================
3672
3673    /// Custom config overrides should not affect unrelated fields.
3674    /// Bug class: struct update syntax (..) accidentally overriding explicitly set fields.
3675    #[test]
3676    fn qa_config_partial_override_preserves_defaults() {
3677        let config = QaConfig {
3678            min_tps: 500.0,
3679            skip_golden: true,
3680            iterations: 5,
3681            ..Default::default()
3682        };
3683        // Overridden fields
3684        assert!((config.min_tps - 500.0).abs() < f64::EPSILON);
3685        assert!(config.skip_golden);
3686        assert_eq!(config.iterations, 5);
3687        // Default fields must be preserved
3688        assert!((config.min_speedup - 0.2).abs() < f64::EPSILON);
3689        assert!((config.min_gpu_speedup - 2.0).abs() < f64::EPSILON);
3690        assert!(!config.skip_throughput);
3691        assert!(!config.skip_ollama);
3692        assert_eq!(config.warmup, 3);
3693        assert_eq!(config.max_tokens, 32);
3694        assert!(!config.json);
3695    }
3696
3697    /// skip_contract flag should be independent of other skip flags.
3698    /// Bug class: skip flags sharing a single boolean or bitmask.
3699    #[test]
3700    fn qa_config_skip_flags_are_independent() {
3701        let config = QaConfig {
3702            skip_golden: true,
3703            skip_contract: true,
3704            ..Default::default()
3705        };
3706        assert!(config.skip_golden);
3707        assert!(config.skip_contract);
3708        assert!(!config.skip_throughput);
3709        assert!(!config.skip_ollama);
3710        assert!(!config.skip_gpu_speedup);
3711        assert!(!config.skip_format_parity);
3712    }
3713
3714    // ========================================================================
3715    // print_gate_result: gate name display mapping
3716    // ========================================================================
3717
3718    /// Verify that all known gate names have display names in the printer.
3719    /// Bug class: new gate added without updating the display name map,
3720    /// causing raw snake_case name to appear in user-facing output.
3721    #[test]
3722    fn all_gate_names_have_display_mapping() {
3723        // These are the canonical gate names used in the QA system
3724        let gate_names = [
3725            "tensor_contract",
3726            "golden_output",
3727            "throughput",
3728            "ollama_parity",
3729            "gpu_speedup",
3730            "format_parity",
3731        ];
3732        for name in &gate_names {
3733            // Verify the name is one of the known gates by matching
3734            // the same logic as print_gate_result
3735            let display = match *name {
3736                "tensor_contract" => "Tensor Contract",
3737                "golden_output" => "Golden Output",
3738                "throughput" => "Throughput",
3739                "ollama_parity" => "Ollama Parity",
3740                "gpu_speedup" => "GPU Speedup",
3741                "format_parity" => "Format Parity",
3742                _ => panic!("Unknown gate name without display mapping: {name}"),
3743            };
3744            assert!(
3745                !display.is_empty(),
3746                "Display name for '{name}' must not be empty"
3747            );
3748        }
3749    }
3750
3751    // ========================================================================
3752    // print_gate_result: status branching and name fallback
3753    // ========================================================================
3754
3755    /// Unknown gate names should fall through to the raw name (the `_ => &result.name` arm).
3756    /// Bug class: match arm panicking on unexpected gate name instead of graceful fallback.
3757    #[test]
3758    fn print_gate_result_unknown_name_uses_raw_name() {
3759        // Exercising `print_gate_result` with an unknown gate name to ensure
3760        // the `_ => &result.name` fallback branch is reached without panic.
3761        let result = GateResult::passed(
3762            "custom_user_gate",
3763            "User-defined gate passed",
3764            None,
3765            None,
3766            Duration::from_millis(42),
3767        );
3768        // This should not panic -- exercises the fallback arm in print_gate_result
3769        print_gate_result(&result);
3770    }
3771
3772    /// print_gate_result with a skipped gate exercises the `[SKIP]` branch.
3773    #[test]
3774    fn print_gate_result_skip_branch() {
3775        let result = GateResult::skipped("ollama_parity", "Ollama not available");
3776        // Exercises the skipped branch; should not print duration line
3777        print_gate_result(&result);
3778    }
3779
3780    /// print_gate_result with a failed gate exercises the `[FAIL]` branch.
3781    #[test]
3782    fn print_gate_result_fail_branch() {
3783        let result = GateResult::failed(
3784            "throughput",
3785            "5.0 tok/s < 100.0 tok/s threshold",
3786            Some(5.0),
3787            Some(100.0),
3788            Duration::from_millis(3500),
3789        );
3790        // Exercises the failed branch; should print duration
3791        print_gate_result(&result);
3792    }
3793
3794    /// print_gate_result with a passed gate exercises the `[PASS]` branch.
3795    #[test]
3796    fn print_gate_result_pass_branch() {
3797        let result = GateResult::passed(
3798            "tensor_contract",
3799            "50 tensors passed all PMAT-235 contract gates",
3800            Some(50.0),
3801            Some(0.0),
3802            Duration::from_millis(120),
3803        );
3804        print_gate_result(&result);
3805    }
3806
3807    /// Exercises every known gate name through print_gate_result to cover
3808    /// all match arms in the name-display mapping.
3809    #[test]
3810    fn print_gate_result_all_known_gate_names() {
3811        let known_names = [
3812            "tensor_contract",
3813            "golden_output",
3814            "throughput",
3815            "ollama_parity",
3816            "gpu_speedup",
3817            "format_parity",
3818        ];
3819        for name in &known_names {
3820            let result = GateResult::passed(name, "ok", None, None, Duration::from_millis(1));
3821            // Each iteration exercises one arm of the match statement
3822            print_gate_result(&result);
3823        }
3824    }
3825
3826    // ========================================================================
3827    // detect_ollama_model_from_path: extended edge cases
3828    // ========================================================================
3829
3830    /// Case-insensitive detection: uppercase size markers should match.
3831    /// Bug class: to_lowercase() not applied before matching.
3832    #[test]
3833    fn detect_ollama_model_case_insensitive() {
3834        let model = detect_ollama_model_from_path(Path::new("/tmp/Qwen2-0.5B-Instruct.gguf"));
3835        assert_eq!(
3836            model, "qwen2.5-coder:0.5b",
3837            "Uppercase '0.5B' should match via to_lowercase"
3838        );
3839    }
3840
3841    /// The 1.5b underscore variant (-1_5b) should be detected correctly.
3842    #[test]
3843    fn detect_ollama_model_1_5b_underscore() {
3844        let model =
3845            detect_ollama_model_from_path(Path::new("/cache/model-1_5b-instruct-q4_k.gguf"));
3846        assert_eq!(model, "qwen2.5-coder:1.5b");
3847    }
3848
3849    /// Path with no filename component (e.g., root path) should not panic.
3850    /// Bug class: unwrap() on file_name() returning None.
3851    #[test]
3852    fn detect_ollama_model_root_path_no_panic() {
3853        let model = detect_ollama_model_from_path(Path::new("/"));
3854        // Root has no filename, so unwrap_or("") gives empty string, falls to file size heuristic
3855        assert!(
3856            model.starts_with("qwen2.5-coder:"),
3857            "Root path should produce valid model tag: {model}"
3858        );
3859    }
3860
3861    /// Path with no extension should still detect size from stem.
3862    #[test]
3863    fn detect_ollama_model_no_extension() {
3864        let model = detect_ollama_model_from_path(Path::new("/tmp/qwen2-7b-instruct"));
3865        assert_eq!(model, "qwen2.5-coder:7b");
3866    }
3867
3868    /// Multiple size markers: the first matching branch wins (0.5b checked before 1.5b, etc.)
3869    /// Bug class: greedy matching where "3b" matches inside "32b".
3870    #[test]
3871    fn detect_ollama_model_priority_order() {
3872        // "0.5b" is checked first; filename contains both "0.5b" and "7b"
3873        let model = detect_ollama_model_from_path(Path::new("/tmp/model-0.5b-vs-7b.gguf"));
3874        assert_eq!(
3875            model, "qwen2.5-coder:0.5b",
3876            "0.5b branch should match before 7b"
3877        );
3878    }
3879
3880    /// Filename with "14b" should not match "1.5b" or "4b" (substring confusion).
3881    #[test]
3882    fn detect_ollama_model_14b_specificity() {
3883        let model = detect_ollama_model_from_path(Path::new("/tmp/llama-14b-chat.gguf"));
3884        assert_eq!(model, "qwen2.5-coder:14b");
3885    }
3886
3887    /// File size heuristic: a tiny temp file (< 800MB) should map to 0.5b.
3888    #[test]
3889    fn detect_ollama_model_file_size_heuristic_tiny() {
3890        // Create a real temp file with no size hint in name
3891        let file = NamedTempFile::with_suffix(".gguf").expect("create temp file");
3892        // Temp file is essentially 0 bytes -> 0..=800_000_000 -> "0.5b"
3893        let model = detect_ollama_model_from_path(file.path());
3894        assert_eq!(
3895            model, "qwen2.5-coder:0.5b",
3896            "Empty temp file should map to 0.5b via file size heuristic"
3897        );
3898    }
3899
3900    // ========================================================================
3901    // QaReport: summary generation logic (mirrors run_qa's summary builder)
3902    // ========================================================================
3903
3904    /// Summary for all-passed report should be the standard success message.
3905    #[test]
3906    fn qa_report_summary_all_passed_message() {
3907        let gates = vec![
3908            GateResult::passed("golden_output", "ok", None, None, Duration::from_secs(1)),
3909            GateResult::passed(
3910                "throughput",
3911                "150 tok/s",
3912                Some(150.0),
3913                Some(100.0),
3914                Duration::from_secs(2),
3915            ),
3916        ];
3917        let passed = gates.iter().all(|g| g.passed);
3918        let summary = if passed {
3919            "All QA gates passed".to_string()
3920        } else {
3921            let failed: Vec<_> = gates
3922                .iter()
3923                .filter(|g| !g.passed && !g.skipped)
3924                .map(|g| g.name.as_str())
3925                .collect();
3926            format!("Failed gates: {}", failed.join(", "))
3927        };
3928        assert_eq!(summary, "All QA gates passed");
3929    }
3930
3931    /// Summary for a failed report should list the failed gate names.
3932    #[test]
3933    fn qa_report_summary_lists_failed_gate_names() {
3934        let gates = vec![
3935            GateResult::passed("golden_output", "ok", None, None, Duration::from_secs(1)),
3936            GateResult::failed(
3937                "throughput",
3938                "too slow",
3939                Some(5.0),
3940                Some(100.0),
3941                Duration::from_secs(2),
3942            ),
3943            GateResult::failed(
3944                "ollama_parity",
3945                "too slow vs ollama",
3946                Some(0.1),
3947                Some(0.2),
3948                Duration::from_secs(3),
3949            ),
3950            GateResult::skipped("gpu_speedup", "no GPU"),
3951        ];
3952        let passed = gates.iter().all(|g| g.passed);
3953        assert!(!passed);
3954        let failed_names: Vec<_> = gates
3955            .iter()
3956            .filter(|g| !g.passed && !g.skipped)
3957            .map(|g| g.name.as_str())
3958            .collect();
3959        let summary = format!("Failed gates: {}", failed_names.join(", "));
3960        assert_eq!(summary, "Failed gates: throughput, ollama_parity");
3961    }
3962
3963    /// Summary for a report where only skipped gates exist (no real failures).
3964    #[test]
3965    fn qa_report_summary_skipped_only_is_passed() {
3966        let gates = vec![
3967            GateResult::skipped("golden_output", "no model"),
3968            GateResult::skipped("throughput", "no engine"),
3969        ];
3970        let passed = gates.iter().all(|g| g.passed);
3971        assert!(passed, "All-skipped should be passed");
3972    }
3973
3974    // ========================================================================
3975    // QaConfig: safetensors_path and combined flag states
3976    // ========================================================================
3977
3978    /// QaConfig with safetensors_path set to Some should preserve the path.
3979    #[test]
3980    fn qa_config_with_safetensors_path() {
3981        let config = QaConfig {
3982            safetensors_path: Some(std::path::PathBuf::from("/models/qwen.safetensors")),
3983            ..Default::default()
3984        };
3985        assert_eq!(
3986            config.safetensors_path.as_deref(),
3987            Some(std::path::Path::new("/models/qwen.safetensors"))
3988        );
3989    }
3990
3991    /// QaConfig default has skip_contract = false.
3992    /// Bug class: new skip flag defaulting to true, silently disabling a gate.
3993    #[test]
3994    fn qa_config_default_skip_contract_is_false() {
3995        let config = QaConfig::default();
3996        assert!(
3997            !config.skip_contract,
3998            "skip_contract must default to false to ensure tensor validation runs"
3999        );
4000    }
4001
4002    /// All skip flags set to true simultaneously.
4003    /// Bug class: skip flag interaction causing unexpected behavior.
4004    #[test]
4005    fn qa_config_all_skips_enabled() {
4006        let config = QaConfig {
4007            skip_golden: true,
4008            skip_throughput: true,
4009            skip_ollama: true,
4010            skip_gpu_speedup: true,
4011            skip_contract: true,
4012            skip_format_parity: true,
4013            ..Default::default()
4014        };
4015        assert!(config.skip_golden);
4016        assert!(config.skip_throughput);
4017        assert!(config.skip_ollama);
4018        assert!(config.skip_gpu_speedup);
4019        assert!(config.skip_contract);
4020        assert!(config.skip_format_parity);
4021        // Non-skip fields should be default
4022        assert_eq!(config.iterations, 10);
4023        assert!((config.min_tps - 100.0).abs() < f64::EPSILON);
4024    }
4025
4026    /// QaConfig with json=true and verbose=true simultaneously.
4027    /// Bug class: mutually exclusive flags not being properly independent.
4028    #[test]
4029    fn qa_config_json_and_verbose_independent() {
4030        let config = QaConfig {
4031            json: true,
4032            verbose: true,
4033            ..Default::default()
4034        };
4035        assert!(config.json);
4036        assert!(config.verbose);
4037    }
4038
4039    /// QaConfig with extreme numeric values should not panic.
4040    #[test]
4041    fn qa_config_extreme_thresholds() {
4042        let config = QaConfig {
4043            min_tps: f64::MAX,
4044            min_speedup: 0.0,
4045            min_gpu_speedup: f64::MIN_POSITIVE,
4046            iterations: usize::MAX,
4047            warmup: 0,
4048            max_tokens: 1,
4049            ..Default::default()
4050        };
4051        assert_eq!(config.min_tps, f64::MAX);
4052        assert!((config.min_speedup).abs() < f64::EPSILON);
4053        assert_eq!(config.iterations, usize::MAX);
4054        assert_eq!(config.warmup, 0);
4055        assert_eq!(config.max_tokens, 1);
4056    }
4057
4058    // ========================================================================
4059    // GateResult: duration conversion edge cases
4060    // ========================================================================
4061
4062    /// Sub-millisecond durations should truncate to 0ms (not round up).
4063    /// Bug class: using as_millis() which truncates, vs round() which would round.
4064    #[test]
4065    fn gate_result_submillisecond_duration_truncates_to_zero() {
4066        let result = GateResult::passed(
4067            "fast",
4068            "blazing fast",
4069            None,
4070            None,
4071            Duration::from_micros(999),
4072        );
4073        assert_eq!(
4074            result.duration_ms, 0,
4075            "999 microseconds should truncate to 0ms"
4076        );
4077    }
4078
4079    /// Duration at exactly 1ms boundary.
4080    #[test]
4081    fn gate_result_exact_one_millisecond() {
4082        let result = GateResult::passed("gate", "msg", None, None, Duration::from_millis(1));
4083        assert_eq!(result.duration_ms, 1);
4084    }
4085
4086    /// Duration from nanoseconds: 1_500_000 ns = 1ms (truncated from 1.5ms).
4087    #[test]
4088    fn gate_result_nanos_to_millis_truncation() {
4089        let result = GateResult::failed("gate", "msg", None, None, Duration::from_nanos(1_500_000));
4090        assert_eq!(
4091            result.duration_ms, 1,
4092            "1.5ms in nanos should truncate to 1ms"
4093        );
4094    }
4095
4096    // ========================================================================
4097    // GateResult: message format contracts
4098    // ========================================================================
4099
4100    /// Skipped gate message must always be prefixed with "Skipped: ".
4101    /// Bug class: changing the format string and breaking downstream parsers.
4102    #[test]
4103    fn gate_result_skipped_message_format_contract() {
4104        let reasons = [
4105            "No GPU available",
4106            "Ollama not available (start with: ollama serve)",
4107            "Requires 'inference' feature",
4108            "Non-GGUF format (F32/F16 lacks fused kernels for Ollama parity)",
4109            "No --safetensors-path provided",
4110            "Skipped by --skip-golden",
4111        ];
4112        for reason in &reasons {
4113            let result = GateResult::skipped("test", reason);
4114            assert!(
4115                result.message.starts_with("Skipped: "),
4116                "Skipped message must start with 'Skipped: ', got: '{}'",
4117                result.message
4118            );
4119            assert!(
4120                result.message.ends_with(reason),
4121                "Skipped message must end with reason"
4122            );
4123        }
4124    }
4125
4126    /// Passed gate with value and threshold: values should appear in the struct.
4127    #[test]
4128    fn gate_result_passed_preserves_value_and_threshold() {
4129        let result = GateResult::passed(
4130            "throughput",
4131            "150.0 tok/s >= 100.0 tok/s",
4132            Some(150.5),
4133            Some(100.0),
4134            Duration::from_secs(1),
4135        );
4136        assert_eq!(result.value, Some(150.5));
4137        assert_eq!(result.threshold, Some(100.0));
4138    }
4139
4140    /// Failed gate with value and threshold: values should appear in the struct.
4141    #[test]
4142    fn gate_result_failed_preserves_value_and_threshold() {
4143        let result = GateResult::failed(
4144            "ollama_parity",
4145            "0.15x < 0.2x",
4146            Some(0.15),
4147            Some(0.2),
4148            Duration::from_secs(5),
4149        );
4150        assert_eq!(result.value, Some(0.15));
4151        assert_eq!(result.threshold, Some(0.2));
4152        assert!(!result.passed);
4153    }
4154
4155    // ========================================================================
4156    // GateResult: JSON deserialization edge cases
4157    // ========================================================================
4158
4159    /// Deserializing JSON with explicit null for value/threshold should produce None.
4160    /// Bug class: serde treating null as missing vs explicit null differently.
4161    #[test]
4162    fn gate_result_deserialize_explicit_null_values() {
4163        let json = r#"{
4164            "name": "throughput",
4165            "passed": true,
4166            "message": "ok",
4167            "value": null,
4168            "threshold": null,
4169            "duration_ms": 100,
4170            "skipped": false
4171        }"#;
4172        let result: GateResult = serde_json::from_str(json).expect("deserialize with nulls");
4173        assert!(result.value.is_none());
4174        assert!(result.threshold.is_none());
4175    }
4176
4177    /// Deserializing JSON with missing optional fields (value/threshold omitted).
4178    #[test]
4179    fn gate_result_deserialize_missing_optional_fields() {
4180        let json = r#"{
4181            "name": "contract",
4182            "passed": false,
4183            "message": "validation error",
4184            "duration_ms": 50,
4185            "skipped": false
4186        }"#;
4187        let result: GateResult = serde_json::from_str(json).expect("deserialize missing optionals");
4188        assert_eq!(result.name, "contract");
4189        assert!(!result.passed);
4190        assert!(result.value.is_none());
4191        assert!(result.threshold.is_none());
4192    }
4193
4194    // ========================================================================
4195    // QaReport: empty gates edge case
4196    // ========================================================================
4197
4198    /// A report with zero gates should still be valid and serializable.
4199    /// Bug class: division by zero or index-out-of-bounds on empty gate list.
4200    #[test]
4201    fn qa_report_empty_gates_is_valid() {
4202        let report = QaReport {
4203            model: "empty.gguf".to_string(),
4204            passed: true,
4205            gates: vec![],
4206            total_duration_ms: 0,
4207            timestamp: "2026-02-06T00:00:00Z".to_string(),
4208            summary: "No gates run".to_string(),
4209            gates_executed: 0,
4210            gates_skipped: 0,
4211            system_info: None,
4212        };
4213        assert!(report.passed);
4214        assert!(report.gates.is_empty());
4215        let json = serde_json::to_string(&report).expect("serialize empty report");
4216        let restored: QaReport = serde_json::from_str(&json).expect("deserialize empty report");
4217        assert!(restored.gates.is_empty());
4218    }
4219
4220    /// A report with many gates (stress test for serialization).
4221    #[test]
4222    fn qa_report_many_gates_serialization() {
4223        let gates: Vec<GateResult> = (0..100)
4224            .map(|i| {
4225                GateResult::passed(
4226                    &format!("gate_{i}"),
4227                    &format!("Gate {i} passed"),
4228                    Some(i as f64),
4229                    Some(0.0),
4230                    Duration::from_millis(i as u64),
4231                )
4232            })
4233            .collect();
4234        let report = QaReport {
4235            model: "stress.gguf".to_string(),
4236            passed: true,
4237            gates,
4238            total_duration_ms: 4950,
4239            timestamp: "2026-02-06T00:00:00Z".to_string(),
4240            summary: "All passed".to_string(),
4241            gates_executed: 0,
4242            gates_skipped: 0,
4243            system_info: None,
4244        };
4245        let json = serde_json::to_string(&report).expect("serialize many gates");
4246        let restored: QaReport = serde_json::from_str(&json).expect("deserialize many gates");
4247        assert_eq!(restored.gates.len(), 100);
4248    }
4249
4250    // ========================================================================
4251    // detect_ollama_model_from_path: format string contract
4252    // ========================================================================
4253
4254    /// Output format must always be "qwen2.5-coder:{size}".
4255    /// Bug class: format string mismatch breaking Ollama API calls.
4256    #[test]
4257    fn detect_ollama_model_output_format_contract() {
4258        let test_paths = [
4259            "/tmp/model-0.5b.gguf",
4260            "/tmp/model-1.5b.gguf",
4261            "/tmp/model-3b.gguf",
4262            "/tmp/model-7b.gguf",
4263            "/tmp/model-14b.gguf",
4264            "/tmp/model-32b.gguf",
4265        ];
4266        for path in &test_paths {
4267            let model = detect_ollama_model_from_path(Path::new(path));
4268            assert!(
4269                model.starts_with("qwen2.5-coder:"),
4270                "Model tag must start with 'qwen2.5-coder:', got: {model}"
4271            );
4272            let size = model.strip_prefix("qwen2.5-coder:").expect("strip prefix");
4273            assert!(
4274                ["0.5b", "1.5b", "3b", "7b", "14b", "32b"].contains(&size),
4275                "Size must be one of the known sizes, got: {size}"
4276            );
4277        }
4278    }
4279
4280    /// Empty filename (just a directory path) should not panic.
4281    #[test]
4282    fn detect_ollama_model_directory_path() {
4283        let model = detect_ollama_model_from_path(Path::new("/tmp/models/"));
4284        // No filename -> empty string -> falls to file size heuristic -> metadata fails -> "7b"
4285        assert!(
4286            model.starts_with("qwen2.5-coder:"),
4287            "Directory path should produce valid tag: {model}"
4288        );
4289    }
4290
4291    // ========================================================================
4292    // run_qa summary builder: failed_gates name collection
4293    // ========================================================================
4294
4295    /// Multiple failed gates should all appear in the summary, comma-separated.
4296    #[test]
4297    fn failed_gates_summary_multiple_failures() {
4298        let gates = vec![
4299            GateResult::failed("golden_output", "wrong", None, None, Duration::from_secs(1)),
4300            GateResult::failed(
4301                "throughput",
4302                "slow",
4303                Some(1.0),
4304                Some(100.0),
4305                Duration::from_secs(2),
4306            ),
4307            GateResult::failed(
4308                "tensor_contract",
4309                "violations",
4310                Some(5.0),
4311                Some(0.0),
4312                Duration::from_secs(1),
4313            ),
4314            GateResult::skipped("ollama_parity", "not available"),
4315            GateResult::passed(
4316                "gpu_speedup",
4317                "ok",
4318                Some(3.0),
4319                Some(2.0),
4320                Duration::from_secs(4),
4321            ),
4322        ];
4323        let failed_names: Vec<&str> = gates
4324            .iter()
4325            .filter(|g| !g.passed && !g.skipped)
4326            .map(|g| g.name.as_str())
4327            .collect();
4328        assert_eq!(failed_names.len(), 3);
4329        let summary = format!("Failed gates: {}", failed_names.join(", "));
4330        assert!(summary.contains("golden_output"));
4331        assert!(summary.contains("throughput"));
4332        assert!(summary.contains("tensor_contract"));
4333        assert!(
4334            !summary.contains("ollama_parity"),
4335            "Skipped gate should not appear in failures"
4336        );
4337        assert!(
4338            !summary.contains("gpu_speedup"),
4339            "Passed gate should not appear in failures"
4340        );
4341    }
4342
4343    /// Zero failed gates should not produce a "Failed gates:" summary.
4344    #[test]
4345    fn failed_gates_summary_no_failures() {
4346        let gates = vec![
4347            GateResult::passed("golden_output", "ok", None, None, Duration::from_secs(1)),
4348            GateResult::skipped("ollama_parity", "not available"),
4349        ];
4350        let passed = gates.iter().all(|g| g.passed);
4351        assert!(passed);
4352        let summary = if passed {
4353            "All QA gates passed".to_string()
4354        } else {
4355            unreachable!()
4356        };
4357        assert_eq!(summary, "All QA gates passed");
4358    }
4359
4360    // ========================================================================
4361    // GateResult: NaN and infinity in value/threshold
4362    // ========================================================================
4363
4364    /// NaN values in gate results: the struct itself can hold NaN,
4365    /// verifying the value is stored correctly (NaN != NaN by IEEE 754).
4366    /// Bug class: accidentally comparing NaN with == and losing the signal.
4367    #[test]
4368    fn gate_result_nan_value_is_nan() {
4369        let result = GateResult::passed(
4370            "test",
4371            "NaN test",
4372            Some(f64::NAN),
4373            Some(100.0),
4374            Duration::from_secs(1),
4375        );
4376        assert!(
4377            result.value.expect("should have value").is_nan(),
4378            "NaN value must be preserved in GateResult"
4379        );
4380        assert!(
4381            !result.value.expect("should have value").is_finite(),
4382            "NaN is not finite"
4383        );
4384    }
4385
4386    /// Infinity in gate results should be representable.
4387    /// Bug class: threshold comparison logic using >= with infinity.
4388    #[test]
4389    fn gate_result_infinity_value_is_infinite() {
4390        let result = GateResult::failed(
4391            "test",
4392            "Inf test",
4393            Some(f64::INFINITY),
4394            Some(100.0),
4395            Duration::from_secs(1),
4396        );
4397        assert!(
4398            result.value.expect("should have value").is_infinite(),
4399            "Infinity must be preserved in GateResult"
4400        );
4401    }
4402
4403    /// Negative infinity in threshold should be representable.
4404    #[test]
4405    fn gate_result_neg_infinity_threshold() {
4406        let result = GateResult::passed(
4407            "test",
4408            "neg inf threshold",
4409            Some(0.0),
4410            Some(f64::NEG_INFINITY),
4411            Duration::from_secs(1),
4412        );
4413        assert!(result
4414            .threshold
4415            .expect("should have threshold")
4416            .is_infinite());
4417    }
4418
4419    // ========================================================================
4420    // QaConfig: clone preserves all fields including PathBuf
4421    // ========================================================================
4422
4423    /// Clone with safetensors_path should deep-copy the PathBuf.
4424    #[test]
4425    fn qa_config_clone_with_safetensors_path() {
4426        let config = QaConfig {
4427            safetensors_path: Some(std::path::PathBuf::from("/deep/clone/test.safetensors")),
4428            min_tps: 42.0,
4429            json: true,
4430            verbose: true,
4431            ..Default::default()
4432        };
4433        let cloned = config.clone();
4434        assert_eq!(cloned.safetensors_path, config.safetensors_path);
4435        assert!((cloned.min_tps - 42.0).abs() < f64::EPSILON);
4436        assert!(cloned.json);
4437        assert!(cloned.verbose);
4438    }
4439
4440    // ========================================================================
4441    // NEW: Contract failure summary truncation logic
4442    // ========================================================================
4443    // Mirrors the truncation in run_tensor_contract_gate (lines 461-468):
4444    //   if failures.len() <= 3: join with "; "
4445    //   else: first 3 joined + "; ... and {N-3} more"
4446
4447    /// Exactly 1 contract failure should display the single failure, no truncation.
4448    #[test]
4449    fn contract_failure_summary_single_failure() {
4450        let failures = vec!["embed_tokens.weight: density below threshold".to_string()];
4451        let summary = if failures.len() <= 3 {
4452            failures.join("; ")
4453        } else {
4454            format!(
4455                "{}; ... and {} more",
4456                failures[..3].join("; "),
4457                failures.len() - 3
4458            )
4459        };
4460        assert_eq!(summary, "embed_tokens.weight: density below threshold");
4461        assert!(!summary.contains("more"));
4462    }
4463
4464    /// Exactly 3 contract failures should display all without truncation.
4465    #[test]
4466    fn contract_failure_summary_three_failures_no_truncation() {
4467        let failures = vec![
4468            "layer.0: NaN detected".to_string(),
4469            "layer.1: Inf detected".to_string(),
4470            "layer.2: zero density".to_string(),
4471        ];
4472        let summary = if failures.len() <= 3 {
4473            failures.join("; ")
4474        } else {
4475            format!(
4476                "{}; ... and {} more",
4477                failures[..3].join("; "),
4478                failures.len() - 3
4479            )
4480        };
4481        assert_eq!(
4482            summary,
4483            "layer.0: NaN detected; layer.1: Inf detected; layer.2: zero density"
4484        );
4485        assert!(!summary.contains("more"));
4486    }
4487
4488    /// 4 contract failures should truncate: show 3, then "... and 1 more".
4489    #[test]
4490    fn contract_failure_summary_four_failures_truncates() {
4491        let failures = vec![
4492            "a: fail".to_string(),
4493            "b: fail".to_string(),
4494            "c: fail".to_string(),
4495            "d: fail".to_string(),
4496        ];
4497        let summary = if failures.len() <= 3 {
4498            failures.join("; ")
4499        } else {
4500            format!(
4501                "{}; ... and {} more",
4502                failures[..3].join("; "),
4503                failures.len() - 3
4504            )
4505        };
4506        assert!(summary.contains("a: fail; b: fail; c: fail"));
4507        assert!(summary.ends_with("; ... and 1 more"));
4508    }
4509
4510    /// 10 contract failures should truncate: show 3, then "... and 7 more".
4511    #[test]
4512    fn contract_failure_summary_ten_failures_truncates() {
4513        let failures: Vec<String> = (0..10).map(|i| format!("tensor_{i}: violation")).collect();
4514        let summary = if failures.len() <= 3 {
4515            failures.join("; ")
4516        } else {
4517            format!(
4518                "{}; ... and {} more",
4519                failures[..3].join("; "),
4520                failures.len() - 3
4521            )
4522        };
4523        assert!(summary.contains("tensor_0: violation"));
4524        assert!(summary.contains("tensor_1: violation"));
4525        assert!(summary.contains("tensor_2: violation"));
4526        assert!(summary.ends_with("; ... and 7 more"));
4527        assert!(!summary.contains("tensor_3"));
4528    }
4529
4530    /// 0 contract failures should produce empty string (join of empty vec).
4531    #[test]
4532    fn contract_failure_summary_zero_failures() {
4533        let failures: Vec<String> = vec![];
4534        let summary = if failures.len() <= 3 {
4535            failures.join("; ")
4536        } else {
4537            format!(
4538                "{}; ... and {} more",
4539                failures[..3].join("; "),
4540                failures.len() - 3
4541            )
4542        };
4543        assert!(summary.is_empty());
4544    }
4545
4546    // ========================================================================
4547    // NEW: detect_ollama_model_from_path -- additional edge cases
4548    // ========================================================================
4549
4550    /// Filename with only "3b" (no prefix dash) should still match the 3b branch.
4551    #[test]
4552    fn detect_ollama_model_3b_standalone() {
4553        let model = detect_ollama_model_from_path(Path::new("/tmp/model3b.gguf"));
4554        assert_eq!(model, "qwen2.5-coder:3b");
4555    }
4556
4557    /// Filename with dash-prefixed sizes: "-3b" variant.
4558    #[test]
4559    fn detect_ollama_model_dash_3b() {
4560        let model = detect_ollama_model_from_path(Path::new("/tmp/model-3b-chat.gguf"));
4561        assert_eq!(model, "qwen2.5-coder:3b");
4562    }
4563
4564    /// Filename with "-7b" variant.
4565    #[test]
4566    fn detect_ollama_model_dash_7b() {
4567        let model = detect_ollama_model_from_path(Path::new("/tmp/llama-7b-q4_k_m.gguf"));
4568        assert_eq!(model, "qwen2.5-coder:7b");
4569    }
4570
4571    /// Filename containing "0.5b" with mixed case.
4572    #[test]
4573    fn detect_ollama_model_mixed_case_0_5b() {
4574        let model = detect_ollama_model_from_path(Path::new("/tmp/Qwen2.5-Coder-0.5B-Q4.gguf"));
4575        assert_eq!(model, "qwen2.5-coder:0.5b");
4576    }
4577
4578    /// Filename containing "-32b" variant.
4579    #[test]
4580    fn detect_ollama_model_dash_32b() {
4581        let model = detect_ollama_model_from_path(Path::new("/tmp/qwen-32b-instruct.gguf"));
4582        assert_eq!(model, "qwen2.5-coder:32b");
4583    }
4584
4585    /// Filename containing "-14b" variant with dash prefix.
4586    #[test]
4587    fn detect_ollama_model_dash_14b() {
4588        let model = detect_ollama_model_from_path(Path::new("/tmp/model-14b.gguf"));
4589        assert_eq!(model, "qwen2.5-coder:14b");
4590    }
4591
4592    /// Empty string path (edge case for Path::new("")).
4593    #[test]
4594    fn detect_ollama_model_empty_string_path() {
4595        let model = detect_ollama_model_from_path(Path::new(""));
4596        // Empty path -> file_name() returns None on empty -> unwrap_or("") -> file size fallback
4597        assert!(
4598            model.starts_with("qwen2.5-coder:"),
4599            "Empty path should produce valid tag: {model}"
4600        );
4601    }
4602
4603    /// Filename that contains multiple size markers: "1.5b" comes before "3b" in check order.
4604    #[test]
4605    fn detect_ollama_model_1_5b_before_3b() {
4606        let model = detect_ollama_model_from_path(Path::new("/tmp/model-1.5b-3b.gguf"));
4607        assert_eq!(
4608            model, "qwen2.5-coder:1.5b",
4609            "1.5b should be matched before 3b in priority order"
4610        );
4611    }
4612
4613    /// Filename with underscore-separated 1.5b variant.
4614    #[test]
4615    fn detect_ollama_model_underscore_1_5b_variant() {
4616        let model = detect_ollama_model_from_path(Path::new("/cache/qwen2-1_5b-q4_k.gguf"));
4617        assert_eq!(model, "qwen2.5-coder:1.5b");
4618    }
4619
4620    /// Filename containing "-0_5b" (underscore variant of 0.5b).
4621    #[test]
4622    fn detect_ollama_model_underscore_0_5b() {
4623        let model = detect_ollama_model_from_path(Path::new("/tmp/model-0_5b-instruct.gguf"));
4624        assert_eq!(model, "qwen2.5-coder:0.5b");
4625    }
4626
4627    // ========================================================================
4628    // NEW: GateResult JSON edge cases for skip_serializing_if
4629    // ========================================================================
4630
4631    /// JSON with value present but threshold missing should deserialize correctly.
4632    #[test]
4633    fn gate_result_json_value_present_threshold_missing() {
4634        let json = r#"{
4635            "name": "contract",
4636            "passed": true,
4637            "message": "50 tensors ok",
4638            "value": 50.0,
4639            "duration_ms": 100,
4640            "skipped": false
4641        }"#;
4642        let result: GateResult = serde_json::from_str(json).expect("deserialize");
4643        assert_eq!(result.value, Some(50.0));
4644        assert!(result.threshold.is_none());
4645    }
4646
4647    /// JSON with threshold present but value missing should deserialize correctly.
4648    #[test]
4649    fn gate_result_json_threshold_present_value_missing() {
4650        let json = r#"{
4651            "name": "throughput",
4652            "passed": false,
4653            "message": "too slow",
4654            "threshold": 100.0,
4655            "duration_ms": 5000,
4656            "skipped": false
4657        }"#;
4658        let result: GateResult = serde_json::from_str(json).expect("deserialize");
4659        assert!(result.value.is_none());
4660        assert_eq!(result.threshold, Some(100.0));
4661    }
4662
4663    /// Serialized JSON for a passed gate with Some(value) should include "value" key.
4664    #[test]
4665    fn gate_result_json_includes_value_when_some() {
4666        let result = GateResult::passed(
4667            "throughput",
4668            "150 tok/s",
4669            Some(150.0),
4670            None,
4671            Duration::from_secs(1),
4672        );
4673        let json = serde_json::to_string(&result).expect("serialize");
4674        assert!(
4675            json.contains("\"value\""),
4676            "value should be present: {json}"
4677        );
4678        assert!(
4679            !json.contains("\"threshold\""),
4680            "threshold should be omitted when None: {json}"
4681        );
4682    }
4683
4684    /// Serialized JSON for a gate with both Some(value) and Some(threshold).
4685    #[test]
4686    fn gate_result_json_includes_both_value_and_threshold() {
4687        let result = GateResult::failed(
4688            "ollama_parity",
4689            "0.1x < 0.2x",
4690            Some(0.1),
4691            Some(0.2),
4692            Duration::from_secs(10),
4693        );
4694        let json = serde_json::to_string(&result).expect("serialize");
4695        assert!(json.contains("\"value\""));
4696        assert!(json.contains("\"threshold\""));
4697        assert!(json.contains("0.1"));
4698        assert!(json.contains("0.2"));
4699    }
4700
4701    // ========================================================================
4702    // NEW: QaReport JSON pretty-print validation
4703    // ========================================================================
4704
4705    /// Pretty-printed JSON report should contain newlines and indentation.
4706    #[test]
4707    fn qa_report_json_pretty_print_format() {
4708        let report = QaReport {
4709            model: "test.gguf".to_string(),
4710            passed: true,
4711            gates: vec![GateResult::passed(
4712                "contract",
4713                "ok",
4714                Some(10.0),
4715                Some(0.0),
4716                Duration::from_millis(50),
4717            )],
4718            total_duration_ms: 50,
4719            timestamp: "2026-02-07T00:00:00Z".to_string(),
4720            summary: "All passed".to_string(),
4721            gates_executed: 0,
4722            gates_skipped: 0,
4723            system_info: None,
4724        };
4725        let json = serde_json::to_string_pretty(&report).expect("pretty serialize");
4726        assert!(json.contains('\n'), "Pretty JSON should contain newlines");
4727        assert!(
4728            json.contains("  "),
4729            "Pretty JSON should contain indentation"
4730        );
4731        assert!(json.contains("\"model\""));
4732        assert!(json.contains("\"gates\""));
4733        assert!(json.contains("\"summary\""));
4734    }
4735
4736    /// JSON report with unwrap_or_default fallback (mirrors run() line 251).
4737    #[test]
4738    fn qa_report_json_to_string_pretty_never_panics() {
4739        let report = QaReport {
4740            model: String::new(),
4741            passed: false,
4742            gates: vec![
4743                GateResult::skipped("a", "skip"),
4744                GateResult::failed("b", "fail", Some(f64::NAN), None, Duration::from_secs(0)),
4745            ],
4746            total_duration_ms: 0,
4747            timestamp: String::new(),
4748            summary: String::new(),
4749            gates_executed: 0,
4750            gates_skipped: 0,
4751            system_info: None,
4752        };
4753        // This is what run() does: serde_json::to_string_pretty(&report).unwrap_or_default()
4754        let json = serde_json::to_string_pretty(&report).unwrap_or_default();
4755        // NaN in JSON becomes null (serde_json behavior), but should not panic
4756        assert!(!json.is_empty());
4757    }
4758
4759    // ========================================================================
4760    // NEW: QaConfig construction from run() parameters (lines 228-244)
4761    // ========================================================================
4762
4763    /// Simulate how run() builds QaConfig from Option parameters.
4764    /// unwrap_or defaults should match QaConfig::default() for the three thresholds.
4765    #[test]
4766    fn run_config_building_none_uses_defaults() {
4767        let min_tps: Option<f64> = None;
4768        let min_speedup: Option<f64> = None;
4769        let min_gpu_speedup: Option<f64> = None;
4770        let config = QaConfig {
4771            min_tps: min_tps.unwrap_or(100.0),
4772            min_speedup: min_speedup.unwrap_or(0.2),
4773            min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0),
4774            ..Default::default()
4775        };
4776        assert!((config.min_tps - 100.0).abs() < f64::EPSILON);
4777        assert!((config.min_speedup - 0.2).abs() < f64::EPSILON);
4778        assert!((config.min_gpu_speedup - 2.0).abs() < f64::EPSILON);
4779    }
4780
4781    /// Simulate how run() builds QaConfig with Some parameters (overrides).
4782    #[test]
4783    fn run_config_building_some_overrides_defaults() {
4784        let min_tps: Option<f64> = Some(50.0);
4785        let min_speedup: Option<f64> = Some(1.5);
4786        let min_gpu_speedup: Option<f64> = Some(3.0);
4787        let config = QaConfig {
4788            min_tps: min_tps.unwrap_or(100.0),
4789            min_speedup: min_speedup.unwrap_or(0.2),
4790            min_gpu_speedup: min_gpu_speedup.unwrap_or(2.0),
4791            ..Default::default()
4792        };
4793        assert!((config.min_tps - 50.0).abs() < f64::EPSILON);
4794        assert!((config.min_speedup - 1.5).abs() < f64::EPSILON);
4795        assert!((config.min_gpu_speedup - 3.0).abs() < f64::EPSILON);
4796    }
4797
4798    // ========================================================================
4799    // NEW: print_gate_result duration formatting (line 1490-1491)
4800    // ========================================================================
4801
4802    /// Verify print_gate_result handles zero duration without division errors.
4803    #[test]
4804    fn print_gate_result_zero_duration_formatting() {
4805        let result = GateResult::passed(
4806            "tensor_contract",
4807            "0 tensors",
4808            Some(0.0),
4809            Some(0.0),
4810            Duration::from_millis(0),
4811        );
4812        // Should print "Duration: 0.00s" without panic
4813        print_gate_result(&result);
4814    }
4815
4816    /// Verify print_gate_result handles large duration values.
4817    #[test]
4818    fn print_gate_result_large_duration_formatting() {
4819        let result = GateResult::passed(
4820            "throughput",
4821            "ok",
4822            Some(100.0),
4823            Some(50.0),
4824            Duration::from_secs(3600),
4825        );
4826        // duration_ms = 3600000, format as 3600000.0/1000.0 = 3600.00s
4827        assert_eq!(result.duration_ms, 3_600_000);
4828        print_gate_result(&result);
4829    }
4830
4831    /// Verify print_gate_result formats duration_ms correctly for sub-second durations.
4832    #[test]
4833    fn print_gate_result_subsecond_duration_formatting() {
4834        let result = GateResult::passed(
4835            "golden_output",
4836            "2 cases passed",
4837            Some(2.0),
4838            Some(2.0),
4839            Duration::from_millis(250),
4840        );
4841        assert_eq!(result.duration_ms, 250);
4842        // 250ms / 1000.0 = 0.25s -> should print "Duration: 0.25s"
4843        print_gate_result(&result);
4844    }
4845
4846    // ========================================================================
4847    // NEW: QaReport with all 6 canonical gates
4848    // ========================================================================
4849
4850    /// Verify a report with all 6 canonical gates can be serialized/deserialized.
4851    #[test]
4852    fn qa_report_all_six_canonical_gates_roundtrip() {
4853        let report = QaReport {
4854            model: "/models/qwen2-0.5b-q4_k.gguf".to_string(),
4855            passed: false,
4856            gates: vec![
4857                GateResult::passed(
4858                    "tensor_contract",
4859                    "50 tensors ok",
4860                    Some(50.0),
4861                    Some(0.0),
4862                    Duration::from_millis(100),
4863                ),
4864                GateResult::passed(
4865                    "golden_output",
4866                    "2 test cases passed",
4867                    Some(2.0),
4868                    Some(2.0),
4869                    Duration::from_millis(5000),
4870                ),
4871                GateResult::failed(
4872                    "throughput",
4873                    "5 tok/s < 100 tok/s",
4874                    Some(5.0),
4875                    Some(100.0),
4876                    Duration::from_millis(10000),
4877                ),
4878                GateResult::skipped("ollama_parity", "Ollama not available"),
4879                GateResult::skipped("gpu_speedup", "CUDA not available"),
4880                GateResult::skipped("format_parity", "No --safetensors-path provided"),
4881            ],
4882            total_duration_ms: 15100,
4883            timestamp: "2026-02-07T12:00:00Z".to_string(),
4884            summary: "Failed gates: throughput".to_string(),
4885            gates_executed: 0,
4886            gates_skipped: 0,
4887            system_info: None,
4888        };
4889        let json = serde_json::to_string_pretty(&report).expect("serialize");
4890        let restored: QaReport = serde_json::from_str(&json).expect("deserialize");
4891        assert_eq!(restored.gates.len(), 6);
4892        assert!(!restored.passed);
4893        // Verify each gate type
4894        assert!(restored.gates[0].passed && !restored.gates[0].skipped);
4895        assert!(restored.gates[1].passed && !restored.gates[1].skipped);
4896        assert!(!restored.gates[2].passed && !restored.gates[2].skipped);
4897        assert!(restored.gates[3].skipped);
4898        assert!(restored.gates[4].skipped);
4899        assert!(restored.gates[5].skipped);
4900    }
4901
4902    // ========================================================================
4903    // NEW: GateResult message content validation
4904    // ========================================================================
4905
4906    /// Passed gate message should be stored verbatim.
4907    #[test]
4908    fn gate_result_passed_message_stored_verbatim() {
4909        let msg = "150.0 tok/s >= 100.0 tok/s threshold";
4910        let result = GateResult::passed(
4911            "throughput",
4912            msg,
4913            Some(150.0),
4914            Some(100.0),
4915            Duration::from_secs(1),
4916        );
4917        assert_eq!(result.message, msg);
4918    }
4919
4920    /// Failed gate message should be stored verbatim.
4921    #[test]
4922    fn gate_result_failed_message_stored_verbatim() {
4923        let msg = "5.0 tok/s < 100.0 tok/s threshold";
4924        let result = GateResult::failed(
4925            "throughput",
4926            msg,
4927            Some(5.0),
4928            Some(100.0),
4929            Duration::from_secs(1),
4930        );
4931        assert_eq!(result.message, msg);
4932    }
4933
4934    /// Skipped gate message format: "Skipped: {reason}".
4935    #[test]
4936    fn gate_result_skipped_message_exact_format() {
4937        let result = GateResult::skipped("gpu_speedup", "CUDA not available");
4938        assert_eq!(result.message, "Skipped: CUDA not available");
4939    }
4940
4941    /// Empty reason for skipped gate should produce "Skipped: ".
4942    #[test]
4943    fn gate_result_skipped_empty_reason() {
4944        let result = GateResult::skipped("test", "");
4945        assert_eq!(result.message, "Skipped: ");
4946        assert!(result.skipped);
4947    }
4948
4949    /// Empty name for gate result should be stored as empty string.
4950    #[test]
4951    fn gate_result_empty_name() {
4952        let result = GateResult::passed("", "ok", None, None, Duration::from_secs(0));
4953        assert_eq!(result.name, "");
4954        assert!(result.passed);
4955    }
4956
4957    // ========================================================================
4958    // NEW: GateResult negative and zero values
4959    // ========================================================================
4960
4961    /// Negative value in a gate result (e.g., from subtraction error).
4962    #[test]
4963    fn gate_result_negative_value() {
4964        let result = GateResult::failed(
4965            "gpu_speedup",
4966            "-0.5x slower",
4967            Some(-0.5),
4968            Some(2.0),
4969            Duration::from_secs(1),
4970        );
4971        assert_eq!(result.value, Some(-0.5));
4972        assert!(!result.passed);
4973    }
4974
4975    /// Zero value should be representable.
4976    #[test]
4977    fn gate_result_zero_value() {
4978        let result = GateResult::failed(
4979            "throughput",
4980            "0 tok/s",
4981            Some(0.0),
4982            Some(100.0),
4983            Duration::from_secs(1),
4984        );
4985        assert_eq!(result.value, Some(0.0));
4986        assert_eq!(result.threshold, Some(100.0));
4987    }
4988
4989    /// Very small positive value (epsilon-level).
4990    #[test]
4991    fn gate_result_epsilon_value() {
4992        let result = GateResult::passed(
4993            "throughput",
4994            "barely passing",
4995            Some(f64::MIN_POSITIVE),
4996            Some(0.0),
4997            Duration::from_secs(1),
4998        );
4999        assert_eq!(result.value, Some(f64::MIN_POSITIVE));
5000        assert!(result.passed);
5001    }
5002
5003    // ========================================================================
5004    // NEW: QaReport timestamp and model path edge cases
5005    // ========================================================================
5006
5007    /// Report with Unicode characters in model path.
5008    #[test]
5009    fn qa_report_unicode_model_path() {
5010        let report = QaReport {
5011            model: "/modelos/modelo_espa\u{00f1}ol.gguf".to_string(),
5012            passed: true,
5013            gates: vec![],
5014            total_duration_ms: 0,
5015            timestamp: "2026-02-07T00:00:00Z".to_string(),
5016            summary: "ok".to_string(),
5017            gates_executed: 0,
5018            gates_skipped: 0,
5019            system_info: None,
5020        };
5021        let json = serde_json::to_string(&report).expect("serialize unicode path");
5022        let restored: QaReport = serde_json::from_str(&json).expect("deserialize unicode path");
5023        assert!(restored.model.contains("espa\u{00f1}ol"));
5024    }
5025
5026    /// Report with very long model path.
5027    #[test]
5028    fn qa_report_long_model_path() {
5029        let long_path = format!("/very/{}/model.gguf", "deep/".repeat(100));
5030        let report = QaReport {
5031            model: long_path.clone(),
5032            passed: true,
5033            gates: vec![],
5034            total_duration_ms: 0,
5035            timestamp: "2026-02-07T00:00:00Z".to_string(),
5036            summary: "ok".to_string(),
5037            gates_executed: 0,
5038            gates_skipped: 0,
5039            system_info: None,
5040        };
5041        let json = serde_json::to_string(&report).expect("serialize long path");
5042        let restored: QaReport = serde_json::from_str(&json).expect("deserialize long path");
5043        assert_eq!(restored.model, long_path);
5044    }
5045
5046    /// Report with empty model path.
5047    #[test]
5048    fn qa_report_empty_model_path() {
5049        let report = QaReport {
5050            model: String::new(),
5051            passed: true,
5052            gates: vec![],
5053            total_duration_ms: 0,
5054            timestamp: "2026-02-07T00:00:00Z".to_string(),
5055            summary: "ok".to_string(),
5056            gates_executed: 0,
5057            gates_skipped: 0,
5058            system_info: None,
5059        };
5060        let json = serde_json::to_string(&report).expect("serialize empty model");
5061        let restored: QaReport = serde_json::from_str(&json).expect("deserialize empty model");
5062        assert!(restored.model.is_empty());
5063    }
5064
5065    // ========================================================================
5066    // NEW: QaReport aggregate pass/fail with mixed states
5067    // ========================================================================
5068
5069    /// All gates failed: report.passed should be false and all gates listed.
5070    #[test]
5071    fn qa_report_all_gates_failed() {
5072        let gates = vec![
5073            GateResult::failed(
5074                "tensor_contract",
5075                "violations",
5076                Some(5.0),
5077                Some(0.0),
5078                Duration::from_secs(1),
5079            ),
5080            GateResult::failed(
5081                "golden_output",
5082                "wrong output",
5083                None,
5084                None,
5085                Duration::from_secs(2),
5086            ),
5087            GateResult::failed(
5088                "throughput",
5089                "too slow",
5090                Some(1.0),
5091                Some(100.0),
5092                Duration::from_secs(3),
5093            ),
5094        ];
5095        let passed = gates.iter().all(|g| g.passed);
5096        assert!(!passed);
5097        let failed_names: Vec<&str> = gates
5098            .iter()
5099            .filter(|g| !g.passed && !g.skipped)
5100            .map(|g| g.name.as_str())
5101            .collect();
5102        assert_eq!(failed_names.len(), 3);
5103        let summary = format!("Failed gates: {}", failed_names.join(", "));
5104        assert_eq!(
5105            summary,
5106            "Failed gates: tensor_contract, golden_output, throughput"
5107        );
5108    }
5109
5110    /// Single passed gate among skipped: overall pass.
5111    #[test]
5112    fn qa_report_single_pass_rest_skipped() {
5113        let gates = vec![
5114            GateResult::passed(
5115                "tensor_contract",
5116                "ok",
5117                Some(10.0),
5118                Some(0.0),
5119                Duration::from_secs(1),
5120            ),
5121            GateResult::skipped("golden_output", "no engine"),
5122            GateResult::skipped("throughput", "no engine"),
5123            GateResult::skipped("ollama_parity", "not available"),
5124            GateResult::skipped("gpu_speedup", "no GPU"),
5125            GateResult::skipped("format_parity", "no path"),
5126        ];
5127        let passed = gates.iter().all(|g| g.passed);
5128        assert!(passed);
5129    }
5130
5131    // ========================================================================
5132    // NEW: print_gate_result exercises for each known gate display name
5133    // ========================================================================
5134
5135    /// Exercise print_gate_result with "format_parity" gate -- one of the display names.
5136    #[test]
5137    fn print_gate_result_format_parity_display_name() {
5138        let result = GateResult::passed(
5139            "format_parity",
5140            "GGUF argmax=42 == SafeTensors argmax=42",
5141            Some(42.0),
5142            Some(42.0),
5143            Duration::from_millis(8000),
5144        );
5145        print_gate_result(&result);
5146    }
5147
5148    /// Exercise print_gate_result with "gpu_speedup" gate in failed state.
5149    #[test]
5150    fn print_gate_result_gpu_speedup_failed() {
5151        let result = GateResult::failed(
5152            "gpu_speedup",
5153            "GPU 1.2x faster than CPU < 2.0x threshold",
5154            Some(1.2),
5155            Some(2.0),
5156            Duration::from_millis(15000),
5157        );
5158        print_gate_result(&result);
5159    }
5160
5161    // ========================================================================
5162    // NEW: QaConfig with zero and extreme iteration/token values
5163    // ========================================================================
5164
5165    /// Zero iterations and warmup should be representable.
5166    #[test]
5167    fn qa_config_zero_iterations_and_warmup() {
5168        let config = QaConfig {
5169            iterations: 0,
5170            warmup: 0,
5171            max_tokens: 0,
5172            ..Default::default()
5173        };
5174        assert_eq!(config.iterations, 0);
5175        assert_eq!(config.warmup, 0);
5176        assert_eq!(config.max_tokens, 0);
5177    }
5178
5179    /// Large max_tokens value.
5180    #[test]
5181    fn qa_config_large_max_tokens() {
5182        let config = QaConfig {
5183            max_tokens: 1_000_000,
5184            ..Default::default()
5185        };
5186        assert_eq!(config.max_tokens, 1_000_000);
5187    }
5188
5189    // ========================================================================
5190    // NEW: GateResult serialization with special f64 values
5191    // ========================================================================
5192
5193    /// Serialize gate with very large value.
5194    #[test]
5195    fn gate_result_serialize_large_value() {
5196        let result = GateResult::passed(
5197            "throughput",
5198            "very fast",
5199            Some(999_999.99),
5200            Some(100.0),
5201            Duration::from_secs(1),
5202        );
5203        let json = serde_json::to_string(&result).expect("serialize large value");
5204        assert!(json.contains("999999.99"));
5205    }
5206
5207    /// Serialize gate with very small (near-zero) positive value.
5208    #[test]
5209    fn gate_result_serialize_tiny_value() {
5210        let result = GateResult::failed(
5211            "throughput",
5212            "basically zero",
5213            Some(0.000_001),
5214            Some(100.0),
5215            Duration::from_secs(1),
5216        );
5217        let json = serde_json::to_string(&result).expect("serialize tiny value");
5218        // serde_json will serialize this as something like 1e-6 or 0.000001
5219        let restored: GateResult = serde_json::from_str(&json).expect("deserialize tiny value");
5220        assert!((restored.value.expect("has value") - 0.000_001).abs() < 1e-10);
5221    }
5222
5223    // ========================================================================
5224    // NEW: QaReport JSON deserialize with extra fields (forward compat)
5225    // ========================================================================
5226
5227    /// JSON with extra unknown fields should still deserialize (serde default).
5228    #[test]
5229    fn qa_report_deserialize_ignores_unknown_fields() {
5230        let json = r#"{
5231            "model": "test.gguf",
5232            "passed": true,
5233            "gates": [],
5234            "total_duration_ms": 100,
5235            "timestamp": "2026-02-07T00:00:00Z",
5236            "summary": "ok",
5237            "extra_field": "should be ignored",
5238            "another_extra": 42
5239        }"#;
5240        let report: QaReport = serde_json::from_str(json).expect("deserialize with extras");
5241        assert_eq!(report.model, "test.gguf");
5242        assert!(report.passed);
5243    }
5244
5245    /// GateResult JSON with extra unknown fields should still deserialize.
5246    #[test]
5247    fn gate_result_deserialize_ignores_unknown_fields() {
5248        let json = r#"{
5249            "name": "test",
5250            "passed": true,
5251            "message": "ok",
5252            "duration_ms": 100,
5253            "skipped": false,
5254            "future_field": "v2"
5255        }"#;
5256        let result: GateResult = serde_json::from_str(json).expect("deserialize with extras");
5257        assert_eq!(result.name, "test");
5258        assert!(result.passed);
5259    }
5260
5261    // ========================================================================
5262    // verify_output Tests (PMAT-QA-PROTOCOL-001 §7.4)
5263    // ========================================================================
5264
5265    #[test]
5266    fn verify_output_rejects_empty() {
5267        let result = verify_output("", "test-001", &["4"]);
5268        assert!(matches!(result, OutputVerification::Fail { .. }));
5269        if let OutputVerification::Fail { reason } = result {
5270            assert!(reason.contains("Empty"), "Expected 'Empty', got: {reason}");
5271        }
5272    }
5273
5274    #[test]
5275    fn verify_output_rejects_whitespace_only() {
5276        let result = verify_output("   \n\t  ", "test-002", &["4"]);
5277        assert!(matches!(result, OutputVerification::Fail { .. }));
5278    }
5279
5280    #[test]
5281    fn verify_output_rejects_garbage_fffd() {
5282        let result = verify_output("The answer is \u{FFFD}\u{FFFD}", "test-003", &["4"]);
5283        assert!(matches!(result, OutputVerification::Fail { .. }));
5284        if let OutputVerification::Fail { reason } = result {
5285            assert!(
5286                reason.contains("Garbage"),
5287                "Expected 'Garbage', got: {reason}"
5288            );
5289        }
5290    }
5291
5292    #[test]
5293    fn verify_output_rejects_garbage_unk() {
5294        let result = verify_output("Hello [UNK] world", "test-004", &["Hello"]);
5295        assert!(matches!(result, OutputVerification::Fail { .. }));
5296        if let OutputVerification::Fail { reason } = result {
5297            assert!(
5298                reason.contains("Garbage"),
5299                "Expected 'Garbage', got: {reason}"
5300            );
5301        }
5302    }
5303
5304    #[test]
5305    fn verify_output_rejects_null_bytes() {
5306        let result = verify_output("Hello\0World", "test-005", &["Hello"]);
5307        assert!(matches!(result, OutputVerification::Fail { .. }));
5308        if let OutputVerification::Fail { reason } = result {
5309            assert!(
5310                reason.contains("null"),
5311                "Expected 'null bytes', got: {reason}"
5312            );
5313        }
5314    }
5315
5316    #[test]
5317    fn verify_output_rejects_missing_expected() {
5318        let result = verify_output("The answer is five", "test-006", &["4"]);
5319        assert!(matches!(result, OutputVerification::Fail { .. }));
5320        if let OutputVerification::Fail { reason } = result {
5321            assert!(
5322                reason.contains("Expected"),
5323                "Expected mention of pattern, got: {reason}"
5324            );
5325        }
5326    }
5327
5328    #[test]
5329    fn verify_output_accepts_correct() {
5330        let result = verify_output("The answer is 4.", "test-007", &["4"]);
5331        assert!(matches!(result, OutputVerification::Pass));
5332    }
5333
5334    #[test]
5335    fn verify_output_accepts_any_expected_pattern() {
5336        let result = verify_output("Hi there!", "test-008", &["Hello", "Hi", "Hey"]);
5337        assert!(matches!(result, OutputVerification::Pass));
5338    }
5339
5340    #[test]
5341    fn verify_output_case_insensitive() {
5342        let result = verify_output("HELLO WORLD", "test-009", &["hello"]);
5343        assert!(matches!(result, OutputVerification::Pass));
5344    }
5345
5346    #[test]
5347    fn verify_output_garbage_check_before_answer_check() {
5348        // Even though output contains "4", garbage should fail first
5349        let result = verify_output("4 [UNK] answer", "test-010", &["4"]);
5350        assert!(matches!(result, OutputVerification::Fail { .. }));
5351        if let OutputVerification::Fail { reason } = result {
5352            assert!(
5353                reason.contains("Garbage"),
5354                "Garbage check must happen BEFORE answer check, got: {reason}"
5355            );
5356        }
5357    }
5358
5359    #[test]
5360    fn verify_output_no_expected_patterns_passes() {
5361        // If no patterns expected, just check for emptiness and garbage
5362        let result = verify_output("Some valid output", "test-011", &[]);
5363        assert!(matches!(result, OutputVerification::Pass));
5364    }
5365
5366    // ========================================================================
5367    // Ollama Parity Grade Tests (F-PROFILE-010)
5368    // ========================================================================
5369
5370    #[cfg(feature = "inference")]
5371    #[test]
5372    fn ollama_parity_grade_boundaries() {
5373        // Grade F: <50% Ollama
5374        assert_eq!(ollama_parity_grade(0.0), "F");
5375        assert_eq!(ollama_parity_grade(0.3), "F");
5376        assert_eq!(ollama_parity_grade(0.49), "F");
5377        // Grade D: 50-75%
5378        assert_eq!(ollama_parity_grade(0.5), "D");
5379        assert_eq!(ollama_parity_grade(0.64), "D");
5380        assert_eq!(ollama_parity_grade(0.74), "D");
5381        // Grade C: 75-100% (parity)
5382        assert_eq!(ollama_parity_grade(0.75), "C");
5383        assert_eq!(ollama_parity_grade(0.99), "C");
5384        // Grade B: 100-150%
5385        assert_eq!(ollama_parity_grade(1.0), "B");
5386        assert_eq!(ollama_parity_grade(1.49), "B");
5387        // Grade A: 150-200%
5388        assert_eq!(ollama_parity_grade(1.5), "A");
5389        assert_eq!(ollama_parity_grade(1.99), "A");
5390        // Grade A+: 200%+
5391        assert_eq!(ollama_parity_grade(2.0), "A+");
5392        assert_eq!(ollama_parity_grade(3.5), "A+");
5393    }
5394}
apr_cli/commands/qa.rs

apr_cli/commands/
qa.rs