fn is_gguf_format(path: &Path) -> bool {
#[cfg(feature = "inference")]
{
use realizar::format::{detect_format, ModelFormat};
let magic = std::fs::read(path).ok().and_then(|b| {
if b.len() >= 8 {
Some(b[..8].to_vec())
} else {
None
}
});
magic.and_then(|m| detect_format(&m).ok()) == Some(ModelFormat::Gguf)
}
#[cfg(not(feature = "inference"))]
{
let _ = path;
false
}
}
fn run_qa(path: &Path, config: &QaConfig) -> Result<QaReport> {
let start = Instant::now();
let mut gates = Vec::new();
if !config.json {
output::header("APR Quality Assurance");
let config_pairs = vec![
("Model", path.display().to_string()),
("Min TPS", format!("{:.0} tok/s", config.min_tps)),
("Min Speedup", format!("{:.1}x Ollama", config.min_speedup)),
];
println!("{}", output::kv_table(&config_pairs));
}
dispatch_gate(
&mut gates,
config.json,
config.skip_capability,
"capability_match",
"Skipped by --skip-capability",
|| super::qa_capability::run_capability_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_contract,
"tensor_contract",
"Skipped by --skip-contract",
|| run_tensor_contract_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_metadata,
"metadata_plausibility",
"Skipped by --skip-metadata",
|| run_metadata_plausibility_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
!config.assert_classifier_head,
"classifier_head",
"Not requested (use --assert-classifier-head)",
|| run_classifier_head_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_golden,
"golden_output",
"Skipped by --skip-golden",
|| run_golden_output_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_throughput,
"throughput",
"Skipped by --skip-throughput",
|| run_throughput_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_ollama,
"ollama_parity",
"Skipped by --skip-ollama",
|| {
if is_gguf_format(path) {
run_ollama_parity_gate(path, config)
} else {
Ok(GateResult::skipped(
"ollama_parity",
"Non-GGUF format (F32/F16 lacks fused kernels for Ollama parity)",
))
}
},
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_gpu_speedup,
"gpu_speedup",
"Skipped by --skip-gpu-speedup",
|| run_gpu_speedup_gate(path, config),
)?;
let (skip_format, format_skip_reason) = format_parity_skip_status(config);
dispatch_gate(
&mut gates,
config.json,
skip_format,
"format_parity",
format_skip_reason,
|| run_format_parity_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_ptx_parity,
"ptx_parity",
"Skipped by --skip-ptx-parity",
|| run_ptx_parity_gate(path, config),
)?;
dispatch_gate(
&mut gates,
config.json,
config.skip_gpu_state,
"gpu_state_isolation",
"Skipped by --skip-gpu-state",
|| run_gpu_state_isolation_gate(path, config),
)?;
dispatch_regression_gate(path, &mut gates, config)?;
let report = finalize_qa_report(path, &start, gates, config)?;
save_qa_report_to_cache(path, &report);
Ok(report)
}
fn format_parity_skip_status(config: &QaConfig) -> (bool, &str) {
if config.skip_format_parity {
(true, "Skipped by --skip-format-parity")
} else {
(false, "")
}
}
fn auto_discover_previous_report(model_path: &Path) -> Option<std::path::PathBuf> {
let cache_dir = dirs::home_dir()?.join(".cache/apr/qa-reports");
let basename = model_path.file_stem()?.to_str()?;
let report_path = cache_dir.join(format!("{basename}.json"));
if report_path.exists() {
Some(report_path)
} else {
None
}
}
fn save_qa_report_to_cache(model_path: &Path, report: &QaReport) {
let Some(home) = dirs::home_dir() else {
return;
};
let cache_dir = home.join(".cache/apr/qa-reports");
if std::fs::create_dir_all(&cache_dir).is_err() {
return;
}
let Some(basename) = model_path.file_stem().and_then(|s| s.to_str()) else {
return;
};
let report_path = cache_dir.join(format!("{basename}.json"));
if let Ok(json) = serde_json::to_string_pretty(report) {
let _ = std::fs::write(&report_path, json);
}
}
fn dispatch_regression_gate(
model_path: &Path,
gates: &mut Vec<GateResult>,
config: &QaConfig,
) -> Result<()> {
let regression_result = if let Some(ref _prev) = config.previous_report {
run_performance_regression_gate(gates, config)?
} else {
match auto_discover_previous_report(model_path) {
Some(prev_path) => {
if !config.json {
println!(
" {} Auto-discovered previous report: {}",
"INFO".cyan(),
prev_path.display()
);
}
let auto_config = QaConfig {
previous_report: Some(prev_path),
..config.clone()
};
run_performance_regression_gate(gates, &auto_config)?
}
None => {
GateResult::passed(
"performance_regression",
"First run — baseline established (saved for future comparison)",
Some(0.0),
Some(config.regression_threshold),
Duration::from_millis(0),
)
}
}
};
if !config.json {
print_gate_result(®ression_result);
}
gates.push(regression_result);
Ok(())
}
fn finalize_qa_report(
path: &Path,
start: &Instant,
gates: Vec<GateResult>,
config: &QaConfig,
) -> Result<QaReport> {
let total_duration = start.elapsed();
let gates_executed = gates.iter().filter(|g| !g.skipped).count();
let gates_skipped = gates.iter().filter(|g| g.skipped).count();
warn_excessive_skips(config.json, gates_executed, gates_skipped);
let mut passed = gates.iter().all(|g| g.passed);
if !check_min_executed(config, gates_executed, &mut passed) && !config.json {
println!(
" {} Only {} gates executed, minimum required: {}",
"FAIL".red().bold(),
gates_executed,
config.min_executed.unwrap_or(0),
);
}
let summary = build_qa_summary(&gates, passed, gates_executed, gates_skipped, config);
if !config.json {
print_qa_summary(&gates, passed, total_duration);
}
Ok(QaReport {
model: path.display().to_string(),
passed,
gates,
gates_executed,
gates_skipped,
total_duration_ms: total_duration.as_millis() as u64,
timestamp: chrono::Utc::now().to_rfc3339(),
summary,
system_info: Some(SystemInfo::capture()),
})
}
fn warn_excessive_skips(json: bool, executed: usize, skipped: usize) {
if !json && skipped > executed {
println!(
" {} {} of {} gates SKIPPED — QA not rigorous",
"WARN".yellow().bold(),
skipped,
skipped + executed
);
}
}
fn check_min_executed(config: &QaConfig, gates_executed: usize, passed: &mut bool) -> bool {
let Some(min) = config.min_executed else {
return true;
};
if gates_executed >= min {
return true;
}
*passed = false;
false
}
fn build_qa_summary(
gates: &[GateResult],
passed: bool,
gates_executed: usize,
gates_skipped: usize,
config: &QaConfig,
) -> String {
if passed {
return format!(
"All QA gates passed ({} executed, {} skipped)",
gates_executed, gates_skipped
);
}
let names: Vec<_> = gates
.iter()
.filter(|g| !g.passed && !g.skipped)
.map(|g| g.name.as_str())
.collect();
if names.is_empty() {
format!(
"Insufficient gate execution: {} < {} minimum",
gates_executed,
config.min_executed.unwrap_or(0)
)
} else {
format!("Failed gates: {}", names.join(", "))
}
}
fn run_tensor_contract_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
let start = Instant::now();
if !config.json && config.verbose {
println!(
"{}",
"Running tensor contract validation (PMAT-235)...".yellow()
);
}
let rosetta = aprender::format::rosetta::RosettaStone::new();
let report = match rosetta.validate(path) {
Ok(r) => r,
Err(e) => {
let duration = start.elapsed();
return Ok(GateResult::failed(
"tensor_contract",
&format!("Failed to validate: {e}"),
None,
None,
duration,
));
}
};
let duration = start.elapsed();
let contract_failures: Vec<String> = report
.tensors
.iter()
.flat_map(|t| t.failures.iter().map(|f| format!("{}: {}", t.name, f)))
.collect();
if contract_failures.is_empty() {
Ok(GateResult::passed(
"tensor_contract",
&format!(
"{} tensors passed all PMAT-235 contract gates",
report.tensor_count
),
Some(report.tensor_count as f64),
Some(0.0),
duration,
))
} else {
let summary = if contract_failures.len() <= 3 {
contract_failures.join("; ")
} else {
format!(
"{}; ... and {} more",
contract_failures[..3].join("; "),
contract_failures.len() - 3
)
};
Ok(GateResult::failed(
"tensor_contract",
&format!(
"{} contract violations in {} tensors: {}",
contract_failures.len(),
report.failed_tensor_count,
summary
),
Some(contract_failures.len() as f64),
Some(0.0),
duration,
))
}
}
fn run_classifier_head_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
let start = Instant::now();
if !config.json && config.verbose {
println!(
"{}",
"Running classifier head validation (F-CLASS-004)...".yellow()
);
}
let rosetta = aprender::format::rosetta::RosettaStone::new();
let report = match rosetta.inspect(path) {
Ok(r) => r,
Err(e) => {
let duration = start.elapsed();
return Ok(GateResult::failed(
"classifier_head",
&format!("Failed to inspect model: {e}"),
None,
None,
duration,
));
}
};
let classifier_patterns = ["score.weight", "classifier.weight", "classification_head.weight"];
let classifier_tensor = report.tensors.iter().find(|t| {
let name_lower = t.name.to_lowercase();
classifier_patterns
.iter()
.any(|p| name_lower.contains(p))
});
let duration = start.elapsed();
match classifier_tensor {
Some(tensor) => {
if tensor.shape.len() != 2 {
return Ok(GateResult::failed(
"classifier_head",
&format!(
"Classifier head '{}' has {}D shape {:?}, expected 2D [num_classes, hidden_size]",
tensor.name,
tensor.shape.len(),
tensor.shape
),
None,
None,
duration,
));
}
let num_classes = tensor.shape[0];
if num_classes < 2 {
return Ok(GateResult::failed(
"classifier_head",
&format!(
"Classifier head '{}' has num_classes={}, minimum is 2",
tensor.name, num_classes
),
Some(num_classes as f64),
Some(2.0),
duration,
));
}
Ok(GateResult::passed(
"classifier_head",
&format!(
"Classifier head '{}' shape {:?} (num_classes={}, hidden_size={})",
tensor.name, tensor.shape, tensor.shape[0], tensor.shape[1]
),
Some(num_classes as f64),
Some(2.0),
duration,
))
}
None => Ok(GateResult::failed(
"classifier_head",
"No classifier head tensor found (expected score.weight or classifier.weight)",
None,
None,
duration,
)),
}
}