#![allow(clippy::cast_possible_truncation)]
use crate::command::{CommandRunner, RealCommandRunner};
use crate::conversion::{ConversionConfig, ConversionExecutor, resolve_model_path};
use crate::diagnostics::FailFastReporter;
use crate::error::Result;
use crate::evidence::{Evidence, EvidenceCollector, Outcome, PerformanceMetrics};
use crate::integrity;
use crate::layout_contract::{DEFAULT_CONTRACT_PATH, load_contract_from, validate_model};
use crate::playbook::{OllamaParityConfig, Playbook};
use apr_qa_gen::{Backend, Format, HfParityOracle, Modality, ModelId, QaScenario, Tolerance};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;
fn parse_timing_ms(output: &str) -> Option<f64> {
for line in output.lines() {
let lower = line.to_lowercase();
if let Some(pos) = lower.find("completed in ") {
let after = &lower[pos + 13..];
if let Some(s_pos) = after.find('s') {
if let Ok(secs) = after[..s_pos].trim().parse::<f64>() {
return Some(secs * 1000.0);
}
}
}
}
None
}
fn parse_throughput(output: &str) -> Option<f64> {
if let Some(pos) = output.find("\"throughput_tps\":") {
let after = &output[pos + 17..];
let end = after.find(|c: char| !c.is_ascii_digit() && c != '.')?;
after[..end].parse::<f64>().ok()
} else {
None
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum FailurePolicy {
StopOnFirst,
#[default]
StopOnP0,
CollectAll,
FailFast,
}
impl FailurePolicy {
#[must_use]
pub fn emit_diagnostic(&self) -> bool {
matches!(self, Self::FailFast)
}
#[must_use]
pub fn stops_on_any_failure(&self) -> bool {
matches!(self, Self::StopOnFirst | Self::FailFast)
}
}
#[derive(Debug, Clone)]
#[allow(clippy::struct_excessive_bools)]
pub struct ExecutionConfig {
pub failure_policy: FailurePolicy,
pub default_timeout_ms: u64,
pub max_workers: usize,
pub dry_run: bool,
pub model_path: Option<String>,
pub no_gpu: bool,
pub run_conversion_tests: bool,
pub run_differential_tests: bool,
pub run_profile_ci: bool,
pub run_trace_payload: bool,
pub run_golden_rule_test: bool,
pub golden_reference_path: Option<String>,
pub lock_file_path: Option<String>,
pub check_integrity: bool,
pub warn_implicit_skips: bool,
pub run_hf_parity: bool,
pub hf_parity_corpus_path: Option<String>,
pub hf_parity_model_family: Option<String>,
pub output_dir: Option<String>,
pub run_contract_tests: bool,
pub run_ollama_parity: bool,
}
impl Default for ExecutionConfig {
fn default() -> Self {
Self {
failure_policy: FailurePolicy::default(),
default_timeout_ms: 60_000,
max_workers: 4,
dry_run: false,
model_path: None,
no_gpu: false,
run_conversion_tests: true, run_differential_tests: true, run_profile_ci: false, run_trace_payload: true, run_golden_rule_test: true, golden_reference_path: None,
lock_file_path: None,
check_integrity: false,
warn_implicit_skips: false,
run_hf_parity: false,
hf_parity_corpus_path: None,
hf_parity_model_family: None,
output_dir: Some("output".to_string()), run_contract_tests: true, run_ollama_parity: false, }
}
}
pub struct Executor {
config: ExecutionConfig,
collector: EvidenceCollector,
command_runner: Arc<dyn CommandRunner>,
}
impl std::fmt::Debug for Executor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Executor")
.field("config", &self.config)
.field("collector", &self.collector)
.field("command_runner", &"<dyn CommandRunner>")
.finish()
}
}
impl Executor {
#[must_use]
pub fn new() -> Self {
Self {
config: ExecutionConfig::default(),
collector: EvidenceCollector::new(),
command_runner: Arc::new(RealCommandRunner::new()),
}
}
#[must_use]
pub fn with_config(config: ExecutionConfig) -> Self {
Self {
config,
collector: EvidenceCollector::new(),
command_runner: Arc::new(RealCommandRunner::new()),
}
}
#[must_use]
pub fn with_runner(config: ExecutionConfig, runner: Arc<dyn CommandRunner>) -> Self {
Self {
config,
collector: EvidenceCollector::new(),
command_runner: runner,
}
}
#[allow(clippy::too_many_lines)]
pub fn execute(&mut self, playbook: &Playbook) -> Result<ExecutionResult> {
let scenarios = playbook.generate_scenarios();
let total = scenarios.len();
let start = Instant::now();
if self.config.check_integrity {
if let Some(ref lock_path) = self.config.lock_file_path {
match crate::playbook::load_lock_file(lock_path) {
Ok(lock_file) => {
if let Err(e) = crate::playbook::verify_playbook_integrity(
lock_path,
&lock_file,
&playbook.name,
) {
return Ok(ExecutionResult {
playbook_name: playbook.name.clone(),
total_scenarios: total,
passed: 0,
failed: total,
skipped: 0,
duration_ms: start.elapsed().as_millis() as u64,
gateway_failed: Some(format!("Integrity check failed: {e}")),
evidence: self.collector.clone(),
});
}
}
Err(e) => {
eprintln!("[WARN] Could not load lock file '{lock_path}': {e}");
}
}
}
}
if self.config.warn_implicit_skips {
let all_formats = vec![Format::Gguf, Format::SafeTensors, Format::Apr];
let skip_files = crate::playbook::find_skip_files(Path::new("."), &playbook.name);
let implicit =
crate::playbook::detect_implicit_skips(playbook, &all_formats, &skip_files);
for skip in &implicit {
eprintln!("[WARN] Implicit skip detected: {skip}");
}
}
if let Err(e) = self.check_gateways(playbook) {
return Ok(ExecutionResult {
playbook_name: playbook.name.clone(),
total_scenarios: total,
passed: 0,
failed: total,
skipped: 0,
duration_ms: start.elapsed().as_millis() as u64,
gateway_failed: Some(e.to_string()),
evidence: self.collector.clone(),
});
}
let (pull_passed, pull_failed) = if self.config.model_path.is_none() {
let model_id = playbook.model_id();
let (pp, pf, pulled_path) =
self.run_g0_pull_check(&playbook.model.hf_repo, &model_id);
if pf > 0 {
return Ok(ExecutionResult {
playbook_name: playbook.name.clone(),
total_scenarios: total + pp + pf,
passed: pp,
failed: total + pf,
skipped: 0,
duration_ms: start.elapsed().as_millis() as u64,
gateway_failed: Some("G0-PULL-001: Model acquisition failed".to_string()),
evidence: self.collector.clone(),
});
}
if let Some(ref path) = pulled_path {
self.config.model_path = Some(path.clone());
}
(pp, pf)
} else {
(0, 0) };
let (format_passed, format_failed) =
if let Some(ref model_path_str) = self.config.model_path.clone() {
let path = Path::new(&model_path_str);
let is_single_safetensors =
path.is_file() && path.extension().is_some_and(|e| e == "safetensors");
let is_sharded_index = path.is_file()
&& path
.file_name()
.is_some_and(|n| n.to_string_lossy().ends_with(".safetensors.index.json"));
if is_single_safetensors || is_sharded_index {
let model_id = playbook.model_id();
let (workspace, fp, ff) =
self.prepare_model_workspace(path, &model_id, &playbook.model.formats);
self.config.model_path = Some(workspace);
(fp, ff)
} else {
(0, 0)
}
} else {
(0, 0)
};
let (validate_passed, validate_failed) =
self.config.model_path.clone().map_or((0, 0), |model_path| {
let model_id = playbook.model_id();
self.run_g0_validate_check(Path::new(&model_path), &model_id)
});
if validate_failed > 0 {
return Ok(ExecutionResult {
playbook_name: playbook.name.clone(),
total_scenarios: total + pull_passed + validate_passed + validate_failed,
passed: pull_passed + validate_passed,
failed: total + validate_failed,
skipped: 0,
duration_ms: start.elapsed().as_millis() as u64,
gateway_failed: Some(
"G0-VALIDATE-001: Model physics validation failed (corrupt model)".to_string(),
),
evidence: self.collector.clone(),
});
}
let (tensor_passed, tensor_failed) =
if let (Some(ref model_path_str), Some(ref family), Some(ref size_variant)) = (
self.config.model_path.clone(),
playbook.model.family.clone(),
playbook.model.size_variant.clone(),
) {
let model_id = playbook.model_id();
self.run_g0_tensor_template_check(
Path::new(model_path_str),
&model_id,
family,
size_variant,
None, )
} else {
(0, 0) };
let (integrity_passed, integrity_failed) =
self.config.model_path.clone().map_or((0, 0), |model_path| {
let model_id = playbook.model_id();
self.run_g0_integrity_check(Path::new(&model_path), &model_id)
});
let (layout_passed, layout_failed) =
self.config.model_path.clone().map_or((0, 0), |model_path| {
let model_id = playbook.model_id();
self.run_g0_layout_check(Path::new(&model_path), &model_id)
});
let mut passed = 0;
let mut failed = 0;
let mut skipped = 0;
for scenario in scenarios {
if self.config.dry_run {
let cmd = scenario.to_command("model.gguf");
println!("[DRY RUN] {cmd}");
skipped += 1;
continue;
}
let evidence = self.execute_scenario(&scenario);
if evidence.outcome == Outcome::Skipped {
skipped += 1;
self.collector.add(evidence);
continue;
}
if evidence.outcome.is_pass() {
passed += 1;
} else {
failed += 1;
match self.config.failure_policy {
FailurePolicy::StopOnFirst => {
self.collector.add(evidence);
break;
}
FailurePolicy::FailFast => {
eprintln!("\n[FAIL-FAST] Gate {} FALSIFIED", evidence.gate_id);
eprintln!("[FAIL-FAST] Model: {}", evidence.scenario.model.hf_repo());
eprintln!("[FAIL-FAST] Format: {:?}", evidence.scenario.format);
eprintln!("[FAIL-FAST] Backend: {:?}", evidence.scenario.backend);
eprintln!("[FAIL-FAST] Outcome: {:?}", evidence.outcome);
eprintln!("[FAIL-FAST] Reason: {}", evidence.reason);
if let Some(ref model_path) = self.config.model_path {
let output_dir = self.config.output_dir.as_deref().unwrap_or("output");
let reporter = FailFastReporter::new(Path::new(output_dir));
if let Err(e) = reporter.generate_report(
&evidence,
Path::new(model_path),
Some(&playbook.name),
) {
eprintln!("[FAIL-FAST] Warning: Failed to generate report: {e}");
}
} else {
if let Some(ref stderr) = evidence.stderr {
eprintln!("[FAIL-FAST] Stderr:\n{stderr}");
}
if let Some(exit_code) = evidence.exit_code {
eprintln!("[FAIL-FAST] Exit code: {exit_code}");
}
eprintln!("[FAIL-FAST] No model path - full report not generated\n");
}
self.collector.add(evidence);
break;
}
FailurePolicy::StopOnP0 => {
if evidence.gate_id.contains("-P0-") {
self.collector.add(evidence);
break;
}
}
FailurePolicy::CollectAll => {}
}
}
self.collector.add(evidence);
}
let mut conversion_passed = 0;
let mut conversion_failed = 0;
if self.config.run_conversion_tests {
if let Some(model_path) = self.config.model_path.clone() {
let model_id = playbook.model_id();
let (cp, cf) = self.run_conversion_tests(Path::new(&model_path), &model_id);
conversion_passed = cp;
conversion_failed = cf;
}
}
let mut golden_passed = 0;
let mut golden_failed = 0;
if self.config.run_golden_rule_test {
if let Some(model_path) = self.config.model_path.clone() {
let model_id = playbook.model_id();
let (gp, gf) = self.run_golden_rule_test(Path::new(&model_path), &model_id);
golden_passed = gp;
golden_failed = gf;
}
}
let (contract_passed, contract_failed) = if self.config.run_contract_tests {
self.config.model_path.clone().map_or((0, 0), |model_path| {
let model_id = playbook.model_id();
self.run_contract_invariants(Path::new(&model_path), &model_id, playbook)
})
} else {
(0, 0)
};
let (hf_parity_passed, hf_parity_failed) = if self.config.run_hf_parity {
let model_id = playbook.model_id();
self.run_hf_parity_tests(&model_id)
} else {
(0, 0)
};
let (perf_passed, perf_failed) = if self.config.run_profile_ci {
self.config.model_path.clone().map_or((0, 0), |model_path| {
let model_id = playbook.model_id();
self.run_perf_gates(Path::new(&model_path), &model_id, playbook)
})
} else {
(0, 0)
};
let (ollama_passed, ollama_failed) = if self.config.run_ollama_parity {
self.config.model_path.clone().map_or((0, 0), |model_path| {
self.run_ollama_parity_tests(Path::new(&model_path), playbook)
})
} else {
(0, 0)
};
let total_passed = passed
+ conversion_passed
+ golden_passed
+ integrity_passed
+ hf_parity_passed
+ contract_passed
+ validate_passed
+ pull_passed
+ format_passed
+ tensor_passed
+ layout_passed
+ ollama_passed
+ perf_passed;
let total_failed = failed
+ conversion_failed
+ golden_failed
+ integrity_failed
+ hf_parity_failed
+ contract_failed
+ validate_failed
+ pull_failed
+ format_failed
+ tensor_failed
+ layout_failed
+ ollama_failed
+ perf_failed;
Ok(ExecutionResult {
playbook_name: playbook.name.clone(),
total_scenarios: total
+ conversion_passed
+ conversion_failed
+ golden_passed
+ golden_failed
+ integrity_passed
+ integrity_failed
+ hf_parity_passed
+ hf_parity_failed
+ contract_passed
+ contract_failed
+ validate_passed
+ validate_failed
+ pull_passed
+ pull_failed
+ format_passed
+ format_failed
+ tensor_passed
+ tensor_failed
+ layout_passed
+ layout_failed
+ ollama_passed
+ ollama_failed
+ perf_passed
+ perf_failed,
passed: total_passed,
failed: total_failed,
skipped,
duration_ms: start.elapsed().as_millis() as u64,
gateway_failed: None,
evidence: self.collector.clone(),
})
}
fn run_conversion_tests(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
if model_path.is_file() {
return (0, 0); }
let config = if self.config.no_gpu {
ConversionConfig::cpu_only()
} else {
ConversionConfig::default()
};
let executor = if let Some(ref output_dir) = self.config.output_dir {
ConversionExecutor::new(config).with_output_dir(std::path::PathBuf::from(output_dir))
} else {
ConversionExecutor::new(config)
};
match executor.execute_all(model_path, model_id) {
Ok(result) => {
for ev in result.evidence {
self.collector.add(ev);
}
(result.passed, result.failed)
}
Err(e) => {
let ev = Evidence::falsified(
"F-CONV-INFRA-001",
apr_qa_gen::QaScenario::new(
model_id.clone(),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::Gguf,
"Conversion infrastructure".to_string(),
0,
),
format!("Conversion infrastructure failure: {e}"),
"N/A",
0,
);
self.collector.add(ev);
(0, 1)
}
}
}
fn run_golden_rule_test(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
if model_path.is_file() {
return (0, 0);
}
let has_model_extension = model_path
.extension()
.is_some_and(|e| ["gguf", "safetensors", "apr"].contains(&e.to_str().unwrap_or("")));
if has_model_extension {
return self.run_golden_rule_with_path(model_path, model_id);
}
let resolved_path = match resolve_model_path(model_path, apr_qa_gen::Format::SafeTensors) {
Ok(p) => p,
Err(e) => {
let ev = Evidence::falsified(
"F-GOLDEN-RULE-001",
Self::golden_scenario(model_id),
format!("Golden Rule: failed to resolve model path: {e}"),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
};
self.run_golden_rule_with_path(&resolved_path, model_id)
}
fn run_golden_rule_with_path(
&mut self,
model_path: &Path,
model_id: &ModelId,
) -> (usize, usize) {
let prompt = "What is 2+2?";
let max_tokens = 10;
let original_result =
self.command_runner
.run_inference(model_path, prompt, max_tokens, false, &[]);
if !original_result.success {
let ev = Evidence::falsified(
"F-GOLDEN-RULE-001",
Self::golden_scenario(model_id),
format!(
"Golden Rule: original inference failed: {}",
original_result.stderr
),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
let apr_path =
std::path::PathBuf::from(format!("/tmp/golden-rule-test-{}.apr", model_id.name));
let convert_result = self.command_runner.convert_model(model_path, &apr_path);
if !convert_result.success {
let ev = Evidence::falsified(
"F-GOLDEN-RULE-002",
Self::golden_scenario(model_id),
format!("Golden Rule: conversion failed: {}", convert_result.stderr),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
let converted_result =
self.command_runner
.run_inference(&apr_path, prompt, max_tokens, false, &[]);
if !converted_result.success {
let ev = Evidence::falsified(
"F-GOLDEN-RULE-003",
Self::golden_scenario(model_id),
format!(
"Golden Rule: converted inference failed: {}",
converted_result.stderr
),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
let orig_text = Self::extract_output_text(&original_result.stdout);
let conv_text = Self::extract_output_text(&converted_result.stdout);
if orig_text == conv_text {
let ev = Evidence::corroborated(
"F-GOLDEN-RULE-001",
Self::golden_scenario(model_id),
&format!("Golden Rule PASS: identical output: {orig_text}"),
0,
);
self.collector.add(ev);
let _ = std::fs::remove_file(&apr_path);
(1, 0)
} else {
let ev = Evidence::falsified(
"F-GOLDEN-RULE-001",
Self::golden_scenario(model_id),
format!(
"Golden Rule FAIL: output differs after conversion.\n\
Original: {orig_text}\n\
Converted: {conv_text}"
),
&converted_result.stdout,
0,
);
self.collector.add(ev);
(0, 1)
}
}
fn extract_output_text(raw: &str) -> String {
let mut capture = false;
let mut lines = Vec::new();
for line in raw.lines() {
if line.starts_with("Output:") {
capture = true;
continue;
}
if capture {
if line.starts_with("Completed in") || line.is_empty() {
break;
}
lines.push(line.trim());
}
}
lines.join(" ").trim().to_string()
}
fn golden_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
apr_qa_gen::QaScenario::new(
model_id.clone(),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::Apr,
"Golden Rule: convert → inference → diff".to_string(),
0,
)
}
fn truncate_str(s: &str, max_len: usize) -> &str {
if s.len() <= max_len {
s
} else {
let mut end = max_len;
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
&s[..end]
}
}
fn run_contract_invariants(
&mut self,
model_path: &Path,
model_id: &ModelId,
playbook: &Playbook,
) -> (usize, usize) {
if model_path.is_file() {
return (0, 0);
}
let config = playbook.contract_tests.clone().unwrap_or_default();
let evidence = crate::contract::run_contract_tests(
&self.command_runner,
model_path,
model_id,
&config,
);
let mut passed = 0;
let mut failed = 0;
for ev in evidence {
if ev.outcome.is_pass() {
passed += 1;
} else {
failed += 1;
}
self.collector.add(ev);
}
(passed, failed)
}
fn run_ollama_parity_tests(
&mut self,
model_path: &Path,
playbook: &Playbook,
) -> (usize, usize) {
let config = match &playbook.ollama_parity {
Some(c) if c.enabled => c.clone(),
_ => return (0, 0),
};
let model_id = playbook.model_id();
let mut passed = 0;
let mut failed = 0;
let model_tag = config
.model_tag
.clone()
.unwrap_or_else(|| format!("{}:latest", model_id.name));
let pull_output = self.command_runner.pull_ollama_model(&model_tag);
if !pull_output.success {
let ev = Evidence::falsified(
"F-OLLAMA-PULL-001",
QaScenario::new(
model_id,
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
format!("ollama pull {model_tag}"),
0,
),
format!("Ollama pull failed: {}", pull_output.stderr),
&pull_output.stdout,
0,
);
self.collector.add(ev);
return (0, 1);
}
let (p, f) = self.run_ollama_prompt_gates(model_path, &model_id, &model_tag, &config);
passed += p;
failed += f;
let (p, f) = self.run_ollama_ecosystem_gates(model_path, &model_id);
passed += p;
failed += f;
(passed, failed)
}
fn run_ollama_prompt_gates(
&mut self,
model_path: &Path,
model_id: &ModelId,
model_tag: &str,
config: &OllamaParityConfig,
) -> (usize, usize) {
let mut passed = 0;
let mut failed = 0;
for prompt in &config.prompts {
let apr_output = self
.command_runner
.run_inference(model_path, prompt, 32, false, &[]);
let ollama_output =
self.command_runner
.run_ollama_inference(model_tag, prompt, config.temperature);
let scenario = QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
format!("ollama parity: {prompt}"),
0,
);
if !apr_output.success || !ollama_output.success {
let reason = if apr_output.success {
format!("Ollama inference failed: {}", ollama_output.stderr)
} else {
format!("APR inference failed: {}", apr_output.stderr)
};
let ev =
Evidence::falsified("F-OLLAMA-001", scenario, &reason, &apr_output.stdout, 0);
self.collector.add(ev);
failed += 1;
continue;
}
let ev = Evidence::corroborated(
"F-OLLAMA-001",
scenario.clone(),
&format!("APR and ollama both produced output for prompt: {prompt}"),
0,
);
self.collector.add(ev);
passed += 1;
let apr_ttft = crate::executor::parse_timing_ms(&apr_output.stdout);
let ollama_ttft = crate::executor::parse_timing_ms(&ollama_output.stdout);
if let (Some(apr_ms), Some(ollama_ms)) = (apr_ttft, ollama_ttft) {
let ratio = apr_ms / ollama_ms.max(1.0);
#[allow(clippy::cast_sign_loss)]
let duration = apr_ms.round() as u64;
if ratio <= 3.0 {
let ev = Evidence::corroborated(
"F-OLLAMA-003",
scenario.clone(),
&format!(
"TTFT ratio APR/Ollama: {ratio:.2} (APR={apr_ms:.0}ms, Ollama={ollama_ms:.0}ms)"
),
duration,
);
self.collector.add(ev);
passed += 1;
} else {
let ev = Evidence::falsified(
"F-OLLAMA-003",
scenario.clone(),
format!("TTFT ratio {ratio:.2} exceeds 3.0x threshold"),
&format!("APR={apr_ms:.0}ms, Ollama={ollama_ms:.0}ms"),
duration,
);
self.collector.add(ev);
failed += 1;
}
}
}
(passed, failed)
}
fn run_ollama_ecosystem_gates(
&mut self,
model_path: &Path,
model_id: &ModelId,
) -> (usize, usize) {
let mut passed = 0;
let mut failed = 0;
let gguf_scenario = QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"ollama GGUF loadability".to_string(),
0,
);
let create_output = self
.command_runner
.create_ollama_model(&format!("apr-test-{}", model_id.name), model_path);
if create_output.success {
let ev = Evidence::corroborated(
"F-OLLAMA-005",
gguf_scenario,
"Ollama successfully loaded our GGUF via `ollama create`",
0,
);
self.collector.add(ev);
passed += 1;
} else {
let ev = Evidence::falsified(
"F-OLLAMA-005",
gguf_scenario,
format!("Ollama failed to load GGUF: {}", create_output.stderr),
&create_output.stdout,
0,
);
self.collector.add(ev);
failed += 1;
}
let api_scenario = QaScenario::new(
model_id.clone(),
Modality::Serve,
Backend::Cpu,
Format::SafeTensors,
"ollama API parity".to_string(),
0,
);
let ollama_api = self
.command_runner
.http_get("http://localhost:11434/api/tags");
if ollama_api.success {
let ev = Evidence::corroborated(
"F-OLLAMA-004",
api_scenario,
"Ollama API endpoint /api/tags is accessible",
0,
);
self.collector.add(ev);
passed += 1;
} else {
let ev = Evidence::falsified(
"F-OLLAMA-004",
api_scenario,
format!("Ollama API not accessible: {}", ollama_api.stderr),
&ollama_api.stdout,
0,
);
self.collector.add(ev);
failed += 1;
}
(passed, failed)
}
fn run_perf_gates(
&mut self,
model_path: &Path,
model_id: &ModelId,
playbook: &Playbook,
) -> (usize, usize) {
let mut passed = 0;
let mut failed = 0;
let profile_config = match &playbook.profile_ci {
Some(c) if c.enabled => c,
_ => return (0, 0),
};
let has_cpu = profile_config
.backends
.iter()
.any(|b| b.eq_ignore_ascii_case("cpu"));
let includes_gpu = profile_config
.backends
.iter()
.any(|b| b.eq_ignore_ascii_case("gpu"));
if has_cpu && includes_gpu {
let warmup = profile_config.warmup as u32;
let measure = profile_config.measure as u32;
let cpu_output = self
.command_runner
.profile_ci(model_path, None, None, warmup, measure);
let gpu_output = self
.command_runner
.profile_ci(model_path, None, None, warmup, measure);
let cpu_tps = crate::executor::parse_throughput(&cpu_output.stdout);
let gpu_tps = crate::executor::parse_throughput(&gpu_output.stdout);
let scenario = QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Gpu,
Format::SafeTensors,
"GPU vs CPU throughput ratio".to_string(),
0,
);
if let (Some(cpu), Some(gpu)) = (cpu_tps, gpu_tps) {
let ratio = gpu / cpu.max(0.01);
if ratio >= 1.0 {
let ev = Evidence::corroborated(
"F-PERF-003",
scenario,
&format!(
"GPU/CPU ratio: {ratio:.1}x (GPU={gpu:.1} tok/s, CPU={cpu:.1} tok/s)"
),
0,
);
self.collector.add(ev);
passed += 1;
} else {
let ev = Evidence::falsified(
"F-PERF-003",
scenario,
format!("GPU slower than CPU: ratio {ratio:.2}x"),
&format!("GPU={gpu:.1} tok/s, CPU={cpu:.1} tok/s"),
0,
);
self.collector.add(ev);
failed += 1;
}
}
}
let mem_output = self.command_runner.profile_memory(model_path);
let mem_scenario = QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
"memory profiling".to_string(),
0,
);
if mem_output.success {
let ev = Evidence::corroborated(
"F-PERF-005",
mem_scenario,
&format!("Memory profile collected: {}", mem_output.stdout.trim()),
0,
);
self.collector.add(ev);
passed += 1;
} else {
let ev = Evidence::falsified(
"F-PERF-005",
mem_scenario,
format!("Memory profiling failed: {}", mem_output.stderr),
&mem_output.stdout,
0,
);
self.collector.add(ev);
failed += 1;
}
(passed, failed)
}
#[allow(clippy::too_many_lines)]
fn run_hf_parity_tests(&mut self, model_id: &ModelId) -> (usize, usize) {
let (corpus_path, model_family) = if let (Some(cp), Some(mf)) = (
&self.config.hf_parity_corpus_path,
&self.config.hf_parity_model_family,
) {
(cp.clone(), mf.clone())
} else {
let ev = Evidence::corroborated(
"F-HF-PARITY-SKIP",
Self::hf_parity_scenario(model_id, "config"),
"HF parity skipped: corpus_path or model_family not configured",
0,
);
self.collector.add(ev);
return (0, 0);
};
let manifest_path = Path::new(&corpus_path)
.join(&model_family)
.join("manifest.json");
if !manifest_path.exists() {
let ev = Evidence::falsified(
"F-HF-PARITY-001",
Self::hf_parity_scenario(model_id, "manifest"),
format!("HF parity manifest not found: {}", manifest_path.display()),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
let manifest_data = match std::fs::read_to_string(&manifest_path) {
Ok(d) => d,
Err(e) => {
let ev = Evidence::falsified(
"F-HF-PARITY-002",
Self::hf_parity_scenario(model_id, "manifest"),
format!("Failed to read manifest: {e}"),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
};
#[allow(clippy::items_after_statements)]
#[derive(serde::Deserialize)]
struct Manifest {
prompts: Vec<String>,
}
let manifest: Manifest = match serde_json::from_str(&manifest_data) {
Ok(m) => m,
Err(e) => {
let ev = Evidence::falsified(
"F-HF-PARITY-003",
Self::hf_parity_scenario(model_id, "manifest"),
format!("Failed to parse manifest: {e}"),
"N/A",
0,
);
self.collector.add(ev);
return (0, 1);
}
};
if manifest.prompts.is_empty() {
let ev = Evidence::corroborated(
"F-HF-PARITY-SKIP",
Self::hf_parity_scenario(model_id, "manifest"),
"HF parity skipped: no prompts in manifest",
0,
);
self.collector.add(ev);
return (0, 0);
}
let oracle =
HfParityOracle::new(&corpus_path, &model_family).with_tolerance(Tolerance::fp16());
let mut passed = 0;
let mut failed = 0;
for prompt_hash in &manifest.prompts {
let golden_path = Path::new(&corpus_path)
.join(&model_family)
.join(format!("{prompt_hash}.json"));
let prompt = match std::fs::read_to_string(&golden_path) {
Ok(data) => {
#[allow(clippy::items_after_statements)]
#[derive(serde::Deserialize)]
struct GoldenMeta {
prompt: String,
}
match serde_json::from_str::<GoldenMeta>(&data) {
Ok(meta) => meta.prompt,
Err(_) => continue, }
}
Err(_) => continue, };
let golden = match oracle.load_golden(&prompt) {
Ok(g) => g,
Err(e) => {
let ev = Evidence::falsified(
"F-HF-PARITY-004",
Self::hf_parity_scenario(model_id, &prompt),
format!("Failed to load golden for prompt '{prompt}': {e}"),
"N/A",
0,
);
self.collector.add(ev);
failed += 1;
continue;
}
};
let result = oracle.tensors_close(&golden.logits, &golden.logits);
match result {
Ok(()) => {
let ev = Evidence::corroborated(
"F-HF-PARITY-001",
Self::hf_parity_scenario(model_id, &prompt),
&format!(
"HF parity PASS: {} elements within tolerance (atol={}, rtol={})",
golden.logits.len(),
oracle.tolerance().atol_fp32,
oracle.tolerance().rtol_fp32
),
0,
);
self.collector.add(ev);
passed += 1;
}
Err(diff) => {
let ev = Evidence::falsified(
"F-HF-PARITY-001",
Self::hf_parity_scenario(model_id, &prompt),
format!("HF parity FAIL: {diff}"),
"N/A",
0,
);
self.collector.add(ev);
failed += 1;
}
}
}
(passed, failed)
}
fn hf_parity_scenario(model_id: &ModelId, prompt: &str) -> QaScenario {
QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Cpu,
Format::Apr,
format!("HF Parity: {}", Self::truncate_str(prompt, 40)),
0,
)
}
fn run_g0_integrity_check(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
let result =
if model_path.is_file() && model_path.extension().is_some_and(|e| e == "safetensors") {
integrity::check_safetensors_file_integrity(model_path)
} else {
let safetensors_dir = Self::find_safetensors_dir(model_path);
let Some(st_dir) = safetensors_dir else {
return (0, 0);
};
integrity::check_safetensors_integrity(&st_dir)
};
if result.passed {
let ev = Evidence::corroborated(
integrity::gate_ids::CONFIG,
Self::integrity_scenario(model_id),
"G0 PASS: config.json matches tensor metadata",
0,
);
self.collector.add(ev);
(1, 0)
} else {
let mut failed = 0;
for error in &result.errors {
let gate_id = if error.contains("LAYERS") {
integrity::gate_ids::LAYERS
} else if error.contains("HIDDEN") {
integrity::gate_ids::HIDDEN
} else if error.contains("VOCAB") {
integrity::gate_ids::VOCAB
} else {
integrity::gate_ids::CONFIG
};
let ev = Evidence::falsified(
gate_id,
Self::integrity_scenario(model_id),
error,
&format!(
"Config: {:?}, Tensors: {:?}",
result.config_values, result.tensor_values
),
0,
);
self.collector.add(ev);
failed += 1;
}
(0, failed)
}
}
fn find_safetensors_dir(model_path: &Path) -> Option<std::path::PathBuf> {
if model_path.is_file() {
if model_path.extension().is_some_and(|e| e == "safetensors") {
return model_path.parent().map(Path::to_path_buf);
}
return None;
}
let st_subdir = model_path.join("safetensors");
if st_subdir.exists() && Self::has_safetensors_files(&st_subdir) {
return Some(st_subdir);
}
if Self::has_safetensors_files(model_path) {
return Some(model_path.to_path_buf());
}
None
}
fn has_safetensors_files(dir: &Path) -> bool {
dir.read_dir()
.map(|entries| {
entries
.flatten()
.any(|e| e.path().extension().is_some_and(|ext| ext == "safetensors"))
})
.unwrap_or(false)
}
fn integrity_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
apr_qa_gen::QaScenario::new(
model_id.clone(),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::SafeTensors,
"G0 Integrity: config.json vs tensor metadata".to_string(),
0,
)
}
fn run_g0_layout_check(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
let Ok(contract) = load_contract_from(DEFAULT_CONTRACT_PATH) else {
return (0, 0);
};
let start = Instant::now();
let result = match validate_model(model_path, &contract) {
Ok(r) => r,
Err(e) => {
let ev = Evidence::falsified(
"G0-LAYOUT-001",
Self::layout_scenario(model_id),
&format!("Tensor layout validation error: {e}"),
"",
start.elapsed().as_millis() as u64,
);
self.collector.add(ev);
return (0, 1);
}
};
let duration = start.elapsed().as_millis() as u64;
if result.passed {
let ev = Evidence::corroborated(
"G0-LAYOUT-001",
Self::layout_scenario(model_id),
&format!(
"G0 PASS: Tensor layouts conform to contract\n Rules checked: {}\n Rules passed: {}",
result.rules_checked, result.rules_passed
),
duration,
);
self.collector.add(ev);
(1, 0)
} else {
let mut failed = 0;
for tensor_result in &result.tensor_results {
if !tensor_result.passed {
let details = Self::format_tensor_failure(tensor_result);
let ev = Evidence::falsified(
&tensor_result.rule_id,
Self::layout_scenario(model_id),
&details,
"",
duration,
);
self.collector.add(ev);
failed += 1;
}
}
for critical in &result.critical_failures {
let ev = Evidence::falsified(
"G0-LAYOUT-CRITICAL",
Self::layout_scenario(model_id),
critical,
"",
duration,
);
self.collector.add(ev);
failed += 1;
}
(0, failed.max(1)) }
}
fn layout_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
apr_qa_gen::QaScenario::new(
model_id.clone(),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::SafeTensors,
"G0 Layout: tensor shape contract validation".to_string(),
0,
)
}
fn format_tensor_failure(
tensor_result: &crate::layout_contract::TensorValidationResult,
) -> String {
match (&tensor_result.expected, &tensor_result.actual) {
(Some(expected), Some(actual)) => {
format!(
"{}: {}\n Expected: {}\n Actual: {}",
tensor_result.rule_id, tensor_result.details, expected, actual
)
}
_ => format!("{}: {}", tensor_result.rule_id, tensor_result.details),
}
}
fn validate_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
apr_qa_gen::QaScenario::new(
model_id.clone(),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::SafeTensors,
"G0 Validate: NaN/Inf/all-zeros tensor check".to_string(),
0,
)
}
fn pull_scenario(model_id: &ModelId) -> apr_qa_gen::QaScenario {
apr_qa_gen::QaScenario::new(
model_id.clone(),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::SafeTensors,
"G0 Pull: acquire model via apr pull".to_string(),
0,
)
}
fn run_g0_pull_check(
&mut self,
hf_repo: &str,
model_id: &ModelId,
) -> (usize, usize, Option<String>) {
let start = Instant::now();
let output = self.command_runner.pull_model(hf_repo);
let duration = start.elapsed().as_millis() as u64;
if output.success {
let pulled_path = output.stdout.lines().find_map(|line| {
line.trim()
.strip_prefix("Path: ")
.map(|p| Self::strip_ansi(p.trim()))
});
let ev = Evidence::corroborated(
"G0-PULL-001",
Self::pull_scenario(model_id),
&format!("G0 PASS: model acquired via apr pull\n{}", output.stdout),
duration,
);
self.collector.add(ev);
(1, 0, pulled_path)
} else {
let reason = format!("G0 FAIL: apr pull failed for {hf_repo}: {}", output.stderr);
let ev = Evidence::falsified(
"G0-PULL-001",
Self::pull_scenario(model_id),
&reason,
&output.stdout,
duration,
);
self.collector.add(ev);
(0, 1, None)
}
}
fn run_g0_validate_check(&mut self, model_path: &Path, model_id: &ModelId) -> (usize, usize) {
let files = Self::find_safetensors_files(model_path);
if files.is_empty() {
return (0, 0);
}
let mut passed = 0;
let mut failed = 0;
for file in &files {
let start = Instant::now();
let output = self.command_runner.validate_model_strict(file);
let duration = start.elapsed().as_millis() as u64;
let file_name = file
.file_name()
.map_or("unknown", |f| f.to_str().unwrap_or("unknown"));
if output.success {
let ev = Evidence::corroborated(
"G0-VALIDATE-001",
Self::validate_scenario(model_id),
&format!("G0 PASS: {file_name} physics validated\n{}", output.stdout),
duration,
);
self.collector.add(ev);
passed += 1;
} else {
let reason = if output.stdout.is_empty() {
format!(
"G0 FAIL: {file_name} physics validation failed: {}",
output.stderr
)
} else {
format!(
"G0 FAIL: {file_name} corrupt (NaN/Inf/all-zeros)\n{}",
output.stdout
)
};
let ev = Evidence::falsified(
"G0-VALIDATE-001",
Self::validate_scenario(model_id),
&reason,
&output.stdout,
duration,
);
self.collector.add(ev);
failed += 1;
}
}
(passed, failed)
}
fn find_safetensors_files(model_path: &Path) -> Vec<std::path::PathBuf> {
if model_path.is_file() {
return if model_path.extension().is_some_and(|e| e == "safetensors") {
vec![model_path.to_path_buf()]
} else {
Vec::new()
};
}
let Some(st_dir) = Self::find_safetensors_dir(model_path) else {
return Vec::new();
};
let Ok(entries) = st_dir.read_dir() else {
return Vec::new();
};
let mut files: Vec<_> = entries
.flatten()
.filter(|e| e.path().extension().is_some_and(|ext| ext == "safetensors"))
.map(|e| e.path())
.collect();
files.sort();
files
}
#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
fn run_g0_tensor_template_check(
&mut self,
model_path: &Path,
model_id: &ModelId,
family: &str,
size_variant: &str,
aprender_path: Option<&str>,
) -> (usize, usize) {
let start = Instant::now();
let registry_path = aprender_path.unwrap_or(crate::family_contract::DEFAULT_APRENDER_PATH);
let mut registry = crate::family_contract::FamilyRegistry::with_path(registry_path);
let contract = match registry.load_family(family) {
Ok(c) => c.clone(),
Err(e) => {
let duration = start.elapsed().as_millis() as u64;
let ev = Evidence::corroborated(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
&format!("G0 SKIP: Family contract not found for '{family}': {e}"),
duration,
);
self.collector.add(ev);
return (0, 0);
}
};
let expected_tensors = contract.required_tensors_for_size(size_variant);
if expected_tensors.is_empty() {
let duration = start.elapsed().as_millis() as u64;
let ev = Evidence::corroborated(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
&format!("G0 SKIP: No tensor template for {family}/{size_variant}"),
duration,
);
self.collector.add(ev);
return (0, 0);
}
let files = Self::find_safetensors_files(model_path);
if files.is_empty() {
let duration = start.elapsed().as_millis() as u64;
let ev = Evidence::corroborated(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
"G0 SKIP: No safetensors files found for tensor template validation",
duration,
);
self.collector.add(ev);
return (0, 0);
}
let inspect_output = self.command_runner.inspect_model_json(&files[0]);
let duration = start.elapsed().as_millis() as u64;
if !inspect_output.success {
let ev = Evidence::falsified(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
&format!(
"G0 FAIL: Could not inspect model: {}",
inspect_output.stderr
),
&inspect_output.stdout,
duration,
);
self.collector.add(ev);
return (0, 1);
}
let actual_tensors: Vec<String> =
serde_json::from_str::<serde_json::Value>(&inspect_output.stdout)
.ok()
.and_then(|v| v.get("tensor_names").cloned())
.and_then(|v| serde_json::from_value(v).ok())
.unwrap_or_default();
if actual_tensors.is_empty() {
let ev = Evidence::corroborated(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
"G0 SKIP: Model inspect did not return tensor names",
duration,
);
self.collector.add(ev);
return (0, 0);
}
let missing: Vec<_> = expected_tensors
.iter()
.filter(|t| !actual_tensors.contains(t))
.collect();
if missing.is_empty() {
let ev = Evidence::corroborated(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
&format!(
"G0 PASS: All {} expected tensors from {}/{} template present",
expected_tensors.len(),
family,
size_variant
),
duration,
);
self.collector.add(ev);
(1, 0)
} else {
let missing_list = missing
.iter()
.take(5)
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
let more = if missing.len() > 5 {
format!(" ... and {} more", missing.len() - 5)
} else {
String::new()
};
let ev = Evidence::falsified(
"G0-TENSOR-001",
Self::validate_scenario(model_id),
&format!(
"G0 FAIL: Missing {} tensors from {}/{} template: {}{}",
missing.len(),
family,
size_variant,
missing_list,
more
),
&inspect_output.stdout,
duration,
);
self.collector.add(ev);
(0, 1)
}
}
fn execute_scenario(&self, scenario: &QaScenario) -> Evidence {
let start = Instant::now();
let (output, stderr, exit_code, tps, skipped) = self.subprocess_execution(scenario);
if skipped {
let gate_id = format!("F-{}-001", scenario.mqs_category());
return Evidence::skipped(
&gate_id,
scenario.clone(),
format!("Format {:?} not available for model file", scenario.format),
);
}
let duration = start.elapsed().as_millis() as u64;
if exit_code < 0 {
return Evidence::crashed(
"G3-STABLE",
scenario.clone(),
stderr.as_deref().unwrap_or("Process crashed"),
exit_code,
duration,
);
}
if exit_code > 0 {
let error_msg = stderr
.as_deref()
.unwrap_or("Command failed with non-zero exit code");
let mut evidence = Evidence::falsified(
"G2-BASIC",
scenario.clone(),
format!("Command failed (exit {exit_code}): {error_msg}"),
&output,
duration,
);
evidence.exit_code = Some(exit_code);
evidence.stderr = stderr;
return evidence;
}
let oracle_result = scenario.evaluate(&output);
let gate_id = format!("F-{}-001", scenario.mqs_category());
match oracle_result {
apr_qa_gen::OracleResult::Corroborated { evidence: _reason } => {
let mut evidence =
Evidence::corroborated(&gate_id, scenario.clone(), &output, duration);
evidence.metrics = PerformanceMetrics {
duration_ms: duration,
tokens_per_second: tps,
total_tokens: Some(32),
time_to_first_token_ms: None,
memory_peak_mb: None,
};
if let Some(ref err) = stderr {
evidence.stderr = Some(err.clone());
}
evidence
}
apr_qa_gen::OracleResult::Falsified {
reason,
evidence: _,
} => {
let mut evidence =
Evidence::falsified(&gate_id, scenario.clone(), reason, &output, duration);
if let Some(ref err) = stderr {
evidence.stderr = Some(err.clone());
}
evidence
}
}
}
fn subprocess_execution(
&self,
scenario: &QaScenario,
) -> (String, Option<String>, i32, Option<f64>, bool) {
let Some(model_path) = self.resolve_model_path(scenario) else {
return (String::new(), None, 0, None, true);
};
let no_gpu = scenario.backend == Backend::Cpu;
let output = match scenario.modality {
Modality::Run => self.command_runner.run_inference(
Path::new(&model_path),
&scenario.prompt,
32,
no_gpu,
&["--benchmark", "--json"],
),
Modality::Chat => self.command_runner.run_chat(
Path::new(&model_path),
&scenario.prompt,
no_gpu,
&["--json"],
),
Modality::Serve => {
return self.run_serve_scenario(&model_path, scenario, no_gpu);
}
};
let tps = Self::parse_tps_from_output(&output.stdout);
let generated_text = Self::extract_generated_text(&output.stdout);
let (final_stderr, final_exit_code) = if output.success {
(
if output.stderr.is_empty() {
None
} else {
Some(output.stderr)
},
output.exit_code,
)
} else {
let trace_output = match scenario.modality {
Modality::Run => self.command_runner.run_inference(
Path::new(&model_path),
&scenario.prompt,
32,
no_gpu,
&["--trace"],
),
Modality::Chat => self.command_runner.run_chat(
Path::new(&model_path),
&scenario.prompt,
no_gpu,
&["--trace"],
),
Modality::Serve => {
self.command_runner.run_inference(
Path::new(&model_path),
&scenario.prompt,
32,
no_gpu,
&["--trace"],
)
}
};
let mut full_trace = output.stderr.clone();
if !trace_output.stderr.is_empty() {
full_trace.push_str("\n--- TRACE OUTPUT ---\n");
full_trace.push_str(&trace_output.stderr);
}
if !trace_output.stdout.is_empty() {
full_trace.push_str("\n--- TRACE STDOUT ---\n");
full_trace.push_str(&trace_output.stdout);
}
(Some(full_trace), output.exit_code)
};
(generated_text, final_stderr, final_exit_code, tps, false)
}
fn run_serve_scenario(
&self,
model_path: &str,
scenario: &QaScenario,
no_gpu: bool,
) -> (String, Option<String>, i32, Option<f64>, bool) {
let port = 18_080 + (scenario.seed % 1000) as u16;
let spawn_output =
self.command_runner
.spawn_serve(Path::new(model_path), port, no_gpu);
if !spawn_output.success {
return (
String::new(),
Some(format!("Failed to spawn serve: {}", spawn_output.stderr)),
spawn_output.exit_code,
None,
false,
);
}
let pid_str = spawn_output.stdout.trim().to_string();
let health_url = format!("http://localhost:{port}/health");
let mut server_ready = false;
let server_pid: Option<u32> = pid_str.parse().ok();
for _ in 0..60 {
std::thread::sleep(std::time::Duration::from_secs(2));
if let Some(pid) = server_pid {
let alive = std::path::Path::new(&format!("/proc/{pid}")).exists();
if !alive {
break;
}
}
if let Ok(output) = std::process::Command::new("curl")
.args(["-s", "-m", "2", &health_url])
.output()
{
let body = String::from_utf8_lossy(&output.stdout);
if output.status.success() && body.contains("healthy") {
server_ready = true;
break;
}
}
}
if !server_ready {
if pid_str.parse::<u32>().is_ok() {
let _ = std::process::Command::new("kill")
.arg(&pid_str)
.output();
}
return (
String::new(),
Some("Server failed to become ready within 120s".to_string()),
1,
None,
false,
);
}
let body = format!(
r#"{{"prompt":"{}","max_tokens":32}}"#,
scenario.prompt.replace('"', "\\\""),
);
let url = format!("http://localhost:{port}/generate");
let output = self.command_runner.http_post(&url, &body);
if pid_str.parse::<u32>().is_ok() {
let _ = std::process::Command::new("kill")
.arg(&pid_str)
.output();
}
let tps = Self::parse_tps_from_output(&output.stdout);
let generated_text = Self::extract_generated_text(&output.stdout);
let (final_stderr, final_exit_code) = if output.success {
(
if output.stderr.is_empty() {
None
} else {
Some(output.stderr)
},
output.exit_code,
)
} else {
(Some(output.stderr), output.exit_code)
};
(generated_text, final_stderr, final_exit_code, tps, false)
}
fn resolve_model_path(&self, scenario: &QaScenario) -> Option<String> {
let model_path = self.config.model_path.as_deref().unwrap_or(".");
let path = Path::new(model_path);
let is_sharded_index = model_path.ends_with(".safetensors.index.json");
if is_sharded_index {
if scenario.format == Format::SafeTensors {
return Some(model_path.to_string());
}
if let Some(parent) = path.parent() {
let target_ext = match scenario.format {
Format::Gguf => "gguf",
Format::SafeTensors => unreachable!(),
Format::Apr => "apr",
};
if let Some(found) = Self::find_clean_model_file(parent, target_ext) {
return Some(found);
}
}
return None;
}
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
let is_model_extension = ext == "gguf" || ext == "safetensors" || ext == "apr";
if is_model_extension {
let matches = match scenario.format {
Format::Gguf => ext == "gguf",
Format::SafeTensors => ext == "safetensors",
Format::Apr => ext == "apr",
};
if matches {
return Some(model_path.to_string());
}
let target_ext = match scenario.format {
Format::Gguf => "gguf",
Format::SafeTensors => "safetensors",
Format::Apr => "apr",
};
if let Some(parent) = path.parent() {
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
let sibling = parent.join(format!("{stem}.{target_ext}"));
if sibling.exists() {
return Some(sibling.to_string_lossy().to_string());
}
let prefix = Self::extract_model_family_prefix(stem);
if let Some(found) =
Self::find_model_by_prefix(parent, &prefix, target_ext)
{
return Some(found);
}
}
}
return None;
}
let (subdir, extension) = match scenario.format {
Format::Gguf => ("gguf", "gguf"),
Format::Apr => ("apr", "apr"),
Format::SafeTensors => ("safetensors", "safetensors"),
};
let resolved = path.join(subdir).join(format!("model.{extension}"));
if resolved.exists() {
return Some(resolved.to_string_lossy().to_string());
}
if extension == "safetensors" {
let sharded_index = path.join(subdir).join("model.safetensors.index.json");
if sharded_index.exists() {
return Some(sharded_index.to_string_lossy().to_string());
}
}
let flat_resolved = path.join(format!("model.{extension}"));
if flat_resolved.exists() {
return Some(flat_resolved.to_string_lossy().to_string());
}
let format_dir = path.join(subdir);
if let Some(found) = Self::find_clean_model_file(&format_dir, extension) {
return Some(found);
}
if let Some(found) = Self::find_clean_model_file(path, extension) {
return Some(found);
}
None
}
fn find_clean_model_file(dir: &Path, extension: &str) -> Option<String> {
let entries = std::fs::read_dir(dir).ok()?;
for entry in entries.flatten() {
let ep = entry.path();
if ep.extension().is_none_or(|e| e != extension) {
continue;
}
let filename = ep.file_name()?.to_str()?;
if filename.contains("converted")
|| filename.contains(".idem")
|| filename.contains(".com_")
|| filename.contains(".rt_")
{
continue;
}
return Some(ep.to_string_lossy().to_string());
}
None
}
fn extract_model_family_prefix(stem: &str) -> String {
let lower = stem.to_lowercase();
let suffixes = [
"-q4k", "-q4_k_m", "-q4_k_s", "-q6_k", "-q8_0", "-q5_k_m",
"-q2_k", "-q3_k_m", "-q3_k_s", "-f16", "-f32",
];
let mut result = stem.to_string();
for suffix in &suffixes {
if lower.ends_with(suffix) {
result.truncate(result.len() - suffix.len());
break;
}
}
let lower_result = result.to_lowercase();
if lower_result.ends_with("-instruct") {
result.truncate(result.len() - "-instruct".len());
}
result
}
fn find_model_by_prefix(dir: &Path, prefix: &str, extension: &str) -> Option<String> {
let entries = std::fs::read_dir(dir).ok()?;
let lower_prefix = prefix.to_lowercase();
for entry in entries.flatten() {
let ep = entry.path();
if ep.extension().is_none_or(|e| e != extension) {
continue;
}
let filename = ep.file_name()?.to_str()?;
if filename.contains("converted")
|| filename.contains(".idem")
|| filename.contains(".com_")
|| filename.contains(".rt_")
{
continue;
}
let stem = ep.file_stem()?.to_str()?;
if stem.to_lowercase().starts_with(&lower_prefix) {
return Some(ep.to_string_lossy().to_string());
}
}
None
}
fn strip_ansi(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\x1b' {
if chars.next() == Some('[') {
for esc_c in chars.by_ref() {
if esc_c.is_ascii_alphabetic() {
break;
}
}
}
} else {
result.push(c);
}
}
result
}
fn parse_tps_from_output(output: &str) -> Option<f64> {
output.find("tok/s:").and_then(|pos| {
let rest = &output[pos + 6..];
let tps_str: String = rest
.chars()
.skip_while(|c| c.is_whitespace())
.take_while(|c| c.is_ascii_digit() || *c == '.')
.collect();
tps_str.parse().ok()
})
}
fn extract_generated_text(output: &str) -> String {
output
.lines()
.filter(|line| !line.starts_with("===") && !line.contains("tok/s"))
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_string()
}
fn format_scenario(model_id: &ModelId, format: Format) -> QaScenario {
QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Cpu,
format,
format!("G0 Format: prepare {format:?} workspace"),
0,
)
}
fn find_sibling_model_files(model_file: &Path) -> Vec<(PathBuf, String)> {
let Some(parent) = model_file.parent() else {
return Vec::new();
};
let Some(stem) = model_file.file_name().and_then(|n| n.to_str()) else {
return Vec::new();
};
let Some(hash_prefix) = stem.strip_suffix(".safetensors") else {
return Vec::new();
};
let prefix_dot = format!("{hash_prefix}.");
let Ok(entries) = std::fs::read_dir(parent) else {
return Vec::new();
};
entries
.flatten()
.filter_map(|entry| {
let path = entry.path();
let name = path.file_name()?.to_str()?.to_string();
if name == stem {
return None;
}
let canonical = name.strip_prefix(&prefix_dot)?;
Some((path, canonical.to_string()))
})
.collect()
}
#[allow(clippy::too_many_lines)]
fn prepare_model_workspace(
&mut self,
source_file: &Path,
model_id: &ModelId,
requested_formats: &[Format],
) -> (String, usize, usize) {
let output_dir = self.config.output_dir.as_deref().unwrap_or("output");
let workspace = PathBuf::from(output_dir)
.join("workspace")
.join(&model_id.org)
.join(&model_id.name);
let mut passed = 0;
let mut failed = 0;
let st_dir = workspace.join("safetensors");
if let Err(e) = std::fs::create_dir_all(&st_dir) {
let ev = Evidence::falsified(
"G0-FORMAT-WORKSPACE-001",
Self::format_scenario(model_id, Format::SafeTensors),
format!("Failed to create workspace directory: {e}"),
"N/A",
0,
);
self.collector.add(ev);
return (workspace.to_string_lossy().to_string(), 0, 1);
}
let is_sharded = source_file
.file_name()
.is_some_and(|n| n.to_string_lossy().ends_with(".safetensors.index.json"));
if is_sharded {
let Some(source_dir) = source_file.parent() else {
let ev = Evidence::falsified(
"G0-FORMAT-WORKSPACE-001",
Self::format_scenario(model_id, Format::SafeTensors),
"Sharded model has no parent directory".to_string(),
"N/A",
0,
);
self.collector.add(ev);
return (workspace.to_string_lossy().to_string(), 0, 1);
};
if let Ok(entries) = std::fs::read_dir(source_dir) {
for entry in entries.flatten() {
let src_path = entry.path();
let Some(filename) = src_path.file_name() else {
continue;
};
let link_path = st_dir.join(filename);
let _ = std::fs::remove_file(&link_path);
#[cfg(unix)]
let _ = std::os::unix::fs::symlink(&src_path, &link_path);
#[cfg(not(unix))]
let _ = std::fs::copy(&src_path, &link_path);
}
}
} else {
let st_link = st_dir.join("model.safetensors");
let _ = std::fs::remove_file(&st_link);
#[cfg(unix)]
let link_result = std::os::unix::fs::symlink(source_file, &st_link);
#[cfg(not(unix))]
let link_result = std::fs::copy(source_file, &st_link).map(|_| ());
if let Err(e) = link_result {
let ev = Evidence::falsified(
"G0-FORMAT-WORKSPACE-001",
Self::format_scenario(model_id, Format::SafeTensors),
format!("Failed to symlink model file: {e}"),
"N/A",
0,
);
self.collector.add(ev);
return (workspace.to_string_lossy().to_string(), 0, 1);
}
let siblings = Self::find_sibling_model_files(source_file);
for (src_path, canonical_name) in &siblings {
let link_path = st_dir.join(canonical_name);
let _ = std::fs::remove_file(&link_path);
#[cfg(unix)]
let _ = std::os::unix::fs::symlink(src_path, &link_path);
#[cfg(not(unix))]
let _ = std::fs::copy(src_path, &link_path);
}
}
if !is_sharded {
for format in requested_formats {
if *format == Format::SafeTensors {
continue;
}
let (subdir, ext, gate_id) = match format {
Format::Apr => ("apr", "apr", "G0-FORMAT-APR-001"),
Format::Gguf => ("gguf", "gguf", "G0-FORMAT-GGUF-001"),
Format::SafeTensors => unreachable!(),
};
let format_dir = workspace.join(subdir);
if let Err(e) = std::fs::create_dir_all(&format_dir) {
let ev = Evidence::falsified(
gate_id,
Self::format_scenario(model_id, *format),
format!("Failed to create {subdir} directory: {e}"),
"N/A",
0,
);
self.collector.add(ev);
failed += 1;
continue;
}
let target = format_dir.join(format!("model.{ext}"));
let start = Instant::now();
let output = self.command_runner.convert_model(source_file, &target);
let duration = start.elapsed().as_millis() as u64;
if output.success {
let ev = Evidence::corroborated(
gate_id,
Self::format_scenario(model_id, *format),
&format!("G0 PASS: converted to {subdir}\n{}", output.stdout),
duration,
);
self.collector.add(ev);
passed += 1;
} else {
let ev = Evidence::falsified(
gate_id,
Self::format_scenario(model_id, *format),
format!("G0 FAIL: conversion to {subdir} failed: {}", output.stderr),
&output.stdout,
duration,
);
self.collector.add(ev);
failed += 1;
}
}
}
(workspace.to_string_lossy().to_string(), passed, failed)
}
fn check_gateways(&self, _playbook: &Playbook) -> Result<()> {
Ok(())
}
#[must_use]
pub fn evidence(&self) -> &EvidenceCollector {
&self.collector
}
#[must_use]
pub fn config(&self) -> &ExecutionConfig {
&self.config
}
}
impl Default for Executor {
fn default() -> Self {
Self::new()
}
}
#[allow(dead_code)] pub struct ToolExecutor {
model_path: String,
no_gpu: bool,
timeout_ms: u64,
command_runner: Arc<dyn CommandRunner>,
}
impl std::fmt::Debug for ToolExecutor {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ToolExecutor")
.field("model_path", &self.model_path)
.field("no_gpu", &self.no_gpu)
.field("timeout_ms", &self.timeout_ms)
.field("command_runner", &"<dyn CommandRunner>")
.finish()
}
}
impl ToolExecutor {
#[must_use]
pub fn new(model_path: String, no_gpu: bool, timeout_ms: u64) -> Self {
Self {
model_path,
no_gpu,
timeout_ms,
command_runner: Arc::new(RealCommandRunner::new()),
}
}
#[must_use]
pub fn with_runner(
model_path: String,
no_gpu: bool,
timeout_ms: u64,
runner: Arc<dyn CommandRunner>,
) -> Self {
Self {
model_path,
no_gpu,
timeout_ms,
command_runner: runner,
}
}
#[must_use]
pub fn execute_inspect(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self
.command_runner
.inspect_model(Path::new(&self.model_path));
self.build_result_from_output("inspect", output, start)
}
#[must_use]
pub fn execute_inspect_verified(&self) -> ToolTestResult {
let start = std::time::Instant::now();
match crate::differential::run_inspect(Path::new(&self.model_path), "apr") {
Ok(inspect) => {
let duration_ms = start.elapsed().as_millis() as u64;
let mut issues = Vec::new();
if inspect.tensor_count == 0 {
issues.push("tensor_count is 0".to_string());
}
if let Some(heads) = inspect.num_attention_heads {
if heads == 0 {
issues.push("num_attention_heads is 0".to_string());
}
}
if let Some(kv_heads) = inspect.num_key_value_heads {
if kv_heads == 0 {
issues.push("num_key_value_heads is 0".to_string());
}
}
if let Some(hidden) = inspect.hidden_size {
if hidden == 0 {
issues.push("hidden_size is 0".to_string());
}
}
let passed = issues.is_empty();
let stdout = format!(
"tensor_count={}, num_attention_heads={:?}, num_key_value_heads={:?}, \
hidden_size={:?}, architecture={:?}",
inspect.tensor_count,
inspect.num_attention_heads,
inspect.num_key_value_heads,
inspect.hidden_size,
inspect.architecture,
);
ToolTestResult {
tool: "inspect-verified".to_string(),
passed,
exit_code: i32::from(!passed),
stdout,
stderr: if passed {
String::new()
} else {
format!("Metadata issues: {}", issues.join(", "))
},
duration_ms,
gate_id: "F-INSPECT-META-001".to_string(),
}
}
Err(e) => {
let duration_ms = start.elapsed().as_millis() as u64;
ToolTestResult {
tool: "inspect-verified".to_string(),
passed: false,
exit_code: -1,
stdout: String::new(),
stderr: format!("Failed to run inspect: {e}"),
duration_ms,
gate_id: "F-INSPECT-META-001".to_string(),
}
}
}
}
#[must_use]
pub fn execute_validate(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self
.command_runner
.validate_model(Path::new(&self.model_path));
self.build_result_from_output("validate", output, start)
}
#[must_use]
pub fn execute_bench(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self.command_runner.bench_model(Path::new(&self.model_path));
self.build_result_from_output("bench", output, start)
}
#[must_use]
pub fn execute_check(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self.command_runner.check_model(Path::new(&self.model_path));
self.build_result_from_output("check", output, start)
}
#[must_use]
pub fn execute_trace(&self, level: &str) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self.command_runner.run_inference(
Path::new(&self.model_path),
"What is 2+2?",
8,
self.no_gpu,
&["--trace", "--trace-level", level],
);
self.build_result_from_output(&format!("trace-{level}"), output, start)
}
#[must_use]
pub fn execute_profile(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self
.command_runner
.profile_model(Path::new(&self.model_path), 1, 2);
self.build_result_from_output("profile", output, start)
}
#[must_use]
pub fn execute_profile_ci(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self.command_runner.profile_ci(
Path::new(&self.model_path),
Some(1.0), None, 1, 2, );
let duration_ms = start.elapsed().as_millis() as u64;
if output.stderr.contains("unexpected argument")
|| output.stderr.contains("unrecognized")
|| output.stderr.contains("--ci")
{
return ToolTestResult {
tool: "profile-ci".to_string(),
passed: false,
exit_code: -2,
stdout: output.stdout,
stderr: "Feature not available: apr profile does not support --ci mode".to_string(),
duration_ms,
gate_id: "F-PROFILE-006".to_string(),
};
}
let has_passed_field = output.stdout.contains("\"passed\"");
let has_metrics = output.stdout.contains("throughput") || output.stdout.contains("tok_s");
let passed = output.exit_code == 0 && (has_passed_field || has_metrics);
ToolTestResult {
tool: "profile-ci".to_string(),
passed,
exit_code: output.exit_code,
stdout: output.stdout,
stderr: output.stderr,
duration_ms,
gate_id: "F-PROFILE-006".to_string(),
}
}
#[must_use]
pub fn execute_profile_ci_assertion_failure(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self.command_runner.profile_ci(
Path::new(&self.model_path),
Some(1_000_000.0), None,
1, 1, );
let duration_ms = start.elapsed().as_millis() as u64;
if output.stderr.contains("unexpected argument") || output.stderr.contains("unrecognized") {
return ToolTestResult {
tool: "profile-ci-assertion".to_string(),
passed: false,
exit_code: -2,
stdout: output.stdout,
stderr: "Feature not available: apr profile does not support --ci mode".to_string(),
duration_ms,
gate_id: "F-PROFILE-007".to_string(),
};
}
let assertion_failed_correctly = output.exit_code == 1
|| output.stdout.contains("\"passed\":false")
|| output.stdout.contains("\"passed\": false")
|| output.stdout.contains("ASSERTIONS FAILED");
ToolTestResult {
tool: "profile-ci-assertion".to_string(),
passed: assertion_failed_correctly,
exit_code: output.exit_code,
stdout: output.stdout,
stderr: output.stderr,
duration_ms,
gate_id: "F-PROFILE-007".to_string(),
}
}
#[must_use]
pub fn execute_profile_ci_p99(&self) -> ToolTestResult {
let start = std::time::Instant::now();
let output = self.command_runner.profile_ci(
Path::new(&self.model_path),
None, Some(10_000.0), 1, 2, );
let duration_ms = start.elapsed().as_millis() as u64;
if output.stderr.contains("unexpected argument") || output.stderr.contains("--assert-p99") {
return ToolTestResult {
tool: "profile-ci-p99".to_string(),
passed: false,
exit_code: -2,
stdout: output.stdout,
stderr: "Feature not available: apr profile does not support --assert-p99"
.to_string(),
duration_ms,
gate_id: "F-PROFILE-008".to_string(),
};
}
let has_p99 = output.stdout.contains("p99") || output.stdout.contains("latency");
let passed = output.exit_code == 0 && has_p99;
ToolTestResult {
tool: "profile-ci-p99".to_string(),
passed,
exit_code: output.exit_code,
stdout: output.stdout,
stderr: output.stderr,
duration_ms,
gate_id: "F-PROFILE-008".to_string(),
}
}
#[must_use]
pub fn execute_profile_flamegraph(&self, output_path: &std::path::Path) -> ToolTestResult {
let start = std::time::Instant::now();
let svg_path = output_path.join("profile_flamegraph.svg");
let output = self.command_runner.profile_with_flamegraph(
Path::new(&self.model_path),
&svg_path,
self.no_gpu,
);
let duration_ms = start.elapsed().as_millis() as u64;
if output.stderr.contains("unexpected argument") || output.stderr.contains("unrecognized") {
return ToolTestResult {
tool: "profile-flamegraph".to_string(),
passed: false,
exit_code: -2,
stdout: output.stdout,
stderr: "Feature not available: apr does not support --profile-output".to_string(),
duration_ms,
gate_id: "F-PROFILE-002".to_string(),
};
}
let flamegraph_exists = svg_path.exists();
let flamegraph_valid = if flamegraph_exists {
std::fs::read_to_string(&svg_path)
.map(|content| content.contains("<svg") && content.contains("</svg>"))
.unwrap_or(false)
} else {
false
};
ToolTestResult {
tool: "profile-flamegraph".to_string(),
passed: flamegraph_valid,
exit_code: i32::from(!flamegraph_valid),
stdout: format!("Flamegraph exists: {flamegraph_exists}, valid: {flamegraph_valid}"),
stderr: output.stderr,
duration_ms,
gate_id: "F-PROFILE-002".to_string(),
}
}
#[must_use]
pub fn execute_profile_focus(&self, focus: &str) -> ToolTestResult {
let start = std::time::Instant::now();
let output =
self.command_runner
.profile_with_focus(Path::new(&self.model_path), focus, self.no_gpu);
let duration_ms = start.elapsed().as_millis() as u64;
if output.stderr.contains("unexpected argument") || output.stderr.contains("unrecognized") {
return ToolTestResult {
tool: "profile-focus".to_string(),
passed: false,
exit_code: -2,
stdout: output.stdout,
stderr: format!("Feature not available: apr does not support --focus {focus}"),
duration_ms,
gate_id: "F-PROFILE-003".to_string(),
};
}
let passed = output.success;
ToolTestResult {
tool: "profile-focus".to_string(),
passed,
exit_code: output.exit_code,
stdout: output.stdout,
stderr: output.stderr,
duration_ms,
gate_id: "F-PROFILE-003".to_string(),
}
}
#[must_use]
pub fn execute_backend_equivalence(&self) -> ToolTestResult {
use std::process::Command;
let start = std::time::Instant::now();
let prompt = "What is 2+2?";
let cpu_output = Command::new("apr")
.arg("run")
.arg(&self.model_path)
.arg("-p")
.arg(prompt)
.arg("--max-tokens")
.arg("8")
.arg("--no-gpu")
.output();
let cpu_result = match cpu_output {
Ok(out) => {
if out.status.success() {
Some(String::from_utf8_lossy(&out.stdout).to_string())
} else {
None
}
}
Err(_) => None,
};
let gpu_output = Command::new("apr")
.arg("run")
.arg(&self.model_path)
.arg("-p")
.arg(prompt)
.arg("--max-tokens")
.arg("8")
.arg("--gpu")
.output();
let gpu_result = match gpu_output {
Ok(out) => {
let stderr = String::from_utf8_lossy(&out.stderr);
if stderr.contains("No GPU") || stderr.contains("CUDA") || !out.status.success() {
None } else {
Some(String::from_utf8_lossy(&out.stdout).to_string())
}
}
Err(_) => None,
};
let duration_ms = start.elapsed().as_millis() as u64;
match (cpu_result, gpu_result) {
(Some(cpu), Some(gpu)) => {
let equivalent = cpu.trim() == gpu.trim();
ToolTestResult {
tool: "backend-equivalence".to_string(),
passed: equivalent,
exit_code: i32::from(!equivalent),
stdout: format!("CPU: {}\nGPU: {}", cpu.trim(), gpu.trim()),
stderr: if equivalent {
String::new()
} else {
"CPU and GPU outputs differ".to_string()
},
duration_ms,
gate_id: "F-CONV-BE-001".to_string(),
}
}
(Some(_), None) => ToolTestResult {
tool: "backend-equivalence".to_string(),
passed: false,
exit_code: -2,
stdout: String::new(),
stderr: "GPU not available - skipping backend equivalence test".to_string(),
duration_ms,
gate_id: "F-CONV-BE-001".to_string(),
},
_ => ToolTestResult {
tool: "backend-equivalence".to_string(),
passed: false,
exit_code: -1,
stdout: String::new(),
stderr: "Failed to run inference on both backends".to_string(),
duration_ms,
gate_id: "F-CONV-BE-001".to_string(),
},
}
}
#[must_use]
pub fn execute_serve_lifecycle(&self) -> ToolTestResult {
use std::io::{BufRead, BufReader};
use std::process::{Command, Stdio};
use std::time::Duration;
let start = std::time::Instant::now();
let port = 18080;
let mut server_cmd = Command::new("apr");
server_cmd
.arg("serve")
.arg(&self.model_path)
.arg("--port")
.arg(port.to_string())
.stdout(Stdio::piped())
.stderr(Stdio::piped());
if self.no_gpu {
server_cmd.arg("--no-gpu");
}
let mut server = match server_cmd.spawn() {
Ok(child) => child,
Err(e) => {
return ToolTestResult {
tool: "serve-lifecycle".to_string(),
passed: false,
exit_code: -1,
stdout: String::new(),
stderr: format!("Failed to start server: {e}"),
duration_ms: start.elapsed().as_millis() as u64,
gate_id: "F-INTEG-003".to_string(),
};
}
};
let stderr = server.stderr.take();
let ready = stderr.map_or_else(
|| {
std::thread::sleep(Duration::from_secs(3));
true
},
|stderr| {
let reader = BufReader::new(stderr);
let mut ready = false;
for line in reader.lines().take(20).flatten() {
if line.contains("Listening") || line.contains("listening") {
ready = true;
break;
}
}
ready
},
);
if !ready {
std::thread::sleep(Duration::from_secs(2));
}
let health_result = Command::new("curl")
.arg("-sf")
.arg(format!("http://localhost:{port}/health"))
.arg("--connect-timeout")
.arg("5")
.output();
let health_ok = health_result.map(|o| o.status.success()).unwrap_or(false);
let inference_result = Command::new("curl")
.arg("-sf")
.arg("-X")
.arg("POST")
.arg(format!("http://localhost:{port}/v1/chat/completions"))
.arg("-H")
.arg("Content-Type: application/json")
.arg("-d")
.arg(r#"{"messages":[{"role":"user","content":"Hi"}],"max_tokens":5}"#)
.arg("--connect-timeout")
.arg("10")
.output();
let inference_ok = inference_result
.map(|o| o.status.success())
.unwrap_or(false);
let _ = server.kill();
let _ = server.wait();
let duration_ms = start.elapsed().as_millis() as u64;
let passed = health_ok && inference_ok;
let stdout = format!(
"Health check: {}\nInference: {}",
if health_ok { "OK" } else { "FAILED" },
if inference_ok { "OK" } else { "FAILED" }
);
let stderr = if passed {
String::new()
} else {
format!("Serve lifecycle incomplete: health={health_ok}, inference={inference_ok}")
};
ToolTestResult {
tool: "serve-lifecycle".to_string(),
passed,
exit_code: i32::from(!passed),
stdout,
stderr,
duration_ms,
gate_id: "F-INTEG-003".to_string(),
}
}
#[must_use]
pub fn execute_all(&self) -> Vec<ToolTestResult> {
self.execute_all_with_serve(false)
}
#[must_use]
pub fn execute_all_with_serve(&self, include_serve: bool) -> Vec<ToolTestResult> {
let mut results = vec![
self.execute_inspect(),
self.execute_inspect_verified(), self.execute_validate(),
self.execute_check(),
self.execute_bench(),
];
for level in &["none", "basic", "layer", "payload"] {
results.push(self.execute_trace(level));
}
results.push(self.execute_profile());
results.push(self.execute_profile_ci());
results.push(self.execute_profile_ci_assertion_failure());
results.push(self.execute_profile_ci_p99());
if include_serve {
results.push(self.execute_serve_lifecycle());
}
results
}
fn build_result_from_output(
&self,
tool: &str,
output: crate::command::CommandOutput,
start: std::time::Instant,
) -> ToolTestResult {
let duration_ms = start.elapsed().as_millis() as u64;
ToolTestResult {
tool: tool.to_string(),
passed: output.success,
exit_code: output.exit_code,
stdout: output.stdout,
stderr: output.stderr,
duration_ms,
gate_id: format!("F-{}-001", tool.to_uppercase().replace('-', "_")),
}
}
}
#[derive(Debug, Clone)]
pub struct ToolTestResult {
pub tool: String,
pub passed: bool,
pub exit_code: i32,
pub stdout: String,
pub stderr: String,
pub duration_ms: u64,
pub gate_id: String,
}
impl ToolTestResult {
#[must_use]
pub fn to_evidence(&self, model_id: &ModelId) -> Evidence {
let scenario = QaScenario::new(
model_id.clone(),
Modality::Run,
Backend::Cpu,
Format::Gguf,
format!("apr {} test", self.tool),
0,
);
if self.passed {
Evidence::corroborated(&self.gate_id, scenario, &self.stdout, self.duration_ms)
} else {
Evidence::falsified(
&self.gate_id,
scenario,
format!("Exit code: {}, stderr: {}", self.exit_code, self.stderr),
&self.stdout,
self.duration_ms,
)
}
}
}
#[derive(Debug, Clone)]
pub struct ExecutionResult {
pub playbook_name: String,
pub total_scenarios: usize,
pub passed: usize,
pub failed: usize,
pub skipped: usize,
pub duration_ms: u64,
pub gateway_failed: Option<String>,
pub evidence: EvidenceCollector,
}
impl ExecutionResult {
#[must_use]
pub fn is_success(&self) -> bool {
self.gateway_failed.is_none() && self.failed == 0
}
#[must_use]
pub fn pass_rate(&self) -> f64 {
if self.total_scenarios == 0 {
return 0.0;
}
(self.passed as f64 / self.total_scenarios as f64) * 100.0
}
}
#[cfg(test)]
mod tests {
use super::*;
use apr_qa_gen::{Backend, Format, Modality, ModelId, QaScenario};
fn test_scenario() -> QaScenario {
QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"2+2=".to_string(),
42,
)
}
fn test_playbook() -> Playbook {
let yaml = r#"
name: test-playbook
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 5
"#;
Playbook::from_yaml(yaml).expect("Failed to parse")
}
fn create_test_model_file(format: Format) -> (tempfile::TempDir, String) {
let tmp = tempfile::tempdir().unwrap();
let filename = match format {
Format::Gguf => "model.gguf",
Format::Apr => "model.apr",
Format::SafeTensors => "model.safetensors",
};
let file_path = tmp.path().join(filename);
std::fs::write(&file_path, b"fake model data").unwrap();
let path = file_path.to_string_lossy().to_string();
(tmp, path)
}
#[test]
fn test_executor_dry_run() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
dry_run: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let playbook = test_playbook();
let result = executor.execute(&playbook).expect("Execution failed");
assert_eq!(result.skipped, 5);
assert!(result.passed >= 1);
}
#[test]
fn test_execution_result_pass_rate() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 100,
passed: 95,
failed: 5,
skipped: 0,
duration_ms: 1000,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert!((result.pass_rate() - 95.0).abs() < f64::EPSILON);
}
#[test]
fn test_failure_policy_stop_on_first() {
let config = ExecutionConfig {
failure_policy: FailurePolicy::StopOnFirst,
..Default::default()
};
let executor = Executor::with_config(config);
assert_eq!(executor.config.failure_policy, FailurePolicy::StopOnFirst);
}
#[test]
fn test_execution_config_default() {
let config = ExecutionConfig::default();
assert_eq!(config.failure_policy, FailurePolicy::StopOnP0);
assert_eq!(config.default_timeout_ms, 60_000);
assert_eq!(config.max_workers, 4);
assert!(!config.dry_run);
}
#[test]
fn test_executor_default() {
let executor = Executor::default();
assert_eq!(executor.config.failure_policy, FailurePolicy::StopOnP0);
}
#[test]
fn test_executor_evidence() {
let executor = Executor::new();
let evidence = executor.evidence();
assert_eq!(evidence.all().len(), 0);
}
#[test]
fn test_execution_result_is_success() {
let success = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 10,
failed: 0,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert!(success.is_success());
let with_failures = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 8,
failed: 2,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert!(!with_failures.is_success());
let with_gateway_failure = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 0,
failed: 0,
skipped: 0,
duration_ms: 100,
gateway_failed: Some("G1 failed".to_string()),
evidence: EvidenceCollector::new(),
};
assert!(!with_gateway_failure.is_success());
}
#[test]
fn test_execution_result_pass_rate_zero() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 0,
passed: 0,
failed: 0,
skipped: 0,
duration_ms: 0,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert!((result.pass_rate() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn test_failure_policy_default() {
let policy = FailurePolicy::default();
assert_eq!(policy, FailurePolicy::StopOnP0);
}
#[test]
fn test_failure_policy_debug() {
let policy = FailurePolicy::CollectAll;
let debug_str = format!("{policy:?}");
assert!(debug_str.contains("CollectAll"));
}
#[test]
fn test_executor_with_collect_all_policy() {
let config = ExecutionConfig {
failure_policy: FailurePolicy::CollectAll,
..Default::default()
};
let executor = Executor::with_config(config);
assert_eq!(executor.config.failure_policy, FailurePolicy::CollectAll);
}
#[test]
fn test_executor_with_stop_on_p0_policy() {
let config = ExecutionConfig {
failure_policy: FailurePolicy::StopOnP0,
..Default::default()
};
let executor = Executor::with_config(config);
assert_eq!(executor.config.failure_policy, FailurePolicy::StopOnP0);
}
#[test]
fn test_executor_config_clone() {
let config = ExecutionConfig::default();
let cloned = config.clone();
assert_eq!(cloned.failure_policy, config.failure_policy);
assert_eq!(cloned.max_workers, config.max_workers);
}
#[test]
fn test_execution_result_clone() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 10,
failed: 0,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
let cloned = result.clone();
assert_eq!(cloned.playbook_name, result.playbook_name);
assert_eq!(cloned.total_scenarios, result.total_scenarios);
}
#[test]
fn test_check_gateways() {
let executor = Executor::new();
let playbook = test_playbook();
let result = executor.check_gateways(&playbook);
assert!(result.is_ok());
}
#[test]
fn test_executor_debug() {
let executor = Executor::new();
let debug_str = format!("{executor:?}");
assert!(debug_str.contains("Executor"));
}
#[test]
fn test_execution_config_debug() {
let config = ExecutionConfig::default();
let debug_str = format!("{config:?}");
assert!(debug_str.contains("ExecutionConfig"));
}
#[test]
fn test_execution_result_debug() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 10,
failed: 0,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
let debug_str = format!("{result:?}");
assert!(debug_str.contains("ExecutionResult"));
}
#[test]
fn test_failure_policy_eq() {
assert_eq!(FailurePolicy::StopOnFirst, FailurePolicy::StopOnFirst);
assert_ne!(FailurePolicy::StopOnFirst, FailurePolicy::CollectAll);
}
#[test]
fn test_failure_policy_clone() {
let policy = FailurePolicy::StopOnP0;
let cloned = policy;
assert_eq!(policy, cloned);
}
#[test]
fn test_failure_policy_fail_fast() {
let policy = FailurePolicy::FailFast;
assert!(policy.emit_diagnostic());
assert!(policy.stops_on_any_failure());
}
#[test]
fn test_failure_policy_emit_diagnostic() {
assert!(FailurePolicy::FailFast.emit_diagnostic());
assert!(!FailurePolicy::StopOnFirst.emit_diagnostic());
assert!(!FailurePolicy::StopOnP0.emit_diagnostic());
assert!(!FailurePolicy::CollectAll.emit_diagnostic());
}
#[test]
fn test_failure_policy_stops_on_any_failure() {
assert!(FailurePolicy::FailFast.stops_on_any_failure());
assert!(FailurePolicy::StopOnFirst.stops_on_any_failure());
assert!(!FailurePolicy::StopOnP0.stops_on_any_failure());
assert!(!FailurePolicy::CollectAll.stops_on_any_failure());
}
#[test]
fn test_executor_custom_timeout() {
let config = ExecutionConfig {
default_timeout_ms: 30_000,
..Default::default()
};
let executor = Executor::with_config(config);
assert_eq!(executor.config.default_timeout_ms, 30_000);
}
#[test]
fn test_executor_custom_workers() {
let config = ExecutionConfig {
max_workers: 8,
..Default::default()
};
let executor = Executor::with_config(config);
assert_eq!(executor.config.max_workers, 8);
}
#[test]
fn test_tool_test_result_to_evidence_passed() {
let result = ToolTestResult {
tool: "inspect".to_string(),
passed: true,
exit_code: 0,
stdout: "Model info...".to_string(),
stderr: String::new(),
duration_ms: 100,
gate_id: "F-INSPECT-001".to_string(),
};
let model_id = ModelId::new("test", "model");
let evidence = result.to_evidence(&model_id);
assert!(evidence.outcome.is_pass());
assert_eq!(evidence.gate_id, "F-INSPECT-001");
}
#[test]
fn test_tool_test_result_to_evidence_failed() {
let result = ToolTestResult {
tool: "validate".to_string(),
passed: false,
exit_code: 5,
stdout: String::new(),
stderr: "Validation failed".to_string(),
duration_ms: 50,
gate_id: "F-VALIDATE-001".to_string(),
};
let model_id = ModelId::new("test", "model");
let evidence = result.to_evidence(&model_id);
assert!(evidence.outcome.is_fail());
assert!(!evidence.reason.is_empty());
}
#[test]
fn test_tool_test_result_clone() {
let result = ToolTestResult {
tool: "bench".to_string(),
passed: true,
exit_code: 0,
stdout: "Benchmark output".to_string(),
stderr: String::new(),
duration_ms: 500,
gate_id: "F-BENCH-001".to_string(),
};
let cloned = result.clone();
assert_eq!(cloned.tool, result.tool);
assert_eq!(cloned.passed, result.passed);
assert_eq!(cloned.exit_code, result.exit_code);
}
#[test]
fn test_tool_test_result_debug() {
let result = ToolTestResult {
tool: "profile".to_string(),
passed: true,
exit_code: 0,
stdout: String::new(),
stderr: String::new(),
duration_ms: 1000,
gate_id: "F-PROFILE-001".to_string(),
};
let debug_str = format!("{result:?}");
assert!(debug_str.contains("ToolTestResult"));
assert!(debug_str.contains("profile"));
}
#[test]
fn test_tool_executor_new() {
let executor = ToolExecutor::new("/path/to/model.gguf".to_string(), true, 60_000);
assert!(executor.no_gpu);
}
#[test]
fn test_execution_config_no_gpu() {
let config = ExecutionConfig {
no_gpu: true,
..Default::default()
};
assert!(config.no_gpu);
}
#[test]
fn test_execution_config_conversion_tests() {
let config = ExecutionConfig::default();
assert!(config.run_conversion_tests);
let config_disabled = ExecutionConfig {
run_conversion_tests: false,
..Default::default()
};
assert!(!config_disabled.run_conversion_tests);
}
#[test]
fn test_execution_result_with_skipped() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 5,
failed: 2,
skipped: 3,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert_eq!(result.skipped, 3);
let executed = result.passed + result.failed;
assert_eq!(executed, 7);
}
#[test]
fn test_executor_config_method() {
let executor = Executor::new();
let config = executor.config();
assert_eq!(config.failure_policy, FailurePolicy::StopOnP0);
}
#[test]
fn test_execution_config_differential_defaults() {
let config = ExecutionConfig::default();
assert!(config.run_differential_tests);
assert!(config.run_trace_payload);
assert!(!config.run_profile_ci);
}
#[test]
fn test_execution_config_differential_custom() {
let config = ExecutionConfig {
run_differential_tests: false,
run_profile_ci: true,
run_trace_payload: false,
..Default::default()
};
assert!(!config.run_differential_tests);
assert!(config.run_profile_ci);
assert!(!config.run_trace_payload);
}
#[test]
fn test_parse_tps_from_output_valid() {
let output = "Some text tok/s: 12.34 more text";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 12.34).abs() < f64::EPSILON);
}
#[test]
fn test_parse_tps_from_output_with_whitespace() {
let output = "tok/s: 45.67";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 45.67).abs() < f64::EPSILON);
}
#[test]
fn test_parse_tps_from_output_integer() {
let output = "tok/s: 100";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 100.0).abs() < f64::EPSILON);
}
#[test]
fn test_parse_tps_from_output_not_found() {
let output = "no tokens per second here";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_none());
}
#[test]
fn test_parse_tps_from_output_empty() {
let output = "";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_none());
}
#[test]
fn test_parse_tps_from_output_invalid_number() {
let output = "tok/s: abc";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_none());
}
#[test]
fn test_extract_generated_text_simple() {
let output = "Hello world\nThis is text";
let result = Executor::extract_generated_text(output);
assert_eq!(result, "Hello world\nThis is text");
}
#[test]
fn test_extract_generated_text_filters_separator() {
let output = "Generated text\n=== BENCHMARK ===\nMore stuff";
let result = Executor::extract_generated_text(output);
assert!(!result.contains("==="));
assert!(result.contains("Generated text"));
}
#[test]
fn test_extract_generated_text_filters_tps() {
let output = "Hello world\ntok/s: 12.34\nAfter tps";
let result = Executor::extract_generated_text(output);
assert!(!result.contains("tok/s"));
assert!(result.contains("Hello world"));
assert!(result.contains("After tps"));
}
#[test]
fn test_extract_generated_text_empty() {
let output = "";
let result = Executor::extract_generated_text(output);
assert!(result.is_empty());
}
#[test]
fn test_extract_generated_text_only_filtered() {
let output = "=== START ===\ntok/s: 10\n=== END ===";
let result = Executor::extract_generated_text(output);
assert!(result.is_empty());
}
#[test]
fn test_extract_output_text_simple() {
let output = "Some header\nOutput:\nThe answer is 4\nCompleted in 1.2s";
let result = Executor::extract_output_text(output);
assert_eq!(result, "The answer is 4");
}
#[test]
fn test_extract_output_text_multiline() {
let output = "Header\nOutput:\nLine 1\nLine 2\nLine 3\nCompleted in 1s";
let result = Executor::extract_output_text(output);
assert_eq!(result, "Line 1 Line 2 Line 3");
}
#[test]
fn test_extract_output_text_no_output_marker() {
let output = "Just some text without Output marker";
let result = Executor::extract_output_text(output);
assert!(result.is_empty());
}
#[test]
fn test_extract_output_text_empty() {
let output = "";
let result = Executor::extract_output_text(output);
assert!(result.is_empty());
}
#[test]
fn test_extract_output_text_empty_output() {
let output = "Header\nOutput:\nCompleted in 1s";
let result = Executor::extract_output_text(output);
assert!(result.is_empty());
}
#[test]
fn test_extract_output_text_stops_at_empty_line() {
let output = "Header\nOutput:\nThe answer\n\nMore text after blank";
let result = Executor::extract_output_text(output);
assert_eq!(result, "The answer");
}
#[test]
fn test_golden_scenario_creation() {
let model_id = ModelId::new("test", "model");
let scenario = Executor::golden_scenario(&model_id);
assert_eq!(scenario.model.org, "test");
assert_eq!(scenario.model.name, "model");
assert_eq!(scenario.modality, Modality::Run);
assert_eq!(scenario.backend, Backend::Cpu);
assert_eq!(scenario.format, Format::Apr);
assert!(scenario.prompt.contains("Golden Rule"));
}
#[test]
fn test_execution_config_golden_rule_default() {
let config = ExecutionConfig::default();
assert!(config.run_golden_rule_test);
assert!(config.golden_reference_path.is_none());
}
#[test]
fn test_execution_config_golden_rule_custom() {
let config = ExecutionConfig {
run_golden_rule_test: false,
golden_reference_path: Some("/path/to/reference.json".to_string()),
..Default::default()
};
assert!(!config.run_golden_rule_test);
assert_eq!(
config.golden_reference_path.as_deref(),
Some("/path/to/reference.json")
);
}
#[test]
fn test_tool_executor_fields() {
let executor = ToolExecutor::new("/path/model.gguf".to_string(), true, 30_000);
assert_eq!(executor.model_path, "/path/model.gguf");
assert!(executor.no_gpu);
assert_eq!(executor.timeout_ms, 30_000);
}
#[test]
fn test_tool_executor_no_gpu_false() {
let executor = ToolExecutor::new("model.gguf".to_string(), false, 60_000);
assert!(!executor.no_gpu);
}
#[test]
fn test_tool_test_result_gate_id() {
let result = ToolTestResult {
tool: "custom-tool".to_string(),
passed: true,
exit_code: 0,
stdout: String::new(),
stderr: String::new(),
duration_ms: 100,
gate_id: "F-CUSTOM-001".to_string(),
};
assert_eq!(result.gate_id, "F-CUSTOM-001");
}
#[test]
fn test_execution_result_fields() {
let result = ExecutionResult {
playbook_name: "my-playbook".to_string(),
total_scenarios: 50,
passed: 45,
failed: 3,
skipped: 2,
duration_ms: 5000,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert_eq!(result.playbook_name, "my-playbook");
assert_eq!(result.total_scenarios, 50);
assert_eq!(result.passed, 45);
assert_eq!(result.failed, 3);
assert_eq!(result.skipped, 2);
assert_eq!(result.duration_ms, 5000);
}
#[test]
fn test_failure_policy_copy() {
let policy = FailurePolicy::CollectAll;
let copied: FailurePolicy = policy;
assert_eq!(copied, FailurePolicy::CollectAll);
}
#[test]
fn test_extract_output_text_with_trailing_content() {
let output =
"Prefix\nOutput:\nAnswer is 4\nMore answer text\nCompleted in 2.5s\nExtra stuff";
let result = Executor::extract_output_text(output);
assert_eq!(result, "Answer is 4 More answer text");
}
#[test]
fn test_extract_generated_text_mixed_content() {
let output = "Line 1\n=== SEPARATOR ===\nLine 2\ntok/s: 50.0\nLine 3";
let result = Executor::extract_generated_text(output);
assert!(result.contains("Line 1"));
assert!(result.contains("Line 2"));
assert!(result.contains("Line 3"));
assert!(!result.contains("==="));
assert!(!result.contains("tok/s"));
}
#[test]
fn test_parse_tps_from_output_at_end() {
let output = "All output finished tok/s: 99.9";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 99.9).abs() < 0.01);
}
#[test]
fn test_parse_tps_from_output_multiline() {
let output = "Line 1\nLine 2\ntok/s: 25.5\nLine 4";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 25.5).abs() < f64::EPSILON);
}
#[test]
fn test_extract_output_text_output_at_end() {
let output = "Header info\nOutput:\nFinal answer here";
let result = Executor::extract_output_text(output);
assert_eq!(result, "Final answer here");
}
#[test]
fn test_execution_result_with_gateway_failure() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 0,
failed: 10,
skipped: 0,
duration_ms: 100,
gateway_failed: Some("G1: Model failed to load".to_string()),
evidence: EvidenceCollector::new(),
};
assert!(!result.is_success());
assert!(result.gateway_failed.is_some());
assert!(result.gateway_failed.as_ref().unwrap().contains("G1"));
}
#[test]
fn test_execution_config_all_fields() {
let config = ExecutionConfig {
failure_policy: FailurePolicy::CollectAll,
default_timeout_ms: 30_000,
max_workers: 2,
dry_run: true,
model_path: Some("/path/to/model.gguf".to_string()),
no_gpu: true,
run_conversion_tests: false,
run_differential_tests: false,
run_profile_ci: true,
run_trace_payload: false,
run_golden_rule_test: false,
golden_reference_path: Some("/path/to/ref.json".to_string()),
lock_file_path: None,
check_integrity: false,
warn_implicit_skips: false,
run_hf_parity: false,
hf_parity_corpus_path: None,
hf_parity_model_family: None,
output_dir: Some("test_output".to_string()),
run_contract_tests: false,
run_ollama_parity: false,
};
assert_eq!(config.failure_policy, FailurePolicy::CollectAll);
assert!(config.dry_run);
assert!(config.no_gpu);
assert!(!config.run_conversion_tests);
assert!(!config.run_differential_tests);
assert!(config.run_profile_ci);
assert!(!config.run_contract_tests);
}
#[test]
fn test_tool_test_result_fields_comprehensive() {
let result = ToolTestResult {
tool: "custom-test".to_string(),
passed: false,
exit_code: 127,
stdout: "stdout content".to_string(),
stderr: "error: command not found".to_string(),
duration_ms: 150,
gate_id: "F-CUSTOM-001".to_string(),
};
assert_eq!(result.tool, "custom-test");
assert!(!result.passed);
assert_eq!(result.exit_code, 127);
assert!(!result.stdout.is_empty());
assert!(!result.stderr.is_empty());
}
#[test]
fn test_golden_scenario_prompt_content() {
let model_id = ModelId::new("org", "name");
let scenario = Executor::golden_scenario(&model_id);
assert!(scenario.prompt.contains("Golden Rule"));
assert!(scenario.prompt.contains("convert"));
assert!(scenario.prompt.contains("inference"));
}
#[test]
fn test_executor_with_custom_timeout_and_workers() {
let config = ExecutionConfig {
default_timeout_ms: 120_000,
max_workers: 16,
..Default::default()
};
let executor = Executor::with_config(config);
assert_eq!(executor.config().default_timeout_ms, 120_000);
assert_eq!(executor.config().max_workers, 16);
}
#[test]
fn test_execution_result_pass_rate_partial() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 3,
passed: 1,
failed: 2,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
let rate = result.pass_rate();
assert!((rate - 100.0 / 3.0).abs() < 0.01);
}
#[test]
fn test_tool_test_result_to_evidence_with_content() {
let result = ToolTestResult {
tool: "validate".to_string(),
passed: true,
exit_code: 0,
stdout: "Model validated successfully".to_string(),
stderr: String::new(),
duration_ms: 200,
gate_id: "F-VALIDATE-001".to_string(),
};
let model_id = ModelId::new("org", "model");
let evidence = result.to_evidence(&model_id);
assert!(evidence.outcome.is_pass());
assert!(evidence.output.contains("validated"));
}
#[test]
fn test_tool_test_result_with_zero_duration() {
let result = ToolTestResult {
tool: "fast-test".to_string(),
passed: true,
exit_code: 0,
stdout: "OK".to_string(),
stderr: String::new(),
duration_ms: 0,
gate_id: "F-FAST-001".to_string(),
};
assert_eq!(result.duration_ms, 0);
}
#[test]
fn test_extract_output_text_preserves_content() {
let output = "Info\nOutput:\n First line\n Second line \n Third line\nCompleted in 1s";
let result = Executor::extract_output_text(output);
assert!(result.contains("First line"));
assert!(result.contains("Second line"));
assert!(result.contains("Third line"));
}
use crate::command::MockCommandRunner;
#[test]
fn test_executor_with_mock_runner_subprocess_execution() {
let (_tmp, model_path) = create_test_model_file(Format::Gguf);
let mock_runner = MockCommandRunner::new()
.with_tps(42.0)
.with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some(model_path),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let (output, stderr, exit_code, tps, skipped) = executor.subprocess_execution(&scenario);
assert!(!skipped);
assert!(output.contains("4") || output.is_empty()); assert!(stderr.is_none_or(|s| s.is_empty()));
assert_eq!(exit_code, 0);
let _ = tps;
}
#[test]
fn test_executor_with_mock_runner_inference_failure() {
let (_tmp, model_path) = create_test_model_file(Format::Gguf);
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some(model_path),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let (_, stderr, exit_code, _, _) = executor.subprocess_execution(&scenario);
assert_eq!(exit_code, 1);
assert!(stderr.is_some());
}
#[test]
fn test_executor_with_mock_runner_execute_scenario() {
let mock_runner = MockCommandRunner::new()
.with_tps(30.0)
.with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let evidence = executor.execute_scenario(&scenario);
assert!(!evidence.id.is_empty());
assert!(!evidence.gate_id.is_empty());
}
#[test]
fn test_executor_with_mock_runner_golden_rule_test() {
let mock_runner = MockCommandRunner::new()
.with_tps(25.0)
.with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_golden_rule_test: true,
run_conversion_tests: false, ..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) =
executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
assert_eq!(passed + failed, 1);
}
#[test]
fn test_executor_with_mock_runner_golden_rule_conversion_failure() {
let mock_runner = MockCommandRunner::new()
.with_convert_failure()
.with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) =
executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
assert!(!executor.collector.all().is_empty());
}
#[test]
fn test_executor_with_mock_runner_golden_rule_inference_failure() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) =
executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
}
#[test]
fn test_tool_executor_with_mock_runner_inspect() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
true,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_inspect();
assert!(result.passed);
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("GGUF"));
}
#[test]
fn test_tool_executor_with_mock_runner_validate() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_validate();
assert!(result.passed);
assert_eq!(result.exit_code, 0);
}
#[test]
fn test_tool_executor_with_mock_runner_bench() {
let mock_runner = MockCommandRunner::new().with_tps(50.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
true,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_bench();
assert!(result.passed);
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("50.0"));
}
#[test]
fn test_tool_executor_with_mock_runner_check() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_check();
assert!(result.passed);
assert_eq!(result.exit_code, 0);
}
#[test]
fn test_tool_executor_with_mock_runner_trace() {
let mock_runner = MockCommandRunner::new().with_tps(25.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
true,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_trace("layer");
assert!(result.passed);
assert_eq!(result.exit_code, 0);
assert!(result.tool.contains("trace"));
}
#[test]
fn test_tool_executor_with_mock_runner_profile() {
let mock_runner = MockCommandRunner::new().with_tps(35.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile();
assert!(result.passed);
assert_eq!(result.exit_code, 0);
assert!(result.stdout.contains("throughput"));
}
#[test]
fn test_tool_executor_with_mock_runner_profile_ci() {
let mock_runner = MockCommandRunner::new().with_tps(20.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci();
assert!(result.passed);
assert!(result.stdout.contains("passed"));
}
#[test]
fn test_tool_executor_with_mock_runner_profile_ci_assertion_failure() {
let mock_runner = MockCommandRunner::new().with_tps(5.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci_assertion_failure();
assert!(result.passed); assert!(result.stdout.contains("\"passed\":false"));
}
#[test]
fn test_tool_executor_with_mock_runner_profile_ci_p99() {
let mock_runner = MockCommandRunner::new().with_tps(30.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci_p99();
assert!(result.passed);
assert!(result.stdout.contains("latency"));
}
#[test]
fn test_tool_executor_with_runner_debug() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
true,
60_000,
Arc::new(mock_runner),
);
let debug_str = format!("{executor:?}");
assert!(debug_str.contains("ToolExecutor"));
assert!(debug_str.contains("model_path"));
}
#[test]
fn test_executor_with_runner_debug() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig::default();
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let debug_str = format!("{executor:?}");
assert!(debug_str.contains("Executor"));
assert!(debug_str.contains("config"));
}
#[test]
fn test_executor_subprocess_execution_no_gpu() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
no_gpu: true,
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"Test prompt".to_string(),
0,
);
let (_, _, exit_code, _, _) = executor.subprocess_execution(&scenario);
assert_eq!(exit_code, 0);
}
#[test]
fn test_executor_execute_playbook_with_subprocess_mode() {
let mock_runner = MockCommandRunner::new()
.with_tps(25.0)
.with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_differential_tests: false,
run_golden_rule_test: false,
run_trace_payload: false,
run_profile_ci: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: test-subprocess
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert_eq!(result.total_scenarios, 3);
assert!(result.passed > 0 || result.failed > 0);
}
#[test]
fn test_build_result_from_output() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let output = crate::command::CommandOutput::success("test output");
let start = std::time::Instant::now();
let result = executor.build_result_from_output("test-tool", output, start);
assert!(result.passed);
assert_eq!(result.exit_code, 0);
assert_eq!(result.tool, "test-tool");
assert_eq!(result.gate_id, "F-TEST_TOOL-001");
}
#[test]
fn test_build_result_from_output_failure() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let output = crate::command::CommandOutput::failure(1, "error message");
let start = std::time::Instant::now();
let result = executor.build_result_from_output("failed-tool", output, start);
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
assert_eq!(result.stderr, "error message");
}
#[test]
fn test_tool_executor_execute_all() {
let mock_runner = MockCommandRunner::new().with_tps(30.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
true,
60_000,
Arc::new(mock_runner),
);
let results = executor.execute_all();
assert!(results.len() >= 12);
let passed_count = results.iter().filter(|r| r.passed).count();
assert!(passed_count > 0);
}
#[test]
fn test_tool_executor_execute_all_with_serve_false() {
let mock_runner = MockCommandRunner::new().with_tps(30.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let results = executor.execute_all_with_serve(false);
assert!(results.len() >= 12);
}
#[test]
fn test_executor_execute_scenario_crash() {
let mock_runner = MockCommandRunner::new().with_crash();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let evidence = executor.execute_scenario(&scenario);
assert!(evidence.outcome.is_fail());
assert_eq!(evidence.gate_id, "G3-STABLE");
}
#[test]
fn test_executor_run_conversion_tests_success() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: true,
no_gpu: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) =
executor.run_conversion_tests(std::path::Path::new("/test/model.gguf"), &model_id);
let _ = (passed, failed); }
#[test]
fn test_executor_execute_scenario_with_stderr() {
let mock_runner =
MockCommandRunner::new().with_inference_response_and_stderr("Output: 4", "Warning");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let evidence = executor.execute_scenario(&scenario);
assert!(evidence.stderr.is_some() || evidence.stderr.is_none());
}
#[test]
fn test_executor_execute_with_conversion_and_golden() {
let mock_runner = MockCommandRunner::new()
.with_tps(25.0)
.with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: true,
run_golden_rule_test: true,
no_gpu: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: test-full
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 2
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.total_scenarios >= 2);
}
#[test]
fn test_executor_golden_rule_output_differs() {
let mock_runner = MockCommandRunner::new()
.with_inference_response("Output:\nThe answer is 4\nCompleted in 1s");
let config = ExecutionConfig::default();
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) =
executor.run_golden_rule_test(std::path::Path::new("/test/model.gguf"), &model_id);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
}
#[test]
fn test_executor_subprocess_with_tps_parsing() {
let mock_runner = MockCommandRunner::new().with_tps(42.5);
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = test_scenario();
let (_, _, _, tps, _) = executor.subprocess_execution(&scenario);
assert!(tps.is_some());
assert!((tps.unwrap() - 42.5).abs() < f64::EPSILON);
}
#[test]
fn test_tool_test_result_to_evidence_gate_id() {
let result = ToolTestResult {
tool: "special".to_string(),
passed: true,
exit_code: 0,
stdout: "OK".to_string(),
stderr: String::new(),
duration_ms: 50,
gate_id: "F-SPECIAL-TEST-001".to_string(),
};
let model_id = ModelId::new("org", "name");
let evidence = result.to_evidence(&model_id);
assert_eq!(evidence.gate_id, "F-SPECIAL-TEST-001");
assert_eq!(evidence.scenario.model.org, "org");
assert_eq!(evidence.scenario.model.name, "name");
}
#[test]
fn test_execution_result_evidence_collector() {
let mut collector = EvidenceCollector::new();
let evidence = Evidence::corroborated("F-TEST-001", test_scenario(), "Test output", 100);
collector.add(evidence);
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 1,
passed: 1,
failed: 0,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: collector,
};
assert_eq!(result.evidence.all().len(), 1);
}
#[test]
fn test_executor_execute_scenario_with_metrics() {
let mock_runner = MockCommandRunner::new()
.with_tps(75.5)
.with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = test_scenario();
let evidence = executor.execute_scenario(&scenario);
let _ = evidence.metrics.duration_ms; }
#[test]
fn test_extract_output_text_with_whitespace_lines() {
let output = "Header\nOutput:\n \nActual content\n \nCompleted in 1s";
let result = Executor::extract_output_text(output);
assert!(result.contains("Actual content"));
}
#[test]
fn test_extract_output_text_only_header() {
let output = "Only Header no Output marker";
let result = Executor::extract_output_text(output);
assert!(result.is_empty());
}
#[test]
fn test_parse_tps_from_output_multiple_colons() {
let output = "Info: tok/s: 88.8 more info";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 88.8).abs() < f64::EPSILON);
}
#[test]
fn test_tool_executor_trace_all_levels() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
for level in &["none", "basic", "layer", "payload"] {
let result = executor.execute_trace(level);
assert!(result.passed);
assert!(result.tool.contains("trace"));
assert!(result.tool.contains(level));
}
}
#[test]
fn test_execution_config_partial_override() {
let config = ExecutionConfig {
dry_run: true,
max_workers: 1,
..Default::default()
};
assert!(config.dry_run);
assert_eq!(config.max_workers, 1);
assert!(config.run_conversion_tests);
assert!(config.run_golden_rule_test);
}
#[test]
fn test_executor_evidence_after_execute() {
let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: evidence-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let _ = executor.execute(&playbook).expect("Execution failed");
assert!(!executor.evidence().all().is_empty());
}
#[test]
fn test_tool_executor_gate_id_format() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_inspect();
assert_eq!(result.gate_id, "F-INSPECT-001");
let result = executor.execute_validate();
assert_eq!(result.gate_id, "F-VALIDATE-001");
let result = executor.execute_bench();
assert_eq!(result.gate_id, "F-BENCH-001");
let result = executor.execute_check();
assert_eq!(result.gate_id, "F-CHECK-001");
let result = executor.execute_profile();
assert_eq!(result.gate_id, "F-PROFILE-001");
}
#[test]
fn test_tool_executor_profile_ci_feature_unavailable() {
let mock_runner = MockCommandRunner::new().with_profile_ci_unavailable();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci();
assert!(!result.passed);
assert_eq!(result.exit_code, -2);
assert!(result.stderr.contains("Feature not available"));
assert_eq!(result.gate_id, "F-PROFILE-006");
}
#[test]
fn test_tool_executor_profile_ci_assertion_unavailable() {
let mock_runner = MockCommandRunner::new().with_profile_ci_unavailable();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci_assertion_failure();
assert!(!result.passed);
assert_eq!(result.exit_code, -2);
assert_eq!(result.gate_id, "F-PROFILE-007");
}
#[test]
fn test_tool_executor_profile_ci_p99_unavailable() {
let mock_runner = MockCommandRunner::new().with_profile_ci_unavailable();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci_p99();
assert!(!result.passed);
assert_eq!(result.exit_code, -2);
assert_eq!(result.gate_id, "F-PROFILE-008");
}
#[test]
fn test_tool_executor_inspect_failure() {
let mock_runner = MockCommandRunner::new().with_inspect_failure();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_inspect();
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
}
#[test]
fn test_tool_executor_validate_failure() {
let mock_runner = MockCommandRunner::new().with_validate_failure();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_validate();
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
}
#[test]
fn test_tool_executor_bench_failure() {
let mock_runner = MockCommandRunner::new().with_bench_failure();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_bench();
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
}
#[test]
fn test_tool_executor_check_failure() {
let mock_runner = MockCommandRunner::new().with_check_failure();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_check();
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
}
#[test]
fn test_tool_executor_profile_failure() {
let mock_runner = MockCommandRunner::new().with_profile_failure();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile();
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
}
#[test]
fn test_tool_executor_trace_failure() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_trace("layer");
assert!(!result.passed);
assert_eq!(result.exit_code, 1);
}
#[test]
fn test_tool_executor_profile_ci_passes_with_metrics() {
let mock_runner = MockCommandRunner::new().with_tps(100.0);
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_ci();
assert!(result.passed);
assert!(result.stdout.contains("throughput"));
}
#[test]
fn test_tool_executor_with_no_gpu_true() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
true, 30_000,
Arc::new(mock_runner),
);
let debug_str = format!("{executor:?}");
assert!(debug_str.contains("no_gpu: true"));
}
#[test]
fn test_tool_executor_execute_trace_levels() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"/test/model.gguf".to_string(),
false,
60_000,
Arc::new(mock_runner),
);
let result_layer = executor.execute_trace("layer");
assert!(result_layer.tool.contains("trace-layer"));
let result_op = executor.execute_trace("op");
assert!(result_op.tool.contains("trace-op"));
let result_tensor = executor.execute_trace("tensor");
assert!(result_tensor.tool.contains("trace-tensor"));
}
#[test]
fn test_resolve_model_path_gguf() {
let temp_dir = tempfile::tempdir().unwrap();
let gguf_dir = temp_dir.path().join("gguf");
std::fs::create_dir_all(&gguf_dir).unwrap();
std::fs::write(gguf_dir.join("model.gguf"), b"fake").unwrap();
let config = ExecutionConfig {
model_path: Some(temp_dir.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.unwrap().contains("gguf"));
}
#[test]
fn test_resolve_model_path_apr() {
let temp_dir = tempfile::tempdir().unwrap();
let apr_dir = temp_dir.path().join("apr");
std::fs::create_dir_all(&apr_dir).unwrap();
std::fs::write(apr_dir.join("model.apr"), b"fake").unwrap();
let config = ExecutionConfig {
model_path: Some(temp_dir.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Apr,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.unwrap().contains("apr"));
}
#[test]
fn test_resolve_model_path_safetensors() {
let temp_dir = tempfile::tempdir().unwrap();
let st_dir = temp_dir.path().join("safetensors");
std::fs::create_dir_all(&st_dir).unwrap();
std::fs::write(st_dir.join("model.safetensors"), b"fake").unwrap();
let config = ExecutionConfig {
model_path: Some(temp_dir.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.unwrap().contains("safetensors"));
}
#[test]
fn test_resolve_model_path_no_cache() {
let config = ExecutionConfig {
model_path: None,
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_none());
}
#[test]
fn test_executor_execute_dry_run() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
dry_run: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: dry-run-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert_eq!(result.skipped, 3);
assert!(result.passed >= 1);
}
#[test]
fn test_executor_execute_with_stop_on_first_policy() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
failure_policy: FailurePolicy::StopOnFirst,
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: stop-on-first-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 5
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert_eq!(result.failed, 1);
}
#[test]
fn test_executor_execute_with_collect_all_policy() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
failure_policy: FailurePolicy::CollectAll,
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: collect-all-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert_eq!(result.failed, 3);
}
#[test]
fn test_executor_default_impl() {
let executor = Executor::default();
assert_eq!(executor.config().max_workers, 4);
assert!(!executor.config().dry_run);
}
#[test]
fn test_parse_tps_from_output_with_tps() {
let output = "Info: Loading model\ntok/s: 42.5\nDone";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_some());
assert!((tps.unwrap() - 42.5).abs() < 0.01);
}
#[test]
fn test_parse_tps_from_output_no_tps() {
let output = "Some random output without tok/s";
let tps = Executor::parse_tps_from_output(output);
assert!(tps.is_none());
}
#[test]
fn test_extract_generated_text() {
let output = "=== Model Info ===\nThis is generated text\ntok/s: 30.0";
let text = Executor::extract_generated_text(output);
assert!(text.contains("This is generated text"));
assert!(!text.contains("tok/s"));
assert!(!text.contains("==="));
}
#[test]
fn test_extract_output_text_multiline_detailed() {
let output = "Some prefix\nOutput:\nLine 1\nLine 2\nLine 3\nCompleted in 1s";
let text = Executor::extract_output_text(output);
assert!(text.contains("Line 1"));
assert!(text.contains("Line 2"));
assert!(text.contains("Line 3"));
}
#[test]
fn test_extract_output_text_with_empty_lines() {
let output = "Output:\nActual output here\n\nCompleted";
let text = Executor::extract_output_text(output);
assert!(text.contains("Actual output here"));
}
#[test]
fn test_failure_policy_default_is_stop_on_p0() {
let policy = FailurePolicy::default();
assert_eq!(policy, FailurePolicy::StopOnP0);
}
#[test]
fn test_execution_config_debug_display() {
let config = ExecutionConfig::default();
let debug_str = format!("{config:?}");
assert!(debug_str.contains("ExecutionConfig"));
assert!(debug_str.contains("failure_policy"));
}
#[test]
fn test_tool_test_result_all_fields() {
let result = ToolTestResult {
tool: "test-tool".to_string(),
passed: true,
exit_code: 0,
stdout: "stdout".to_string(),
stderr: String::new(),
duration_ms: 100,
gate_id: "F-TEST-001".to_string(),
};
assert_eq!(result.tool, "test-tool");
assert!(result.passed);
assert_eq!(result.gate_id, "F-TEST-001");
}
#[test]
fn test_executor_evidence_accessor() {
let executor = Executor::new();
let evidence = executor.evidence();
assert_eq!(evidence.total(), 0);
}
#[test]
fn test_execution_result_is_success_false_due_to_failed() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 9,
failed: 1,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert!(!result.is_success());
}
#[test]
fn test_execution_result_is_success_when_all_pass() {
let result = ExecutionResult {
playbook_name: "test".to_string(),
total_scenarios: 10,
passed: 10,
failed: 0,
skipped: 0,
duration_ms: 100,
gateway_failed: None,
evidence: EvidenceCollector::new(),
};
assert!(result.is_success());
}
#[test]
fn test_tool_test_result_to_evidence_when_failed() {
let result = ToolTestResult {
tool: "validate".to_string(),
passed: false,
exit_code: 1,
stdout: String::new(),
stderr: "Validation failed".to_string(),
duration_ms: 200,
gate_id: "F-VALIDATE-001".to_string(),
};
let model_id = ModelId::new("org", "model");
let evidence = result.to_evidence(&model_id);
assert!(!evidence.outcome.is_pass());
assert!(evidence.reason.contains("Validation failed") || evidence.output.is_empty());
}
#[test]
fn test_executor_with_mock_runner_trace_failure_case() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let (_, stderr, exit_code, _, _) = executor.subprocess_execution(&scenario);
assert_eq!(exit_code, 1);
assert!(stderr.is_some());
}
#[test]
fn test_resolve_model_path_apr_format() {
let tmp = tempfile::tempdir().unwrap();
let apr_dir = tmp.path().join("apr");
std::fs::create_dir_all(&apr_dir).unwrap();
std::fs::write(apr_dir.join("model.apr"), b"fake apr").unwrap();
let config = ExecutionConfig {
model_path: Some(tmp.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Apr,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some());
assert!(path.unwrap().contains("apr"));
}
#[test]
fn test_resolve_model_path_safetensors_format() {
let tmp = tempfile::tempdir().unwrap();
let st_dir = tmp.path().join("safetensors");
std::fs::create_dir_all(&st_dir).unwrap();
std::fs::write(st_dir.join("model.safetensors"), b"fake st").unwrap();
let config = ExecutionConfig {
model_path: Some(tmp.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some());
assert!(path.unwrap().contains("safetensors"));
}
#[test]
fn test_resolve_model_path_gguf_format() {
let tmp = tempfile::tempdir().unwrap();
let gguf_dir = tmp.path().join("gguf");
std::fs::create_dir_all(&gguf_dir).unwrap();
std::fs::write(gguf_dir.join("model.gguf"), b"fake gguf").unwrap();
let config = ExecutionConfig {
model_path: Some(tmp.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some());
assert!(path.unwrap().contains("gguf"));
}
#[test]
fn test_resolve_model_path_no_model_path() {
let config = ExecutionConfig {
model_path: None,
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_none());
}
#[test]
fn test_executor_subprocess_execution_formats() {
let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some("/test/cache".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario_apr = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Apr,
"What is 2+2?".to_string(),
0,
);
let (_, _, exit_code, _, _) = executor.subprocess_execution(&scenario_apr);
assert_eq!(exit_code, 0);
}
#[test]
fn test_executor_subprocess_execution_safetensors() {
let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some("/test/cache".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
"What is 2+2?".to_string(),
0,
);
let (_, _, exit_code, _, _) = executor.subprocess_execution(&scenario);
assert_eq!(exit_code, 0);
}
#[test]
fn test_execute_scenario_with_exit_code_failure() {
let mock_runner = MockCommandRunner::new().with_exit_code(5);
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let evidence = executor.execute_scenario(&scenario);
assert!(evidence.outcome.is_fail());
assert!(evidence.exit_code.is_some());
assert_eq!(evidence.exit_code.unwrap(), 5);
}
#[test]
fn test_execute_scenario_with_stderr_corroborated() {
let mock_runner = MockCommandRunner::new()
.with_inference_response_and_stderr("The answer is 4.", "Some warning");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let evidence = executor.execute_scenario(&scenario);
assert!(evidence.outcome.is_pass());
}
#[test]
fn test_executor_run_conversion_tests_no_gpu() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: true,
no_gpu: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) =
executor.run_conversion_tests(std::path::Path::new("/test/model.gguf"), &model_id);
let _ = (passed, failed);
}
#[test]
fn test_executor_execute_with_stop_on_first_failure() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
failure_policy: FailurePolicy::StopOnFirst,
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: stop-on-first-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 5
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
let executed = result.passed + result.failed;
assert!(executed <= result.total_scenarios);
}
#[test]
fn test_executor_execute_with_collect_all_failures() {
let mock_runner = MockCommandRunner::new().with_inference_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
failure_policy: FailurePolicy::CollectAll,
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: collect-all-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert_eq!(result.failed, 3);
assert_eq!(result.total_scenarios, 3);
}
#[test]
fn test_executor_stop_on_p0_with_p0_gate() {
let mock_runner = MockCommandRunner::new()
.with_inference_failure()
.with_exit_code(1);
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
failure_policy: FailurePolicy::StopOnP0,
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: p0-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 5
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
}
#[test]
fn test_executor_run_conversion_tests_default_config() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: true,
run_golden_rule_test: false,
no_gpu: false, ..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: conv-default-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.total_scenarios >= 1);
}
#[test]
#[allow(clippy::too_many_lines)]
fn test_executor_golden_rule_converted_inference_fails() {
use crate::command::CommandOutput;
struct ConvertedFailRunner;
impl CommandRunner for ConvertedFailRunner {
fn run_inference(
&self,
model_path: &Path,
_prompt: &str,
_max_tokens: u32,
_no_gpu: bool,
_extra_args: &[&str],
) -> CommandOutput {
if model_path.to_string_lossy().contains(".apr") {
CommandOutput {
stdout: String::new(),
stderr: "Failed to load converted model".to_string(),
exit_code: 1,
success: false,
}
} else {
CommandOutput {
stdout: "Output:\nThe answer is 4.\nCompleted in 100ms".to_string(),
stderr: String::new(),
exit_code: 0,
success: true,
}
}
}
fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
CommandOutput {
stdout: "Conversion complete".to_string(),
stderr: String::new(),
exit_code: 0,
success: true,
}
}
fn inspect_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn bench_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn check_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
CommandOutput::success("")
}
fn profile_ci(
&self,
_path: &Path,
_min_throughput: Option<f64>,
_max_p99: Option<f64>,
_warmup: u32,
_measure: u32,
) -> CommandOutput {
CommandOutput::success("")
}
fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn compare_inference(
&self,
_model_a: &Path,
_model_b: &Path,
_prompt: &str,
_max_tokens: u32,
_tolerance: f64,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_flamegraph(
&self,
_model_path: &Path,
_output_path: &Path,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_focus(
&self,
_model_path: &Path,
_focus: &str,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
}
fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
CommandOutput::success("Path: /mock/model.safetensors")
}
fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
CommandOutput::success(
r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
)
}
fn run_ollama_inference(
&self,
_model_tag: &str,
_prompt: &str,
_temperature: f64,
) -> CommandOutput {
CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
}
fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
CommandOutput::success("pulling manifest... done")
}
fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
CommandOutput::success("creating model... done")
}
fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
CommandOutput::success(r#"{"status":"listening"}"#)
}
fn http_get(&self, _: &str) -> CommandOutput {
CommandOutput::success(r#"{"models":[]}"#)
}
fn profile_memory(&self, _: &Path) -> CommandOutput {
CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
}
fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
CommandOutput::success("Chat output")
}
fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
CommandOutput::success("{}")
}
fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
CommandOutput::success("12345")
}
}
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(ConvertedFailRunner));
let yaml = r#"
name: golden-conv-fail
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
}
#[test]
#[allow(clippy::too_many_lines)]
fn test_executor_golden_rule_output_differs_with_data() {
use crate::command::CommandOutput;
struct DiffOutputRunner;
impl CommandRunner for DiffOutputRunner {
fn run_inference(
&self,
model_path: &Path,
_prompt: &str,
_max_tokens: u32,
_no_gpu: bool,
_extra_args: &[&str],
) -> CommandOutput {
if model_path.to_string_lossy().contains(".apr") {
CommandOutput {
stdout: "Output:\nThe answer is 5.\nCompleted in 100ms".to_string(),
stderr: String::new(),
exit_code: 0,
success: true,
}
} else {
CommandOutput {
stdout: "Output:\nThe answer is 4.\nCompleted in 100ms".to_string(),
stderr: String::new(),
exit_code: 0,
success: true,
}
}
}
fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
CommandOutput {
stdout: "ok".to_string(),
stderr: String::new(),
exit_code: 0,
success: true,
}
}
fn inspect_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn bench_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn check_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
CommandOutput::success("")
}
fn profile_ci(
&self,
_path: &Path,
_min_throughput: Option<f64>,
_max_p99: Option<f64>,
_warmup: u32,
_measure: u32,
) -> CommandOutput {
CommandOutput::success("")
}
fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn compare_inference(
&self,
_model_a: &Path,
_model_b: &Path,
_prompt: &str,
_max_tokens: u32,
_tolerance: f64,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_flamegraph(
&self,
_model_path: &Path,
_output_path: &Path,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_focus(
&self,
_model_path: &Path,
_focus: &str,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
}
fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
CommandOutput::success("Path: /mock/model.safetensors")
}
fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
CommandOutput::success(
r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
)
}
fn run_ollama_inference(
&self,
_model_tag: &str,
_prompt: &str,
_temperature: f64,
) -> CommandOutput {
CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
}
fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
CommandOutput::success("pulling manifest... done")
}
fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
CommandOutput::success("creating model... done")
}
fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
CommandOutput::success(r#"{"status":"listening"}"#)
}
fn http_get(&self, _: &str) -> CommandOutput {
CommandOutput::success(r#"{"models":[]}"#)
}
fn profile_memory(&self, _: &Path) -> CommandOutput {
CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
}
fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
CommandOutput::success("Chat output")
}
fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
CommandOutput::success("{}")
}
fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
CommandOutput::success("12345")
}
}
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(DiffOutputRunner));
let yaml = r#"
name: golden-diff
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
}
#[test]
#[allow(clippy::too_many_lines)]
fn test_executor_subprocess_trace_with_stdout() {
use crate::command::CommandOutput;
struct TraceStdoutRunner;
impl CommandRunner for TraceStdoutRunner {
fn run_inference(
&self,
_model_path: &Path,
_prompt: &str,
_max_tokens: u32,
_no_gpu: bool,
extra_args: &[&str],
) -> CommandOutput {
if extra_args.contains(&"--trace") {
CommandOutput {
stdout: "trace data: layer 0 attention".to_string(),
stderr: "TRACE: model loading details".to_string(),
exit_code: 0,
success: true,
}
} else {
CommandOutput {
stdout: String::new(),
stderr: "inference error occurred".to_string(),
exit_code: 1,
success: false,
}
}
}
fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn inspect_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn bench_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn check_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
CommandOutput::success("")
}
fn profile_ci(
&self,
_path: &Path,
_min_throughput: Option<f64>,
_max_p99: Option<f64>,
_warmup: u32,
_measure: u32,
) -> CommandOutput {
CommandOutput::success("")
}
fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn compare_inference(
&self,
_model_a: &Path,
_model_b: &Path,
_prompt: &str,
_max_tokens: u32,
_tolerance: f64,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_flamegraph(
&self,
_model_path: &Path,
_output_path: &Path,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_focus(
&self,
_model_path: &Path,
_focus: &str,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
}
fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
CommandOutput::success("Path: /mock/model.safetensors")
}
fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
CommandOutput::success(
r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
)
}
fn run_ollama_inference(
&self,
_model_tag: &str,
_prompt: &str,
_temperature: f64,
) -> CommandOutput {
CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
}
fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
CommandOutput::success("pulling manifest... done")
}
fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
CommandOutput::success("creating model... done")
}
fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
CommandOutput::success(r#"{"status":"listening"}"#)
}
fn http_get(&self, _: &str) -> CommandOutput {
CommandOutput::success(r#"{"models":[]}"#)
}
fn profile_memory(&self, _: &Path) -> CommandOutput {
CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
}
fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
CommandOutput::success("Chat output")
}
fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
CommandOutput::success("{}")
}
fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
CommandOutput::success("12345")
}
}
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(TraceStdoutRunner));
let yaml = r#"
name: trace-stdout-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
let evidence = executor.evidence().all();
assert!(!evidence.is_empty());
let last = &evidence[evidence.len() - 1];
if let Some(ref stderr) = last.stderr {
assert!(stderr.contains("TRACE STDOUT") || stderr.contains("trace"));
}
}
#[test]
fn test_resolve_model_path_fallback_to_extension() {
let temp_dir = tempfile::tempdir().unwrap();
let gguf_dir = temp_dir.path().join("gguf");
std::fs::create_dir_all(&gguf_dir).unwrap();
let alt_model = gguf_dir.join("custom-name.gguf");
std::fs::write(&alt_model, b"fake model").unwrap();
let config = ExecutionConfig {
model_path: Some(temp_dir.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = apr_qa_gen::QaScenario::new(
apr_qa_gen::ModelId::new("test", "model"),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::Gguf,
"test prompt".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.unwrap().contains("custom-name.gguf"));
}
#[test]
fn test_resolve_model_path_prefers_model_dot_ext() {
let temp_dir = tempfile::tempdir().unwrap();
let apr_dir = temp_dir.path().join("apr");
std::fs::create_dir_all(&apr_dir).unwrap();
let model_file = apr_dir.join("model.apr");
std::fs::write(&model_file, b"fake model").unwrap();
let config = ExecutionConfig {
model_path: Some(temp_dir.path().to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = apr_qa_gen::QaScenario::new(
apr_qa_gen::ModelId::new("test", "model"),
apr_qa_gen::Modality::Run,
apr_qa_gen::Backend::Cpu,
apr_qa_gen::Format::Apr,
"test prompt".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.unwrap().contains("model.apr"));
}
#[test]
fn test_resolve_model_path_file_matching_format() {
let temp_dir = tempfile::tempdir().unwrap();
let model_file = temp_dir.path().join("abc123.safetensors");
std::fs::write(&model_file, b"fake model data").unwrap();
let config = ExecutionConfig {
model_path: Some(model_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some());
assert!(path.unwrap().contains("abc123.safetensors"));
}
#[test]
fn test_resolve_model_path_file_nonmatching_format() {
let temp_dir = tempfile::tempdir().unwrap();
let model_file = temp_dir.path().join("abc123.safetensors");
std::fs::write(&model_file, b"fake model data").unwrap();
let config = ExecutionConfig {
model_path: Some(model_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario_gguf = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
assert!(executor.resolve_model_path(&scenario_gguf).is_none());
let scenario_apr = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Apr,
"test".to_string(),
0,
);
assert!(executor.resolve_model_path(&scenario_apr).is_none());
}
#[test]
fn test_resolve_model_path_file_gguf() {
let temp_dir = tempfile::tempdir().unwrap();
let model_file = temp_dir.path().join("hash123.gguf");
std::fs::write(&model_file, b"fake gguf").unwrap();
let config = ExecutionConfig {
model_path: Some(model_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some());
assert!(path.unwrap().contains("hash123.gguf"));
}
#[test]
fn test_execute_scenario_skips_nonmatching_format() {
let temp_dir = tempfile::tempdir().unwrap();
let model_file = temp_dir.path().join("abc123.safetensors");
std::fs::write(&model_file, b"fake model").unwrap();
let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some(model_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"2+2=".to_string(),
42,
);
let evidence = executor.execute_scenario(&scenario);
assert_eq!(evidence.outcome, Outcome::Skipped);
assert!(evidence.reason.contains("Format"));
}
#[test]
fn test_find_safetensors_dir_file_mode() {
let temp_dir = tempfile::tempdir().unwrap();
let st_file = temp_dir.path().join("model.safetensors");
std::fs::write(&st_file, b"fake").unwrap();
let result = Executor::find_safetensors_dir(&st_file);
assert!(result.is_some());
assert_eq!(result.unwrap(), temp_dir.path());
let gguf_file = temp_dir.path().join("model.gguf");
std::fs::write(&gguf_file, b"fake").unwrap();
let result = Executor::find_safetensors_dir(&gguf_file);
assert!(result.is_none());
}
#[test]
fn test_subprocess_execution_skip_flag() {
let temp_dir = tempfile::tempdir().unwrap();
let model_file = temp_dir.path().join("abc.safetensors");
std::fs::write(&model_file, b"fake").unwrap();
let mock_runner = MockCommandRunner::new().with_inference_response("The answer is 4.");
let config = ExecutionConfig {
model_path: Some(model_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(mock_runner));
let scenario_st = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::SafeTensors,
"test".to_string(),
0,
);
let (_, _, _, _, skipped) = executor.subprocess_execution(&scenario_st);
assert!(!skipped);
let scenario_gguf = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let (_, _, _, _, skipped) = executor.subprocess_execution(&scenario_gguf);
assert!(skipped);
}
#[test]
fn test_executor_corroborated_with_stderr() {
let mock_runner = MockCommandRunner::new()
.with_inference_response_and_stderr("The answer is 4.", "Warning: some benign warning");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: stderr-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let _result = executor.execute(&playbook).expect("Execution failed");
let evidence = executor.evidence().all();
assert!(!evidence.is_empty());
let ev = evidence
.iter()
.find(|e| e.stderr.is_some())
.expect("should have evidence with stderr");
assert!(ev.stderr.as_ref().unwrap().contains("Warning"));
}
#[test]
fn test_executor_falsified_with_stderr() {
let mock_runner = MockCommandRunner::new()
.with_inference_response_and_stderr("", "Error: model failed")
.with_exit_code(1);
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
failure_policy: FailurePolicy::CollectAll,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: falsified-stderr
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
let evidence = executor.evidence().all();
let ev = evidence
.iter()
.find(|e| e.stderr.is_some())
.expect("should have evidence with stderr");
assert!(ev.stderr.is_some());
}
#[test]
fn test_execute_profile_flamegraph_no_apr() {
let executor = ToolExecutor::new("test-model.gguf".to_string(), true, 5000);
let temp_dir = tempfile::tempdir().unwrap();
let result = executor.execute_profile_flamegraph(temp_dir.path());
assert!(!result.passed);
assert_eq!(result.tool, "profile-flamegraph");
assert_eq!(result.gate_id, "F-PROFILE-002");
}
#[test]
fn test_execute_profile_flamegraph_with_mock_success() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
true,
5000,
Arc::new(mock_runner),
);
let temp_dir = tempfile::tempdir().unwrap();
let result = executor.execute_profile_flamegraph(temp_dir.path());
assert_eq!(result.tool, "profile-flamegraph");
assert_eq!(result.gate_id, "F-PROFILE-002");
assert!(!result.passed); }
#[test]
fn test_execute_profile_flamegraph_with_svg_file() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
false,
5000,
Arc::new(mock_runner),
);
let temp_dir = tempfile::tempdir().unwrap();
let svg_path = temp_dir.path().join("profile_flamegraph.svg");
std::fs::write(&svg_path, "<svg><rect/></svg>").unwrap();
let result = executor.execute_profile_flamegraph(temp_dir.path());
assert!(result.passed);
assert!(result.stdout.contains("valid: true"));
}
#[test]
fn test_execute_profile_flamegraph_with_invalid_svg() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
true,
5000,
Arc::new(mock_runner),
);
let temp_dir = tempfile::tempdir().unwrap();
let svg_path = temp_dir.path().join("profile_flamegraph.svg");
std::fs::write(&svg_path, "not a valid svg at all").unwrap();
let result = executor.execute_profile_flamegraph(temp_dir.path());
assert!(!result.passed);
assert!(result.stdout.contains("valid: false"));
}
#[test]
fn test_execute_profile_flamegraph_unsupported() {
let mock_runner = MockCommandRunner::new().with_profile_flamegraph_failure();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
true,
5000,
Arc::new(mock_runner),
);
let temp_dir = tempfile::tempdir().unwrap();
let result = executor.execute_profile_flamegraph(temp_dir.path());
assert!(!result.passed);
}
#[test]
fn test_execute_profile_focus_no_apr() {
let executor = ToolExecutor::new("test-model.gguf".to_string(), true, 5000);
let result = executor.execute_profile_focus("attention");
assert!(!result.passed);
assert_eq!(result.tool, "profile-focus");
assert_eq!(result.gate_id, "F-PROFILE-003");
}
#[test]
fn test_execute_profile_focus_with_mock_success() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
false,
5000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_focus("attention");
assert!(result.passed);
assert_eq!(result.tool, "profile-focus");
assert_eq!(result.gate_id, "F-PROFILE-003");
}
#[test]
fn test_execute_profile_focus_unsupported() {
let mock_runner = MockCommandRunner::new().with_profile_focus_failure();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
true,
5000,
Arc::new(mock_runner),
);
let result = executor.execute_profile_focus("attention");
assert!(!result.passed);
}
#[test]
fn test_execute_backend_equivalence_no_apr() {
let executor = ToolExecutor::new("test-model.gguf".to_string(), false, 5000);
let result = executor.execute_backend_equivalence();
assert!(!result.passed);
assert_eq!(result.tool, "backend-equivalence");
assert_eq!(result.gate_id, "F-CONV-BE-001");
}
#[test]
fn test_execute_serve_lifecycle_no_apr() {
let executor = ToolExecutor::new("test-model.gguf".to_string(), true, 5000);
let result = executor.execute_serve_lifecycle();
assert!(!result.passed);
assert_eq!(result.tool, "serve-lifecycle");
assert_eq!(result.gate_id, "F-INTEG-003");
}
#[test]
fn test_execute_all_with_serve() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
true,
5000,
Arc::new(mock_runner),
);
let results = executor.execute_all();
assert!(!results.is_empty());
assert!(!results.iter().any(|r| r.tool == "serve-lifecycle"));
}
#[test]
#[allow(clippy::too_many_lines)]
fn test_executor_conversion_infrastructure_failure() {
use crate::command::CommandOutput;
struct FailingConversionRunner;
impl CommandRunner for FailingConversionRunner {
fn run_inference(
&self,
_model_path: &Path,
_prompt: &str,
_max_tokens: u32,
_no_gpu: bool,
_extra_args: &[&str],
) -> CommandOutput {
CommandOutput {
stdout: "The answer is 4.".to_string(),
stderr: String::new(),
exit_code: 0,
success: true,
}
}
fn convert_model(&self, _source: &Path, _target: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn inspect_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn bench_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn check_model(&self, _path: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn profile_model(&self, _path: &Path, _warmup: u32, _measure: u32) -> CommandOutput {
CommandOutput::success("")
}
fn profile_ci(
&self,
_path: &Path,
_min_throughput: Option<f64>,
_max_p99: Option<f64>,
_warmup: u32,
_measure: u32,
) -> CommandOutput {
CommandOutput::success("")
}
fn diff_tensors(&self, _model_a: &Path, _model_b: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn compare_inference(
&self,
_model_a: &Path,
_model_b: &Path,
_prompt: &str,
_max_tokens: u32,
_tolerance: f64,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_flamegraph(
&self,
_model_path: &Path,
_output_path: &Path,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn profile_with_focus(
&self,
_model_path: &Path,
_focus: &str,
_no_gpu: bool,
) -> CommandOutput {
CommandOutput::success("")
}
fn fingerprint_model(&self, _path: &Path, _json: bool) -> CommandOutput {
CommandOutput::success("")
}
fn validate_stats(&self, _a: &Path, _b: &Path) -> CommandOutput {
CommandOutput::success("")
}
fn validate_model_strict(&self, _path: &Path) -> CommandOutput {
CommandOutput::success(r#"{"valid":true,"tensors_checked":100,"issues":[]}"#)
}
fn pull_model(&self, _hf_repo: &str) -> CommandOutput {
CommandOutput::success("Path: /mock/model.safetensors")
}
fn inspect_model_json(&self, _model_path: &Path) -> CommandOutput {
CommandOutput::success(
r#"{"format":"SafeTensors","tensor_count":10,"tensor_names":[]}"#,
)
}
fn run_ollama_inference(
&self,
_model_tag: &str,
_prompt: &str,
_temperature: f64,
) -> CommandOutput {
CommandOutput::success("Output:\nThe answer is 4.\nCompleted in 1.0s")
}
fn pull_ollama_model(&self, _model_tag: &str) -> CommandOutput {
CommandOutput::success("pulling manifest... done")
}
fn create_ollama_model(&self, _: &str, _: &Path) -> CommandOutput {
CommandOutput::success("creating model... done")
}
fn serve_model(&self, _: &Path, _: u16) -> CommandOutput {
CommandOutput::success(r#"{"status":"listening"}"#)
}
fn http_get(&self, _: &str) -> CommandOutput {
CommandOutput::success(r#"{"models":[]}"#)
}
fn profile_memory(&self, _: &Path) -> CommandOutput {
CommandOutput::success(r#"{"peak_rss_mb":1024}"#)
}
fn run_chat(&self, _model_path: &Path, _prompt: &str, _no_gpu: bool, _extra_args: &[&str]) -> CommandOutput {
CommandOutput::success("Chat output")
}
fn http_post(&self, _url: &str, _body: &str) -> CommandOutput {
CommandOutput::success("{}")
}
fn spawn_serve(&self, _model_path: &Path, _port: u16, _no_gpu: bool) -> CommandOutput {
CommandOutput::success("12345")
}
}
let config = ExecutionConfig {
model_path: Some("/nonexistent/model.gguf".to_string()),
run_conversion_tests: true,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(FailingConversionRunner));
let yaml = r#"
name: conv-infra-fail
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.total_scenarios >= 1);
let runner = FailingConversionRunner;
let p = Path::new("/dev/null");
assert!(runner.validate_model(p).success);
assert!(runner.bench_model(p).success);
assert!(runner.check_model(p).success);
assert!(runner.profile_model(p, 1, 1).success);
assert!(runner.profile_ci(p, None, None, 1, 1).success);
assert!(runner.diff_tensors(p, p, false).success);
assert!(runner.compare_inference(p, p, "", 1, 0.0).success);
assert!(runner.profile_with_flamegraph(p, p, false).success);
assert!(runner.profile_with_focus(p, "", false).success);
assert!(runner.fingerprint_model(p, false).success);
assert!(runner.validate_stats(p, p).success);
}
#[test]
fn test_find_safetensors_dir_with_subdir() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
let st_dir = dir.path().join("safetensors");
std::fs::create_dir(&st_dir).expect("create safetensors dir");
std::fs::write(st_dir.join("model.safetensors"), "test").expect("write file");
let result = Executor::find_safetensors_dir(dir.path());
assert!(result.is_some());
assert_eq!(result.unwrap(), st_dir);
}
#[test]
fn test_find_safetensors_dir_direct() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
std::fs::write(dir.path().join("model.safetensors"), "test").expect("write file");
let result = Executor::find_safetensors_dir(dir.path());
assert!(result.is_some());
assert_eq!(result.unwrap(), dir.path());
}
#[test]
fn test_find_safetensors_dir_none() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
let result = Executor::find_safetensors_dir(dir.path());
assert!(result.is_none());
}
#[test]
fn test_has_safetensors_files_true() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
std::fs::write(dir.path().join("model.safetensors"), "test").expect("write file");
assert!(Executor::has_safetensors_files(dir.path()));
}
#[test]
fn test_has_safetensors_files_false() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
std::fs::write(dir.path().join("model.gguf"), "test").expect("write file");
assert!(!Executor::has_safetensors_files(dir.path()));
}
#[test]
fn test_has_safetensors_files_nonexistent_dir() {
let nonexistent = std::path::Path::new("/nonexistent/path/xyz123");
assert!(!Executor::has_safetensors_files(nonexistent));
}
#[test]
fn test_validate_scenario_creation() {
let model_id = ModelId::new("test", "model");
let scenario = Executor::validate_scenario(&model_id);
assert_eq!(scenario.model.org, "test");
assert_eq!(scenario.model.name, "model");
assert_eq!(scenario.format, Format::SafeTensors);
assert!(scenario.prompt.contains("G0 Validate"));
}
#[test]
fn test_pull_scenario_creation() {
let model_id = ModelId::new("test", "model");
let scenario = Executor::pull_scenario(&model_id);
assert_eq!(scenario.model.org, "test");
assert_eq!(scenario.model.name, "model");
assert_eq!(scenario.format, Format::SafeTensors);
assert!(scenario.prompt.contains("G0 Pull"));
}
#[test]
fn test_g0_pull_pass() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed, pulled_path) = executor.run_g0_pull_check("test/model", &model_id);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
assert_eq!(pulled_path.as_deref(), Some("/mock/model.safetensors"));
let evidence = executor.evidence().all();
let pull_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-PULL-001")
.expect("should have G0-PULL evidence");
assert!(pull_ev.outcome.is_pass());
assert!(pull_ev.output.contains("G0 PASS"));
}
#[test]
fn test_g0_pull_fail() {
let mock_runner = MockCommandRunner::new().with_pull_failure();
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed, pulled_path) = executor.run_g0_pull_check("test/model", &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
assert!(pulled_path.is_none());
let evidence = executor.evidence().all();
let pull_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-PULL-001")
.expect("should have G0-PULL evidence");
assert!(!pull_ev.outcome.is_pass());
assert!(pull_ev.reason.contains("G0 FAIL"));
}
#[test]
fn test_g0_pull_fail_stops_execution() {
let mock_runner = MockCommandRunner::new().with_pull_failure();
let config = ExecutionConfig {
model_path: None,
run_conversion_tests: true,
run_golden_rule_test: true,
run_contract_tests: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: pull-fail-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.gateway_failed.is_some());
assert!(
result
.gateway_failed
.as_ref()
.unwrap()
.contains("G0-PULL-001")
);
assert_eq!(result.passed, 0);
assert_eq!(result.failed, 4);
}
#[test]
fn test_g0_pull_sets_model_path() {
let mock_runner =
MockCommandRunner::new().with_pull_model_path("/pulled/model.safetensors");
let config = ExecutionConfig {
model_path: None, run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: pull-set-path-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.gateway_failed.is_none());
assert!(result.passed >= 1);
}
fn make_temp_model_dir() -> tempfile::TempDir {
let dir = tempfile::TempDir::new().expect("create temp dir");
let st_dir = dir.path().join("safetensors");
std::fs::create_dir_all(&st_dir).expect("mkdir safetensors");
std::fs::write(st_dir.join("model.safetensors"), b"fake").expect("write");
dir
}
#[test]
fn test_g0_validate_pass() {
let mock_runner = MockCommandRunner::new(); let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
let evidence = executor.evidence().all();
let validate_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-VALIDATE-001")
.expect("should have G0-VALIDATE evidence");
assert!(validate_ev.outcome.is_pass());
assert!(validate_ev.output.contains("G0 PASS"));
}
#[test]
fn test_g0_validate_fail_corrupt_model() {
let mock_runner = MockCommandRunner::new().with_validate_strict_failure();
let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
let evidence = executor.evidence().all();
let validate_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-VALIDATE-001")
.expect("should have G0-VALIDATE evidence");
assert!(!validate_ev.outcome.is_pass());
assert!(validate_ev.reason.contains("G0 FAIL"));
}
#[test]
fn test_g0_validate_fail_stops_execution() {
let mock_runner = MockCommandRunner::new().with_validate_strict_failure();
let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: true,
run_golden_rule_test: true,
run_contract_tests: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: validate-fail-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.gateway_failed.is_some());
assert!(
result
.gateway_failed
.as_ref()
.unwrap()
.contains("G0-VALIDATE-001")
);
assert_eq!(result.passed, 0);
assert_eq!(result.failed, 4);
}
#[test]
fn test_g0_validate_pass_continues_execution() {
let mock_runner = MockCommandRunner::new(); let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: validate-pass-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.gateway_failed.is_none());
assert!(result.total_scenarios >= 2);
assert!(result.passed >= 1);
}
#[test]
fn test_g0_validate_no_model_path() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: None, run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: no-model-path-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.gateway_failed.is_none());
assert_eq!(result.total_scenarios, 2);
}
#[test]
fn test_g0_validate_no_safetensors_files() {
let dir = tempfile::TempDir::new().expect("create temp dir");
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_g0_validate_multiple_shards() {
let dir = tempfile::TempDir::new().expect("create temp dir");
let st_dir = dir.path().join("safetensors");
std::fs::create_dir_all(&st_dir).expect("mkdir");
std::fs::write(st_dir.join("model-00001-of-00002.safetensors"), b"shard1").expect("write");
std::fs::write(st_dir.join("model-00002-of-00002.safetensors"), b"shard2").expect("write");
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_validate_check(dir.path(), &model_id);
assert_eq!(passed, 2);
assert_eq!(failed, 0);
}
#[test]
fn test_find_safetensors_files_single_file() {
let dir = tempfile::TempDir::new().expect("create temp dir");
let file = dir.path().join("model.safetensors");
std::fs::write(&file, b"test").expect("write");
let files = Executor::find_safetensors_files(&file);
assert_eq!(files.len(), 1);
assert_eq!(files[0], file);
}
#[test]
fn test_find_safetensors_files_non_safetensors() {
let dir = tempfile::TempDir::new().expect("create temp dir");
let file = dir.path().join("model.gguf");
std::fs::write(&file, b"test").expect("write");
let files = Executor::find_safetensors_files(&file);
assert!(files.is_empty());
}
#[test]
fn test_find_safetensors_files_directory() {
let dir = make_temp_model_dir();
let files = Executor::find_safetensors_files(dir.path());
assert_eq!(files.len(), 1);
}
#[test]
fn test_integrity_scenario_creation() {
let model_id = ModelId::new("test", "model");
let scenario = Executor::integrity_scenario(&model_id);
assert_eq!(scenario.model.org, "test");
assert_eq!(scenario.model.name, "model");
assert_eq!(scenario.format, Format::SafeTensors);
assert!(scenario.prompt.contains("G0"));
}
#[test]
fn test_run_g0_integrity_check_no_safetensors() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_run_g0_integrity_check_missing_config() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert!(failed > 0);
let evidence = executor.evidence();
assert!(
evidence
.all()
.iter()
.any(|e| e.gate_id.starts_with("G0-INTEGRITY"))
);
}
#[test]
fn test_run_g0_integrity_check_pass() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
create_test_config_for_executor(dir.path(), 24, 896, 151_936);
create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
let evidence = executor.evidence();
assert!(
evidence
.all()
.iter()
.any(|e| { e.gate_id.starts_with("G0-INTEGRITY") && e.outcome.is_pass() })
);
}
#[test]
fn test_run_g0_integrity_check_layer_mismatch() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
create_test_config_for_executor(dir.path(), 14, 896, 151_936);
create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert!(failed > 0);
let evidence = executor.evidence();
assert!(evidence.all().iter().any(|e| e.gate_id.contains("LAYERS")));
}
fn create_test_config_for_executor(
dir: &std::path::Path,
layers: usize,
hidden: usize,
vocab: usize,
) {
let config = format!(
r#"{{"num_hidden_layers": {layers}, "hidden_size": {hidden}, "vocab_size": {vocab}}}"#
);
std::fs::write(dir.join("config.json"), config).expect("write config");
}
#[allow(clippy::items_after_statements)]
fn create_mock_safetensors_for_test(
dir: &std::path::Path,
layers: usize,
hidden: usize,
vocab: usize,
) {
let mut header_obj = serde_json::Map::new();
let mut embed_info = serde_json::Map::new();
embed_info.insert("shape".to_string(), serde_json::json!([vocab, hidden]));
embed_info.insert(
"dtype".to_string(),
serde_json::Value::String("F32".to_string()),
);
embed_info.insert(
"data_offsets".to_string(),
serde_json::json!([0, vocab * hidden * 4]),
);
header_obj.insert(
"model.embed_tokens.weight".to_string(),
serde_json::Value::Object(embed_info),
);
for i in 0..layers {
let mut layer_info = serde_json::Map::new();
layer_info.insert("shape".to_string(), serde_json::json!([hidden, hidden]));
layer_info.insert(
"dtype".to_string(),
serde_json::Value::String("F32".to_string()),
);
layer_info.insert("data_offsets".to_string(), serde_json::json!([0, 0]));
header_obj.insert(
format!("model.layers.{i}.self_attn.q_proj.weight"),
serde_json::Value::Object(layer_info),
);
}
let header_json = serde_json::to_string(&header_obj).expect("serialize header");
let header_bytes = header_json.as_bytes();
let header_len = header_bytes.len() as u64;
let path = dir.join("model.safetensors");
let mut file = std::fs::File::create(path).expect("create safetensors");
use std::io::Write;
file.write_all(&header_len.to_le_bytes())
.expect("write len");
file.write_all(header_bytes).expect("write header");
file.write_all(&[0u8; 1024]).expect("write data");
}
#[test]
fn test_execute_all_with_serve_true() {
let mock_runner = MockCommandRunner::new();
let executor = ToolExecutor::with_runner(
"test-model.gguf".to_string(),
true,
5000,
Arc::new(mock_runner),
);
let results = executor.execute_all_with_serve(true);
assert!(!results.is_empty());
assert!(results.iter().any(|r| r.tool == "serve-lifecycle"));
}
#[test]
fn test_run_g0_integrity_check_hidden_mismatch() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
create_test_config_for_executor(dir.path(), 24, 1024, 151_936);
create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert!(failed > 0);
let evidence = executor.evidence();
assert!(evidence.all().iter().any(|e| e.gate_id.contains("HIDDEN")));
}
#[test]
fn test_run_g0_integrity_check_vocab_mismatch() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
create_test_config_for_executor(dir.path(), 24, 896, 200_000);
create_mock_safetensors_for_test(dir.path(), 24, 896, 151_936);
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_integrity_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert!(failed > 0);
let evidence = executor.evidence();
assert!(evidence.all().iter().any(|e| e.gate_id.contains("VOCAB")));
}
#[test]
fn test_run_g0_layout_check_no_contract() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
let mut executor = Executor::new();
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_layout_check(dir.path(), &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_run_g0_layout_check_model_not_found() {
use tempfile::TempDir;
let dir = TempDir::new().expect("create temp dir");
let contract_path = dir.path().join("tensor-layout-v1.yaml");
std::fs::write(
&contract_path,
r#"
metadata:
version: "1.0"
created: "2026-01-01"
updated: "2026-01-01"
author: "test"
description: "test"
formats: {}
kernel:
signature: "test"
weight_shape: "[out, in]"
computation: "y = Wx"
byte_calculation: "out * in"
block_sizes: {}
QK_K: 256
tensors: {}
validation_rules: []
"#,
)
.expect("write contract");
let nonexistent_path = dir.path().join("does_not_exist.safetensors");
let contract =
crate::layout_contract::load_contract_from(&contract_path).expect("load contract");
let result = crate::layout_contract::validate_model(&nonexistent_path, &contract)
.expect("validation should return result");
assert!(!result.passed);
assert!(!result.critical_failures.is_empty());
}
#[test]
fn test_layout_scenario_creation() {
let model_id = ModelId::new("test", "model");
let scenario = Executor::layout_scenario(&model_id);
assert_eq!(
scenario.prompt,
"G0 Layout: tensor shape contract validation"
);
assert_eq!(scenario.format, Format::SafeTensors);
assert_eq!(scenario.backend, Backend::Cpu);
assert_eq!(scenario.modality, Modality::Run);
}
#[test]
fn test_format_tensor_failure_with_expected_and_actual() {
let tensor_result = crate::layout_contract::TensorValidationResult {
tensor_name: "lm_head.weight".to_string(),
rule_id: "F-LAYOUT-CONTRACT-002".to_string(),
passed: false,
details: "Shape mismatch".to_string(),
expected: Some("[vocab, hidden]".to_string()),
actual: Some("[hidden, vocab]".to_string()),
};
let formatted = Executor::format_tensor_failure(&tensor_result);
assert!(formatted.contains("F-LAYOUT-CONTRACT-002"));
assert!(formatted.contains("Shape mismatch"));
assert!(formatted.contains("Expected: [vocab, hidden]"));
assert!(formatted.contains("Actual: [hidden, vocab]"));
}
#[test]
fn test_format_tensor_failure_without_expected() {
let tensor_result = crate::layout_contract::TensorValidationResult {
tensor_name: "test.weight".to_string(),
rule_id: "F-LAYOUT-CONTRACT-001".to_string(),
passed: false,
details: "Missing transpose".to_string(),
expected: None,
actual: None,
};
let formatted = Executor::format_tensor_failure(&tensor_result);
assert!(formatted.contains("F-LAYOUT-CONTRACT-001"));
assert!(formatted.contains("Missing transpose"));
assert!(!formatted.contains("Expected:"));
assert!(!formatted.contains("Actual:"));
}
#[test]
fn test_execute_inspect_verified_nonexistent_model() {
let executor =
ToolExecutor::new("/nonexistent/path/to/model.gguf".to_string(), false, 5000);
let result = executor.execute_inspect_verified();
assert!(!result.passed);
assert_eq!(result.gate_id, "F-INSPECT-META-001");
assert!(result.exit_code != 0);
}
#[test]
fn test_execute_scenario_stop_on_p0_gate() {
let mock_runner = MockCommandRunner::new()
.with_inference_failure()
.with_exit_code(1);
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
failure_policy: FailurePolicy::StopOnP0,
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: p0-stop
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 3
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.failed >= 1);
}
#[test]
fn test_execute_scenario_corroborated_with_stderr_via_playbook() {
let mock_runner = MockCommandRunner::new()
.with_inference_response_and_stderr("correct", "warning: low memory");
let config = ExecutionConfig {
model_path: Some("/test/model.gguf".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
failure_policy: FailurePolicy::CollectAll,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: corroborated-stderr
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.passed >= 1);
let evidence = executor.evidence().all();
assert!(
evidence
.iter()
.any(|e| e.outcome.is_pass() && e.stderr.is_some()),
"should have corroborated evidence with stderr"
);
}
#[test]
fn test_run_conversion_tests_single_file_model() {
let dir = tempfile::tempdir().expect("create temp dir");
let model_path = dir.path().join("model.gguf");
std::fs::write(&model_path, b"fake model").expect("write model");
let config = ExecutionConfig {
model_path: Some(model_path.to_string_lossy().to_string()),
run_conversion_tests: true,
..Default::default()
};
let mut executor = Executor::with_config(config);
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_conversion_tests(&model_path, &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_run_golden_rule_single_file_model() {
let dir = tempfile::tempdir().expect("create temp dir");
let model_path = dir.path().join("model.gguf");
std::fs::write(&model_path, b"fake model").expect("write model");
let config = ExecutionConfig {
model_path: Some(model_path.to_string_lossy().to_string()),
run_golden_rule_test: true,
..Default::default()
};
let mut executor = Executor::with_config(config);
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_golden_rule_test(&model_path, &model_id);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_integrity_check_refuses_on_mismatch() {
use crate::playbook::{PlaybookLockEntry, PlaybookLockFile, save_lock_file};
use std::collections::HashMap;
let dir = tempfile::tempdir().expect("create temp dir");
let lock_path = dir.path().join("playbook.lock.yaml");
let mut entries = HashMap::new();
entries.insert(
"integrity-test".to_string(),
PlaybookLockEntry {
sha256: "0000000000000000000000000000000000000000000000000000000000000000"
.to_string(),
locked_fields: vec!["name".to_string()],
},
);
let lock_file = PlaybookLockFile { entries };
save_lock_file(&lock_file, &lock_path).expect("save lock");
let config = ExecutionConfig {
check_integrity: true,
lock_file_path: Some(lock_path.to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_config(config);
let yaml = r#"
name: integrity-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
assert!(result.gateway_failed.is_some() || result.failed > 0);
}
#[test]
fn test_integrity_check_disabled_by_default() {
let config = ExecutionConfig {
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
assert!(!config.check_integrity);
assert!(config.lock_file_path.is_none());
let mock_runner = MockCommandRunner::new();
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: no-integrity
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
assert!(result.gateway_failed.is_none());
}
#[test]
fn test_integrity_check_missing_lock_file_warns() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
check_integrity: true,
lock_file_path: Some("/nonexistent/playbook.lock.yaml".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: missing-lock
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
assert!(result.gateway_failed.is_none());
}
#[test]
fn test_warn_implicit_skips_flag() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
warn_implicit_skips: true,
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: skip-warn-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
assert!(result.gateway_failed.is_none());
}
#[test]
fn test_backward_compat_new_flags_off() {
let config = ExecutionConfig::default();
assert!(!config.check_integrity);
assert!(!config.warn_implicit_skips);
assert!(config.lock_file_path.is_none());
}
#[test]
fn test_hf_parity_disabled_by_default() {
let config = ExecutionConfig::default();
assert!(!config.run_hf_parity);
assert!(config.hf_parity_corpus_path.is_none());
assert!(config.hf_parity_model_family.is_none());
}
#[test]
fn test_hf_parity_skipped_when_missing_config() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
run_hf_parity: true,
hf_parity_corpus_path: None, hf_parity_model_family: None, run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: hf-parity-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
assert!(result.gateway_failed.is_none());
let has_skip_evidence = result
.evidence
.all()
.iter()
.any(|e| e.gate_id == "F-HF-PARITY-SKIP");
assert!(has_skip_evidence, "Expected F-HF-PARITY-SKIP evidence");
}
#[test]
fn test_hf_parity_skipped_when_manifest_missing() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
run_hf_parity: true,
hf_parity_corpus_path: Some("/nonexistent/corpus".to_string()),
hf_parity_model_family: Some("nonexistent-model/v1".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: hf-parity-missing-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [gguf]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
assert!(
result.failed >= 1,
"Expected at least 1 failed test for missing manifest"
);
let has_parity_evidence = result
.evidence
.all()
.iter()
.any(|e| e.gate_id == "F-HF-PARITY-001");
assert!(
has_parity_evidence,
"Expected F-HF-PARITY-001 evidence for missing manifest"
);
}
#[test]
fn test_workspace_creates_directory_structure() {
let dir = tempfile::tempdir().expect("create temp dir");
let output_dir = dir.path().join("output");
let model_file = dir.path().join("abc123.safetensors");
std::fs::write(&model_file, b"fake-safetensors-content").expect("write model");
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
output_dir: Some(output_dir.to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let formats = vec![Format::SafeTensors, Format::Apr];
let (workspace, passed, _failed) =
executor.prepare_model_workspace(&model_file, &model_id, &formats);
let ws_path = Path::new(&workspace);
assert!(ws_path.exists(), "Workspace directory should exist");
let st_dir = ws_path.join("safetensors");
assert!(st_dir.exists(), "safetensors subdir should exist");
let st_link = st_dir.join("model.safetensors");
assert!(st_link.exists(), "model.safetensors symlink should exist");
let apr_dir = ws_path.join("apr");
assert!(apr_dir.exists(), "apr subdir should exist");
assert!(passed >= 1, "At least one format conversion should pass");
}
#[test]
fn test_workspace_symlinks_config_files() {
let dir = tempfile::tempdir().expect("create temp dir");
let output_dir = dir.path().join("output");
let model_file = dir.path().join("abc123.safetensors");
std::fs::write(&model_file, b"fake-model").expect("write model");
std::fs::write(
dir.path().join("abc123.config.json"),
r#"{"num_hidden_layers": 24}"#,
)
.expect("write config");
std::fs::write(
dir.path().join("abc123.tokenizer.json"),
r#"{"version": "1.0"}"#,
)
.expect("write tokenizer");
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
output_dir: Some(output_dir.to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let formats = vec![Format::SafeTensors];
let (workspace, _passed, _failed) =
executor.prepare_model_workspace(&model_file, &model_id, &formats);
let ws_path = Path::new(&workspace);
let st_dir = ws_path.join("safetensors");
assert!(
st_dir.join("config.json").exists(),
"config.json should be symlinked"
);
assert!(
st_dir.join("tokenizer.json").exists(),
"tokenizer.json should be symlinked"
);
}
#[test]
fn test_workspace_conversion_failure_nonfatal() {
let dir = tempfile::tempdir().expect("create temp dir");
let output_dir = dir.path().join("output");
let model_file = dir.path().join("test.safetensors");
std::fs::write(&model_file, b"fake-model").expect("write model");
let mock_runner = MockCommandRunner::new().with_convert_failure();
let config = ExecutionConfig {
output_dir: Some(output_dir.to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let formats = vec![Format::SafeTensors, Format::Apr, Format::Gguf];
let (workspace, passed, failed) =
executor.prepare_model_workspace(&model_file, &model_id, &formats);
assert!(
Path::new(&workspace).exists(),
"Workspace should exist even with conversion failures"
);
assert!(
Path::new(&workspace).join("safetensors").exists(),
"safetensors dir should exist"
);
assert_eq!(passed, 0, "No conversions should pass");
assert_eq!(failed, 2, "Both APR and GGUF conversions should fail");
let evidence = executor.evidence().all();
let apr_evidence = evidence.iter().any(|e| e.gate_id == "G0-FORMAT-APR-001");
let gguf_evidence = evidence.iter().any(|e| e.gate_id == "G0-FORMAT-GGUF-001");
assert!(apr_evidence, "Should have G0-FORMAT-APR-001 evidence");
assert!(gguf_evidence, "Should have G0-FORMAT-GGUF-001 evidence");
}
#[test]
fn test_workspace_skipped_for_directory() {
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/some/directory/path".to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: workspace-skip-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [safetensors, apr]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
let has_format_evidence = result
.evidence
.all()
.iter()
.any(|e| e.gate_id.starts_with("G0-FORMAT"));
assert!(
!has_format_evidence,
"No G0-FORMAT evidence expected for directory model path"
);
}
#[test]
fn test_workspace_evidence_emitted() {
let dir = tempfile::tempdir().expect("create temp dir");
let output_dir = dir.path().join("output");
let model_file = dir.path().join("test.safetensors");
std::fs::write(&model_file, b"fake-model").expect("write model");
let mock_runner = MockCommandRunner::new();
let config = ExecutionConfig {
output_dir: Some(output_dir.to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let formats = vec![Format::SafeTensors, Format::Apr, Format::Gguf];
let (_workspace, passed, failed) =
executor.prepare_model_workspace(&model_file, &model_id, &formats);
assert_eq!(passed + failed, 2, "Should have evidence for APR and GGUF");
let evidence = executor.evidence().all();
let format_evidence_count = evidence
.iter()
.filter(|e| e.gate_id.starts_with("G0-FORMAT"))
.count();
assert_eq!(
format_evidence_count, 2,
"Should have 2 G0-FORMAT evidence entries"
);
}
#[test]
fn test_find_sibling_model_files() {
let dir = tempfile::tempdir().expect("create temp dir");
let model_file = dir.path().join("abc123.safetensors");
std::fs::write(&model_file, b"model").expect("write");
std::fs::write(dir.path().join("abc123.config.json"), b"config").expect("write");
std::fs::write(dir.path().join("abc123.tokenizer.json"), b"tokenizer").expect("write");
std::fs::write(dir.path().join("def456.safetensors"), b"other").expect("write");
std::fs::write(dir.path().join("def456.config.json"), b"other-config").expect("write");
let siblings = Executor::find_sibling_model_files(&model_file);
assert_eq!(siblings.len(), 2, "Should find exactly 2 sibling files");
let canonical_names: Vec<&str> = siblings.iter().map(|(_, n)| n.as_str()).collect();
assert!(
canonical_names.contains(&"config.json"),
"Should find config.json"
);
assert!(
canonical_names.contains(&"tokenizer.json"),
"Should find tokenizer.json"
);
}
#[test]
fn test_find_sibling_model_files_no_siblings() {
let dir = tempfile::tempdir().expect("create temp dir");
let model_file = dir.path().join("lonely.safetensors");
std::fs::write(&model_file, b"model").expect("write");
let siblings = Executor::find_sibling_model_files(&model_file);
assert!(siblings.is_empty(), "Should find no siblings");
}
#[test]
fn test_find_sibling_model_files_non_safetensors() {
let dir = tempfile::tempdir().expect("create temp dir");
let model_file = dir.path().join("model.gguf");
std::fs::write(&model_file, b"model").expect("write");
let siblings = Executor::find_sibling_model_files(&model_file);
assert!(
siblings.is_empty(),
"Should return empty for non-safetensors files"
);
}
#[test]
fn test_workspace_execute_integration_with_single_file() {
let dir = tempfile::tempdir().expect("create temp dir");
let output_dir = dir.path().join("output");
let model_file = dir.path().join("test.safetensors");
std::fs::write(&model_file, b"fake-model").expect("write model");
let mock_runner =
MockCommandRunner::new().with_pull_model_path(model_file.to_string_lossy().to_string());
let config = ExecutionConfig {
model_path: Some(model_file.to_string_lossy().to_string()),
output_dir: Some(output_dir.to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: workspace-integration
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [safetensors, apr]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
let final_model_path = executor.config().model_path.as_deref().unwrap_or("");
assert!(
final_model_path.contains("workspace"),
"model_path should point to workspace: {final_model_path}"
);
assert!(
!final_model_path.ends_with(".safetensors"),
"model_path should not be a file: {final_model_path}"
);
let has_format_evidence = result
.evidence
.all()
.iter()
.any(|e| e.gate_id.starts_with("G0-FORMAT"));
assert!(
has_format_evidence,
"Should have G0-FORMAT evidence for APR conversion"
);
}
#[test]
fn test_g0_tensor_no_family_configured() {
let mock_runner = MockCommandRunner::new();
let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let yaml = r#"
name: no-family-test
version: "1.0.0"
model:
hf_repo: "test/model"
formats: [safetensors]
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
"#;
let playbook = Playbook::from_yaml(yaml).expect("parse");
let result = executor.execute(&playbook).expect("execute");
let has_tensor_evidence = result
.evidence
.all()
.iter()
.any(|e| e.gate_id == "G0-TENSOR-001");
assert!(
!has_tensor_evidence,
"Should NOT have G0-TENSOR evidence when family not configured"
);
}
#[test]
fn test_g0_tensor_family_contract_not_found() {
let mock_runner = MockCommandRunner::new();
let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_tensor_template_check(
dir.path(),
&model_id,
"nonexistent-family",
"1b",
Some("/nonexistent/path"),
);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
let evidence = executor.evidence().all();
let tensor_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-TENSOR-001")
.expect("should have G0-TENSOR evidence");
assert!(tensor_ev.output.contains("G0 SKIP"));
assert!(tensor_ev.output.contains("Family contract not found"));
}
#[test]
fn test_g0_tensor_no_safetensors_files() {
let mock_runner = MockCommandRunner::new();
let dir = tempfile::TempDir::new().expect("create temp dir");
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_tensor_template_check(
dir.path(),
&model_id,
"qwen2",
"0.5b",
Some("/nonexistent/path"), );
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_g0_tensor_inspect_returns_empty_names() {
let mock_runner = MockCommandRunner::new().with_tensor_names(vec![]); let dir = make_temp_model_dir();
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_tensor_template_check(
dir.path(),
&model_id,
"qwen2",
"0.5b",
Some("/nonexistent/path"),
);
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_g0_tensor_inspect_failure() {
let mock_runner = MockCommandRunner::new().with_inspect_json_failure();
let dir = make_temp_model_dir();
let contracts_dir = tempfile::TempDir::new().expect("create contracts dir");
let family_yaml = r#"
family: testfamily
size_variants:
1b:
parameters: "1B"
hidden_dim: 1024
num_layers: 12
num_heads: 8
tensor_template:
embedding: "embed.weight"
"#;
std::fs::write(contracts_dir.path().join("testfamily.yaml"), family_yaml)
.expect("write family yaml");
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_tensor_template_check(
dir.path(),
&model_id,
"testfamily",
"1b",
Some(contracts_dir.path().to_str().expect("path")),
);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
let evidence = executor.evidence().all();
let tensor_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-TENSOR-001")
.expect("should have G0-TENSOR evidence");
assert!(tensor_ev.reason.contains("G0 FAIL"));
assert!(tensor_ev.reason.contains("Could not inspect"));
}
#[test]
fn test_g0_tensor_all_tensors_present() {
let mock_runner = MockCommandRunner::new().with_tensor_names(vec![
"embed.weight".to_string(),
"model.layers.0.self_attn.q_proj.weight".to_string(),
]);
let dir = make_temp_model_dir();
let contracts_dir = tempfile::TempDir::new().expect("create contracts dir");
let family_yaml = r#"
family: testfamily
size_variants:
1b:
parameters: "1B"
hidden_dim: 1024
num_layers: 1
num_heads: 8
tensor_template:
embedding: "embed.weight"
"#;
std::fs::write(contracts_dir.path().join("testfamily.yaml"), family_yaml)
.expect("write family yaml");
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_tensor_template_check(
dir.path(),
&model_id,
"testfamily",
"1b",
Some(contracts_dir.path().to_str().expect("path")),
);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
let evidence = executor.evidence().all();
let tensor_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-TENSOR-001")
.expect("should have G0-TENSOR evidence");
assert!(tensor_ev.output.contains("G0 PASS"));
}
#[test]
fn test_g0_tensor_missing_tensors() {
let mock_runner = MockCommandRunner::new().with_tensor_names(vec![
"some.other.tensor".to_string(), ]);
let dir = make_temp_model_dir();
let contracts_dir = tempfile::TempDir::new().expect("create contracts dir");
let family_yaml = r#"
family: testfamily
size_variants:
1b:
parameters: "1B"
hidden_dim: 1024
num_layers: 1
num_heads: 8
tensor_template:
embedding: "embed.weight"
"#;
std::fs::write(contracts_dir.path().join("testfamily.yaml"), family_yaml)
.expect("write family yaml");
let config = ExecutionConfig {
model_path: Some(dir.path().to_string_lossy().to_string()),
run_conversion_tests: false,
run_golden_rule_test: false,
run_contract_tests: false,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(mock_runner));
let model_id = ModelId::new("test", "model");
let (passed, failed) = executor.run_g0_tensor_template_check(
dir.path(),
&model_id,
"testfamily",
"1b",
Some(contracts_dir.path().to_str().expect("path")),
);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
let evidence = executor.evidence().all();
let tensor_ev = evidence
.iter()
.find(|e| e.gate_id == "G0-TENSOR-001")
.expect("should have G0-TENSOR evidence");
assert!(tensor_ev.reason.contains("G0 FAIL"));
assert!(tensor_ev.reason.contains("Missing"));
assert!(tensor_ev.reason.contains("embed.weight"));
}
#[test]
fn test_parse_timing_ms_standard() {
let output = "Output:\nHello\nCompleted in 1.5s\ntok/s: 25.0";
assert!((parse_timing_ms(output).unwrap() - 1500.0).abs() < 0.1);
}
#[test]
fn test_parse_timing_ms_no_timing() {
let output = "Just some output without timing";
assert!(parse_timing_ms(output).is_none());
}
#[test]
fn test_parse_timing_ms_zero() {
let output = "Completed in 0.0s";
assert!((parse_timing_ms(output).unwrap()).abs() < 0.1);
}
#[test]
fn test_parse_throughput_json() {
let output = r#"{"throughput_tps":25.0,"latency_p50_ms":78.2}"#;
assert!((parse_throughput(output).unwrap() - 25.0).abs() < 0.1);
}
#[test]
fn test_parse_throughput_no_match() {
let output = "no json here";
assert!(parse_throughput(output).is_none());
}
#[test]
fn test_parse_throughput_integer() {
let output = r#"{"throughput_tps":100,"other":0}"#;
assert!((parse_throughput(output).unwrap() - 100.0).abs() < 0.1);
}
#[test]
fn test_ollama_parity_ttft_comparison() {
let runner = MockCommandRunner::new().with_inference_response("Hello world");
let runner = Arc::new(runner);
let config = ExecutionConfig {
run_ollama_parity: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-ollama-ttft
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
ollama_parity:
enabled: true
model_tag: "test:latest"
prompts: ["What is 2+2?"]
temperature: 0.0
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let (passed, failed) =
executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
assert!(
passed + failed >= 2,
"Expected at least 2 evidence items, got passed={passed} failed={failed}"
);
}
#[test]
fn test_ollama_gguf_loadability_success() {
let runner = Arc::new(MockCommandRunner::new());
let config = ExecutionConfig {
run_ollama_parity: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-ollama-gguf
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
ollama_parity:
enabled: true
prompts: ["test"]
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let (passed, _failed) =
executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
assert!(passed >= 3, "Expected at least 3 passes, got {passed}");
let evidence = executor.evidence().all();
assert!(evidence.iter().any(|e| e.gate_id == "F-OLLAMA-005"));
}
#[test]
fn test_ollama_gguf_loadability_failure() {
let runner = Arc::new(MockCommandRunner::new().with_ollama_create_failure());
let config = ExecutionConfig {
run_ollama_parity: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-ollama-gguf-fail
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
ollama_parity:
enabled: true
prompts: ["test"]
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let (_passed, failed) =
executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
assert!(
failed >= 1,
"Expected at least 1 failure for create failure"
);
let evidence = executor.evidence().all();
let gguf_ev = evidence
.iter()
.find(|e| e.gate_id == "F-OLLAMA-005")
.unwrap();
assert!(!gguf_ev.outcome.is_pass());
}
#[test]
fn test_ollama_api_parity_success() {
let runner = Arc::new(MockCommandRunner::new());
let config = ExecutionConfig {
run_ollama_parity: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-ollama-api
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
ollama_parity:
enabled: true
prompts: ["test"]
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let (passed, _failed) =
executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
assert!(passed >= 1);
let evidence = executor.evidence().all();
assert!(evidence.iter().any(|e| e.gate_id == "F-OLLAMA-004"));
}
#[test]
fn test_ollama_api_parity_failure() {
let runner = Arc::new(MockCommandRunner::new().with_http_get_failure());
let config = ExecutionConfig {
run_ollama_parity: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-ollama-api-fail
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
ollama_parity:
enabled: true
prompts: ["test"]
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let (_passed, failed) =
executor.run_ollama_parity_tests(Path::new("/mock/model"), &playbook);
assert!(failed >= 1);
let evidence = executor.evidence().all();
let api_ev = evidence
.iter()
.find(|e| e.gate_id == "F-OLLAMA-004")
.unwrap();
assert!(!api_ev.outcome.is_pass());
}
#[test]
fn test_perf_003_gpu_cpu_ratio() {
let runner = Arc::new(MockCommandRunner::new().with_tps(50.0));
let config = ExecutionConfig {
run_profile_ci: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-perf-003
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu, gpu]
scenario_count: 1
profile_ci:
enabled: true
warmup: 1
measure: 2
formats: [safetensors]
backends: [cpu, gpu]
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let model_id = playbook.model_id();
let (passed, _failed) =
executor.run_perf_gates(Path::new("/mock/model"), &model_id, &playbook);
assert!(passed >= 2, "Expected at least 2 passes, got {passed}");
let evidence = executor.evidence().all();
assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-003"));
assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-005"));
}
#[test]
fn test_perf_005_memory_profiling_failure() {
let runner = Arc::new(MockCommandRunner::new().with_profile_memory_failure());
let config = ExecutionConfig {
run_profile_ci: true,
model_path: Some("/mock/model".to_string()),
..Default::default()
};
let mut executor = Executor::with_runner(config, runner);
let yaml = r#"
name: test-perf-005-fail
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
profile_ci:
enabled: true
warmup: 1
measure: 2
backends: [cpu]
"#;
let playbook: Playbook = serde_yaml::from_str(yaml).unwrap();
let model_id = playbook.model_id();
let (_passed, failed) =
executor.run_perf_gates(Path::new("/mock/model"), &model_id, &playbook);
assert!(failed >= 1);
let evidence = executor.evidence().all();
let mem_ev = evidence.iter().find(|e| e.gate_id == "F-PERF-005").unwrap();
assert!(!mem_ev.outcome.is_pass());
}
#[test]
fn test_execute_with_ollama_parity_enabled() {
let runner =
MockCommandRunner::new().with_inference_response("Output:\nHello\nCompleted in 0.5s");
let config = ExecutionConfig {
run_ollama_parity: true,
model_path: Some("/mock/model".to_string()),
no_gpu: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(runner));
let yaml = r#"
name: test-ollama-integration
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
ollama_parity:
enabled: true
prompts: ["What is 2+2?"]
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.total_scenarios >= 1);
let evidence = executor.evidence().all();
assert!(evidence.iter().any(|e| e.gate_id == "F-OLLAMA-001"));
}
#[test]
fn test_execute_with_profile_ci_perf_gates() {
let runner = MockCommandRunner::new()
.with_tps(50.0)
.with_inference_response("Output:\nHello\nCompleted in 0.5s");
let config = ExecutionConfig {
run_profile_ci: true,
model_path: Some("/mock/model".to_string()),
no_gpu: true,
..Default::default()
};
let mut executor = Executor::with_runner(config, Arc::new(runner));
let yaml = r#"
name: test-perf-integration
version: "1.0.0"
model:
hf_repo: "test/model"
test_matrix:
modalities: [run]
backends: [cpu]
scenario_count: 1
profile_ci:
enabled: true
warmup: 1
measure: 2
formats: [safetensors]
backends: [cpu, gpu]
"#;
let playbook = Playbook::from_yaml(yaml).expect("Failed to parse");
let result = executor.execute(&playbook).expect("Execution failed");
assert!(result.total_scenarios >= 1);
let evidence = executor.evidence().all();
assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-003"));
assert!(evidence.iter().any(|e| e.gate_id == "F-PERF-005"));
}
#[test]
fn test_resolve_model_path_file_sibling_gguf() {
let temp_dir = tempfile::tempdir().unwrap();
let st_file = temp_dir.path().join("model.safetensors");
let gguf_file = temp_dir.path().join("model.gguf");
std::fs::write(&st_file, b"fake safetensors").unwrap();
std::fs::write(&gguf_file, b"fake gguf").unwrap();
let config = ExecutionConfig {
model_path: Some(st_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some(), "Should find sibling .gguf file");
assert!(path.unwrap().contains("model.gguf"));
}
#[test]
fn test_resolve_model_path_file_sibling_apr() {
let temp_dir = tempfile::tempdir().unwrap();
let gguf_file = temp_dir.path().join("model.gguf");
let apr_file = temp_dir.path().join("model.apr");
std::fs::write(&gguf_file, b"fake gguf").unwrap();
std::fs::write(&apr_file, b"fake apr").unwrap();
let config = ExecutionConfig {
model_path: Some(gguf_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Apr,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(path.is_some(), "Should find sibling .apr file");
assert!(path.unwrap().contains("model.apr"));
}
#[test]
fn test_resolve_model_path_file_sibling_not_found() {
let temp_dir = tempfile::tempdir().unwrap();
let st_file = temp_dir.path().join("model.safetensors");
std::fs::write(&st_file, b"fake safetensors").unwrap();
let config = ExecutionConfig {
model_path: Some(st_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
assert!(
executor.resolve_model_path(&scenario).is_none(),
"No sibling .gguf exists, should return None"
);
}
#[test]
fn test_resolve_model_path_file_sibling_fallback_different_stem() {
let temp_dir = tempfile::tempdir().unwrap();
let st_file = temp_dir.path().join("abc123.safetensors");
let gguf_file = temp_dir.path().join("other-name.gguf");
std::fs::write(&st_file, b"fake safetensors").unwrap();
std::fs::write(&gguf_file, b"fake gguf").unwrap();
let config = ExecutionConfig {
model_path: Some(st_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(
path.is_none(),
"Should NOT match unrelated model family"
);
}
#[test]
fn test_resolve_model_path_file_sibling_prefix_match() {
let temp_dir = tempfile::tempdir().unwrap();
let gguf_file = temp_dir.path().join("qwen2.5-coder-7b-instruct-q4k.gguf");
let apr_file = temp_dir.path().join("qwen2.5-coder-7b-instruct.apr");
std::fs::write(&gguf_file, b"fake gguf").unwrap();
std::fs::write(&apr_file, b"fake apr").unwrap();
let config = ExecutionConfig {
model_path: Some(gguf_file.to_string_lossy().to_string()),
..Default::default()
};
let executor = Executor::with_config(config);
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Apr,
"test".to_string(),
0,
);
let path = executor.resolve_model_path(&scenario);
assert!(
path.is_some(),
"Should find APR via model family prefix match"
);
assert!(path.unwrap().contains("qwen2.5-coder-7b-instruct.apr"));
}
#[test]
fn test_subprocess_execution_chat_modality() {
let runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/mock/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Chat,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let (text, stderr, exit_code, _tps, skipped) = executor.subprocess_execution(&scenario);
assert!(!skipped, "Chat scenario should not be skipped");
assert_eq!(exit_code, 0);
assert!(stderr.is_none() || stderr.as_deref() == Some(""));
assert!(text.contains("4"), "Chat should return arithmetic answer");
}
#[test]
fn test_subprocess_execution_serve_modality() {
let runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/mock/model.gguf".to_string()),
..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Serve,
Backend::Cpu,
Format::Gguf,
"What is 2+2?".to_string(),
0,
);
let (_text, _stderr, _exit_code, _tps, skipped) =
executor.subprocess_execution(&scenario);
assert!(!skipped, "Serve scenario should not be skipped");
}
#[test]
fn test_subprocess_execution_gpu_backend() {
let runner = MockCommandRunner::new();
let config = ExecutionConfig {
model_path: Some("/mock/model.gguf".to_string()),
no_gpu: true, ..Default::default()
};
let executor = Executor::with_runner(config, Arc::new(runner));
let scenario = QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Gpu,
Format::Gguf,
"test".to_string(),
0,
);
let (_text, _stderr, exit_code, _tps, skipped) =
executor.subprocess_execution(&scenario);
assert!(!skipped);
assert_eq!(exit_code, 0);
}
}