fn run_metadata_plausibility_gate(path: &Path, config: &QaConfig) -> Result<GateResult> {
let start = Instant::now();
if !config.json && config.verbose {
println!(
"{}",
"Running metadata plausibility validation (Bug 210)...".yellow()
);
}
let data = std::fs::read(path)
.map_err(|e| CliError::ValidationFailed(format!("Failed to read model: {e}")))?;
if data.len() < 4 {
let duration = start.elapsed();
return Ok(GateResult::failed(
"metadata_plausibility",
"File too small for metadata extraction",
None,
None,
duration,
));
}
let (architecture, rope_theta, max_pos, rms_norm_eps) = extract_model_metadata(&data, path)?;
let mut violations: Vec<String> = Vec::new();
let mut checks_passed = 0usize;
check_rope_theta(
architecture.as_deref(),
rope_theta,
&data,
&mut violations,
&mut checks_passed,
);
check_max_position_embeddings(max_pos, &mut violations, &mut checks_passed);
check_rms_norm_eps(rms_norm_eps, &mut violations, &mut checks_passed);
check_arch_theta_cross_validation(
architecture.as_ref(),
rope_theta,
&mut violations,
&mut checks_passed,
);
let duration = start.elapsed();
if violations.is_empty() {
Ok(GateResult::passed(
"metadata_plausibility",
&format!(
"{checks_passed} metadata checks passed (arch={}, rope_theta={}, max_pos={})",
architecture.as_deref().unwrap_or("unknown"),
rope_theta.map_or("none".to_string(), |t| format!("{t}")),
max_pos.map_or("none".to_string(), |p| format!("{p}")),
),
Some(checks_passed as f64),
Some(0.0),
duration,
))
} else {
Ok(GateResult::failed(
"metadata_plausibility",
&format!(
"{} metadata violation(s): {}",
violations.len(),
violations.join("; ")
),
Some(violations.len() as f64),
Some(0.0),
duration,
))
}
}
fn rope_theta_range(arch: Option<&str>) -> (f64, f64, &'static str) {
match arch {
Some("qwen2" | "qwen2.5" | "qwen") => (100_000.0, f64::MAX, "expected ~1000000.0 (100x too low, will produce garbage)"),
Some("llama" | "llama2" | "llama3") => (1000.0, 10_000_000.0, "expected 10000-500000"),
_ => (100.0, 100_000_000.0, "outside plausible range [100, 100M]"),
}
}
fn check_rope_theta(
arch: Option<&str>,
rope_theta: Option<f32>,
data: &[u8],
violations: &mut Vec<String>,
checks_passed: &mut usize,
) {
let Some(theta) = rope_theta else {
if data.len() >= 4 && &data[0..4] == b"GGUF" {
*checks_passed += 1;
} else {
violations.push("rope_theta missing from APR metadata".to_string());
}
return;
};
let theta_f64 = f64::from(theta);
let (min, max, msg) = rope_theta_range(arch);
if theta_f64 >= min && theta_f64 <= max {
*checks_passed += 1;
} else {
violations.push(format!("rope_theta={theta} for {} — {msg}", arch.unwrap_or("unknown")));
}
}
fn check_max_position_embeddings(
max_pos: Option<usize>,
violations: &mut Vec<String>,
checks_passed: &mut usize,
) {
if let Some(val) = max_pos {
if (128..=1_048_576).contains(&val) {
*checks_passed += 1;
} else {
violations.push(format!(
"max_position_embeddings={val} outside plausible range [128, 1M]"
));
}
} else {
*checks_passed += 1;
}
}
fn check_rms_norm_eps(
rms_norm_eps: Option<f32>,
violations: &mut Vec<String>,
checks_passed: &mut usize,
) {
if let Some(eps) = rms_norm_eps {
let eps_f64 = f64::from(eps);
if eps_f64 <= 0.0 || eps_f64 > 0.01 {
violations.push(format!(
"rms_norm_eps={eps} outside plausible range (0, 0.01]"
));
} else {
*checks_passed += 1;
}
} else {
*checks_passed += 1;
}
}
fn check_arch_theta_cross_validation(
architecture: Option<&String>,
rope_theta: Option<f32>,
violations: &mut Vec<String>,
checks_passed: &mut usize,
) {
if let (Some(arch), Some(theta)) = (architecture, rope_theta) {
let theta_f64 = f64::from(theta);
let suspicious = matches!(arch.as_str(), "qwen2" | "qwen2.5" | "qwen")
&& (theta_f64 - 10000.0).abs() < 1.0;
if suspicious {
violations.push(format!(
"CRITICAL: {arch} with rope_theta=10000.0 — \
likely missing config.json (Bug 210)"
));
} else {
*checks_passed += 1;
}
} else {
*checks_passed += 1;
}
}
type ModelMetadata = (Option<String>, Option<f32>, Option<usize>, Option<f32>);
fn extract_model_metadata(data: &[u8], path: &Path) -> Result<ModelMetadata> {
let magic = &data[0..4];
if magic == b"GGUF" {
let reader = aprender::format::gguf::reader::GgufReader::from_bytes(data.to_vec())
.map_err(|e| CliError::ValidationFailed(format!("GGUF parse failed: {e}")))?;
let arch = reader.architecture();
let rope_theta = reader.rope_theta();
let max_pos = reader.context_length();
let rms_norm_eps = reader.rms_norm_eps();
Ok((arch, rope_theta, max_pos, rms_norm_eps))
} else if &magic[0..3] == b"APR" || magic == b"APRN" {
use aprender::format::v2::AprV2Reader;
let reader = AprV2Reader::from_bytes(data)
.map_err(|e| CliError::ValidationFailed(format!("APR parse failed: {e}")))?;
let meta = reader.metadata();
let _ = path;
Ok((
meta.architecture.clone(),
meta.rope_theta,
meta.max_position_embeddings,
meta.rms_norm_eps,
))
} else {
let config_path = {
#[cfg(feature = "inference")]
{
realizar::safetensors::find_sibling_file(path, "config.json")
}
#[cfg(not(feature = "inference"))]
{
path.parent()
.map(|dir| dir.join("config.json"))
.filter(|p| p.exists())
}
};
if let Some(config_path) = config_path {
let config_str = std::fs::read_to_string(&config_path)
.map_err(|e| CliError::ValidationFailed(format!("config.json read failed: {e}")))?;
let arch = extract_json_string(&config_str, "model_type");
let rope_theta = extract_json_f32(&config_str, "rope_theta");
let max_pos = extract_json_usize(&config_str, "max_position_embeddings");
let rms_norm_eps = extract_json_f32(&config_str, "rms_norm_eps");
Ok((arch, rope_theta, max_pos, rms_norm_eps))
} else {
Ok((None, None, None, None))
}
}
}
fn extract_json_string(json: &str, key: &str) -> Option<String> {
let pattern = format!("\"{key}\"");
let idx = json.find(&pattern)?;
let after_key = &json[idx + pattern.len()..];
let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
let trimmed = after_colon.trim_start();
if trimmed.starts_with('"') {
let start = 1;
let end = trimmed[start..].find('"')?;
Some(trimmed[start..start + end].to_string())
} else {
None
}
}
fn extract_json_f32(json: &str, key: &str) -> Option<f32> {
let pattern = format!("\"{key}\"");
let idx = json.find(&pattern)?;
let after_key = &json[idx + pattern.len()..];
let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
let trimmed = after_colon.trim_start();
let end = trimmed.find([',', '}', '\n'])?;
trimmed[..end].trim().parse::<f32>().ok()
}
fn extract_json_usize(json: &str, key: &str) -> Option<usize> {
let pattern = format!("\"{key}\"");
let idx = json.find(&pattern)?;
let after_key = &json[idx + pattern.len()..];
let after_colon = after_key.find(':').map(|i| &after_key[i + 1..])?;
let trimmed = after_colon.trim_start();
let end = trimmed.find([',', '}', '\n'])?;
trimmed[..end].trim().parse::<usize>().ok()
}
#[derive(Debug, Clone)]
pub enum OutputVerification {
Pass,
Fail {
reason: String,
},
}
pub fn verify_output(
output: &str,
test_id: &str,
expected_patterns: &[&str],
) -> OutputVerification {
if output.trim().is_empty() {
return OutputVerification::Fail {
reason: format!("{test_id}: Empty output"),
};
}
let garbage_patterns = ["\u{FFFD}", "[UNK]", "akunji", "olumbia"];
for pattern in &garbage_patterns {
if output.contains(pattern) {
return OutputVerification::Fail {
reason: format!("{test_id}: Garbage detected: '{pattern}'"),
};
}
}
let null_count = output.bytes().filter(|&b| b == 0).count();
if null_count > 0 {
return OutputVerification::Fail {
reason: format!("{test_id}: {null_count} null bytes detected (BPE artifact)"),
};
}
if !expected_patterns.is_empty() {
let found = expected_patterns
.iter()
.any(|p| output.to_lowercase().contains(&p.to_lowercase()));
if !found {
return OutputVerification::Fail {
reason: format!(
"{test_id}: Expected one of {:?}, got: '{}'",
expected_patterns,
output.chars().take(100).collect::<String>()
),
};
}
}
OutputVerification::Pass
}
pub fn strip_thinking_blocks(output: &str) -> String {
let mut result = output.to_string();
while let (Some(start), Some(end)) = (result.find("<think>"), result.find("</think>")) {
if end > start {
result = format!("{}{}", &result[..start], &result[end + "</think>".len()..]);
} else {
break;
}
}
if let Some(start) = result.find("<think>") {
result.truncate(start);
}
result.trim().to_string()
}
#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(all(feature = "inference", feature = "cuda"))]
fn validate_gpu_golden_output(
mapped: &realizar::gguf::MappedGGUFModel,
prompt_tokens: &[u32],
gen_config: &realizar::gguf::QuantizedGenerateConfig,
gguf: &realizar::gguf::GGUFModel,
expected_patterns: &[&str],
config: &QaConfig,
) -> Result<Option<String>> {
use realizar::gguf::{OwnedQuantizedModel, OwnedQuantizedModelCuda};
let model = OwnedQuantizedModel::from_mapped(mapped)
.map_err(|e| CliError::ValidationFailed(format!("Model failed: {e}")))?;
match OwnedQuantizedModelCuda::new(model, 0) {
Ok(mut cuda_model) => match cuda_model.generate_gpu_resident(prompt_tokens, gen_config) {
Ok(gpu_tokens) => {
let gpu_text = gguf.decode(&gpu_tokens);
let gpu_answer = strip_thinking_blocks(&gpu_text); if let OutputVerification::Fail { reason } =
verify_output(&gpu_answer, "golden_output_gpu", expected_patterns)
{
return Ok(Some(format!("GPU output failed (CPU passed): {reason}")));
}
}
Err(e) => {
if !config.json && config.verbose {
println!("{}", format!("GPU golden output skipped: {e}").yellow());
}
}
},
Err(e) => {
if !config.json && config.verbose {
println!("{}", format!("CUDA init skipped: {e}").yellow());
}
}
}
Ok(None)
}
#[cfg(feature = "inference")]
fn golden_output_apr(path: &Path, prompt: &str, max_tokens: usize) -> Result<(Vec<u32>, String)> {
use realizar::apr::AprV2Model;
use realizar::apr_transformer::{AprTransformer, GenerateConfig};
let apr_model = AprV2Model::load(path)
.map_err(|e| CliError::ValidationFailed(format!("Failed to load APR: {e}")))?;
let tokenizer = apr_model
.load_embedded_bpe_tokenizer()
.ok_or_else(|| CliError::ValidationFailed("APR missing embedded tokenizer".to_string()))?;
let transformer = AprTransformer::from_apr_file(path)
.map_err(|e| CliError::ValidationFailed(format!("Failed to load APR transformer: {e}")))?;
let prompt_tokens = tokenizer.encode(prompt);
let gen_config = GenerateConfig {
max_tokens,
temperature: 0.0,
top_k: 1,
..Default::default()
};
let tokens = transformer
.generate_with_cache(&prompt_tokens, &gen_config)
.map_err(|e| CliError::ValidationFailed(format!("Generation failed: {e}")))?;
let text = tokenizer.decode(&tokens);
Ok((tokens, text))
}