use crate::error::CliError;
use crate::output;
use aprender::format::HEADER_SIZE;
use colored::Colorize;
use serde::Serialize;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::Path;
#[derive(Serialize, Clone)]
pub(crate) struct LayerTrace {
pub name: String,
pub index: Option<usize>,
pub input_stats: Option<TensorStats>,
pub output_stats: Option<TensorStats>,
pub weight_stats: Option<TensorStats>,
pub anomalies: Vec<String>,
}
#[derive(Serialize, Clone)]
#[allow(dead_code)]
pub(crate) struct TensorStats {
pub count: usize,
pub mean: f32,
pub std: f32,
pub l2_norm: f32,
pub min: f32,
pub max: f32,
pub max_abs: f32,
pub nan_count: usize,
pub inf_count: usize,
}
impl TensorStats {
#[allow(dead_code, clippy::cast_lossless)]
pub(crate) fn from_slice(data: &[f32]) -> Self {
let count = data.len();
if count == 0 {
return Self {
count: 0,
mean: 0.0,
std: 0.0,
l2_norm: 0.0,
min: 0.0,
max: 0.0,
max_abs: 0.0,
nan_count: 0,
inf_count: 0,
};
}
let mut sum = 0.0_f64;
let mut sum_sq = 0.0_f64;
let mut min = f32::INFINITY;
let mut max = f32::NEG_INFINITY;
let mut max_abs = 0.0_f32;
let mut nan_count = 0;
let mut inf_count = 0;
for &v in data {
if v.is_nan() {
nan_count += 1;
continue;
}
if v.is_infinite() {
inf_count += 1;
continue;
}
sum += v as f64;
sum_sq += (v as f64) * (v as f64);
min = min.min(v);
max = max.max(v);
max_abs = max_abs.max(v.abs());
}
let valid_count = count - nan_count - inf_count;
let mean = if valid_count > 0 {
(sum / valid_count as f64) as f32
} else {
0.0
};
let variance = if valid_count > 1 {
((sum_sq / valid_count as f64) - (mean as f64).powi(2)).max(0.0)
} else {
0.0
};
let std = (variance as f32).sqrt();
let l2_norm = (sum_sq as f32).sqrt();
Self {
count,
mean,
std,
l2_norm,
min: if min.is_finite() { min } else { 0.0 },
max: if max.is_finite() { max } else { 0.0 },
max_abs,
nan_count,
inf_count,
}
}
#[allow(dead_code)]
pub(crate) fn detect_anomalies(&self, name: &str) -> Vec<String> {
let mut anomalies = Vec::new();
if self.nan_count > 0 {
anomalies.push(format!(
"{name}: {}/{} NaN values",
self.nan_count, self.count
));
}
if self.inf_count > 0 {
anomalies.push(format!(
"{name}: {}/{} Inf values",
self.inf_count, self.count
));
}
if self.std < 1e-8 && self.count > 1 {
anomalies.push(format!("{name}: near-zero variance (std={:.2e})", self.std));
}
if self.max_abs > 100.0 {
anomalies.push(format!(
"{name}: large values (max_abs={:.2})",
self.max_abs
));
}
if self.mean.abs() > 10.0 {
anomalies.push(format!("{name}: large mean bias ({:.4})", self.mean));
}
anomalies
}
}
#[derive(Serialize)]
struct TraceResult {
file: String,
format: String,
layers: Vec<LayerTrace>,
summary: TraceSummary,
}
#[derive(Serialize)]
struct TraceSummary {
total_layers: usize,
total_parameters: usize,
anomaly_count: usize,
anomalies: Vec<String>,
}
fn handle_special_modes(
path: &Path,
reference: Option<&Path>,
payload: bool,
diff: bool,
interactive: bool,
) -> Option<Result<(), CliError>> {
handle_special_modes_with_json(path, reference, payload, diff, interactive, false)
}
fn handle_special_modes_with_json(
path: &Path,
reference: Option<&Path>,
payload: bool,
diff: bool,
interactive: bool,
json: bool,
) -> Option<Result<(), CliError>> {
if interactive {
println!("Starting interactive trace (TUI) for {}", path.display());
println!("(TUI mode not yet fully implemented)");
return Some(Ok(()));
}
if payload {
if json {
return Some(run_traced_inference_json(path));
}
return Some(run_traced_inference(path));
}
if diff {
if let Some(ref_path) = reference {
println!(
"Diffing trace between {} and {}",
path.display(),
ref_path.display()
);
} else {
println!("Diff mode requires --reference");
}
}
None
}
fn resolve_model_path(path: &Path) -> Result<std::path::PathBuf, CliError> {
use super::run::{download_hf_model, ModelSource};
let path_str = path.to_string_lossy();
if !path_str.starts_with("hf://") {
println!("Model: {}", path.display());
println!();
return Ok(path.to_path_buf());
}
let source = ModelSource::parse(&path_str)?;
match source {
ModelSource::HuggingFace { org, repo, file } => {
println!(
"Model: hf://{}/{}{}",
org,
repo,
file.as_ref().map(|f| format!("/{}", f)).unwrap_or_default()
);
println!();
eprintln!("{}", "Downloading from HuggingFace...".yellow());
download_hf_model(&org, &repo, file.as_deref())
}
_ => Ok(path.to_path_buf()),
}
}
fn preflight_contract_check(local_path: &Path) {
use aprender::format::rosetta::RosettaStone;
let rosetta = RosettaStone::new();
match rosetta.validate(local_path) {
Ok(report) => {
let contract_failures: Vec<String> = report
.tensors
.iter()
.flat_map(|t| t.failures.iter().map(move |f| format!("{}: {}", t.name, f)))
.collect();
if contract_failures.is_empty() {
println!(
"{}",
format!(
"Contract: {} tensors pass PMAT-235 gates",
report.tensor_count
)
.green()
);
} else {
println!(
"{}",
format!(
"Contract: {} violations in {} tensors",
contract_failures.len(),
report.failed_tensor_count
)
.red()
.bold()
);
for failure in contract_failures.iter().take(5) {
println!(" {}", failure.red());
}
if contract_failures.len() > 5 {
println!(" ... and {} more", contract_failures.len() - 5);
}
println!();
println!(
"{}",
"WARNING: Contract violations may cause garbage output."
.yellow()
.bold()
);
}
}
Err(e) => {
println!("{}", format!("Contract: validation skipped ({e})").yellow());
}
}
println!();
}
fn dispatch_by_format(local_path: &Path) -> Result<(), CliError> {
let ext = local_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
match ext.to_lowercase().as_str() {
"gguf" => run_traced_inference_gguf(local_path),
"apr" => run_traced_inference_apr(local_path),
"safetensors" => run_traced_inference_safetensors(local_path),
_ => Err(CliError::InvalidFormat(format!(
"Unknown format: {}. Supported: .gguf, .apr, .safetensors",
ext
))),
}
}
fn run_traced_inference(path: &Path) -> Result<(), CliError> {
output::section("Traced Inference (APR-TRACE-001)");
let local_path = resolve_model_path(path)?;
preflight_contract_check(&local_path);
dispatch_by_format(&local_path)
}
#[cfg(feature = "inference")]
fn run_traced_inference_gguf(path: &Path) -> Result<(), CliError> {
use colored::Colorize;
use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel, QuantizedGenerateConfig};
println!("{}", "Format: GGUF (quantized)".cyan());
println!();
let mapped = MappedGGUFModel::from_path(path)
.map_err(|e| CliError::ModelLoadFailed(format!("Failed to load GGUF: {e}")))?;
let model = OwnedQuantizedModel::from_mapped(&mapped)
.map_err(|e| CliError::ModelLoadFailed(format!("Failed to create quantized model: {e}")))?;
let config = model.config();
println!("Architecture: {}", config.architecture);
println!(" Layers: {}", config.num_layers);
println!(" Hidden dim: {}", config.hidden_dim);
println!(" Vocab size: {}", config.vocab_size);
println!(
" Heads: {} (KV: {})",
config.num_heads, config.num_kv_heads
);
println!();
let test_prompt = "What is 2+2?";
let test_tokens = mapped
.model
.encode(test_prompt)
.unwrap_or_else(|| vec![1u32]);
println!("{}", format!("Test prompt: {:?}", test_prompt).cyan());
println!("{}", format!("Encoded tokens: {:?}", test_tokens).cyan());
println!();
println!("{}", "FORWARD PASS (with layer tracing):".green().bold());
let canonical_arch = realizar::tensor_names::normalize_architecture(&config.architecture);
let raw_arch_lower = config.architecture.to_lowercase();
let is_qwen3_moe = canonical_arch == "qwen3_moe"
|| raw_arch_lower == "qwen3moe"
|| raw_arch_lower == "qwen3_moe";
let trace_result = if is_qwen3_moe {
run_qwen3_moe_traced_forward(&mapped, &model, &test_tokens)
} else {
model.forward_traced(&test_tokens)
};
match trace_result {
Ok(trace) => {
println!();
println!("{}", "EMBEDDING:".cyan().bold());
print_activation_stats_colored(" ", &trace.embed_stats);
print_layer_activations(&trace.layer_activations);
println!();
println!("{}", "FINAL LAYER NORM:".cyan().bold());
print_activation_stats(" ", &trace.final_norm_stats);
print_logit_predictions(&trace.logits);
print_trace_summary(&trace.layer_activations, &trace.logits);
}
Err(e) => {
eprintln!(
"{}",
format!("forward_traced unavailable for this GGUF: {e}").yellow()
);
println!(" Layer-by-layer tracing not available (e.g., encoder-decoder model).");
}
}
println!();
if is_qwen3_moe {
println!(
"{}",
"GENERATION: skipped for qwen3_moe (use `apr run` for text generation)".yellow()
);
println!();
return Ok(());
}
println!("{}", "GENERATION (max 8 tokens):".green().bold());
let gen_config = QuantizedGenerateConfig {
max_tokens: 8,
temperature: 0.0, top_k: 1,
..Default::default()
};
let output_tokens = model
.generate_with_cache(&test_tokens, &gen_config)
.map_err(|e| CliError::InferenceFailed(format!("Generation failed: {e}")))?;
let generated = &output_tokens[test_tokens.len()..];
println!(" Generated token IDs: {:?}", generated);
println!();
println!("{}", "TOKEN-BY-TOKEN DECODE:".green().bold());
for (i, &token_id) in generated.iter().enumerate() {
let decoded = mapped.model.decode(&[token_id]);
let is_garbage = is_likely_garbage(&decoded);
if is_garbage {
println!(
" {}. token_id={} → {:?} {}",
i + 1,
token_id,
decoded,
"⚠ GARBAGE".red().bold()
);
} else {
println!(" {}. token_id={} → {:?}", i + 1, token_id, decoded);
}
}
let full_decoded = mapped.model.decode(generated);
println!();
println!("{}", "FULL OUTPUT:".green().bold());
println!(" {:?}", full_decoded);
println!();
if is_likely_garbage(&full_decoded) {
println!("{}", "⚠ GARBAGE OUTPUT DETECTED!".red().bold());
println!();
println!("Likely causes:");
println!(" 1. LAYOUT-001: Column-major vs row-major kernel mismatch");
println!(" 2. Weight tensor corruption during loading");
println!(" 3. Tokenizer vocabulary mismatch");
println!();
println!("Debug steps:");
println!(" 1. Check if SafeTensors produces correct output (same model)");
println!(" 2. Compare token IDs between GGUF and SafeTensors");
println!(" 3. Verify quantization type is supported");
} else {
println!("{}", "✓ Output appears reasonable".green());
}
Ok(())
}
#[cfg(feature = "inference")]
fn run_traced_inference_json(path: &Path) -> Result<(), CliError> {
use realizar::gguf::{MappedGGUFModel, OwnedQuantizedModel};
use serde_json::json;
let local_path: std::path::PathBuf = if path.to_string_lossy().starts_with("hf://") {
resolve_model_path(path)?
} else {
path.to_path_buf()
};
let mapped = MappedGGUFModel::from_path(&local_path)
.map_err(|e| CliError::ModelLoadFailed(format!("Failed to load GGUF: {e}")))?;
let model = OwnedQuantizedModel::from_mapped(&mapped)
.map_err(|e| CliError::ModelLoadFailed(format!("Failed to create quantized model: {e}")))?;
let config = model.config();
let test_prompt = "What is 2+2?";
let test_tokens = mapped
.model
.encode(test_prompt)
.unwrap_or_else(|| vec![1u32]);
let canonical_arch = realizar::tensor_names::normalize_architecture(&config.architecture);
let raw_arch_lower = config.architecture.to_lowercase();
let is_qwen3_moe = canonical_arch == "qwen3_moe"
|| raw_arch_lower == "qwen3moe"
|| raw_arch_lower == "qwen3_moe";
let trace = if is_qwen3_moe {
run_qwen3_moe_traced_forward(&mapped, &model, &test_tokens)
} else {
model.forward_traced(&test_tokens)
}
.map_err(|e| CliError::InferenceFailed(format!("forward_traced: {e}")))?;
let stats_to_json = |s: &realizar::apr_transformer::ActivationStats| {
json!({
"min": s.min, "max": s.max, "mean": s.mean, "std_dev": s.std_dev,
"nan_count": s.nan_count, "inf_count": s.inf_count,
"zero_count": s.zero_count, "count": s.count,
})
};
let layers_json: Vec<_> = trace
.layer_activations
.iter()
.map(|la| {
json!({
"layer_idx": la.layer_idx,
"attn_norm": stats_to_json(&la.attn_norm_stats),
"qkv": stats_to_json(&la.qkv_stats),
"attn_out": stats_to_json(&la.attn_out_stats),
"ffn_norm": stats_to_json(&la.ffn_norm_stats),
"ffn_out": stats_to_json(&la.ffn_out_stats),
"output": stats_to_json(&la.output_stats),
})
})
.collect();
let logits_l2: f32 = trace.logits.iter().map(|v| v * v).sum::<f32>().sqrt();
let mut indexed: Vec<(usize, f32)> = trace.logits.iter().copied().enumerate().collect();
indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let top_k: Vec<_> = indexed
.iter()
.take(5)
.map(|(i, v)| json!({ "token_id": *i, "logit": *v }))
.collect();
let arch_label = if is_qwen3_moe {
format!("GGUF ({})", config.architecture)
} else {
"GGUF (quantized)".to_string()
};
let out = json!({
"format": arch_label,
"architecture": config.architecture,
"num_layers": config.num_layers,
"hidden_dim": config.hidden_dim,
"vocab_size": config.vocab_size,
"num_heads": config.num_heads,
"num_kv_heads": config.num_kv_heads,
"prompt": test_prompt,
"encoded_tokens": test_tokens,
"embedding": stats_to_json(&trace.embed_stats),
"layers": layers_json,
"final_norm": stats_to_json(&trace.final_norm_stats),
"logits_stats": stats_to_json(&trace.logits_stats),
"logits": {
"vocab_size": trace.logits.len(),
"l2_norm": logits_l2,
"top_k": top_k,
},
});
println!(
"{}",
serde_json::to_string_pretty(&out)
.map_err(|e| { CliError::InferenceFailed(format!("JSON serialization: {e}")) })?
);
Ok(())
}
#[cfg(not(feature = "inference"))]
fn run_traced_inference_json(_path: &Path) -> Result<(), CliError> {
Err(CliError::FeatureDisabled(
"Traced inference requires the 'inference' feature.".to_string(),
))
}
#[cfg(feature = "inference")]
fn run_qwen3_moe_traced_forward(
mapped: &realizar::gguf::MappedGGUFModel,
model: &realizar::gguf::OwnedQuantizedModel,
test_tokens: &[u32],
) -> realizar::error::Result<realizar::apr_transformer::ForwardTrace> {
let num_experts = mapped.model.expert_count().ok_or_else(|| {
realizar::error::RealizarError::InvalidShape {
reason: "qwen3_moe trace: missing 'expert_count' in GGUF metadata".to_string(),
}
})?;
let num_experts_per_tok = mapped.model.expert_used_count().ok_or_else(|| {
realizar::error::RealizarError::InvalidShape {
reason: "qwen3_moe trace: missing 'expert_used_count' in GGUF metadata".to_string(),
}
})?;
let moe_intermediate = mapped.model.expert_feed_forward_length().ok_or_else(|| {
realizar::error::RealizarError::InvalidShape {
reason: "qwen3_moe trace: missing 'expert_feed_forward_length' in GGUF metadata"
.to_string(),
}
})?;
let data = mapped.data();
let num_layers = model.config().num_layers;
let mut moe_layers = Vec::with_capacity(num_layers);
for layer_idx in 0..num_layers {
moe_layers.push(realizar::gguf::qwen3_moe_load::load_qwen3_moe_layer(
&mapped.model,
data,
layer_idx,
)?);
}
model.forward_qwen3_moe_traced(
test_tokens,
&moe_layers,
num_experts,
num_experts_per_tok,
moe_intermediate,
data,
)
}
#[cfg(not(feature = "inference"))]
fn run_traced_inference_gguf(_path: &Path) -> Result<(), CliError> {
Err(CliError::FeatureDisabled(
"Traced inference for GGUF models requires the 'inference' feature. Build with --features inference".to_string(),
))
}
include!("vector_stats.rs");
include!("trace_likely_has_repeated.rs");
include!("layer.rs");
include!("trace_05.rs");