use std::process;
use clap::Parser;
use llama_rs::gguf::{GgufFile, MetadataValue};
#[derive(Parser)]
#[command(name = "llama-rs-inspect", about = "Inspect GGUF file metadata and tensor layout")]
struct Cli {
path: String,
#[arg(long)]
tensors: bool,
#[arg(long)]
filter: Option<String>,
#[arg(long, default_value_t = 64)]
array_limit: usize,
#[arg(long)]
dump_tensor: Option<String>,
}
fn format_value(val: &MetadataValue, array_limit: usize) -> String {
match val {
MetadataValue::Uint8(v) => format!("{v} (u8)"),
MetadataValue::Int8(v) => format!("{v} (i8)"),
MetadataValue::Uint16(v) => format!("{v} (u16)"),
MetadataValue::Int16(v) => format!("{v} (i16)"),
MetadataValue::Uint32(v) => format!("{v} (u32)"),
MetadataValue::Int32(v) => format!("{v} (i32)"),
MetadataValue::Float32(v) => format!("{v} (f32)"),
MetadataValue::Float64(v) => format!("{v} (f64)"),
MetadataValue::Uint64(v) => format!("{v} (u64)"),
MetadataValue::Int64(v) => format!("{v} (i64)"),
MetadataValue::Bool(v) => format!("{v} (bool)"),
MetadataValue::String(s) => {
let truncated = if s.len() > 120 { &s[..120] } else { s.as_str() };
format!("\"{truncated}\" (string)")
}
MetadataValue::Array(a) => {
if a.values.len() <= array_limit {
let items: Vec<String> = a
.values
.iter()
.map(|v| format_value(v, array_limit))
.collect();
format!("[{}]", items.join(", "))
} else {
format!("[{} elements]", a.values.len())
}
}
}
}
fn main() {
let cli = Cli::parse();
let gguf = match GgufFile::open(&cli.path) {
Ok(g) => g,
Err(e) => {
eprintln!("Failed to open {}: {e}", cli.path);
process::exit(1);
}
};
let header = &gguf.data.header;
println!(
"GGUF v{}, {} tensors, {} metadata entries",
header.version, header.tensor_count, header.metadata_kv_count
);
println!();
let mut keys: Vec<&String> = gguf.data.metadata.keys().collect();
keys.sort();
println!("--- Metadata ---");
for key in &keys {
if let Some(ref prefix) = cli.filter {
if !key.contains(prefix.as_str()) {
continue;
}
}
let val = &gguf.data.metadata[key.as_str()];
println!(" {key} = {}", format_value(val, cli.array_limit));
}
if let Some(ref name) = cli.dump_tensor {
if let Some(info) = gguf.data.get_tensor(name) {
let dims: Vec<String> = info.dims.iter().map(|d| d.to_string()).collect();
println!();
println!(
"--- Tensor: {} {:?} [{}] ---",
info.name,
info.dtype,
dims.join(", ")
);
if let Some(data) = gguf.tensor_data(name) {
if info.dtype == llama_rs::gguf::GgmlType::F32 {
let floats: &[f32] = bytemuck::cast_slice(data);
let n = floats.len().min(512);
for (i, &v) in floats[..n].iter().enumerate() {
println!(" [{i:>4}] {v:.8}");
}
if floats.len() > n {
println!(" ... ({} more values)", floats.len() - n);
}
} else {
println!(" (dtype {:?}, {} bytes — F32 dump not available)", info.dtype, data.len());
}
}
} else {
eprintln!("Tensor '{}' not found", name);
}
}
if cli.tensors {
println!();
println!("--- Tensors ({}) ---", gguf.data.tensors.len());
for (i, t) in gguf.data.tensors.iter().enumerate() {
let dims: Vec<String> = t.dims.iter().map(|d| d.to_string()).collect();
println!(
" [{i:>4}] {:<60} {:?} [{}]",
t.name,
t.dtype,
dims.join(", ")
);
}
}
}