use std::path::PathBuf;
use std::process;
use clap::{Parser, Subcommand};
use llama_rs::diagnostics::{
compare, find_profile_for_architecture, generate_profile, list_builtin_profile_names,
load_builtin_profile, load_profile_file, render_json, render_text, GgufSource, ModelSource,
ReportContext, SafetensorsSource,
};
use llama_rs::gguf::{GgufFile, MetadataValue};
#[derive(Parser)]
#[command(name = "llama-rs-inspect")]
#[command(about = "Inspect and diagnose GGUF / SafeTensors model files")]
struct Cli {
path: Option<String>,
#[arg(long)]
tensors: bool,
#[arg(long)]
filter: Option<String>,
#[arg(long, default_value_t = 64)]
array_limit: usize,
#[arg(long)]
dump_tensor: Option<String>,
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand)]
enum Command {
Metadata {
path: String,
#[arg(long)]
tensors: bool,
#[arg(long)]
filter: Option<String>,
#[arg(long, default_value_t = 64)]
array_limit: usize,
},
Dump {
path: String,
#[arg(long)]
tensor: String,
},
ListProfiles,
DiffProfile {
path: String,
#[arg(long)]
against: Option<String>,
#[arg(long)]
json: bool,
},
GenerateProfile {
path: String,
#[arg(long)]
out: Option<PathBuf>,
#[arg(long)]
name: Option<String>,
},
}
fn main() {
let cli = Cli::parse();
let exit_code = match cli.command {
Some(Command::Metadata {
path,
tensors,
filter,
array_limit,
}) => run_metadata(&path, tensors, filter.as_deref(), array_limit),
Some(Command::Dump { path, tensor }) => run_dump(&path, &tensor),
Some(Command::ListProfiles) => run_list_profiles(),
Some(Command::DiffProfile {
path,
against,
json,
}) => run_diff_profile(&path, against.as_deref(), json),
Some(Command::GenerateProfile { path, out, name }) => {
run_generate_profile(&path, out.as_deref(), name.as_deref())
}
None => match cli.path.as_deref() {
Some(p) => {
if let Some(tname) = cli.dump_tensor.as_deref() {
run_dump(p, tname)
} else {
run_metadata(p, cli.tensors, cli.filter.as_deref(), cli.array_limit)
}
}
None => {
eprintln!(
"Usage: llama-rs-inspect <path> [--tensors] [--filter STR] [--dump-tensor NAME]\n or: llama-rs-inspect <subcommand> --help"
);
1
}
},
};
process::exit(exit_code);
}
fn format_value(val: &MetadataValue, array_limit: usize) -> String {
match val {
MetadataValue::Uint8(v) => format!("{v} (u8)"),
MetadataValue::Int8(v) => format!("{v} (i8)"),
MetadataValue::Uint16(v) => format!("{v} (u16)"),
MetadataValue::Int16(v) => format!("{v} (i16)"),
MetadataValue::Uint32(v) => format!("{v} (u32)"),
MetadataValue::Int32(v) => format!("{v} (i32)"),
MetadataValue::Float32(v) => format!("{v} (f32)"),
MetadataValue::Float64(v) => format!("{v} (f64)"),
MetadataValue::Uint64(v) => format!("{v} (u64)"),
MetadataValue::Int64(v) => format!("{v} (i64)"),
MetadataValue::Bool(v) => format!("{v} (bool)"),
MetadataValue::String(s) => {
let truncated = if s.len() > 120 { &s[..120] } else { s.as_str() };
format!("\"{truncated}\" (string)")
}
MetadataValue::Array(a) => {
if a.values.len() <= array_limit {
let items: Vec<String> = a
.values
.iter()
.map(|v| format_value(v, array_limit))
.collect();
format!("[{}]", items.join(", "))
} else {
format!("[{} elements]", a.values.len())
}
}
}
}
fn run_metadata(path: &str, tensors: bool, filter: Option<&str>, array_limit: usize) -> i32 {
let gguf = match GgufFile::open(path) {
Ok(g) => g,
Err(e) => {
eprintln!("Failed to open {path}: {e}");
return 1;
}
};
let header = &gguf.data.header;
println!(
"GGUF v{}, {} tensors, {} metadata entries",
header.version, header.tensor_count, header.metadata_kv_count
);
println!();
let mut keys: Vec<&String> = gguf.data.metadata.keys().collect();
keys.sort();
println!("--- Metadata ---");
for key in &keys {
if let Some(prefix) = filter
&& !key.contains(prefix)
{
continue;
}
let val = &gguf.data.metadata[key.as_str()];
println!(" {key} = {}", format_value(val, array_limit));
}
if tensors {
println!();
println!("--- Tensors ({}) ---", gguf.data.tensors.len());
for (i, t) in gguf.data.tensors.iter().enumerate() {
let dims: Vec<String> = t.dims.iter().map(|d| d.to_string()).collect();
println!(
" [{i:>4}] {:<60} {:?} [{}]",
t.name,
t.dtype,
dims.join(", ")
);
}
}
0
}
fn run_dump(path: &str, tensor: &str) -> i32 {
let gguf = match GgufFile::open(path) {
Ok(g) => g,
Err(e) => {
eprintln!("Failed to open {path}: {e}");
return 1;
}
};
let Some(info) = gguf.data.get_tensor(tensor) else {
eprintln!("Tensor '{tensor}' not found");
return 1;
};
let dims: Vec<String> = info.dims.iter().map(|d| d.to_string()).collect();
println!(
"--- Tensor: {} {:?} [{}] ---",
info.name,
info.dtype,
dims.join(", ")
);
if let Some(data) = gguf.tensor_data(tensor) {
if info.dtype == llama_rs::gguf::GgmlType::F32 {
let floats: &[f32] = bytemuck::cast_slice(data);
let n = floats.len().min(512);
for (i, &v) in floats[..n].iter().enumerate() {
println!(" [{i:>4}] {v:.8}");
}
if floats.len() > n {
println!(" ... ({} more values)", floats.len() - n);
}
} else {
println!(
" (dtype {:?}, {} bytes — F32 dump not available)",
info.dtype,
data.len()
);
}
}
0
}
fn run_list_profiles() -> i32 {
let names = list_builtin_profile_names();
if names.is_empty() {
println!("(no built-in profiles)");
return 0;
}
for name in names {
println!("{name}");
}
0
}
fn run_diff_profile(path: &str, against: Option<&str>, json: bool) -> i32 {
let source = match open_source(path) {
Ok(src) => src,
Err(code) => return code,
};
let (arch_source, format_kind) = detect_source_labels(path);
let ctx = ReportContext {
file_path: path,
arch_source,
format_kind,
};
let profile = match resolve_profile_for_diff(against, source.as_ref()) {
Ok(p) => p,
Err(code) => return code,
};
let report = compare(source.as_ref(), profile.as_ref(), &ctx);
if json {
let value = render_json(&report);
match serde_json::to_string_pretty(&value) {
Ok(s) => println!("{s}"),
Err(e) => {
eprintln!("JSON serialization failed: {e}");
return 1;
}
}
} else {
print!("{}", render_text(&report));
}
report.verdict.exit_code()
}
fn resolve_profile_for_diff(
against: Option<&str>,
source: &dyn ModelSource,
) -> Result<Option<llama_rs::diagnostics::Profile>, i32> {
match against {
Some(token) if is_profile_reference(token) => resolve_profile(token).map(Some),
Some(_) => {
eprintln!(
"pairwise model-vs-model diff is not implemented in v1; use --against <profile-name> or <file.toml>"
);
Err(1)
}
None => {
let Some(arch) = source.declared_architecture() else {
return Ok(None);
};
match find_profile_for_architecture(&arch) {
Ok(profile) => Ok(profile),
Err(e) => {
eprintln!("Failed to look up profile for architecture `{arch}`: {e}");
Err(1)
}
}
}
}
}
fn run_generate_profile(
path: &str,
out: Option<&std::path::Path>,
name: Option<&str>,
) -> i32 {
let source = match open_source(path) {
Ok(s) => s,
Err(code) => return code,
};
let profile_name = name
.map(|s| s.to_string())
.or_else(|| source.declared_architecture())
.unwrap_or_else(|| "generated".to_string());
let toml = generate_profile(source.as_ref(), &profile_name);
match out {
Some(p) => {
if let Err(e) = std::fs::write(p, &toml) {
eprintln!("Failed to write {}: {e}", p.display());
return 1;
}
eprintln!("Wrote {}", p.display());
}
None => print!("{toml}"),
}
0
}
fn open_source(path: &str) -> Result<Box<dyn ModelSource>, i32> {
let pb = std::path::Path::new(path);
let is_dir = pb.is_dir();
let is_st = pb.extension().map(|e| e == "safetensors").unwrap_or(false);
if is_dir || is_st {
match SafetensorsSource::open(path) {
Ok(s) => Ok(Box::new(s)),
Err(e) => {
eprintln!("Failed to open {path}: {e}");
Err(1)
}
}
} else {
match GgufSource::open(path) {
Ok(s) => Ok(Box::new(s)),
Err(e) => {
eprintln!("Failed to open {path}: {e}");
Err(1)
}
}
}
}
fn detect_source_labels(path: &str) -> (&'static str, &'static str) {
let pb = std::path::Path::new(path);
let is_dir = pb.is_dir();
let is_st = pb.extension().map(|e| e == "safetensors").unwrap_or(false);
if is_dir || is_st {
("model_type", "safetensors")
} else {
("general.architecture", "gguf")
}
}
fn is_profile_reference(token: &str) -> bool {
if token.ends_with(".toml") {
return true;
}
list_builtin_profile_names().contains(&token)
}
fn resolve_profile(token: &str) -> Result<llama_rs::diagnostics::Profile, i32> {
if token.ends_with(".toml") {
load_profile_file(std::path::Path::new(token)).map_err(|e| {
eprintln!("Failed to load profile {token}: {e}");
1
})
} else {
load_builtin_profile(token).map_err(|e| {
eprintln!("Failed to load built-in profile {token}: {e}");
1
})
}
}