llama-rs 0.15.0

A high-performance Rust implementation of llama.cpp - LLM inference engine with full GGUF support
Documentation
//! Inspect GGUF file metadata and tensor layout.

use std::process;

use clap::Parser;
use llama_rs::gguf::{GgufFile, MetadataValue};

#[derive(Parser)]
#[command(name = "llama-rs-inspect", about = "Inspect GGUF file metadata and tensor layout")]
struct Cli {
    /// Path to the GGUF file
    path: String,

    /// Show tensor names, types, and shapes
    #[arg(long)]
    tensors: bool,

    /// Only show metadata keys containing this substring
    #[arg(long)]
    filter: Option<String>,

    /// Maximum number of array elements to expand inline (0 = always collapse)
    #[arg(long, default_value_t = 64)]
    array_limit: usize,

    /// Dump raw F32 values of a specific tensor (by name)
    #[arg(long)]
    dump_tensor: Option<String>,
}

fn format_value(val: &MetadataValue, array_limit: usize) -> String {
    match val {
        MetadataValue::Uint8(v) => format!("{v} (u8)"),
        MetadataValue::Int8(v) => format!("{v} (i8)"),
        MetadataValue::Uint16(v) => format!("{v} (u16)"),
        MetadataValue::Int16(v) => format!("{v} (i16)"),
        MetadataValue::Uint32(v) => format!("{v} (u32)"),
        MetadataValue::Int32(v) => format!("{v} (i32)"),
        MetadataValue::Float32(v) => format!("{v} (f32)"),
        MetadataValue::Float64(v) => format!("{v} (f64)"),
        MetadataValue::Uint64(v) => format!("{v} (u64)"),
        MetadataValue::Int64(v) => format!("{v} (i64)"),
        MetadataValue::Bool(v) => format!("{v} (bool)"),
        MetadataValue::String(s) => {
            let truncated = if s.len() > 120 { &s[..120] } else { s.as_str() };
            format!("\"{truncated}\" (string)")
        }
        MetadataValue::Array(a) => {
            if a.values.len() <= array_limit {
                let items: Vec<String> = a
                    .values
                    .iter()
                    .map(|v| format_value(v, array_limit))
                    .collect();
                format!("[{}]", items.join(", "))
            } else {
                format!("[{} elements]", a.values.len())
            }
        }
    }
}

fn main() {
    let cli = Cli::parse();

    let gguf = match GgufFile::open(&cli.path) {
        Ok(g) => g,
        Err(e) => {
            eprintln!("Failed to open {}: {e}", cli.path);
            process::exit(1);
        }
    };

    let header = &gguf.data.header;
    println!(
        "GGUF v{}, {} tensors, {} metadata entries",
        header.version, header.tensor_count, header.metadata_kv_count
    );
    println!();

    let mut keys: Vec<&String> = gguf.data.metadata.keys().collect();
    keys.sort();

    println!("--- Metadata ---");
    for key in &keys {
        if let Some(ref prefix) = cli.filter {
            if !key.contains(prefix.as_str()) {
                continue;
            }
        }
        let val = &gguf.data.metadata[key.as_str()];
        println!("  {key} = {}", format_value(val, cli.array_limit));
    }

    if let Some(ref name) = cli.dump_tensor {
        if let Some(info) = gguf.data.get_tensor(name) {
            let dims: Vec<String> = info.dims.iter().map(|d| d.to_string()).collect();
            println!();
            println!(
                "--- Tensor: {} {:?} [{}] ---",
                info.name,
                info.dtype,
                dims.join(", ")
            );
            if let Some(data) = gguf.tensor_data(name) {
                if info.dtype == llama_rs::gguf::GgmlType::F32 {
                    let floats: &[f32] = bytemuck::cast_slice(data);
                    let n = floats.len().min(512);
                    for (i, &v) in floats[..n].iter().enumerate() {
                        println!("  [{i:>4}] {v:.8}");
                    }
                    if floats.len() > n {
                        println!("  ... ({} more values)", floats.len() - n);
                    }
                } else {
                    println!("  (dtype {:?}, {} bytes — F32 dump not available)", info.dtype, data.len());
                }
            }
        } else {
            eprintln!("Tensor '{}' not found", name);
        }
    }

    if cli.tensors {
        println!();
        println!("--- Tensors ({}) ---", gguf.data.tensors.len());
        for (i, t) in gguf.data.tensors.iter().enumerate() {
            let dims: Vec<String> = t.dims.iter().map(|d| d.to_string()).collect();
            println!(
                "  [{i:>4}] {:<60} {:?} [{}]",
                t.name,
                t.dtype,
                dims.join(", ")
            );
        }
    }
}