llama-rs 0.16.0

//! `llama-rs-inspect` — inspect and diagnose model files.
//!
//! Legacy default (no subcommand): print GGUF metadata + tensor list.
//!
//! Subcommands:
//!   metadata         — explicit form of the legacy default.
//!   dump             — dump raw F32 values of a single tensor.
//!   list-profiles    — list every built-in architecture profile.
//!   diff-profile     — compare a model against a profile.
//!   generate-profile — scaffold a profile TOML from a model.

use std::path::PathBuf;
use std::process;

use clap::{Parser, Subcommand};
use llama_rs::diagnostics::{
    compare, find_profile_for_architecture, generate_profile, list_builtin_profile_names,
    load_builtin_profile, load_profile_file, render_json, render_text, GgufSource, ModelSource,
    ReportContext, SafetensorsSource,
};
use llama_rs::gguf::{GgufFile, MetadataValue};

#[derive(Parser)]
#[command(name = "llama-rs-inspect")]
#[command(about = "Inspect and diagnose GGUF / SafeTensors model files")]
struct Cli {
    /// Legacy mode: path to a GGUF file. When given without a
    /// subcommand, prints metadata and optionally the tensor list.
    path: Option<String>,

    /// Show tensor names, types, and shapes (legacy mode).
    #[arg(long)]
    tensors: bool,

    /// Only show metadata keys containing this substring (legacy mode).
    #[arg(long)]
    filter: Option<String>,

    /// Maximum array elements to expand inline (legacy mode).
    #[arg(long, default_value_t = 64)]
    array_limit: usize,

    /// Dump raw F32 values of a specific tensor (legacy mode).
    #[arg(long)]
    dump_tensor: Option<String>,

    #[command(subcommand)]
    command: Option<Command>,
}

#[derive(Subcommand)]
enum Command {
    /// Print GGUF metadata and (optionally) the tensor list.
    Metadata {
        path: String,
        #[arg(long)]
        tensors: bool,
        #[arg(long)]
        filter: Option<String>,
        #[arg(long, default_value_t = 64)]
        array_limit: usize,
    },

    /// Dump raw F32 values of a single tensor from a GGUF file.
    Dump {
        path: String,
        #[arg(long)]
        tensor: String,
    },

    /// List every built-in architecture profile.
    ListProfiles,

    /// Compare a model against a profile.
    DiffProfile {
        /// Path to a model file (GGUF or SafeTensors).
        path: String,

        /// Profile name (built-in) or path to a TOML profile. When
        /// omitted, the profile is auto-matched from the model's
        /// declared architecture.
        #[arg(long)]
        against: Option<String>,

        /// Emit JSON instead of the human-readable report.
        #[arg(long)]
        json: bool,
    },

    /// Scaffold a profile TOML from a model file.
    GenerateProfile {
        path: String,

        /// Output path (defaults to stdout).
        #[arg(long)]
        out: Option<PathBuf>,

        /// Profile name to use in the generated TOML.
        #[arg(long)]
        name: Option<String>,
    },
}

fn main() {
    let cli = Cli::parse();

    let exit_code = match cli.command {
        Some(Command::Metadata {
            path,
            tensors,
            filter,
            array_limit,
        }) => run_metadata(&path, tensors, filter.as_deref(), array_limit),
        Some(Command::Dump { path, tensor }) => run_dump(&path, &tensor),
        Some(Command::ListProfiles) => run_list_profiles(),
        Some(Command::DiffProfile {
            path,
            against,
            json,
        }) => run_diff_profile(&path, against.as_deref(), json),
        Some(Command::GenerateProfile { path, out, name }) => {
            run_generate_profile(&path, out.as_deref(), name.as_deref())
        }
        None => match cli.path.as_deref() {
            Some(p) => {
                if let Some(tname) = cli.dump_tensor.as_deref() {
                    run_dump(p, tname)
                } else {
                    run_metadata(p, cli.tensors, cli.filter.as_deref(), cli.array_limit)
                }
            }
            None => {
                eprintln!(
                    "Usage: llama-rs-inspect <path> [--tensors] [--filter STR] [--dump-tensor NAME]\n   or: llama-rs-inspect <subcommand> --help"
                );
                1
            }
        },
    };

    process::exit(exit_code);
}

fn format_value(val: &MetadataValue, array_limit: usize) -> String {
    match val {
        MetadataValue::Uint8(v) => format!("{v} (u8)"),
        MetadataValue::Int8(v) => format!("{v} (i8)"),
        MetadataValue::Uint16(v) => format!("{v} (u16)"),
        MetadataValue::Int16(v) => format!("{v} (i16)"),
        MetadataValue::Uint32(v) => format!("{v} (u32)"),
        MetadataValue::Int32(v) => format!("{v} (i32)"),
        MetadataValue::Float32(v) => format!("{v} (f32)"),
        MetadataValue::Float64(v) => format!("{v} (f64)"),
        MetadataValue::Uint64(v) => format!("{v} (u64)"),
        MetadataValue::Int64(v) => format!("{v} (i64)"),
        MetadataValue::Bool(v) => format!("{v} (bool)"),
        MetadataValue::String(s) => {
            let truncated = if s.len() > 120 { &s[..120] } else { s.as_str() };
            format!("\"{truncated}\" (string)")
        }
        MetadataValue::Array(a) => {
            if a.values.len() <= array_limit {
                let items: Vec<String> = a
                    .values
                    .iter()
                    .map(|v| format_value(v, array_limit))
                    .collect();
                format!("[{}]", items.join(", "))
            } else {
                format!("[{} elements]", a.values.len())
            }
        }
    }
}

fn run_metadata(path: &str, tensors: bool, filter: Option<&str>, array_limit: usize) -> i32 {
    let gguf = match GgufFile::open(path) {
        Ok(g) => g,
        Err(e) => {
            eprintln!("Failed to open {path}: {e}");
            return 1;
        }
    };

    let header = &gguf.data.header;
    println!(
        "GGUF v{}, {} tensors, {} metadata entries",
        header.version, header.tensor_count, header.metadata_kv_count
    );
    println!();

    let mut keys: Vec<&String> = gguf.data.metadata.keys().collect();
    keys.sort();

    println!("--- Metadata ---");
    for key in &keys {
        if let Some(prefix) = filter
            && !key.contains(prefix)
        {
            continue;
        }
        let val = &gguf.data.metadata[key.as_str()];
        println!("  {key} = {}", format_value(val, array_limit));
    }

    if tensors {
        println!();
        println!("--- Tensors ({}) ---", gguf.data.tensors.len());
        for (i, t) in gguf.data.tensors.iter().enumerate() {
            let dims: Vec<String> = t.dims.iter().map(|d| d.to_string()).collect();
            println!(
                "  [{i:>4}] {:<60} {:?} [{}]",
                t.name,
                t.dtype,
                dims.join(", ")
            );
        }
    }
    0
}

fn run_dump(path: &str, tensor: &str) -> i32 {
    let gguf = match GgufFile::open(path) {
        Ok(g) => g,
        Err(e) => {
            eprintln!("Failed to open {path}: {e}");
            return 1;
        }
    };

    let Some(info) = gguf.data.get_tensor(tensor) else {
        eprintln!("Tensor '{tensor}' not found");
        return 1;
    };
    let dims: Vec<String> = info.dims.iter().map(|d| d.to_string()).collect();
    println!(
        "--- Tensor: {} {:?} [{}] ---",
        info.name,
        info.dtype,
        dims.join(", ")
    );
    if let Some(data) = gguf.tensor_data(tensor) {
        if info.dtype == llama_rs::gguf::GgmlType::F32 {
            let floats: &[f32] = bytemuck::cast_slice(data);
            let n = floats.len().min(512);
            for (i, &v) in floats[..n].iter().enumerate() {
                println!("  [{i:>4}] {v:.8}");
            }
            if floats.len() > n {
                println!("  ... ({} more values)", floats.len() - n);
            }
        } else {
            println!(
                "  (dtype {:?}, {} bytes — F32 dump not available)",
                info.dtype,
                data.len()
            );
        }
    }
    0
}

fn run_list_profiles() -> i32 {
    let names = list_builtin_profile_names();
    if names.is_empty() {
        println!("(no built-in profiles)");
        return 0;
    }
    for name in names {
        println!("{name}");
    }
    0
}

fn run_diff_profile(path: &str, against: Option<&str>, json: bool) -> i32 {
    let source = match open_source(path) {
        Ok(src) => src,
        Err(code) => return code,
    };
    let (arch_source, format_kind) = detect_source_labels(path);
    let ctx = ReportContext {
        file_path: path,
        arch_source,
        format_kind,
    };

    let profile = match resolve_profile_for_diff(against, source.as_ref()) {
        Ok(p) => p,
        Err(code) => return code,
    };

    let report = compare(source.as_ref(), profile.as_ref(), &ctx);

    if json {
        let value = render_json(&report);
        match serde_json::to_string_pretty(&value) {
            Ok(s) => println!("{s}"),
            Err(e) => {
                eprintln!("JSON serialization failed: {e}");
                return 1;
            }
        }
    } else {
        print!("{}", render_text(&report));
    }

    report.verdict.exit_code()
}

/// Resolve which profile to use for the diff, if any.
///
/// - `Ok(Some(profile))`: match against this profile.
/// - `Ok(None)`: no profile — emit an inventory-only report.
/// - `Err(code)`: the user pointed us at something unusable; exit with this code.
fn resolve_profile_for_diff(
    against: Option<&str>,
    source: &dyn ModelSource,
) -> Result<Option<llama_rs::diagnostics::Profile>, i32> {
    match against {
        Some(token) if is_profile_reference(token) => resolve_profile(token).map(Some),
        Some(_) => {
            eprintln!(
                "pairwise model-vs-model diff is not implemented in v1; use --against <profile-name> or <file.toml>"
            );
            Err(1)
        }
        None => {
            let Some(arch) = source.declared_architecture() else {
                return Ok(None);
            };
            match find_profile_for_architecture(&arch) {
                Ok(profile) => Ok(profile),
                Err(e) => {
                    eprintln!("Failed to look up profile for architecture `{arch}`: {e}");
                    Err(1)
                }
            }
        }
    }
}

fn run_generate_profile(
    path: &str,
    out: Option<&std::path::Path>,
    name: Option<&str>,
) -> i32 {
    let source = match open_source(path) {
        Ok(s) => s,
        Err(code) => return code,
    };
    let profile_name = name
        .map(|s| s.to_string())
        .or_else(|| source.declared_architecture())
        .unwrap_or_else(|| "generated".to_string());
    let toml = generate_profile(source.as_ref(), &profile_name);
    match out {
        Some(p) => {
            if let Err(e) = std::fs::write(p, &toml) {
                eprintln!("Failed to write {}: {e}", p.display());
                return 1;
            }
            eprintln!("Wrote {}", p.display());
        }
        None => print!("{toml}"),
    }
    0
}

fn open_source(path: &str) -> Result<Box<dyn ModelSource>, i32> {
    let pb = std::path::Path::new(path);
    let is_dir = pb.is_dir();
    let is_st = pb.extension().map(|e| e == "safetensors").unwrap_or(false);
    if is_dir || is_st {
        match SafetensorsSource::open(path) {
            Ok(s) => Ok(Box::new(s)),
            Err(e) => {
                eprintln!("Failed to open {path}: {e}");
                Err(1)
            }
        }
    } else {
        match GgufSource::open(path) {
            Ok(s) => Ok(Box::new(s)),
            Err(e) => {
                eprintln!("Failed to open {path}: {e}");
                Err(1)
            }
        }
    }
}

fn detect_source_labels(path: &str) -> (&'static str, &'static str) {
    let pb = std::path::Path::new(path);
    let is_dir = pb.is_dir();
    let is_st = pb.extension().map(|e| e == "safetensors").unwrap_or(false);
    if is_dir || is_st {
        ("model_type", "safetensors")
    } else {
        ("general.architecture", "gguf")
    }
}

fn is_profile_reference(token: &str) -> bool {
    if token.ends_with(".toml") {
        return true;
    }
    list_builtin_profile_names().contains(&token)
}

fn resolve_profile(token: &str) -> Result<llama_rs::diagnostics::Profile, i32> {
    if token.ends_with(".toml") {
        load_profile_file(std::path::Path::new(token)).map_err(|e| {
            eprintln!("Failed to load profile {token}: {e}");
            1
        })
    } else {
        load_builtin_profile(token).map_err(|e| {
            eprintln!("Failed to load built-in profile {token}: {e}");
            1
        })
    }
}