#![cfg(feature = "cli")]
#![allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
use std::process::Command;
fn hf_fm() -> Command {
Command::new(env!("CARGO_BIN_EXE_hf-fetch-model"))
}
fn run(cmd: &mut Command) -> (String, String, bool) {
let output = cmd
.output()
.expect("failed to execute hf-fetch-model binary");
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
(stdout, stderr, output.status.success())
}
#[test]
fn help_shows_download_description() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("Downloads all files"),
"help should describe default download behavior, got:\n{stdout}"
);
}
#[test]
fn help_shows_list_files_subcommand() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("list-files"),
"help should list the list-files subcommand, got:\n{stdout}"
);
}
#[test]
fn help_shows_version_number() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
let version = env!("CARGO_PKG_VERSION");
assert!(
stdout.contains(version),
"help should contain version {version}, got:\n{stdout}"
);
}
#[test]
fn help_shows_pth_preset() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("pth"),
"help should mention pth preset, got:\n{stdout}"
);
}
#[test]
fn help_shows_dry_run_flag() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("--dry-run"),
"help should list the --dry-run flag, got:\n{stdout}"
);
}
#[test]
fn help_shows_flat_flag() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("--flat"),
"help should show --flat flag, got:\n{stdout}"
);
}
#[test]
fn download_file_help_shows_flat_flag() {
let (stdout, stderr, success) = run(hf_fm().args(["download-file", "--help"]));
assert!(success, "download-file --help failed: {stderr}");
assert!(
stdout.contains("--flat"),
"download-file help should show --flat flag, got:\n{stdout}"
);
}
#[test]
fn download_file_help_mentions_glob() {
let (stdout, stderr, success) = run(hf_fm().args(["download-file", "--help"]));
assert!(success, "download-file --help failed: {stderr}");
assert!(
stdout.contains("glob"),
"download-file help should mention glob support, got:\n{stdout}"
);
}
#[test]
fn help_shows_timeout_flags() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
for flag in ["--timeout-per-file-secs", "--timeout-total-secs"] {
assert!(
stdout.contains(flag),
"help should show {flag} flag, got:\n{stdout}"
);
}
}
#[test]
fn download_file_help_shows_timeout_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["download-file", "--help"]));
assert!(success, "download-file --help failed: {stderr}");
for flag in ["--timeout-per-file-secs", "--timeout-total-secs"] {
assert!(
stdout.contains(flag),
"download-file help should show {flag} flag, got:\n{stdout}"
);
}
}
#[test]
fn timeout_flags_accept_numeric_values() {
let (_stdout, stderr, _success) = run(hf_fm().args([
"--timeout-per-file-secs",
"1800",
"--timeout-total-secs",
"3600",
"--dry-run",
"julien-c/dummy-unknown",
]));
assert!(
!stderr.contains("error: invalid value"),
"timeout flags should parse cleanly, got:\n{stderr}"
);
assert!(
!stderr.contains("error: unexpected argument"),
"timeout flags should be recognized, got:\n{stderr}"
);
}
#[test]
fn list_files_help_shows_all_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["list-files", "--help"]));
assert!(success, "list-files --help failed: {stderr}");
for flag in ["--no-checksum", "--show-cached", "--filter", "--preset"] {
assert!(
stdout.contains(flag),
"list-files help should contain {flag}, got:\n{stdout}"
);
}
assert!(
stdout.contains("partial"),
"list-files help should describe partial state, got:\n{stdout}"
);
}
#[test]
fn list_files_invalid_repo_format() {
let (_stdout, stderr, success) = run(hf_fm().args(["list-files", "noSlash"]));
assert!(!success, "list-files with invalid repo should fail");
assert!(
stderr.contains("org/model"),
"error should mention expected format, got:\n{stderr}"
);
}
#[test]
fn dry_run_invalid_repo_format() {
let (_stdout, stderr, success) = run(hf_fm().args(["noSlash", "--dry-run"]));
assert!(!success, "--dry-run with invalid repo should fail");
assert!(
stderr.contains("org/model"),
"error should mention expected format, got:\n{stderr}"
);
}
#[test]
fn list_files_nonexistent_repo() {
let (_stdout, stderr, success) =
run(hf_fm().args(["list-files", "fake/nonexistent-repo-12345"]));
assert!(!success, "list-files with nonexistent repo should fail");
assert!(
stderr.contains("not found") || stderr.contains("401") || stderr.contains("Unauthorized"),
"error should indicate repo inaccessible, got:\n{stderr}"
);
}
#[test]
fn list_files_default_output() {
let (stdout, stderr, success) = run(hf_fm().args(["list-files", "julien-c/dummy-unknown"]));
assert!(success, "list-files failed: {stderr}");
assert!(
stdout.contains("config.json"),
"output should contain config.json, got:\n{stdout}"
);
assert!(
stdout.contains("pytorch_model.bin"),
"output should contain pytorch_model.bin, got:\n{stdout}"
);
assert!(
stdout.contains("files") && stdout.contains("total"),
"output should contain summary with file count and total, got:\n{stdout}"
);
assert!(
stdout.contains("SHA256"),
"default output should contain SHA256 header, got:\n{stdout}"
);
}
#[test]
fn list_files_no_checksum_hides_sha256() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--no-checksum"]));
assert!(success, "list-files --no-checksum failed: {stderr}");
assert!(
!stdout.contains("SHA256"),
"--no-checksum should hide SHA256 header, got:\n{stdout}"
);
}
#[test]
fn list_files_show_cached_adds_column() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--show-cached"]));
assert!(success, "list-files --show-cached failed: {stderr}");
assert!(
stdout.contains("Cached"),
"--show-cached should add Cached header, got:\n{stdout}"
);
}
#[test]
fn list_files_show_cached_marks_complete_files() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--show-cached"]));
assert!(success, "list-files --show-cached failed: {stderr}");
assert!(
stdout.contains('\u{2713}'),
"cached files should show \u{2713} mark, got:\n{stdout}"
);
assert!(
!stdout.contains("partial"),
"fully cached files should not show 'partial', got:\n{stdout}"
);
assert!(
stdout.contains("cached"),
"summary should mention cached count, got:\n{stdout}"
);
}
#[test]
fn list_files_filter_limits_output() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--filter", "*.json"]));
assert!(success, "list-files --filter failed: {stderr}");
assert!(
stdout.contains("config.json"),
"filtered output should contain config.json, got:\n{stdout}"
);
assert!(
!stdout.contains("pytorch_model.bin"),
"filtered output should NOT contain pytorch_model.bin, got:\n{stdout}"
);
}
#[test]
fn dry_run_shows_repo_and_revision() {
let (stdout, stderr, success) = run(hf_fm().args(["julien-c/dummy-unknown", "--dry-run"]));
assert!(success, "--dry-run failed: {stderr}");
assert!(
stdout.contains("Repo:"),
"dry-run should show Repo: header, got:\n{stdout}"
);
assert!(
stdout.contains("Revision:"),
"dry-run should show Revision: header, got:\n{stdout}"
);
}
#[test]
fn dry_run_cached_repo_shows_zero_download() {
let (stdout, stderr, success) = run(hf_fm().args(["julien-c/dummy-unknown", "--dry-run"]));
assert!(success, "--dry-run failed: {stderr}");
assert!(
stdout.contains("0 to download"),
"cached repo should show 0 to download, got:\n{stdout}"
);
assert!(
stdout.contains("Download: 0 B"),
"cached repo should show Download: 0 B, got:\n{stdout}"
);
}
#[test]
fn dry_run_no_astronomical_chunk_threshold() {
let (stdout, stderr, success) = run(hf_fm().args([
"mistralai/Ministral-3-3B-Instruct-2512",
"--preset",
"safetensors",
"--dry-run",
]));
assert!(success, "--dry-run failed: {stderr}");
for line in stdout.lines() {
if line.contains("chunk threshold") {
let is_disabled = line.contains("disabled");
let has_sane_number = line
.split_whitespace()
.filter_map(|w| w.parse::<u64>().ok())
.all(|n| n <= 10_000);
assert!(
is_disabled || has_sane_number,
"chunk threshold should be sane or disabled, got:\n{line}"
);
}
}
}
#[test]
fn dry_run_with_filter_shows_filter_info() {
let (stdout, stderr, success) =
run(hf_fm().args(["julien-c/dummy-unknown", "--dry-run", "--filter", "*.json"]));
assert!(success, "--dry-run --filter failed: {stderr}");
assert!(
stdout.contains("Filter:"),
"filtered dry-run should show Filter: line, got:\n{stdout}"
);
assert!(
stdout.contains("config.json"),
"filtered dry-run should contain config.json, got:\n{stdout}"
);
assert!(
!stdout.contains("pytorch_model.bin"),
"filtered dry-run should NOT contain pytorch_model.bin, got:\n{stdout}"
);
}
#[test]
fn cache_delete_help_shows_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "delete", "--help"]));
assert!(success, "cache delete --help failed: {stderr}");
assert!(
stdout.contains("--yes"),
"cache delete help should contain --yes, got:\n{stdout}"
);
}
#[test]
fn cache_delete_nonexistent_repo() {
let (_, stderr, success) = run(hf_fm().args([
"cache",
"delete",
"nonexistent-org/nonexistent-model-xyz",
"--yes",
]));
assert!(!success, "cache delete of missing repo should fail");
assert!(
stderr.contains("not cached"),
"cache delete should report not cached, got:\n{stderr}"
);
}
#[test]
fn help_shows_cache_delete() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "--help"]));
assert!(success, "cache --help failed: {stderr}");
assert!(
stdout.contains("delete"),
"cache help should mention delete subcommand, got:\n{stdout}"
);
}
#[test]
fn help_shows_cache_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("cache"),
"help should mention cache subcommand, got:\n{stdout}"
);
}
#[test]
fn cache_clean_partial_help_shows_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "clean-partial", "--help"]));
assert!(success, "cache clean-partial --help failed: {stderr}");
for flag in ["--yes", "--dry-run"] {
assert!(
stdout.contains(flag),
"cache clean-partial help should contain {flag}, got:\n{stdout}"
);
}
}
#[test]
fn cache_clean_partial_no_partials() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "clean-partial", "--yes"]));
assert!(success, "cache clean-partial should succeed: {stderr}");
assert!(
stdout.contains("No partial downloads found")
|| stdout.contains("No HuggingFace cache found"),
"cache clean-partial should report no partials, got:\n{stdout}"
);
}
#[test]
fn cache_gc_help_shows_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "gc", "--help"]));
assert!(success, "cache gc --help failed: {stderr}");
for flag in [
"--older-than",
"--max-size",
"--except",
"--dry-run",
"--yes",
"--list-kept",
] {
assert!(
stdout.contains(flag),
"cache gc help should contain {flag}, got:\n{stdout}"
);
}
}
#[test]
fn help_shows_cache_gc() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "--help"]));
assert!(success, "cache --help failed: {stderr}");
assert!(
stdout.contains("gc"),
"cache help should mention gc subcommand, got:\n{stdout}"
);
}
#[test]
fn cache_gc_requires_strategy() {
let (_, stderr, success) = run(hf_fm().args(["cache", "gc"]));
assert!(!success, "bare `cache gc` should be rejected by clap");
assert!(
stderr.contains("--older-than") && stderr.contains("--max-size"),
"cache gc with no flags should mention --older-than and --max-size, got:\n{stderr}"
);
}
#[test]
fn cache_gc_rejects_decimal_size() {
let (_, stderr, success) = run(hf_fm().args(["cache", "gc", "--max-size", "5GB", "--dry-run"]));
assert!(!success, "--max-size 5GB should be rejected");
assert!(
stderr.contains("decimal size unit") && stderr.contains("binary units"),
"cache gc should reject decimal units with helpful error, got:\n{stderr}"
);
}
#[test]
fn cache_gc_dry_run_no_matches() {
let (stdout, stderr, success) =
run(hf_fm().args(["cache", "gc", "--older-than", "99999", "--dry-run"]));
assert!(success, "cache gc --dry-run should succeed: {stderr}");
assert!(
stdout.contains("No repos matched eviction criteria")
|| stdout.contains("No models in cache")
|| stdout.contains("No HuggingFace cache found"),
"cache gc --older-than 99999 should report no matches, got:\n{stdout}"
);
}
#[test]
fn cache_verify_help_shows_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "verify", "--help"]));
assert!(success, "cache verify --help failed: {stderr}");
for flag in ["--revision", "--token"] {
assert!(
stdout.contains(flag),
"cache verify help should contain {flag}, got:\n{stdout}"
);
}
}
#[test]
fn help_shows_cache_verify() {
let (stdout, stderr, success) = run(hf_fm().args(["cache", "--help"]));
assert!(success, "cache --help failed: {stderr}");
assert!(
stdout.contains("verify"),
"cache help should mention verify subcommand, got:\n{stdout}"
);
}
#[test]
fn cache_verify_nonexistent_repo() {
let (_, stderr, success) =
run(hf_fm().args(["cache", "verify", "nonexistent-org/nonexistent-model-xyz"]));
assert!(!success, "cache verify of missing repo should fail");
assert!(
stderr.contains("not cached"),
"cache verify should report not cached, got:\n{stderr}"
);
}
#[test]
fn cache_verify_against_dummy_unknown() {
let (_, _, dl_success) = run(hf_fm().args(["julien-c/dummy-unknown"]));
assert!(dl_success, "download should succeed to populate cache");
let (stdout, stderr, success) =
run(hf_fm().args(["cache", "verify", "julien-c/dummy-unknown"]));
assert!(success, "cache verify should succeed: {stderr}\n{stdout}");
assert!(
stdout.contains("SHA256 OK") || stdout.contains("no LFS hash"),
"cache verify should show OK or no-LFS-hash markers, got:\n{stdout}"
);
assert!(
stdout.contains("SHA256 OK") && stdout.contains("skipped"),
"cache verify footer should mention OK and skipped counts, got:\n{stdout}"
);
}
#[test]
fn help_shows_du_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("du"),
"help should mention du subcommand, got:\n{stdout}"
);
}
#[test]
fn du_summary_lists_cached_repos() {
let (stdout, stderr, success) = run(hf_fm().args(["du"]));
assert!(success, "du should succeed: {stderr}");
assert!(
stdout.contains("total") || stdout.contains("No models found"),
"du should show total or empty message, got:\n{stdout}"
);
if stdout.contains("total") {
assert!(
stdout.contains('#') && stdout.contains("SIZE") && stdout.contains("REPO"),
"du should show numbered column headers, got:\n{stdout}"
);
}
}
#[test]
fn du_repo_shows_files() {
let (_, _, dl_success) = run(hf_fm().args(["julien-c/dummy-unknown"]));
assert!(dl_success, "download should succeed to populate cache");
let (stdout, stderr, success) = run(hf_fm().args(["du", "julien-c/dummy-unknown"]));
assert!(success, "du repo should succeed: {stderr}");
assert!(
stdout.contains("julien-c/dummy-unknown:"),
"du repo should show repo name header, got:\n{stdout}"
);
assert!(
stdout.contains('#') && stdout.contains("SIZE") && stdout.contains("FILE"),
"du repo should show numbered column headers, got:\n{stdout}"
);
assert!(
stdout.contains("config.json"),
"du repo should list config.json, got:\n{stdout}"
);
assert!(
stdout.contains("total"),
"du repo should show total line, got:\n{stdout}"
);
}
#[test]
fn du_numeric_index_drills_down() {
let (_, _, dl_success) = run(hf_fm().args(["julien-c/dummy-unknown"]));
assert!(dl_success, "download should succeed to populate cache");
let (stdout, stderr, success) = run(hf_fm().args(["du", "1"]));
assert!(success, "du 1 should succeed: {stderr}");
assert!(
stdout.contains("total"),
"du 1 should show per-file total, got:\n{stdout}"
);
assert!(
stdout.contains("FILE"),
"du 1 should show file column header, got:\n{stdout}"
);
}
#[test]
fn du_invalid_index_fails() {
let (_, stderr, success) = run(hf_fm().args(["du", "99999"]));
assert!(!success, "du 99999 should fail");
assert!(
stderr.contains("out of range"),
"du 99999 should report out of range, got:\n{stderr}"
);
}
#[test]
fn du_nonexistent_repo_shows_empty() {
let (stdout, stderr, success) =
run(hf_fm().args(["du", "nonexistent-org/nonexistent-model-xyz"]));
assert!(success, "du for missing repo should succeed: {stderr}");
assert!(
stdout.contains("No cached files found"),
"du for missing repo should say no files found, got:\n{stdout}"
);
}
#[test]
fn du_tree_succeeds() {
let (_, _, dl_success) = run(hf_fm().args(["julien-c/dummy-unknown"]));
assert!(dl_success, "download for du-tree fixture should succeed");
let (stdout, stderr, success) = run(hf_fm().args(["du", "--tree"]));
assert!(success, "du --tree should succeed: {stderr}");
assert!(
stdout.starts_with("Cache: "),
"du --tree should announce the cache path, got:\n{stdout}"
);
assert!(
stdout.contains("\u{251c}\u{2500}\u{2500} ")
|| stdout.contains("\u{2514}\u{2500}\u{2500} "),
"du --tree should render box-drawing connectors, got:\n{stdout}"
);
assert!(
stdout.contains("total ("),
"du --tree should print a totals line, got:\n{stdout}"
);
}
#[test]
fn du_tree_with_age_succeeds() {
let (_, _, dl_success) = run(hf_fm().args(["julien-c/dummy-unknown"]));
assert!(dl_success, "download for du-tree fixture should succeed");
let (stdout, stderr, success) = run(hf_fm().args(["du", "--tree", "--age"]));
assert!(success, "du --tree --age should succeed: {stderr}");
assert!(
stdout.contains("hour") || stdout.contains("day") || stdout.contains("month"),
"du --tree --age should include a relative age, got:\n{stdout}"
);
}
#[test]
fn du_tree_conflicts_with_repo_arg() {
let (_, stderr, success) = run(hf_fm().args(["du", "--tree", "julien-c/dummy-unknown"]));
assert!(!success, "du --tree <REPO> should be rejected by clap");
assert!(
stderr.contains("cannot be used with") || stderr.contains("conflict"),
"du --tree <REPO> should report a clap conflict, got:\n{stderr}"
);
}
#[test]
fn help_shows_inspect_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("inspect"),
"help should mention inspect subcommand, got:\n{stdout}"
);
}
fn find_cached_safetensors_repo() -> Option<(String, String)> {
let cache_dir = dirs::home_dir()?.join(".cache/huggingface/hub");
if !cache_dir.exists() {
return None;
}
for entry in std::fs::read_dir(&cache_dir).ok()? {
let entry = entry.ok()?;
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
let Ok(files) = std::fs::read_dir(snap.path()) else {
continue;
};
for file in files.flatten() {
let fname = file.file_name().to_string_lossy().to_string();
if fname.ends_with(".safetensors") {
return Some((repo_id, fname));
}
}
}
}
None
}
fn find_all_cached_safetensors_repos() -> Vec<String> {
let mut repos = Vec::new();
let Some(cache_dir) = dirs::home_dir().map(|h| h.join(".cache/huggingface/hub")) else {
return repos;
};
if !cache_dir.exists() {
return repos;
}
let Ok(entries) = std::fs::read_dir(&cache_dir) else {
return repos;
};
for entry in entries.flatten() {
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
'snap: for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
let Ok(files) = std::fs::read_dir(snap.path()) else {
continue;
};
for file in files.flatten() {
let fname = file.file_name().to_string_lossy().to_string();
if fname.ends_with(".safetensors") {
repos.push(repo_id.clone());
break 'snap;
}
}
}
}
repos
}
fn find_cached_safetensors_with_metadata() -> Option<(String, String)> {
use std::io::Read;
let cache_dir = dirs::home_dir()?.join(".cache/huggingface/hub");
if !cache_dir.exists() {
return None;
}
for entry in std::fs::read_dir(&cache_dir).ok()? {
let entry = entry.ok()?;
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
let Ok(files) = std::fs::read_dir(snap.path()) else {
continue;
};
for file in files.flatten() {
let fname = file.file_name().to_string_lossy().to_string();
if !fname.ends_with(".safetensors") {
continue;
}
let Ok(mut f) = std::fs::File::open(file.path()) else {
continue;
};
let mut len_buf = [0u8; 8];
if f.read_exact(&mut len_buf).is_err() {
continue;
}
let Ok(header_size) = usize::try_from(u64::from_le_bytes(len_buf)) else {
continue;
};
if header_size > 10_000_000 {
continue; }
let mut json_buf = vec![0u8; header_size];
if f.read_exact(&mut json_buf).is_err() {
continue;
}
if let Ok(text) = std::str::from_utf8(&json_buf) {
if text.contains("__metadata__") {
return Some((repo_id, fname));
}
}
}
}
}
None
}
fn find_cached_sharded_repo() -> Option<String> {
let cache_dir = dirs::home_dir()?.join(".cache/huggingface/hub");
if !cache_dir.exists() {
return None;
}
for entry in std::fs::read_dir(&cache_dir).ok()? {
let entry = entry.ok()?;
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
if snap.path().join("model.safetensors.index.json").exists() {
return Some(repo_id);
}
}
}
None
}
#[test]
fn inspect_cached_single_file() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, &filename, "--cached"]));
assert!(
success,
"inspect --cached should succeed for {repo_id}/{filename}: {stderr}"
);
assert!(
stdout.contains("Source: cached"),
"should report cached source, got:\n{stdout}"
);
assert!(
stdout.contains("Tensor") && stdout.contains("Dtype") && stdout.contains("Shape"),
"should show tensor table headers, got:\n{stdout}"
);
assert!(
stdout.contains("tensors"),
"should show tensor count summary, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_json_output() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, &filename, "--cached", "--json"]));
assert!(success, "inspect --cached --json should succeed: {stderr}");
assert!(
stdout.contains("\"tensors\""),
"JSON should contain tensors field, got:\n{stdout}"
);
assert!(
stdout.contains("\"header_size\""),
"JSON should contain header_size field, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_no_metadata() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, &filename, "--cached", "--no-metadata"]));
assert!(
success,
"inspect --cached --no-metadata should succeed: {stderr}"
);
assert!(
!stdout.contains("Metadata:"),
"--no-metadata should suppress Metadata line, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_repo_summary() {
let Some((repo_id, _filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, "--cached"]));
assert!(
success,
"inspect --cached repo summary should succeed: {stderr}"
);
assert!(
stdout.contains("tensors") || stdout.contains("Tensors"),
"should mention tensors in output, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_metadata_present() {
let Some((repo_id, filename)) = find_cached_safetensors_with_metadata() else {
eprintln!("SKIP: no cached safetensors file with __metadata__ found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, &filename, "--cached"]));
assert!(
success,
"inspect --cached should succeed for {repo_id}/{filename}: {stderr}"
);
assert!(
stdout.contains("Metadata:"),
"output should contain Metadata: line by default, got:\n{stdout}"
);
let meta_line = stdout.lines().find(|l| l.contains("Metadata:")).unwrap();
assert!(
meta_line.contains('='),
"Metadata line should contain key=value pairs, got: {meta_line}"
);
}
#[test]
fn inspect_cached_sharded_model() {
let Some(repo_id) = find_cached_sharded_repo() else {
eprintln!("SKIP: no cached sharded safetensors model found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, "--cached"]));
assert!(
success,
"inspect --cached sharded model should succeed: {stderr}"
);
assert!(
stdout.contains("shard index"),
"sharded model should show shard index source, got:\n{stdout}"
);
assert!(
stdout.contains("shards") || stdout.contains("shard,"),
"should show shard count, got:\n{stdout}"
);
assert!(
stdout.contains("Hint:"),
"should show per-tensor detail hint, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_sharded_dtypes_aggregates() {
let Some(repo_id) = find_cached_sharded_repo() else {
eprintln!("SKIP: no cached sharded safetensors model found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, "--cached", "--dtypes"]));
assert!(
success,
"inspect --cached --dtypes sharded model should succeed: {stderr}"
);
assert!(
stdout.contains("aggregated across"),
"sharded --dtypes should report aggregated source, got:\n{stdout}"
);
assert!(
stdout.contains("Dtype") && stdout.contains("Tensors") && stdout.contains("Params"),
"sharded --dtypes should show histogram columns, got:\n{stdout}"
);
assert!(
!stdout.contains("Hint: use `hf-fm inspect"),
"aggregated --dtypes should NOT print the per-file hint, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_sharded_tree_aggregates() {
let Some(repo_id) = find_cached_sharded_repo() else {
eprintln!("SKIP: no cached sharded safetensors model found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, "--cached", "--tree"]));
assert!(
success,
"inspect --cached --tree sharded model should succeed: {stderr}"
);
assert!(
stdout.contains("aggregated across"),
"sharded --tree should report aggregated source, got:\n{stdout}"
);
assert!(
stdout.contains("├──") || stdout.contains("└──"),
"sharded --tree should render box-drawing connectors, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_sharded_dtypes_json_aggregates() {
let Some(repo_id) = find_cached_sharded_repo() else {
eprintln!("SKIP: no cached sharded safetensors model found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, "--cached", "--dtypes", "--json"]));
assert!(
success,
"inspect --cached --dtypes --json sharded model should succeed: {stderr}"
);
assert!(
stdout.contains("\"dtypes\"") && stdout.contains("\"total_tensors\""),
"JSON should contain dtypes + total_tensors fields, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_sharded_limit_shows_shard_column() {
let Some(repo_id) = find_cached_sharded_repo() else {
eprintln!("SKIP: no cached sharded safetensors model found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, "--cached", "--limit", "3"]));
assert!(
success,
"inspect --cached --limit sharded model should succeed: {stderr}"
);
assert!(
stdout.contains("aggregated across"),
"sharded --limit should report aggregated source, got:\n{stdout}"
);
assert!(
stdout.contains("Shard"),
"sharded --limit table should include a Shard column, got:\n{stdout}"
);
assert!(
stdout.contains("limit: 3"),
"footer should mention the active limit, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_filter() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout_all, _stderr, success) =
run(hf_fm().args(["inspect", &repo_id, &filename, "--cached"]));
assert!(success, "inspect --cached should succeed");
let has_tensors = stdout_all.contains("tensor");
assert!(has_tensors, "unfiltered output should show tensors");
let (stdout, stderr, success) = run(hf_fm().args([
"inspect", &repo_id, &filename, "--cached", "--filter", "embed",
]));
assert!(
success,
"inspect --cached --filter should succeed: {stderr}"
);
for line in stdout.lines() {
let trimmed = line.trim();
if trimmed.is_empty()
|| trimmed.starts_with("Repo:")
|| trimmed.starts_with("File:")
|| trimmed.starts_with("Source:")
|| trimmed.starts_with("Header:")
|| trimmed.starts_with("Metadata:")
|| trimmed.starts_with("Tensor")
|| trimmed.starts_with('\u{2500}')
|| trimmed.contains("tensor")
|| trimmed.contains("params")
{
continue;
}
assert!(
trimmed.contains("embed"),
"filtered line should contain 'embed': {trimmed}"
);
}
assert!(
stdout.contains('/'),
"filtered summary should show filtered/total format, got:\n{stdout}"
);
assert!(
stdout.contains("filter:"),
"filtered summary should mention filter, got:\n{stdout}"
);
}
#[test]
fn help_shows_diff_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("diff"),
"help should mention diff subcommand, got:\n{stdout}"
);
}
#[test]
fn diff_cached_identical_model() {
let Some((repo_id, _filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["diff", &repo_id, &repo_id, "--cached"]));
assert!(success, "diff --cached self-diff should succeed: {stderr}");
assert!(
stdout.contains("only-A: 0"),
"self-diff should have 0 only-A, got:\n{stdout}"
);
assert!(
stdout.contains("only-B: 0"),
"self-diff should have 0 only-B, got:\n{stdout}"
);
assert!(
stdout.contains("differ: 0"),
"self-diff should have 0 differ, got:\n{stdout}"
);
assert!(
stdout.contains("Matching:") && !stdout.contains("Matching: 0"),
"self-diff should have matching tensors, got:\n{stdout}"
);
}
#[test]
fn diff_cached_different_models() {
let repos = find_all_cached_safetensors_repos();
let (Some(repo_a), Some(repo_b)) = (repos.first(), repos.get(1)) else {
eprintln!("SKIP: need at least 2 cached safetensors repos for diff test");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["diff", repo_a.as_str(), repo_b.as_str(), "--cached"]));
assert!(
success,
"diff --cached different models should succeed: {stderr}"
);
assert!(
stdout.contains(&format!("A: {repo_a}")),
"should show repo A label, got:\n{stdout}"
);
assert!(
stdout.contains(&format!("B: {repo_b}")),
"should show repo B label, got:\n{stdout}"
);
assert!(
stdout.contains("A:") && stdout.contains("tensors"),
"should show summary line, got:\n{stdout}"
);
}