#![cfg(feature = "cli")]
#![allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
use std::process::Command;
fn hf_fm() -> Command {
Command::new(env!("CARGO_BIN_EXE_hf-fetch-model"))
}
fn run(cmd: &mut Command) -> (String, String, bool) {
let output = cmd
.output()
.expect("failed to execute hf-fetch-model binary");
let stdout = String::from_utf8_lossy(&output.stdout).to_string();
let stderr = String::from_utf8_lossy(&output.stderr).to_string();
(stdout, stderr, output.status.success())
}
#[test]
fn help_shows_download_description() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("Downloads all files"),
"help should describe default download behavior, got:\n{stdout}"
);
}
#[test]
fn help_shows_list_files_subcommand() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("list-files"),
"help should list the list-files subcommand, got:\n{stdout}"
);
}
#[test]
fn help_shows_version_number() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
let version = env!("CARGO_PKG_VERSION");
assert!(
stdout.contains(version),
"help should contain version {version}, got:\n{stdout}"
);
}
#[test]
fn help_shows_pth_preset() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("pth"),
"help should mention pth preset, got:\n{stdout}"
);
}
#[test]
fn help_shows_dry_run_flag() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("--dry-run"),
"help should list the --dry-run flag, got:\n{stdout}"
);
}
#[test]
fn help_shows_flat_flag() {
let (stdout, stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "--help failed: {stderr}");
assert!(
stdout.contains("--flat"),
"help should show --flat flag, got:\n{stdout}"
);
}
#[test]
fn download_file_help_shows_flat_flag() {
let (stdout, stderr, success) = run(hf_fm().args(["download-file", "--help"]));
assert!(success, "download-file --help failed: {stderr}");
assert!(
stdout.contains("--flat"),
"download-file help should show --flat flag, got:\n{stdout}"
);
}
#[test]
fn download_file_help_mentions_glob() {
let (stdout, stderr, success) = run(hf_fm().args(["download-file", "--help"]));
assert!(success, "download-file --help failed: {stderr}");
assert!(
stdout.contains("glob"),
"download-file help should mention glob support, got:\n{stdout}"
);
}
#[test]
fn list_files_help_shows_all_flags() {
let (stdout, stderr, success) = run(hf_fm().args(["list-files", "--help"]));
assert!(success, "list-files --help failed: {stderr}");
for flag in ["--no-checksum", "--show-cached", "--filter", "--preset"] {
assert!(
stdout.contains(flag),
"list-files help should contain {flag}, got:\n{stdout}"
);
}
assert!(
stdout.contains("partial"),
"list-files help should describe partial state, got:\n{stdout}"
);
}
#[test]
fn list_files_invalid_repo_format() {
let (_stdout, stderr, success) = run(hf_fm().args(["list-files", "noSlash"]));
assert!(!success, "list-files with invalid repo should fail");
assert!(
stderr.contains("org/model"),
"error should mention expected format, got:\n{stderr}"
);
}
#[test]
fn dry_run_invalid_repo_format() {
let (_stdout, stderr, success) = run(hf_fm().args(["noSlash", "--dry-run"]));
assert!(!success, "--dry-run with invalid repo should fail");
assert!(
stderr.contains("org/model"),
"error should mention expected format, got:\n{stderr}"
);
}
#[test]
fn list_files_nonexistent_repo() {
let (_stdout, stderr, success) =
run(hf_fm().args(["list-files", "fake/nonexistent-repo-12345"]));
assert!(!success, "list-files with nonexistent repo should fail");
assert!(
stderr.contains("not found") || stderr.contains("401") || stderr.contains("Unauthorized"),
"error should indicate repo inaccessible, got:\n{stderr}"
);
}
#[test]
fn list_files_default_output() {
let (stdout, stderr, success) = run(hf_fm().args(["list-files", "julien-c/dummy-unknown"]));
assert!(success, "list-files failed: {stderr}");
assert!(
stdout.contains("config.json"),
"output should contain config.json, got:\n{stdout}"
);
assert!(
stdout.contains("pytorch_model.bin"),
"output should contain pytorch_model.bin, got:\n{stdout}"
);
assert!(
stdout.contains("files") && stdout.contains("total"),
"output should contain summary with file count and total, got:\n{stdout}"
);
assert!(
stdout.contains("SHA256"),
"default output should contain SHA256 header, got:\n{stdout}"
);
}
#[test]
fn list_files_no_checksum_hides_sha256() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--no-checksum"]));
assert!(success, "list-files --no-checksum failed: {stderr}");
assert!(
!stdout.contains("SHA256"),
"--no-checksum should hide SHA256 header, got:\n{stdout}"
);
}
#[test]
fn list_files_show_cached_adds_column() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--show-cached"]));
assert!(success, "list-files --show-cached failed: {stderr}");
assert!(
stdout.contains("Cached"),
"--show-cached should add Cached header, got:\n{stdout}"
);
}
#[test]
fn list_files_show_cached_marks_complete_files() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--show-cached"]));
assert!(success, "list-files --show-cached failed: {stderr}");
assert!(
stdout.contains('\u{2713}'),
"cached files should show \u{2713} mark, got:\n{stdout}"
);
assert!(
!stdout.contains("partial"),
"fully cached files should not show 'partial', got:\n{stdout}"
);
assert!(
stdout.contains("cached"),
"summary should mention cached count, got:\n{stdout}"
);
}
#[test]
fn list_files_filter_limits_output() {
let (stdout, stderr, success) =
run(hf_fm().args(["list-files", "julien-c/dummy-unknown", "--filter", "*.json"]));
assert!(success, "list-files --filter failed: {stderr}");
assert!(
stdout.contains("config.json"),
"filtered output should contain config.json, got:\n{stdout}"
);
assert!(
!stdout.contains("pytorch_model.bin"),
"filtered output should NOT contain pytorch_model.bin, got:\n{stdout}"
);
}
#[test]
fn dry_run_shows_repo_and_revision() {
let (stdout, stderr, success) = run(hf_fm().args(["julien-c/dummy-unknown", "--dry-run"]));
assert!(success, "--dry-run failed: {stderr}");
assert!(
stdout.contains("Repo:"),
"dry-run should show Repo: header, got:\n{stdout}"
);
assert!(
stdout.contains("Revision:"),
"dry-run should show Revision: header, got:\n{stdout}"
);
}
#[test]
fn dry_run_cached_repo_shows_zero_download() {
let (stdout, stderr, success) = run(hf_fm().args(["julien-c/dummy-unknown", "--dry-run"]));
assert!(success, "--dry-run failed: {stderr}");
assert!(
stdout.contains("0 to download"),
"cached repo should show 0 to download, got:\n{stdout}"
);
assert!(
stdout.contains("Download: 0 B"),
"cached repo should show Download: 0 B, got:\n{stdout}"
);
}
#[test]
fn dry_run_no_astronomical_chunk_threshold() {
let (stdout, stderr, success) = run(hf_fm().args([
"mistralai/Ministral-3-3B-Instruct-2512",
"--preset",
"safetensors",
"--dry-run",
]));
assert!(success, "--dry-run failed: {stderr}");
for line in stdout.lines() {
if line.contains("chunk threshold") {
let is_disabled = line.contains("disabled");
let has_sane_number = line
.split_whitespace()
.filter_map(|w| w.parse::<u64>().ok())
.all(|n| n <= 10_000);
assert!(
is_disabled || has_sane_number,
"chunk threshold should be sane or disabled, got:\n{line}"
);
}
}
}
#[test]
fn dry_run_with_filter_shows_filter_info() {
let (stdout, stderr, success) =
run(hf_fm().args(["julien-c/dummy-unknown", "--dry-run", "--filter", "*.json"]));
assert!(success, "--dry-run --filter failed: {stderr}");
assert!(
stdout.contains("Filter:"),
"filtered dry-run should show Filter: line, got:\n{stdout}"
);
assert!(
stdout.contains("config.json"),
"filtered dry-run should contain config.json, got:\n{stdout}"
);
assert!(
!stdout.contains("pytorch_model.bin"),
"filtered dry-run should NOT contain pytorch_model.bin, got:\n{stdout}"
);
}
#[test]
fn help_shows_du_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("du"),
"help should mention du subcommand, got:\n{stdout}"
);
}
#[test]
fn du_summary_lists_cached_repos() {
let (stdout, stderr, success) = run(hf_fm().args(["du"]));
assert!(success, "du should succeed: {stderr}");
assert!(
stdout.contains("total") || stdout.contains("No models found"),
"du should show total or empty message, got:\n{stdout}"
);
}
#[test]
fn du_repo_shows_files() {
let (_, _, dl_success) = run(hf_fm().args(["julien-c/dummy-unknown"]));
assert!(dl_success, "download should succeed to populate cache");
let (stdout, stderr, success) = run(hf_fm().args(["du", "julien-c/dummy-unknown"]));
assert!(success, "du repo should succeed: {stderr}");
assert!(
stdout.contains("config.json"),
"du repo should list config.json, got:\n{stdout}"
);
assert!(
stdout.contains("total"),
"du repo should show total line, got:\n{stdout}"
);
}
#[test]
fn du_nonexistent_repo_shows_empty() {
let (stdout, stderr, success) =
run(hf_fm().args(["du", "nonexistent-org/nonexistent-model-xyz"]));
assert!(success, "du for missing repo should succeed: {stderr}");
assert!(
stdout.contains("No cached files found"),
"du for missing repo should say no files found, got:\n{stdout}"
);
}
#[test]
fn help_shows_inspect_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("inspect"),
"help should mention inspect subcommand, got:\n{stdout}"
);
}
fn find_cached_safetensors_repo() -> Option<(String, String)> {
let cache_dir = dirs::home_dir()?.join(".cache/huggingface/hub");
if !cache_dir.exists() {
return None;
}
for entry in std::fs::read_dir(&cache_dir).ok()? {
let entry = entry.ok()?;
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
let Ok(files) = std::fs::read_dir(snap.path()) else {
continue;
};
for file in files.flatten() {
let fname = file.file_name().to_string_lossy().to_string();
if fname.ends_with(".safetensors") {
return Some((repo_id, fname));
}
}
}
}
None
}
fn find_all_cached_safetensors_repos() -> Vec<String> {
let mut repos = Vec::new();
let Some(cache_dir) = dirs::home_dir().map(|h| h.join(".cache/huggingface/hub")) else {
return repos;
};
if !cache_dir.exists() {
return repos;
}
let Ok(entries) = std::fs::read_dir(&cache_dir) else {
return repos;
};
for entry in entries.flatten() {
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
'snap: for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
let Ok(files) = std::fs::read_dir(snap.path()) else {
continue;
};
for file in files.flatten() {
let fname = file.file_name().to_string_lossy().to_string();
if fname.ends_with(".safetensors") {
repos.push(repo_id.clone());
break 'snap;
}
}
}
}
repos
}
fn find_cached_safetensors_with_metadata() -> Option<(String, String)> {
use std::io::Read;
let cache_dir = dirs::home_dir()?.join(".cache/huggingface/hub");
if !cache_dir.exists() {
return None;
}
for entry in std::fs::read_dir(&cache_dir).ok()? {
let entry = entry.ok()?;
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
let Ok(files) = std::fs::read_dir(snap.path()) else {
continue;
};
for file in files.flatten() {
let fname = file.file_name().to_string_lossy().to_string();
if !fname.ends_with(".safetensors") {
continue;
}
let Ok(mut f) = std::fs::File::open(file.path()) else {
continue;
};
let mut len_buf = [0u8; 8];
if f.read_exact(&mut len_buf).is_err() {
continue;
}
let Ok(header_size) = usize::try_from(u64::from_le_bytes(len_buf)) else {
continue;
};
if header_size > 10_000_000 {
continue; }
let mut json_buf = vec![0u8; header_size];
if f.read_exact(&mut json_buf).is_err() {
continue;
}
if let Ok(text) = std::str::from_utf8(&json_buf) {
if text.contains("__metadata__") {
return Some((repo_id, fname));
}
}
}
}
}
None
}
fn find_cached_sharded_repo() -> Option<String> {
let cache_dir = dirs::home_dir()?.join(".cache/huggingface/hub");
if !cache_dir.exists() {
return None;
}
for entry in std::fs::read_dir(&cache_dir).ok()? {
let entry = entry.ok()?;
let dir_name = entry.file_name().to_string_lossy().to_string();
let Some(repo_part) = dir_name.strip_prefix("models--") else {
continue;
};
let repo_id = match repo_part.find("--") {
Some(pos) => {
let (org, name_with_sep) = repo_part.split_at(pos);
let name = name_with_sep.get(2..).unwrap_or_default();
format!("{org}/{name}")
}
None => continue,
};
let snapshots_dir = entry.path().join("snapshots");
let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
continue;
};
for snap in snapshots.flatten() {
if !snap.path().is_dir() {
continue;
}
if snap.path().join("model.safetensors.index.json").exists() {
return Some(repo_id);
}
}
}
None
}
#[test]
fn inspect_cached_single_file() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, &filename, "--cached"]));
assert!(
success,
"inspect --cached should succeed for {repo_id}/{filename}: {stderr}"
);
assert!(
stdout.contains("Source: cached"),
"should report cached source, got:\n{stdout}"
);
assert!(
stdout.contains("Tensor") && stdout.contains("Dtype") && stdout.contains("Shape"),
"should show tensor table headers, got:\n{stdout}"
);
assert!(
stdout.contains("tensors"),
"should show tensor count summary, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_json_output() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, &filename, "--cached", "--json"]));
assert!(success, "inspect --cached --json should succeed: {stderr}");
assert!(
stdout.contains("\"tensors\""),
"JSON should contain tensors field, got:\n{stdout}"
);
assert!(
stdout.contains("\"header_size\""),
"JSON should contain header_size field, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_no_metadata() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["inspect", &repo_id, &filename, "--cached", "--no-metadata"]));
assert!(
success,
"inspect --cached --no-metadata should succeed: {stderr}"
);
assert!(
!stdout.contains("Metadata:"),
"--no-metadata should suppress Metadata line, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_repo_summary() {
let Some((repo_id, _filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, "--cached"]));
assert!(
success,
"inspect --cached repo summary should succeed: {stderr}"
);
assert!(
stdout.contains("tensors") || stdout.contains("Tensors"),
"should mention tensors in output, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_metadata_present() {
let Some((repo_id, filename)) = find_cached_safetensors_with_metadata() else {
eprintln!("SKIP: no cached safetensors file with __metadata__ found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, &filename, "--cached"]));
assert!(
success,
"inspect --cached should succeed for {repo_id}/{filename}: {stderr}"
);
assert!(
stdout.contains("Metadata:"),
"output should contain Metadata: line by default, got:\n{stdout}"
);
let meta_line = stdout.lines().find(|l| l.contains("Metadata:")).unwrap();
assert!(
meta_line.contains('='),
"Metadata line should contain key=value pairs, got: {meta_line}"
);
}
#[test]
fn inspect_cached_sharded_model() {
let Some(repo_id) = find_cached_sharded_repo() else {
eprintln!("SKIP: no cached sharded safetensors model found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["inspect", &repo_id, "--cached"]));
assert!(
success,
"inspect --cached sharded model should succeed: {stderr}"
);
assert!(
stdout.contains("shard index"),
"sharded model should show shard index source, got:\n{stdout}"
);
assert!(
stdout.contains("shards") || stdout.contains("shard,"),
"should show shard count, got:\n{stdout}"
);
assert!(
stdout.contains("Hint:"),
"should show per-tensor detail hint, got:\n{stdout}"
);
}
#[test]
fn inspect_cached_filter() {
let Some((repo_id, filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout_all, _stderr, success) =
run(hf_fm().args(["inspect", &repo_id, &filename, "--cached"]));
assert!(success, "inspect --cached should succeed");
let has_tensors = stdout_all.contains("tensor");
assert!(has_tensors, "unfiltered output should show tensors");
let (stdout, stderr, success) = run(hf_fm().args([
"inspect", &repo_id, &filename, "--cached", "--filter", "embed",
]));
assert!(
success,
"inspect --cached --filter should succeed: {stderr}"
);
for line in stdout.lines() {
let trimmed = line.trim();
if trimmed.is_empty()
|| trimmed.starts_with("Repo:")
|| trimmed.starts_with("File:")
|| trimmed.starts_with("Source:")
|| trimmed.starts_with("Header:")
|| trimmed.starts_with("Metadata:")
|| trimmed.starts_with("Tensor")
|| trimmed.starts_with('\u{2500}')
|| trimmed.contains("tensor")
|| trimmed.contains("params")
{
continue;
}
assert!(
trimmed.contains("embed"),
"filtered line should contain 'embed': {trimmed}"
);
}
assert!(
stdout.contains('/'),
"filtered summary should show filtered/total format, got:\n{stdout}"
);
assert!(
stdout.contains("filter:"),
"filtered summary should mention filter, got:\n{stdout}"
);
}
#[test]
fn help_shows_diff_subcommand() {
let (stdout, _stderr, success) = run(hf_fm().arg("--help"));
assert!(success, "help should succeed");
assert!(
stdout.contains("diff"),
"help should mention diff subcommand, got:\n{stdout}"
);
}
#[test]
fn diff_cached_identical_model() {
let Some((repo_id, _filename)) = find_cached_safetensors_repo() else {
eprintln!("SKIP: no cached safetensors repo found");
return;
};
let (stdout, stderr, success) = run(hf_fm().args(["diff", &repo_id, &repo_id, "--cached"]));
assert!(success, "diff --cached self-diff should succeed: {stderr}");
assert!(
stdout.contains("only-A: 0"),
"self-diff should have 0 only-A, got:\n{stdout}"
);
assert!(
stdout.contains("only-B: 0"),
"self-diff should have 0 only-B, got:\n{stdout}"
);
assert!(
stdout.contains("differ: 0"),
"self-diff should have 0 differ, got:\n{stdout}"
);
assert!(
stdout.contains("Matching:") && !stdout.contains("Matching: 0"),
"self-diff should have matching tensors, got:\n{stdout}"
);
}
#[test]
fn diff_cached_different_models() {
let repos = find_all_cached_safetensors_repos();
let (Some(repo_a), Some(repo_b)) = (repos.first(), repos.get(1)) else {
eprintln!("SKIP: need at least 2 cached safetensors repos for diff test");
return;
};
let (stdout, stderr, success) =
run(hf_fm().args(["diff", repo_a.as_str(), repo_b.as_str(), "--cached"]));
assert!(
success,
"diff --cached different models should succeed: {stderr}"
);
assert!(
stdout.contains(&format!("A: {repo_a}")),
"should show repo A label, got:\n{stdout}"
);
assert!(
stdout.contains(&format!("B: {repo_b}")),
"should show repo B label, got:\n{stdout}"
);
assert!(
stdout.contains("A:") && stdout.contains("tensors"),
"should show summary line, got:\n{stdout}"
);
}