1use std::collections::BTreeMap;
11use std::path::{Path, PathBuf};
12
13use crate::error::FetchError;
14
15fn repo_id_from_folder_name(dir_name: &str) -> Option<String> {
19 let repo_part = dir_name.strip_prefix("models--")?;
20
21 let repo_id = match repo_part.find("--") {
23 Some(pos) => {
24 let (org, name_with_sep) = repo_part.split_at(pos);
25 let name = name_with_sep.get(2..).unwrap_or_default();
26 format!("{org}/{name}")
27 }
28 None => repo_part.to_string(),
29 };
30
31 Some(repo_id)
32}
33
34pub fn hf_cache_dir() -> Result<PathBuf, FetchError> {
44 if let Ok(home) = std::env::var("HF_HOME") {
45 let mut path = PathBuf::from(home);
46 path.push("hub");
47 return Ok(path);
48 }
49
50 let home = dirs::home_dir().ok_or_else(|| FetchError::Io {
51 path: PathBuf::from("~"),
52 source: std::io::Error::new(std::io::ErrorKind::NotFound, "home directory not found"),
53 })?;
54
55 let mut path = home;
56 path.push(".cache");
57 path.push("huggingface");
58 path.push("hub");
59 Ok(path)
60}
61
62pub fn list_cached_families() -> Result<BTreeMap<String, Vec<String>>, FetchError> {
76 let cache_dir = hf_cache_dir()?;
77
78 if !cache_dir.exists() {
79 return Ok(BTreeMap::new());
80 }
81
82 let entries = std::fs::read_dir(&cache_dir).map_err(|e| FetchError::Io {
83 path: cache_dir.clone(),
84 source: e,
85 })?;
86
87 let mut families: BTreeMap<String, Vec<String>> = BTreeMap::new();
88
89 for entry in entries {
90 let Ok(entry) = entry else { continue };
91
92 let dir_name = entry.file_name();
93 let dir_str = dir_name.to_string_lossy();
95
96 let Some(repo_id) = repo_id_from_folder_name(&dir_str) else {
97 continue;
98 };
99
100 let snapshots_dir = crate::cache_layout::snapshots_dir(&entry.path());
102 if !snapshots_dir.exists() {
103 continue;
104 }
105
106 if let Some(model_type) = find_model_type_in_snapshots(&snapshots_dir) {
107 families.entry(model_type).or_default().push(repo_id);
108 }
109 }
110
111 for repos in families.values_mut() {
113 repos.sort();
114 }
115
116 Ok(families)
117}
118
119fn find_model_type_in_snapshots(snapshots_dir: &std::path::Path) -> Option<String> {
123 let snapshots = std::fs::read_dir(snapshots_dir).ok()?;
124
125 for snap_entry in snapshots {
126 let Ok(snap_entry) = snap_entry else { continue };
127 let config_path = snap_entry.path().join("config.json");
128
129 if !config_path.exists() {
130 continue;
131 }
132
133 if let Some(model_type) = extract_model_type(&config_path) {
134 return Some(model_type);
135 }
136 }
137
138 None
139}
140
141fn extract_model_type(config_path: &std::path::Path) -> Option<String> {
143 let contents = std::fs::read_to_string(config_path).ok()?;
144 let value: serde_json::Value = serde_json::from_str(contents.as_str()).ok()?;
146 value.get("model_type")?.as_str().map(String::from)
148}
149
150#[derive(Debug, Clone)]
152#[non_exhaustive]
153pub enum FileStatus {
154 Complete {
156 local_size: u64,
158 },
159 Partial {
163 local_size: u64,
165 expected_size: u64,
167 },
168 Missing {
170 expected_size: u64,
172 },
173}
174
175#[derive(Debug, Clone)]
177pub struct RepoStatus {
178 pub repo_id: String,
180 pub commit_hash: Option<String>,
182 pub cache_path: PathBuf,
184 pub files: Vec<(String, FileStatus)>,
186}
187
188impl RepoStatus {
189 #[must_use]
191 pub fn complete_count(&self) -> usize {
192 self.files
193 .iter()
194 .filter(|(_, s)| matches!(s, FileStatus::Complete { .. }))
195 .count()
196 }
197
198 #[must_use]
200 pub fn partial_count(&self) -> usize {
201 self.files
202 .iter()
203 .filter(|(_, s)| matches!(s, FileStatus::Partial { .. }))
204 .count()
205 }
206
207 #[must_use]
209 pub fn missing_count(&self) -> usize {
210 self.files
211 .iter()
212 .filter(|(_, s)| matches!(s, FileStatus::Missing { .. }))
213 .count()
214 }
215}
216
217pub async fn repo_status(
239 repo_id: &str,
240 token: Option<&str>,
241 revision: Option<&str>,
242) -> Result<RepoStatus, FetchError> {
243 let revision = revision.unwrap_or("main");
244 let cache_dir = hf_cache_dir()?;
245 let repo_dir = crate::cache_layout::repo_dir(&cache_dir, repo_id);
246
247 let commit_hash = read_ref(&repo_dir, revision);
249
250 let client = crate::chunked::build_client(token)?;
252 let remote_files =
253 crate::repo::list_repo_files_with_metadata(repo_id, token, Some(revision), &client).await?;
254
255 let snapshot_dir = commit_hash
258 .as_deref()
259 .map(|hash| crate::cache_layout::snapshot_dir(&repo_dir, hash));
260
261 let blobs_dir = crate::cache_layout::blobs_dir(&repo_dir);
264 let has_any_partial = has_partial_blob(&blobs_dir);
265
266 let mut files: Vec<(String, FileStatus)> = Vec::with_capacity(remote_files.len());
268
269 for remote in &remote_files {
270 let expected_size = remote.size.unwrap_or(0);
271
272 let local_path = snapshot_dir
273 .as_ref()
274 .map(|dir| dir.join(remote.filename.as_str()));
276
277 let status = if let Some(ref path) = local_path {
278 if path.exists() {
279 let local_size = std::fs::metadata(path).map_or(0, |m| m.len());
280
281 if expected_size > 0 && local_size < expected_size {
282 FileStatus::Partial {
283 local_size,
284 expected_size,
285 }
286 } else {
287 FileStatus::Complete { local_size }
288 }
289 } else if has_any_partial {
290 let part_size = find_partial_blob_size(&blobs_dir);
292 FileStatus::Partial {
293 local_size: part_size,
294 expected_size,
295 }
296 } else {
297 FileStatus::Missing { expected_size }
298 }
299 } else {
300 FileStatus::Missing { expected_size }
301 };
302
303 files.push((remote.filename.clone(), status));
305 }
306
307 files.sort_by(|(a, _), (b, _)| a.cmp(b));
308
309 Ok(RepoStatus {
311 repo_id: repo_id.to_owned(),
312 commit_hash,
313 cache_path: repo_dir,
314 files,
315 })
316}
317
318#[derive(Debug, Clone)]
320pub struct CachedModelSummary {
321 pub repo_id: String,
323 pub file_count: usize,
325 pub total_size: u64,
327 pub has_partial: bool,
329 pub last_modified: Option<std::time::SystemTime>,
333}
334
335pub fn cache_summary() -> Result<Vec<CachedModelSummary>, FetchError> {
344 let cache_dir = hf_cache_dir()?;
345
346 if !cache_dir.exists() {
347 return Ok(Vec::new());
348 }
349
350 let entries = std::fs::read_dir(&cache_dir).map_err(|e| FetchError::Io {
351 path: cache_dir.clone(),
352 source: e,
353 })?;
354
355 let mut summaries: Vec<CachedModelSummary> = Vec::new();
356
357 for entry in entries {
358 let Ok(entry) = entry else { continue };
359 let dir_name = entry.file_name();
360 let dir_str = dir_name.to_string_lossy();
362
363 let Some(repo_id) = repo_id_from_folder_name(&dir_str) else {
364 continue;
365 };
366
367 let repo_dir = entry.path();
368
369 let (file_count, total_size, last_modified) = count_snapshot_files(&repo_dir);
371
372 let has_partial = find_partial_blob_size(&crate::cache_layout::blobs_dir(&repo_dir)) > 0;
374
375 summaries.push(CachedModelSummary {
376 repo_id,
377 file_count,
378 total_size,
379 has_partial,
380 last_modified,
381 });
382 }
383
384 summaries.sort_by(|a, b| a.repo_id.cmp(&b.repo_id));
385
386 Ok(summaries)
387}
388
389pub fn repo_disk_usage(repo_id: &str) -> Result<(usize, u64), FetchError> {
398 let cache_dir = hf_cache_dir()?;
399 let repo_dir = crate::cache_layout::repo_dir(&cache_dir, repo_id);
400 let (file_count, total_size, _) = count_snapshot_files(&repo_dir);
401 Ok((file_count, total_size))
402}
403
404pub fn repo_has_partial(repo_id: &str) -> Result<bool, FetchError> {
413 let cache_dir = hf_cache_dir()?;
414 let repo_dir = crate::cache_layout::repo_dir(&cache_dir, repo_id);
415 let blobs_dir = crate::cache_layout::blobs_dir(&repo_dir);
416 Ok(find_partial_blob_size(&blobs_dir) > 0)
417}
418
419fn count_snapshot_files(repo_dir: &Path) -> (usize, u64, Option<std::time::SystemTime>) {
422 let snapshots_dir = crate::cache_layout::snapshots_dir(repo_dir);
423 let Ok(snapshots) = std::fs::read_dir(snapshots_dir) else {
424 return (0, 0, None);
425 };
426
427 let mut file_count: usize = 0;
428 let mut total_size: u64 = 0;
429 let mut latest: Option<std::time::SystemTime> = None;
430
431 for snap_entry in snapshots {
432 let Ok(snap_entry) = snap_entry else { continue };
433 let snap_path = snap_entry.path();
434 if !snap_path.is_dir() {
435 continue;
436 }
437 count_files_recursive(&snap_path, &mut file_count, &mut total_size, &mut latest);
438 }
439
440 (file_count, total_size, latest)
441}
442
443fn count_files_recursive(
446 dir: &Path,
447 count: &mut usize,
448 total: &mut u64,
449 latest: &mut Option<std::time::SystemTime>,
450) {
451 let Ok(entries) = std::fs::read_dir(dir) else {
452 return;
453 };
454
455 for entry in entries {
456 let Ok(entry) = entry else { continue };
457 let path = entry.path();
458 if path.is_dir() {
459 count_files_recursive(&path, count, total, latest);
460 } else if let Ok(meta) = entry.metadata() {
461 *count += 1;
462 *total += meta.len();
463 if let Ok(modified) = meta.modified() {
464 match *latest {
465 Some(current) if modified <= current => {} _ => *latest = Some(modified),
467 }
468 }
469 } else {
470 *count += 1;
471 }
472 }
473}
474
475#[must_use]
480pub fn read_ref(repo_dir: &Path, revision: &str) -> Option<String> {
481 let ref_path = crate::cache_layout::ref_path(repo_dir, revision);
482 std::fs::read_to_string(ref_path)
483 .ok()
484 .map(|s| s.trim().to_owned())
486 .filter(|s| !s.is_empty())
487}
488
489fn has_partial_blob(blobs_dir: &Path) -> bool {
495 find_partial_blob_size(blobs_dir) > 0
496}
497
498fn find_partial_blob_size(blobs_dir: &Path) -> u64 {
500 let Ok(entries) = std::fs::read_dir(blobs_dir) else {
501 return 0;
502 };
503
504 for entry in entries {
505 let Ok(entry) = entry else { continue };
506 let name = entry.file_name();
507 if name.to_string_lossy().ends_with(".chunked.part") {
509 return entry.metadata().map_or(0, |m| m.len());
510 }
511 }
512
513 0
514}
515
516#[derive(Debug, Clone)]
518pub struct PartialFile {
519 pub repo_id: String,
521 pub filename: String,
523 pub path: PathBuf,
525 pub size: u64,
527}
528
529pub fn find_partial_files(repo_filter: Option<&str>) -> Result<Vec<PartialFile>, FetchError> {
540 let cache_dir = hf_cache_dir()?;
541
542 if !cache_dir.exists() {
543 return Ok(Vec::new());
544 }
545
546 let entries = std::fs::read_dir(&cache_dir).map_err(|e| FetchError::Io {
547 path: cache_dir.clone(),
549 source: e,
550 })?;
551
552 let mut partials: Vec<PartialFile> = Vec::new();
553
554 for entry in entries {
555 let Ok(entry) = entry else { continue };
556 let dir_name = entry.file_name();
557 let dir_str = dir_name.to_string_lossy();
559
560 let Some(repo_id) = repo_id_from_folder_name(&dir_str) else {
561 continue;
562 };
563
564 if let Some(filter) = repo_filter {
567 if repo_id.as_str() != filter {
568 continue;
569 }
570 }
571
572 let blobs_dir = crate::cache_layout::blobs_dir(&entry.path());
573 let Ok(blob_entries) = std::fs::read_dir(&blobs_dir) else {
574 continue;
575 };
576
577 for blob_entry in blob_entries {
578 let Ok(blob_entry) = blob_entry else { continue };
579 let name = blob_entry.file_name();
580 let name_str = name.to_string_lossy();
582 if name_str.ends_with(".chunked.part") {
583 let size = blob_entry.metadata().map_or(0, |m| m.len());
584 partials.push(PartialFile {
585 repo_id: repo_id.clone(),
587 filename: name_str.to_string(),
589 path: blob_entry.path(),
590 size,
591 });
592 }
593 }
594 }
595
596 Ok(partials)
597}
598
599#[derive(Debug, Clone)]
601pub struct CacheFileUsage {
602 pub filename: String,
604 pub size: u64,
606}
607
608pub fn cache_repo_usage(repo_id: &str) -> Result<Vec<CacheFileUsage>, FetchError> {
620 let cache_dir = hf_cache_dir()?;
621 let repo_dir = crate::cache_layout::repo_dir(&cache_dir, repo_id);
622
623 if !repo_dir.exists() {
624 return Ok(Vec::new());
625 }
626
627 let snapshots_dir = crate::cache_layout::snapshots_dir(&repo_dir);
628 let Ok(snapshots) = std::fs::read_dir(&snapshots_dir) else {
629 return Ok(Vec::new());
630 };
631
632 let mut files: Vec<CacheFileUsage> = Vec::new();
633
634 for snap_entry in snapshots {
635 let Ok(snap_entry) = snap_entry else { continue };
636 let snap_path = snap_entry.path();
637 if !snap_path.is_dir() {
638 continue;
639 }
640 collect_snapshot_files(&snap_path, "", &mut files);
641 }
642
643 files.sort_by_key(|f| std::cmp::Reverse(f.size));
644
645 Ok(files)
646}
647
648fn collect_snapshot_files(dir: &Path, prefix: &str, files: &mut Vec<CacheFileUsage>) {
653 let Ok(entries) = std::fs::read_dir(dir) else {
654 return;
655 };
656
657 for entry in entries {
658 let Ok(entry) = entry else { continue };
659 let path = entry.path();
660 let name = entry.file_name().to_string_lossy().to_string();
662
663 if path.is_dir() {
664 let child_prefix = if prefix.is_empty() {
665 name
666 } else {
667 format!("{prefix}/{name}")
668 };
669 collect_snapshot_files(&path, &child_prefix, files);
670 } else {
671 let filename = if prefix.is_empty() {
672 name
673 } else {
674 format!("{prefix}/{name}")
675 };
676 let size = entry.metadata().map_or(0, |m| m.len());
677 files.push(CacheFileUsage { filename, size });
678 }
679 }
680}