Skip to main content

hf_fetch_model/
repo.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3//! Repository file listing via the `HuggingFace` API.
4//!
5//! This module provides functions to list all files in a `HuggingFace` model
6//! repository, using the `hf-hub` crate's `info()` API and optionally
7//! fetching extended metadata (sizes and SHA256 hashes) via a direct HTTP call.
8
9use hf_hub::api::tokio::ApiRepo;
10use serde::Deserialize;
11
12use crate::error::FetchError;
13
14/// A file entry in a `HuggingFace` repository.
15#[derive(Debug, Clone)]
16pub struct RepoFile {
17    /// The relative path of the file within the repository.
18    pub filename: String,
19    /// File size in bytes (if known from API metadata).
20    pub size: Option<u64>,
21    /// SHA256 hex digest (if the file is stored in LFS).
22    pub sha256: Option<String>,
23}
24
25/// Lists all files in the given repository.
26///
27/// # Errors
28///
29/// Returns [`FetchError::Api`] if the `HuggingFace` API request fails.
30/// Returns [`FetchError::RepoNotFound`] if the repository does not exist.
31pub async fn list_repo_files(repo: &ApiRepo, repo_id: String) -> Result<Vec<RepoFile>, FetchError> {
32    let info = repo.info().await.map_err(|e| {
33        // BORROW: explicit .to_string() for error message inspection
34        let msg = e.to_string();
35        if msg.contains("404") {
36            FetchError::RepoNotFound { repo_id }
37        } else {
38            FetchError::Api(e)
39        }
40    })?;
41
42    let files = info
43        .siblings
44        .into_iter()
45        .map(|s| RepoFile {
46            filename: s.rfilename,
47            size: None,
48            sha256: None,
49        })
50        .collect();
51
52    Ok(files)
53}
54
55// --- Direct HF API metadata (for SHA256 and file sizes) ---
56
57/// Raw JSON sibling entry from the `HuggingFace` API.
58#[derive(Debug, Deserialize)]
59struct ApiSibling {
60    rfilename: String,
61    #[serde(default)]
62    size: Option<u64>,
63    #[serde(default)]
64    lfs: Option<ApiLfs>,
65}
66
67/// LFS metadata attached to a sibling entry.
68#[derive(Debug, Deserialize)]
69struct ApiLfs {
70    sha256: String,
71    size: u64,
72}
73
74/// Raw JSON response from `GET /api/models/{repo_id}`.
75#[derive(Debug, Deserialize)]
76struct ApiModelInfo {
77    siblings: Vec<ApiSibling>,
78}
79
80/// Fetches extended file metadata (sizes and SHA256 hashes) via the `HuggingFace` REST API.
81///
82/// This makes a direct HTTP call to `https://huggingface.co/api/models/{repo_id}?blobs=true`
83/// to retrieve file sizes and LFS metadata that `hf-hub`'s `info()` does not expose.
84///
85/// # Errors
86///
87/// Returns [`FetchError::Http`] if the HTTP request fails.
88/// Returns [`FetchError::RepoNotFound`] if the repository does not exist.
89pub async fn list_repo_files_with_metadata(
90    repo_id: &str,
91    token: Option<&str>,
92    revision: Option<&str>,
93) -> Result<Vec<RepoFile>, FetchError> {
94    let mut url = format!("https://huggingface.co/api/models/{repo_id}?blobs=true");
95    if let Some(rev) = revision {
96        url = format!("{url}&revision={rev}");
97    }
98
99    let client = reqwest::Client::new();
100    // BORROW: explicit .as_str() instead of Deref coercion
101    let mut request = client.get(url.as_str());
102    if let Some(t) = token {
103        request = request.bearer_auth(t);
104    }
105
106    let response = request
107        .send()
108        .await
109        .map_err(|e| FetchError::Http(e.to_string()))?;
110
111    if response.status() == reqwest::StatusCode::NOT_FOUND {
112        return Err(FetchError::RepoNotFound {
113            repo_id: repo_id.to_owned(),
114        });
115    }
116
117    if !response.status().is_success() {
118        return Err(FetchError::Http(format!(
119            "HF API returned status {}",
120            response.status()
121        )));
122    }
123
124    let info: ApiModelInfo = response
125        .json()
126        .await
127        .map_err(|e| FetchError::Http(e.to_string()))?;
128
129    let files = info
130        .siblings
131        .into_iter()
132        .map(|s| {
133            let (size, sha256) = match s.lfs {
134                Some(lfs) => (Some(lfs.size), Some(lfs.sha256)),
135                None => (s.size, None),
136            };
137            RepoFile {
138                filename: s.rfilename,
139                size,
140                sha256,
141            }
142        })
143        .collect();
144
145    Ok(files)
146}