Skip to main content

hf_fetch_model/
lib.rs

1// SPDX-License-Identifier: MIT OR Apache-2.0
2
3//! # hf-fetch-model
4//!
5//! Fast `HuggingFace` model downloads for Rust.
6//!
7//! An embeddable library for downloading `HuggingFace` model repositories
8//! with maximum throughput. Wraps [`hf_hub`] and adds repo-level orchestration.
9//!
10//! ## Quick Start
11//!
12//! ```rust,no_run
13//! # async fn example() -> Result<(), hf_fetch_model::FetchError> {
14//! let outcome = hf_fetch_model::download("julien-c/dummy-unknown".to_owned()).await?;
15//! println!("Model at: {}", outcome.inner().display());
16//! # Ok(())
17//! # }
18//! ```
19//!
20//! ## Configured Download
21//!
22//! ```rust,no_run
23//! # async fn example() -> Result<(), hf_fetch_model::FetchError> {
24//! use hf_fetch_model::FetchConfig;
25//!
26//! let config = FetchConfig::builder()
27//!     .filter("*.safetensors")
28//!     .filter("*.json")
29//!     .on_progress(|e| {
30//!         println!("{}: {:.1}%", e.filename, e.percent);
31//!     })
32//!     .build()?;
33//!
34//! let outcome = hf_fetch_model::download_with_config(
35//!     "google/gemma-2-2b".to_owned(),
36//!     &config,
37//! ).await?;
38//! // outcome.is_cached() tells you if it came from local cache
39//! let path = outcome.into_inner();
40//! # Ok(())
41//! # }
42//! ```
43//!
44//! ## `HuggingFace` Cache
45//!
46//! Downloaded files are stored in the standard `HuggingFace` cache directory
47//! (`~/.cache/huggingface/hub/`), ensuring compatibility with Python tooling.
48//!
49//! ## Authentication
50//!
51//! Set the `HF_TOKEN` environment variable to access private or gated models,
52//! or use [`FetchConfig::builder().token()`](FetchConfigBuilder::token).
53
54pub mod cache;
55pub mod checksum;
56mod chunked;
57pub mod config;
58pub mod discover;
59pub mod download;
60pub mod error;
61pub mod inspect;
62pub mod plan;
63pub mod progress;
64pub mod repo;
65mod retry;
66
67pub use config::{
68    compile_glob_patterns, file_matches, has_glob_chars, FetchConfig, FetchConfigBuilder, Filter,
69};
70pub use discover::{GateStatus, ModelCardMetadata, SearchResult};
71pub use download::DownloadOutcome;
72pub use error::{FetchError, FileFailure};
73pub use inspect::AdapterConfig;
74pub use plan::{download_plan, DownloadPlan, FilePlan};
75pub use progress::ProgressEvent;
76
77use std::collections::HashMap;
78use std::path::PathBuf;
79
80use hf_hub::{Repo, RepoType};
81
82/// Downloads all files from a `HuggingFace` model repository.
83///
84/// Uses high-throughput mode for maximum download speed, including
85/// auto-tuned concurrency, chunked multi-connection downloads for large
86/// files, and plan-optimized settings based on file size distribution.
87/// Files are stored in the standard `HuggingFace` cache layout
88/// (`~/.cache/huggingface/hub/`).
89///
90/// Authentication is handled via the `HF_TOKEN` environment variable when set.
91///
92/// For filtering, progress, and other options, use [`download_with_config()`].
93///
94/// # Arguments
95///
96/// * `repo_id` — The repository identifier (e.g., `"google/gemma-2-2b-it"`).
97///
98/// # Returns
99///
100/// The path to the snapshot directory containing all downloaded files.
101///
102/// # Errors
103///
104/// * [`FetchError::Api`] — if the `HuggingFace` API or download fails (includes auth failures).
105/// * [`FetchError::RepoNotFound`] — if the repository does not exist.
106/// * [`FetchError::InvalidPattern`] — if the default config fails to build (should not happen).
107pub async fn download(repo_id: String) -> Result<DownloadOutcome<PathBuf>, FetchError> {
108    let config = FetchConfig::builder().build()?;
109    download_with_config(repo_id, &config).await
110}
111
112/// Downloads files from a `HuggingFace` model repository using the given configuration.
113///
114/// Supports filtering, progress reporting, custom revision, authentication,
115/// and concurrency settings via [`FetchConfig`].
116///
117/// # Arguments
118///
119/// * `repo_id` — The repository identifier (e.g., `"google/gemma-2-2b-it"`).
120/// * `config` — Download configuration (see [`FetchConfig::builder()`]).
121///
122/// # Returns
123///
124/// The path to the snapshot directory containing all downloaded files.
125///
126/// # Errors
127///
128/// * [`FetchError::Api`] — if the `HuggingFace` API or download fails (includes auth failures).
129/// * [`FetchError::RepoNotFound`] — if the repository does not exist.
130pub async fn download_with_config(
131    repo_id: String,
132    config: &FetchConfig,
133) -> Result<DownloadOutcome<PathBuf>, FetchError> {
134    let mut builder = hf_hub::api::tokio::ApiBuilder::new().high();
135
136    if let Some(ref token) = config.token {
137        // BORROW: explicit .clone() to pass owned String
138        builder = builder.with_token(Some(token.clone()));
139    }
140
141    if let Some(ref dir) = config.output_dir {
142        // BORROW: explicit .clone() for owned PathBuf
143        builder = builder.with_cache_dir(dir.clone());
144    }
145
146    let api = builder.build().map_err(FetchError::Api)?;
147
148    let hf_repo = match config.revision {
149        Some(ref rev) => {
150            // BORROW: explicit .clone() for owned String arguments
151            Repo::with_revision(repo_id.clone(), RepoType::Model, rev.clone())
152        }
153        None => Repo::new(repo_id.clone(), RepoType::Model),
154    };
155
156    let repo = api.repo(hf_repo);
157    download::download_all_files(repo, repo_id, Some(config)).await
158}
159
160/// Blocking version of [`download()`] for non-async callers.
161///
162/// Creates a Tokio runtime internally. Do not call from within
163/// an existing async context (use [`download()`] instead).
164///
165/// # Errors
166///
167/// Same as [`download()`].
168pub fn download_blocking(repo_id: String) -> Result<DownloadOutcome<PathBuf>, FetchError> {
169    let rt = tokio::runtime::Runtime::new().map_err(|e| FetchError::Io {
170        path: PathBuf::from("<runtime>"),
171        source: e,
172    })?;
173    rt.block_on(download(repo_id))
174}
175
176/// Blocking version of [`download_with_config()`] for non-async callers.
177///
178/// Creates a Tokio runtime internally. Do not call from within
179/// an existing async context (use [`download_with_config()`] instead).
180///
181/// # Errors
182///
183/// Same as [`download_with_config()`].
184pub fn download_with_config_blocking(
185    repo_id: String,
186    config: &FetchConfig,
187) -> Result<DownloadOutcome<PathBuf>, FetchError> {
188    let rt = tokio::runtime::Runtime::new().map_err(|e| FetchError::Io {
189        path: PathBuf::from("<runtime>"),
190        source: e,
191    })?;
192    rt.block_on(download_with_config(repo_id, config))
193}
194
195/// Downloads all files from a `HuggingFace` model repository and returns
196/// a filename → path map.
197///
198/// Each key is the relative filename within the repository (e.g.,
199/// `"config.json"`, `"model.safetensors"`), and each value is the
200/// absolute local path to the downloaded file.
201///
202/// Uses the same high-throughput defaults as [`download()`]: auto-tuned
203/// concurrency and chunked multi-connection downloads for large files.
204///
205/// For filtering, progress, and other options, use
206/// [`download_files_with_config()`].
207///
208/// # Arguments
209///
210/// * `repo_id` — The repository identifier (e.g., `"google/gemma-2-2b-it"`).
211///
212/// # Errors
213///
214/// * [`FetchError::Api`] — if the `HuggingFace` API or download fails (includes auth failures).
215/// * [`FetchError::RepoNotFound`] — if the repository does not exist.
216/// * [`FetchError::InvalidPattern`] — if the default config fails to build (should not happen).
217pub async fn download_files(
218    repo_id: String,
219) -> Result<DownloadOutcome<HashMap<String, PathBuf>>, FetchError> {
220    let config = FetchConfig::builder().build()?;
221    download_files_with_config(repo_id, &config).await
222}
223
224/// Downloads files from a `HuggingFace` model repository using the given
225/// configuration and returns a filename → path map.
226///
227/// Each key is the relative filename within the repository (e.g.,
228/// `"config.json"`, `"model.safetensors"`), and each value is the
229/// absolute local path to the downloaded file.
230///
231/// # Arguments
232///
233/// * `repo_id` — The repository identifier (e.g., `"google/gemma-2-2b-it"`).
234/// * `config` — Download configuration (see [`FetchConfig::builder()`]).
235///
236/// # Errors
237///
238/// * [`FetchError::Api`] — if the `HuggingFace` API or download fails (includes auth failures).
239/// * [`FetchError::RepoNotFound`] — if the repository does not exist.
240pub async fn download_files_with_config(
241    repo_id: String,
242    config: &FetchConfig,
243) -> Result<DownloadOutcome<HashMap<String, PathBuf>>, FetchError> {
244    let mut builder = hf_hub::api::tokio::ApiBuilder::new().high();
245
246    if let Some(ref token) = config.token {
247        // BORROW: explicit .clone() to pass owned String
248        builder = builder.with_token(Some(token.clone()));
249    }
250
251    if let Some(ref dir) = config.output_dir {
252        // BORROW: explicit .clone() for owned PathBuf
253        builder = builder.with_cache_dir(dir.clone());
254    }
255
256    let api = builder.build().map_err(FetchError::Api)?;
257
258    let hf_repo = match config.revision {
259        Some(ref rev) => {
260            // BORROW: explicit .clone() for owned String arguments
261            Repo::with_revision(repo_id.clone(), RepoType::Model, rev.clone())
262        }
263        None => Repo::new(repo_id.clone(), RepoType::Model),
264    };
265
266    let repo = api.repo(hf_repo);
267    download::download_all_files_map(repo, repo_id, Some(config)).await
268}
269
270/// Blocking version of [`download_files()`] for non-async callers.
271///
272/// Creates a Tokio runtime internally. Do not call from within
273/// an existing async context (use [`download_files()`] instead).
274///
275/// # Errors
276///
277/// Same as [`download_files()`].
278pub fn download_files_blocking(
279    repo_id: String,
280) -> Result<DownloadOutcome<HashMap<String, PathBuf>>, FetchError> {
281    let rt = tokio::runtime::Runtime::new().map_err(|e| FetchError::Io {
282        path: PathBuf::from("<runtime>"),
283        source: e,
284    })?;
285    rt.block_on(download_files(repo_id))
286}
287
288/// Downloads a single file from a `HuggingFace` model repository.
289///
290/// Returns the local cache path. If the file is already cached (and
291/// checksums match when `verify_checksums` is enabled), the download
292/// is skipped and the cached path is returned immediately.
293///
294/// Files at or above [`FetchConfig`]'s `chunk_threshold` (auto-tuned by
295/// the download plan optimizer, or 100 MiB fallback) are downloaded using
296/// multiple parallel HTTP Range connections (`connections_per_file`,
297/// auto-tuned or 8 fallback). Smaller files use a single connection.
298///
299/// # Arguments
300///
301/// * `repo_id` — Repository identifier (e.g., `"mntss/clt-gemma-2-2b-426k"`).
302/// * `filename` — Exact filename within the repository (e.g., `"W_enc_5.safetensors"`).
303/// * `config` — Shared configuration for auth, progress, checksums, retries, and chunking.
304///
305/// # Errors
306///
307/// * [`FetchError::Http`] — if the file does not exist in the repository.
308/// * [`FetchError::Api`] — on download failure (after retries).
309/// * [`FetchError::Checksum`] — if verification is enabled and fails.
310pub async fn download_file(
311    repo_id: String,
312    filename: &str,
313    config: &FetchConfig,
314) -> Result<DownloadOutcome<PathBuf>, FetchError> {
315    let mut builder = hf_hub::api::tokio::ApiBuilder::new().high();
316
317    if let Some(ref token) = config.token {
318        // BORROW: explicit .clone() to pass owned String
319        builder = builder.with_token(Some(token.clone()));
320    }
321
322    if let Some(ref dir) = config.output_dir {
323        // BORROW: explicit .clone() for owned PathBuf
324        builder = builder.with_cache_dir(dir.clone());
325    }
326
327    let api = builder.build().map_err(FetchError::Api)?;
328
329    let hf_repo = match config.revision {
330        Some(ref rev) => {
331            // BORROW: explicit .clone() for owned String arguments
332            Repo::with_revision(repo_id.clone(), RepoType::Model, rev.clone())
333        }
334        None => Repo::new(repo_id.clone(), RepoType::Model),
335    };
336
337    let repo = api.repo(hf_repo);
338    download::download_file_by_name(repo, repo_id, filename, config).await
339}
340
341/// Blocking version of [`download_file()`] for non-async callers.
342///
343/// Creates a Tokio runtime internally. Do not call from within
344/// an existing async context (use [`download_file()`] instead).
345///
346/// # Errors
347///
348/// Same as [`download_file()`].
349pub fn download_file_blocking(
350    repo_id: String,
351    filename: &str,
352    config: &FetchConfig,
353) -> Result<DownloadOutcome<PathBuf>, FetchError> {
354    let rt = tokio::runtime::Runtime::new().map_err(|e| FetchError::Io {
355        path: PathBuf::from("<runtime>"),
356        source: e,
357    })?;
358    rt.block_on(download_file(repo_id, filename, config))
359}
360
361/// Blocking version of [`download_files_with_config()`] for non-async callers.
362///
363/// Creates a Tokio runtime internally. Do not call from within
364/// an existing async context (use [`download_files_with_config()`] instead).
365///
366/// # Errors
367///
368/// Same as [`download_files_with_config()`].
369pub fn download_files_with_config_blocking(
370    repo_id: String,
371    config: &FetchConfig,
372) -> Result<DownloadOutcome<HashMap<String, PathBuf>>, FetchError> {
373    let rt = tokio::runtime::Runtime::new().map_err(|e| FetchError::Io {
374        path: PathBuf::from("<runtime>"),
375        source: e,
376    })?;
377    rt.block_on(download_files_with_config(repo_id, config))
378}
379
380/// Downloads files according to an existing [`DownloadPlan`].
381///
382/// Only uncached files in the plan are downloaded. The `config` controls
383/// authentication, progress, timeouts, and performance settings.
384/// Use [`DownloadPlan::recommended_config()`] to compute an optimized config,
385/// or override specific fields via [`DownloadPlan::recommended_config_builder()`].
386///
387/// # Errors
388///
389/// Returns [`FetchError::Io`] if the cache directory cannot be resolved.
390/// Same error conditions as [`download_with_config()`] for the download itself.
391pub async fn download_with_plan(
392    plan: &DownloadPlan,
393    config: &FetchConfig,
394) -> Result<DownloadOutcome<PathBuf>, FetchError> {
395    if plan.fully_cached() {
396        // Resolve snapshot path from cache and return immediately.
397        let cache_dir = config
398            .output_dir
399            .clone()
400            .map_or_else(cache::hf_cache_dir, Ok)?;
401        let repo_folder = format!("models--{}", plan.repo_id.replace('/', "--"));
402        let snapshot_dir = cache_dir
403            .join(&repo_folder)
404            .join("snapshots")
405            .join(&plan.revision);
406        return Ok(DownloadOutcome::Cached(snapshot_dir));
407    }
408
409    // Delegate to the standard download path which will re-check cache
410    // internally. The plan's value is the dry-run preview and the
411    // recommended config computed by the caller.
412    // BORROW: explicit .clone() for owned String argument
413    download_with_config(plan.repo_id.clone(), config).await
414}
415
416/// Blocking version of [`download_with_plan()`] for non-async callers.
417///
418/// Creates a Tokio runtime internally. Do not call from within
419/// an existing async context (use [`download_with_plan()`] instead).
420///
421/// # Errors
422///
423/// Same as [`download_with_plan()`].
424pub fn download_with_plan_blocking(
425    plan: &DownloadPlan,
426    config: &FetchConfig,
427) -> Result<DownloadOutcome<PathBuf>, FetchError> {
428    let rt = tokio::runtime::Runtime::new().map_err(|e| FetchError::Io {
429        path: PathBuf::from("<runtime>"),
430        source: e,
431    })?;
432    rt.block_on(download_with_plan(plan, config))
433}