alimentar/hf_hub/mod.rs
1//! HuggingFace Hub dataset importer.
2//!
3//! Provides functionality to import datasets from the HuggingFace Hub.
4//! Supports downloading parquet files directly from HF datasets.
5//!
6//! # Example
7//!
8//! ```no_run
9//! use alimentar::{hf_hub::HfDataset, Dataset};
10//!
11//! // Import a dataset from HuggingFace Hub
12//! let hf = HfDataset::builder("squad")
13//! .revision("main")
14//! .split("train")
15//! .build()
16//! .unwrap();
17//!
18//! let dataset = hf.download().unwrap();
19//! println!("Loaded {} rows", dataset.len());
20//! ```
21
22mod download;
23mod upload;
24pub mod validation;
25
26#[cfg(test)]
27mod tests;
28
29// Re-export download types
30// Internal re-exports for tests
31#[cfg(test)]
32pub(crate) use download::default_cache_dir;
33pub use download::{list_dataset_files, DatasetInfo, HfDataset, HfDatasetBuilder};
34#[cfg(test)]
35pub(crate) use upload::HF_API_URL;
36// Re-export upload types
37#[cfg(feature = "hf-hub")]
38pub use upload::{
39 build_lfs_batch_request, build_lfs_preupload_request, build_ndjson_lfs_commit,
40 build_ndjson_upload_payload, compute_sha256, is_binary_file,
41};
42pub use upload::{HfPublisher, HfPublisherBuilder};
43// Re-export validation types
44pub use validation::{
45 DatasetCardValidator, ValidationError, VALID_LICENSES, VALID_SIZE_CATEGORIES,
46 VALID_TASK_CATEGORIES,
47};