entrenar/hf_pipeline/dataset/mod.rs
1//! HuggingFace Dataset Fetcher and Collator
2//!
3//! Provides dataset loading and batching for distillation training.
4//!
5//! # Features
6//!
7//! - Streaming support for large datasets
8//! - Parquet file loading
9//! - Dynamic padding and batching
10//! - Teacher output caching
11
12mod batch;
13mod cache;
14mod collator;
15mod dataset_impl;
16mod example;
17mod fetcher;
18mod options;
19mod split;
20
21#[cfg(test)]
22mod tests;
23
24// Re-export all public types for API compatibility
25pub use batch::Batch;
26pub use cache::{CacheStats, TeacherCache};
27pub use collator::DistillationCollator;
28pub use dataset_impl::Dataset;
29pub use example::Example;
30pub use fetcher::HfDatasetFetcher;
31pub use options::DatasetOptions;
32pub use split::Split;