use std::path::PathBuf;
use super::split::Split;
#[derive(Debug, Clone)]
pub struct DatasetOptions {
pub split: Split,
pub max_examples: Option<usize>,
pub streaming: bool,
pub shuffle: bool,
pub seed: Option<u64>,
pub cache_dir: Option<PathBuf>,
}
impl Default for DatasetOptions {
fn default() -> Self {
Self {
split: Split::Train,
max_examples: None,
streaming: false,
shuffle: true,
seed: Some(42),
cache_dir: None,
}
}
}
impl DatasetOptions {
#[must_use]
pub fn train() -> Self {
Self::default()
}
#[must_use]
pub fn validation() -> Self {
Self { split: Split::Validation, shuffle: false, ..Default::default() }
}
#[must_use]
pub fn test() -> Self {
Self { split: Split::Test, shuffle: false, ..Default::default() }
}
#[must_use]
pub fn max_examples(mut self, n: usize) -> Self {
self.max_examples = Some(n);
self
}
#[must_use]
pub fn streaming(mut self, enabled: bool) -> Self {
self.streaming = enabled;
self
}
#[must_use]
pub fn shuffle(mut self, enabled: bool) -> Self {
self.shuffle = enabled;
self
}
#[must_use]
pub fn seed(mut self, seed: u64) -> Self {
self.seed = Some(seed);
self
}
}