Skip to main content

entrenar/hf_pipeline/config/
dataset.rs

1//! Dataset configuration
2
3use serde::{Deserialize, Serialize};
4
5/// Dataset configuration
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct DatasetConfig {
8    /// Dataset ID or path
9    pub path: String,
10    /// Maximum sequence length
11    #[serde(default = "default_max_seq_length")]
12    pub max_seq_length: usize,
13    /// Maximum training examples (None = all)
14    pub max_train_examples: Option<usize>,
15    /// Maximum validation examples
16    pub max_eval_examples: Option<usize>,
17}
18
19fn default_max_seq_length() -> usize {
20    512
21}