entrenar/hf_pipeline/config/dataset.rs
1//! Dataset configuration
2
3use serde::{Deserialize, Serialize};
4
5/// Dataset configuration
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct DatasetConfig {
8 /// Dataset ID or path
9 pub path: String,
10 /// Maximum sequence length
11 #[serde(default = "default_max_seq_length")]
12 pub max_seq_length: usize,
13 /// Maximum training examples (None = all)
14 pub max_train_examples: Option<usize>,
15 /// Maximum validation examples
16 pub max_eval_examples: Option<usize>,
17}
18
19fn default_max_seq_length() -> usize {
20 512
21}