Skip to main content

entrenar/hf_pipeline/dataset/
example.rs

1//! Dataset example
2
3/// A single example from a dataset
4#[derive(Debug, Clone)]
5pub struct Example {
6    /// Input token IDs
7    pub input_ids: Vec<u32>,
8    /// Attention mask (1 = attend, 0 = ignore)
9    pub attention_mask: Vec<u8>,
10    /// Target labels (for supervised learning)
11    pub labels: Option<Vec<u32>>,
12    /// Text content (if available)
13    pub text: Option<String>,
14}
15
16impl Example {
17    /// Create new example from token IDs
18    #[must_use]
19    pub fn from_tokens(input_ids: Vec<u32>) -> Self {
20        let len = input_ids.len();
21        Self { input_ids, attention_mask: vec![1; len], labels: None, text: None }
22    }
23
24    /// Set labels
25    #[must_use]
26    pub fn with_labels(mut self, labels: Vec<u32>) -> Self {
27        self.labels = Some(labels);
28        self
29    }
30
31    /// Set text
32    #[must_use]
33    pub fn with_text(mut self, text: impl Into<String>) -> Self {
34        self.text = Some(text.into());
35        self
36    }
37
38    /// Get sequence length
39    #[must_use]
40    pub fn len(&self) -> usize {
41        self.input_ids.len()
42    }
43
44    /// Check if empty
45    #[must_use]
46    pub fn is_empty(&self) -> bool {
47        self.input_ids.is_empty()
48    }
49}