entrenar/hf_pipeline/dataset/
dataset_impl.rs1use super::example::Example;
4
5pub struct Dataset {
7 name: String,
9 examples: Vec<Example>,
11 position: usize,
13}
14
15impl Dataset {
16 #[must_use]
18 pub fn new(name: impl Into<String>, examples: Vec<Example>) -> Self {
19 Self { name: name.into(), examples, position: 0 }
20 }
21
22 #[must_use]
24 pub fn mock(num_examples: usize, seq_len: usize) -> Self {
25 let examples: Vec<Example> = (0..num_examples)
26 .map(|i| {
27 Example::from_tokens((0..seq_len).map(|j| ((i + j) % 30000) as u32).collect())
28 .with_labels((0..seq_len).map(|j| ((i + j + 1) % 30000) as u32).collect())
29 })
30 .collect();
31
32 Self::new("mock_dataset", examples)
33 }
34
35 #[must_use]
37 pub fn name(&self) -> &str {
38 &self.name
39 }
40
41 #[must_use]
43 pub fn len(&self) -> usize {
44 self.examples.len()
45 }
46
47 #[must_use]
49 pub fn is_empty(&self) -> bool {
50 self.examples.is_empty()
51 }
52
53 #[must_use]
55 pub fn get(&self, index: usize) -> Option<&Example> {
56 self.examples.get(index)
57 }
58
59 #[must_use]
61 pub fn examples(&self) -> &[Example] {
62 &self.examples
63 }
64
65 pub fn reset(&mut self) {
67 self.position = 0;
68 }
69
70 pub fn shuffle(&mut self, seed: u64) {
72 use rand::prelude::*;
73 let mut rng = StdRng::seed_from_u64(seed);
74 self.examples.shuffle(&mut rng);
75 }
76
77 #[must_use]
79 pub fn take(mut self, n: usize) -> Self {
80 self.examples.truncate(n);
81 self
82 }
83}
84
85impl Iterator for Dataset {
86 type Item = Example;
87
88 fn next(&mut self) -> Option<Self::Item> {
89 if self.position < self.examples.len() {
90 let example = self.examples[self.position].clone();
91 self.position += 1;
92 Some(example)
93 } else {
94 None
95 }
96 }
97}