Skip to main content

datasynth_core/traits/
generator.rs

1//! Core Generator trait for data generation.
2//!
3//! Defines the interface that all data generators must implement,
4//! supporting both batch and streaming generation patterns.
5
6// Error types are available via crate::error if needed
7
8/// Core trait for all data generators.
9///
10/// Generators produce synthetic data items based on configuration and
11/// statistical distributions. They support deterministic generation
12/// via seeding for reproducibility.
13pub trait Generator {
14    /// The type of items this generator produces.
15    type Item: Clone + Send;
16
17    /// The configuration type for this generator.
18    type Config: Clone + Send + Sync;
19
20    /// Initialize the generator with configuration and seed.
21    ///
22    /// The seed ensures deterministic, reproducible generation.
23    fn new(config: Self::Config, seed: u64) -> Self
24    where
25        Self: Sized;
26
27    /// Generate a single item.
28    fn generate_one(&mut self) -> Self::Item;
29
30    /// Generate a batch of items.
31    ///
32    /// Default implementation calls generate_one repeatedly.
33    fn generate_batch(&mut self, count: usize) -> Vec<Self::Item> {
34        (0..count).map(|_| self.generate_one()).collect()
35    }
36
37    /// Generate items into an iterator.
38    ///
39    /// Useful for lazy evaluation and streaming.
40    fn generate_iter(&mut self, count: usize) -> GeneratorIterator<'_, Self>
41    where
42        Self: Sized,
43    {
44        GeneratorIterator {
45            generator: self,
46            remaining: count,
47        }
48    }
49
50    /// Reset the generator to initial state (same seed).
51    ///
52    /// After reset, the generator will produce the same sequence of items.
53    fn reset(&mut self);
54
55    /// Get the current generation count.
56    fn count(&self) -> u64;
57
58    /// Get the seed used by this generator.
59    fn seed(&self) -> u64;
60}
61
62/// Iterator adapter for generators.
63pub struct GeneratorIterator<'a, G: Generator> {
64    generator: &'a mut G,
65    remaining: usize,
66}
67
68impl<'a, G: Generator> Iterator for GeneratorIterator<'a, G> {
69    type Item = G::Item;
70
71    fn next(&mut self) -> Option<Self::Item> {
72        if self.remaining > 0 {
73            self.remaining -= 1;
74            Some(self.generator.generate_one())
75        } else {
76            None
77        }
78    }
79
80    fn size_hint(&self) -> (usize, Option<usize>) {
81        (self.remaining, Some(self.remaining))
82    }
83}
84
85impl<'a, G: Generator> ExactSizeIterator for GeneratorIterator<'a, G> {}
86
87/// Trait for generators that can be parallelized.
88///
89/// Allows splitting a generator into multiple independent generators
90/// for parallel execution.
91pub trait ParallelGenerator: Generator + Sized {
92    /// Split the generator into multiple independent generators.
93    ///
94    /// Each split generator will produce a portion of the total items.
95    /// The splits should be deterministic based on the original seed.
96    fn split(self, parts: usize) -> Vec<Self>;
97
98    /// Merge results from parallel execution.
99    ///
100    /// Combines results from multiple generators into a single sequence.
101    fn merge_results(results: Vec<Vec<Self::Item>>) -> Vec<Self::Item> {
102        results.into_iter().flatten().collect()
103    }
104}
105
106/// Progress information for long-running generation.
107#[derive(Debug, Clone)]
108pub struct GenerationProgress {
109    /// Total items to generate.
110    pub total: u64,
111    /// Items generated so far.
112    pub completed: u64,
113    /// Items per second throughput.
114    pub items_per_second: f64,
115    /// Estimated seconds remaining.
116    pub eta_seconds: Option<u64>,
117    /// Current phase/stage description.
118    pub phase: String,
119}
120
121impl GenerationProgress {
122    /// Create a new progress tracker.
123    pub fn new(total: u64) -> Self {
124        Self {
125            total,
126            completed: 0,
127            items_per_second: 0.0,
128            eta_seconds: None,
129            phase: String::new(),
130        }
131    }
132
133    /// Get progress as a percentage (0.0 to 1.0).
134    pub fn percentage(&self) -> f64 {
135        if self.total == 0 {
136            1.0
137        } else {
138            self.completed as f64 / self.total as f64
139        }
140    }
141
142    /// Check if generation is complete.
143    pub fn is_complete(&self) -> bool {
144        self.completed >= self.total
145    }
146}
147
148/// Trait for components that can report progress.
149pub trait ProgressReporter {
150    /// Report current progress.
151    fn report_progress(&self, progress: &GenerationProgress);
152}
153
154/// No-op progress reporter for when progress tracking is not needed.
155pub struct NoopProgressReporter;
156
157impl ProgressReporter for NoopProgressReporter {
158    fn report_progress(&self, _progress: &GenerationProgress) {}
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    struct SimpleGenerator {
166        seed: u64,
167        count: u64,
168        value: u64,
169    }
170
171    impl Generator for SimpleGenerator {
172        type Item = u64;
173        type Config = ();
174
175        fn new(_config: Self::Config, seed: u64) -> Self {
176            Self {
177                seed,
178                count: 0,
179                value: seed,
180            }
181        }
182
183        fn generate_one(&mut self) -> Self::Item {
184            self.count += 1;
185            self.value = self.value.wrapping_mul(6364136223846793005).wrapping_add(1);
186            self.value
187        }
188
189        fn reset(&mut self) {
190            self.count = 0;
191            self.value = self.seed;
192        }
193
194        fn count(&self) -> u64 {
195            self.count
196        }
197
198        fn seed(&self) -> u64 {
199            self.seed
200        }
201    }
202
203    #[test]
204    fn test_generator_batch() {
205        let mut gen = SimpleGenerator::new((), 42);
206        let batch = gen.generate_batch(10);
207        assert_eq!(batch.len(), 10);
208        assert_eq!(gen.count(), 10);
209    }
210
211    #[test]
212    fn test_generator_determinism() {
213        let mut gen1 = SimpleGenerator::new((), 42);
214        let mut gen2 = SimpleGenerator::new((), 42);
215
216        for _ in 0..100 {
217            assert_eq!(gen1.generate_one(), gen2.generate_one());
218        }
219    }
220
221    #[test]
222    fn test_generator_reset() {
223        let mut gen = SimpleGenerator::new((), 42);
224        let first_run: Vec<_> = gen.generate_iter(10).collect();
225
226        gen.reset();
227        let second_run: Vec<_> = gen.generate_iter(10).collect();
228
229        assert_eq!(first_run, second_run);
230    }
231}