mockforge_data/
generator.rs1use crate::Result;
4use crate::{
5 faker::EnhancedFaker,
6 rag::{RagConfig, RagEngine},
7 schema::SchemaDefinition,
8 DataConfig, GenerationResult,
9};
10use std::time::Instant;
11
12#[derive(Debug)]
14pub struct DataGenerator {
15 schema: SchemaDefinition,
17 config: DataConfig,
19 faker: EnhancedFaker,
21 seeded_rng: Option<rand::rngs::StdRng>,
23 rag_engine: Option<RagEngine>,
25}
26
27impl DataGenerator {
28 pub fn new(schema: SchemaDefinition, config: DataConfig) -> Result<Self> {
30 let faker = EnhancedFaker::new();
31 let seeded_rng = if let Some(seed) = config.seed {
32 use rand::SeedableRng;
33 Some(rand::rngs::StdRng::seed_from_u64(seed))
34 } else {
35 None
36 };
37
38 let rag_engine = if config.rag_enabled {
40 let rag_config = RagConfig::default();
41 let mut engine = RagEngine::new(rag_config);
42 engine.add_schema(&schema)?;
44 Some(engine)
45 } else {
46 None
47 };
48
49 Ok(Self {
50 schema,
51 config,
52 faker,
53 seeded_rng,
54 rag_engine,
55 })
56 }
57
58 pub async fn generate(&mut self) -> Result<GenerationResult> {
60 let start_time = Instant::now();
61
62 if let Some(rag_engine) = &mut self.rag_engine {
64 let data = rag_engine.generate_with_rag(&self.schema, &self.config).await?;
65 let generation_time = start_time.elapsed().as_millis();
66 Ok(GenerationResult::new(data, generation_time))
67 } else {
68 let mut data = Vec::with_capacity(self.config.rows);
70
71 for _ in 0..self.config.rows {
72 let row = self.schema.generate_row(&mut self.faker)?;
73 data.push(row);
74 }
75
76 let generation_time = start_time.elapsed().as_millis();
77 Ok(GenerationResult::new(data, generation_time))
78 }
79 }
80
81 pub async fn generate_with_relationships(
83 &mut self,
84 related_schemas: &[SchemaDefinition],
85 ) -> Result<GenerationResult> {
86 let start_time = Instant::now();
87
88 let schema_map: std::collections::HashMap<String, &SchemaDefinition> =
90 related_schemas.iter().map(|s| (s.name.clone(), s)).collect();
91
92 let mut data = Vec::with_capacity(self.config.rows);
93
94 for _ in 0..self.config.rows {
95 let mut row = self.schema.generate_row(&mut self.faker)?;
96
97 for relationship in self.schema.relationships.values() {
99 if let Some(target_schema) = schema_map.get(&relationship.target_schema) {
100 let related_row = target_schema.generate_row(&mut self.faker)?;
102
103 if let Some(related_obj) = related_row.as_object() {
105 if let Some(fk_value) = related_obj.get("id") {
106 if let Some(row_obj) = row.as_object_mut() {
108 row_obj.insert(relationship.foreign_key.clone(), fk_value.clone());
109 }
110 }
111 }
112 }
113 }
114
115 data.push(row);
116 }
117
118 let generation_time = start_time.elapsed().as_millis();
119
120 Ok(GenerationResult::new(data, generation_time))
121 }
122
123 pub fn generate_single(&mut self) -> Result<serde_json::Value> {
125 self.schema.generate_row(&mut self.faker)
126 }
127
128 pub fn schema(&self) -> &SchemaDefinition {
130 &self.schema
131 }
132
133 pub fn config(&self) -> &DataConfig {
135 &self.config
136 }
137
138 pub fn update_config(&mut self, config: DataConfig) -> Result<()> {
140 self.config = config.clone();
141
142 if let Some(seed) = self.config.seed {
144 use rand::SeedableRng;
145 self.seeded_rng = Some(rand::rngs::StdRng::seed_from_u64(seed));
146 } else {
147 self.seeded_rng = None;
148 }
149
150 if config.rag_enabled {
152 if self.rag_engine.is_none() {
153 let rag_config = RagConfig::default();
154 let mut engine = RagEngine::new(rag_config);
155 engine.add_schema(&self.schema)?;
156 self.rag_engine = Some(engine);
157 }
158 } else {
159 self.rag_engine = None;
160 }
161
162 Ok(())
163 }
164
165 pub fn configure_rag(&mut self, rag_config: RagConfig) -> Result<()> {
167 if let Some(engine) = &mut self.rag_engine {
168 engine.update_config(rag_config);
169 } else {
170 let mut engine = RagEngine::new(rag_config);
171 engine.add_schema(&self.schema)?;
172 self.rag_engine = Some(engine);
173 }
174 Ok(())
175 }
176
177 pub fn rag_engine(&self) -> Option<&RagEngine> {
179 self.rag_engine.as_ref()
180 }
181
182 pub fn rag_engine_mut(&mut self) -> Option<&mut RagEngine> {
184 self.rag_engine.as_mut()
185 }
186}
187
188#[derive(Debug)]
190pub struct BatchGenerator {
191 generators: Vec<DataGenerator>,
193 #[allow(dead_code)]
195 config: DataConfig,
196}
197
198impl BatchGenerator {
199 pub fn new(schemas: Vec<SchemaDefinition>, config: DataConfig) -> Result<Self> {
201 let mut generators = Vec::new();
202
203 for schema in schemas {
204 let generator = DataGenerator::new(schema, config.clone())?;
205 generators.push(generator);
206 }
207
208 Ok(Self { generators, config })
209 }
210
211 pub async fn generate_batch(&mut self) -> Result<Vec<GenerationResult>> {
213 let mut results = Vec::new();
214
215 for generator in &mut self.generators {
216 let result = generator.generate().await?;
217 results.push(result);
218 }
219
220 Ok(results)
221 }
222
223 pub async fn generate_with_relationships(&mut self) -> Result<Vec<GenerationResult>> {
225 let mut results = Vec::new();
226 let schemas: Vec<SchemaDefinition> =
227 self.generators.iter().map(|g| g.schema().clone()).collect();
228
229 for generator in &mut self.generators {
230 let result = generator.generate_with_relationships(&schemas).await?;
231 results.push(result);
232 }
233
234 Ok(results)
235 }
236
237 pub fn schemas(&self) -> Vec<&SchemaDefinition> {
239 self.generators.iter().map(|g| g.schema()).collect()
240 }
241}
242
243pub mod utils {
245 use super::*;
246 use crate::Result;
247
248 pub async fn generate_sample_data(
250 schema_name: &str,
251 fields: Vec<(&str, &str)>,
252 rows: usize,
253 ) -> Result<GenerationResult> {
254 let mut schema = SchemaDefinition::new(schema_name.to_string());
255
256 for (field_name, field_type) in fields {
257 let field =
258 crate::schema::FieldDefinition::new(field_name.to_string(), field_type.to_string());
259 schema = schema.with_field(field);
260 }
261
262 let config = DataConfig {
263 rows,
264 ..Default::default()
265 };
266
267 let mut generator = DataGenerator::new(schema, config)?;
268 generator.generate().await
269 }
270
271 pub async fn generate_users(count: usize) -> Result<GenerationResult> {
273 let schema = crate::schema::templates::user_schema();
274 let config = DataConfig {
275 rows: count,
276 ..Default::default()
277 };
278
279 let mut generator = DataGenerator::new(schema, config)?;
280 generator.generate().await
281 }
282
283 pub async fn generate_products(count: usize) -> Result<GenerationResult> {
285 let schema = crate::schema::templates::product_schema();
286 let config = DataConfig {
287 rows: count,
288 ..Default::default()
289 };
290
291 let mut generator = DataGenerator::new(schema, config)?;
292 generator.generate().await
293 }
294
295 pub async fn generate_orders_with_users(
297 order_count: usize,
298 user_count: usize,
299 ) -> Result<Vec<GenerationResult>> {
300 let user_schema = crate::schema::templates::user_schema();
301 let order_schema = crate::schema::templates::order_schema();
302
303 let config = DataConfig {
304 rows: order_count,
305 ..Default::default()
306 };
307
308 let mut batch_generator = BatchGenerator::new(vec![user_schema, order_schema], config)?;
309
310 if let Some(order_generator) = batch_generator.generators.get_mut(1) {
312 let order_config = DataConfig {
313 rows: order_count,
314 ..Default::default()
315 };
316 order_generator.update_config(order_config)?;
317 }
318
319 if let Some(user_generator) = batch_generator.generators.get_mut(0) {
321 let user_config = DataConfig {
322 rows: user_count,
323 ..Default::default()
324 };
325 user_generator.update_config(user_config)?;
326 }
327
328 batch_generator.generate_with_relationships().await
329 }
330}
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335 use crate::schema::templates;
336
337 #[test]
338 fn test_data_generator_new() {
339 let schema = templates::user_schema();
340 let config = DataConfig::default();
341
342 let result = DataGenerator::new(schema, config);
343 assert!(result.is_ok());
344 }
345
346 #[test]
347 fn test_data_generator_with_seed() {
348 let schema = templates::user_schema();
349 let config = DataConfig {
350 rows: 10,
351 seed: Some(42),
352 ..Default::default()
353 };
354
355 let result = DataGenerator::new(schema, config);
356 assert!(result.is_ok());
357 }
358
359 #[test]
360 fn test_batch_generator_new() {
361 let schemas = vec![templates::user_schema()];
362 let config = DataConfig::default();
363
364 let result = BatchGenerator::new(schemas, config);
365 assert!(result.is_ok());
366 }
367
368 #[test]
369 fn test_batch_generator_multiple_schemas() {
370 let schemas = vec![templates::user_schema(), templates::product_schema()];
371 let config = DataConfig::default();
372
373 let result = BatchGenerator::new(schemas, config);
374 assert!(result.is_ok());
375
376 if let Ok(batch) = result {
377 assert_eq!(batch.generators.len(), 2);
378 }
379 }
380
381 #[test]
382 fn test_data_generator_update_config() {
383 let schema = templates::user_schema();
384 let config = DataConfig::default();
385
386 let mut generator = DataGenerator::new(schema, config).unwrap();
387
388 let new_config = DataConfig {
389 rows: 50,
390 ..Default::default()
391 };
392
393 let result = generator.update_config(new_config);
394 assert!(result.is_ok());
395 }
396}