mockforge_data/
generator.rs1use crate::Result;
4use crate::{
5 faker::EnhancedFaker,
6 rag::{RagConfig, RagEngine},
7 schema::SchemaDefinition,
8 DataConfig, GenerationResult,
9};
10use std::time::Instant;
11
12#[derive(Debug)]
14pub struct DataGenerator {
15 schema: SchemaDefinition,
17 config: DataConfig,
19 faker: EnhancedFaker,
21 seeded_rng: Option<rand::rngs::StdRng>,
23 rag_engine: Option<RagEngine>,
25}
26
27impl DataGenerator {
28 pub fn new(schema: SchemaDefinition, config: DataConfig) -> Result<Self> {
30 let faker = EnhancedFaker::new();
31 let seeded_rng = if let Some(seed) = config.seed {
32 use rand::SeedableRng;
33 Some(rand::rngs::StdRng::seed_from_u64(seed))
34 } else {
35 None
36 };
37
38 let rag_engine = if config.rag_enabled {
40 let rag_config = RagConfig::default();
41 let mut engine = RagEngine::new(rag_config);
42 engine.add_schema(&schema)?;
44 Some(engine)
45 } else {
46 None
47 };
48
49 Ok(Self {
50 schema,
51 config,
52 faker,
53 seeded_rng,
54 rag_engine,
55 })
56 }
57
58 pub async fn generate(&mut self) -> Result<GenerationResult> {
60 let start_time = Instant::now();
61
62 if let Some(rag_engine) = &mut self.rag_engine {
64 let data = rag_engine.generate_with_rag(&self.schema, &self.config).await?;
65 let generation_time = start_time.elapsed().as_millis();
66 Ok(GenerationResult::new(data, generation_time))
67 } else {
68 let mut data = Vec::with_capacity(self.config.rows);
70
71 for _ in 0..self.config.rows {
72 let row = self.schema.generate_row(&mut self.faker)?;
73 data.push(row);
74 }
75
76 let generation_time = start_time.elapsed().as_millis();
77 Ok(GenerationResult::new(data, generation_time))
78 }
79 }
80
81 pub async fn generate_with_relationships(
83 &mut self,
84 related_schemas: &[SchemaDefinition],
85 ) -> Result<GenerationResult> {
86 let start_time = Instant::now();
87
88 let schema_map: std::collections::HashMap<String, &SchemaDefinition> =
90 related_schemas.iter().map(|s| (s.name.clone(), s)).collect();
91
92 let mut data = Vec::with_capacity(self.config.rows);
93
94 for _ in 0..self.config.rows {
95 let mut row = self.schema.generate_row(&mut self.faker)?;
96
97 for relationship in self.schema.relationships.values() {
99 if let Some(target_schema) = schema_map.get(&relationship.target_schema) {
100 let related_row = target_schema.generate_row(&mut self.faker)?;
102
103 if let Some(related_obj) = related_row.as_object() {
105 if let Some(fk_value) = related_obj.get("id") {
106 if let Some(row_obj) = row.as_object_mut() {
108 row_obj.insert(relationship.foreign_key.clone(), fk_value.clone());
109 }
110 }
111 }
112 }
113 }
114
115 data.push(row);
116 }
117
118 let generation_time = start_time.elapsed().as_millis();
119
120 Ok(GenerationResult::new(data, generation_time))
121 }
122
123 pub fn generate_single(&mut self) -> Result<serde_json::Value> {
125 self.schema.generate_row(&mut self.faker)
126 }
127
128 pub fn schema(&self) -> &SchemaDefinition {
130 &self.schema
131 }
132
133 pub fn config(&self) -> &DataConfig {
135 &self.config
136 }
137
138 pub fn update_config(&mut self, config: DataConfig) -> Result<()> {
140 self.config = config.clone();
141
142 if let Some(seed) = self.config.seed {
144 use rand::SeedableRng;
145 self.seeded_rng = Some(rand::rngs::StdRng::seed_from_u64(seed));
146 } else {
147 self.seeded_rng = None;
148 }
149
150 if config.rag_enabled {
152 if self.rag_engine.is_none() {
153 let rag_config = RagConfig::default();
154 let mut engine = RagEngine::new(rag_config);
155 engine.add_schema(&self.schema)?;
156 self.rag_engine = Some(engine);
157 }
158 } else {
159 self.rag_engine = None;
160 }
161
162 Ok(())
163 }
164
165 pub fn configure_rag(&mut self, rag_config: RagConfig) -> Result<()> {
167 if let Some(engine) = &mut self.rag_engine {
168 engine.update_config(rag_config);
169 } else {
170 let mut engine = RagEngine::new(rag_config);
171 engine.add_schema(&self.schema)?;
172 self.rag_engine = Some(engine);
173 }
174 Ok(())
175 }
176
177 pub fn rag_engine(&self) -> Option<&RagEngine> {
179 self.rag_engine.as_ref()
180 }
181
182 pub fn rag_engine_mut(&mut self) -> Option<&mut RagEngine> {
184 self.rag_engine.as_mut()
185 }
186}
187
188#[derive(Debug)]
190pub struct BatchGenerator {
191 generators: Vec<DataGenerator>,
193}
194
195impl BatchGenerator {
196 pub fn new(schemas: Vec<SchemaDefinition>, config: DataConfig) -> Result<Self> {
198 let mut generators = Vec::new();
199
200 for schema in schemas {
201 let generator = DataGenerator::new(schema, config.clone())?;
202 generators.push(generator);
203 }
204
205 Ok(Self { generators })
206 }
207
208 pub async fn generate_batch(&mut self) -> Result<Vec<GenerationResult>> {
210 let mut results = Vec::new();
211
212 for generator in &mut self.generators {
213 let result = generator.generate().await?;
214 results.push(result);
215 }
216
217 Ok(results)
218 }
219
220 pub async fn generate_with_relationships(&mut self) -> Result<Vec<GenerationResult>> {
222 let mut results = Vec::new();
223 let schemas: Vec<SchemaDefinition> =
224 self.generators.iter().map(|g| g.schema().clone()).collect();
225
226 for generator in &mut self.generators {
227 let result = generator.generate_with_relationships(&schemas).await?;
228 results.push(result);
229 }
230
231 Ok(results)
232 }
233
234 pub fn schemas(&self) -> Vec<&SchemaDefinition> {
236 self.generators.iter().map(|g| g.schema()).collect()
237 }
238}
239
240pub mod utils {
242 use super::*;
243 use crate::Result;
244
245 pub async fn generate_sample_data(
247 schema_name: &str,
248 fields: Vec<(&str, &str)>,
249 rows: usize,
250 ) -> Result<GenerationResult> {
251 let mut schema = SchemaDefinition::new(schema_name.to_string());
252
253 for (field_name, field_type) in fields {
254 let field =
255 crate::schema::FieldDefinition::new(field_name.to_string(), field_type.to_string());
256 schema = schema.with_field(field);
257 }
258
259 let config = DataConfig {
260 rows,
261 ..Default::default()
262 };
263
264 let mut generator = DataGenerator::new(schema, config)?;
265 generator.generate().await
266 }
267
268 pub async fn generate_users(count: usize) -> Result<GenerationResult> {
270 let schema = crate::schema::templates::user_schema();
271 let config = DataConfig {
272 rows: count,
273 ..Default::default()
274 };
275
276 let mut generator = DataGenerator::new(schema, config)?;
277 generator.generate().await
278 }
279
280 pub async fn generate_products(count: usize) -> Result<GenerationResult> {
282 let schema = crate::schema::templates::product_schema();
283 let config = DataConfig {
284 rows: count,
285 ..Default::default()
286 };
287
288 let mut generator = DataGenerator::new(schema, config)?;
289 generator.generate().await
290 }
291
292 pub async fn generate_orders_with_users(
294 order_count: usize,
295 user_count: usize,
296 ) -> Result<Vec<GenerationResult>> {
297 let user_schema = crate::schema::templates::user_schema();
298 let order_schema = crate::schema::templates::order_schema();
299
300 let config = DataConfig {
301 rows: order_count,
302 ..Default::default()
303 };
304
305 let mut batch_generator = BatchGenerator::new(vec![user_schema, order_schema], config)?;
306
307 if let Some(order_generator) = batch_generator.generators.get_mut(1) {
309 let order_config = DataConfig {
310 rows: order_count,
311 ..Default::default()
312 };
313 order_generator.update_config(order_config)?;
314 }
315
316 if let Some(user_generator) = batch_generator.generators.get_mut(0) {
318 let user_config = DataConfig {
319 rows: user_count,
320 ..Default::default()
321 };
322 user_generator.update_config(user_config)?;
323 }
324
325 batch_generator.generate_with_relationships().await
326 }
327}
328
329#[cfg(test)]
330mod tests {
331 use super::*;
332 use crate::schema::templates;
333
334 #[test]
335 fn test_data_generator_new() {
336 let schema = templates::user_schema();
337 let config = DataConfig::default();
338
339 let result = DataGenerator::new(schema, config);
340 assert!(result.is_ok());
341 }
342
343 #[test]
344 fn test_data_generator_with_seed() {
345 let schema = templates::user_schema();
346 let config = DataConfig {
347 rows: 10,
348 seed: Some(42),
349 ..Default::default()
350 };
351
352 let result = DataGenerator::new(schema, config);
353 assert!(result.is_ok());
354 }
355
356 #[test]
357 fn test_batch_generator_new() {
358 let schemas = vec![templates::user_schema()];
359 let config = DataConfig::default();
360
361 let result = BatchGenerator::new(schemas, config);
362 assert!(result.is_ok());
363 }
364
365 #[test]
366 fn test_batch_generator_multiple_schemas() {
367 let schemas = vec![templates::user_schema(), templates::product_schema()];
368 let config = DataConfig::default();
369
370 let result = BatchGenerator::new(schemas, config);
371 assert!(result.is_ok());
372
373 if let Ok(batch) = result {
374 assert_eq!(batch.generators.len(), 2);
375 }
376 }
377
378 #[test]
379 fn test_data_generator_update_config() {
380 let schema = templates::user_schema();
381 let config = DataConfig::default();
382
383 let mut generator = DataGenerator::new(schema, config).unwrap();
384
385 let new_config = DataConfig {
386 rows: 50,
387 ..Default::default()
388 };
389
390 let result = generator.update_config(new_config);
391 assert!(result.is_ok());
392 }
393}