use crate::Result;
use crate::{
faker::EnhancedFaker,
rag::{RagConfig, RagEngine},
schema::SchemaDefinition,
DataConfig, GenerationResult,
};
use std::time::Instant;
#[derive(Debug)]
pub struct DataGenerator {
schema: SchemaDefinition,
config: DataConfig,
faker: EnhancedFaker,
seeded_rng: Option<rand::rngs::StdRng>,
rag_engine: Option<RagEngine>,
}
impl DataGenerator {
pub fn new(schema: SchemaDefinition, config: DataConfig) -> Result<Self> {
let faker = EnhancedFaker::new();
let seeded_rng = if let Some(seed) = config.seed {
use rand::SeedableRng;
Some(rand::rngs::StdRng::seed_from_u64(seed))
} else {
None
};
let rag_engine = if config.rag_enabled {
let rag_config = RagConfig::default();
let mut engine = RagEngine::new(rag_config);
engine.add_schema(&schema)?;
Some(engine)
} else {
None
};
Ok(Self {
schema,
config,
faker,
seeded_rng,
rag_engine,
})
}
pub async fn generate(&mut self) -> Result<GenerationResult> {
let start_time = Instant::now();
if let Some(rag_engine) = &mut self.rag_engine {
let data = rag_engine.generate_with_rag(&self.schema, &self.config).await?;
let generation_time = start_time.elapsed().as_millis();
Ok(GenerationResult::new(data, generation_time))
} else {
let mut data = Vec::with_capacity(self.config.rows);
for _ in 0..self.config.rows {
let row = self.schema.generate_row(&mut self.faker)?;
data.push(row);
}
let generation_time = start_time.elapsed().as_millis();
Ok(GenerationResult::new(data, generation_time))
}
}
pub async fn generate_with_relationships(
&mut self,
related_schemas: &[SchemaDefinition],
) -> Result<GenerationResult> {
let start_time = Instant::now();
let schema_map: std::collections::HashMap<String, &SchemaDefinition> =
related_schemas.iter().map(|s| (s.name.clone(), s)).collect();
let mut data = Vec::with_capacity(self.config.rows);
for _ in 0..self.config.rows {
let mut row = self.schema.generate_row(&mut self.faker)?;
for relationship in self.schema.relationships.values() {
if let Some(target_schema) = schema_map.get(&relationship.target_schema) {
let related_row = target_schema.generate_row(&mut self.faker)?;
if let Some(related_obj) = related_row.as_object() {
if let Some(fk_value) = related_obj.get("id") {
if let Some(row_obj) = row.as_object_mut() {
row_obj.insert(relationship.foreign_key.clone(), fk_value.clone());
}
}
}
}
}
data.push(row);
}
let generation_time = start_time.elapsed().as_millis();
Ok(GenerationResult::new(data, generation_time))
}
pub fn generate_single(&mut self) -> Result<serde_json::Value> {
self.schema.generate_row(&mut self.faker)
}
pub fn schema(&self) -> &SchemaDefinition {
&self.schema
}
pub fn config(&self) -> &DataConfig {
&self.config
}
pub fn update_config(&mut self, config: DataConfig) -> Result<()> {
self.config = config.clone();
if let Some(seed) = self.config.seed {
use rand::SeedableRng;
self.seeded_rng = Some(rand::rngs::StdRng::seed_from_u64(seed));
} else {
self.seeded_rng = None;
}
if config.rag_enabled {
if self.rag_engine.is_none() {
let rag_config = RagConfig::default();
let mut engine = RagEngine::new(rag_config);
engine.add_schema(&self.schema)?;
self.rag_engine = Some(engine);
}
} else {
self.rag_engine = None;
}
Ok(())
}
pub fn configure_rag(&mut self, rag_config: RagConfig) -> Result<()> {
if let Some(engine) = &mut self.rag_engine {
engine.update_config(rag_config);
} else {
let mut engine = RagEngine::new(rag_config);
engine.add_schema(&self.schema)?;
self.rag_engine = Some(engine);
}
Ok(())
}
pub fn rag_engine(&self) -> Option<&RagEngine> {
self.rag_engine.as_ref()
}
pub fn rag_engine_mut(&mut self) -> Option<&mut RagEngine> {
self.rag_engine.as_mut()
}
}
#[derive(Debug)]
pub struct BatchGenerator {
generators: Vec<DataGenerator>,
}
impl BatchGenerator {
pub fn new(schemas: Vec<SchemaDefinition>, config: DataConfig) -> Result<Self> {
let mut generators = Vec::new();
for schema in schemas {
let generator = DataGenerator::new(schema, config.clone())?;
generators.push(generator);
}
Ok(Self { generators })
}
pub async fn generate_batch(&mut self) -> Result<Vec<GenerationResult>> {
let mut results = Vec::new();
for generator in &mut self.generators {
let result = generator.generate().await?;
results.push(result);
}
Ok(results)
}
pub async fn generate_with_relationships(&mut self) -> Result<Vec<GenerationResult>> {
let mut results = Vec::new();
let schemas: Vec<SchemaDefinition> =
self.generators.iter().map(|g| g.schema().clone()).collect();
for generator in &mut self.generators {
let result = generator.generate_with_relationships(&schemas).await?;
results.push(result);
}
Ok(results)
}
pub fn schemas(&self) -> Vec<&SchemaDefinition> {
self.generators.iter().map(|g| g.schema()).collect()
}
}
pub mod utils {
use super::*;
use crate::Result;
pub async fn generate_sample_data(
schema_name: &str,
fields: Vec<(&str, &str)>,
rows: usize,
) -> Result<GenerationResult> {
let mut schema = SchemaDefinition::new(schema_name.to_string());
for (field_name, field_type) in fields {
let field =
crate::schema::FieldDefinition::new(field_name.to_string(), field_type.to_string());
schema = schema.with_field(field);
}
let config = DataConfig {
rows,
..Default::default()
};
let mut generator = DataGenerator::new(schema, config)?;
generator.generate().await
}
pub async fn generate_users(count: usize) -> Result<GenerationResult> {
let schema = crate::schema::templates::user_schema();
let config = DataConfig {
rows: count,
..Default::default()
};
let mut generator = DataGenerator::new(schema, config)?;
generator.generate().await
}
pub async fn generate_products(count: usize) -> Result<GenerationResult> {
let schema = crate::schema::templates::product_schema();
let config = DataConfig {
rows: count,
..Default::default()
};
let mut generator = DataGenerator::new(schema, config)?;
generator.generate().await
}
pub async fn generate_orders_with_users(
order_count: usize,
user_count: usize,
) -> Result<Vec<GenerationResult>> {
let user_schema = crate::schema::templates::user_schema();
let order_schema = crate::schema::templates::order_schema();
let config = DataConfig {
rows: order_count,
..Default::default()
};
let mut batch_generator = BatchGenerator::new(vec![user_schema, order_schema], config)?;
if let Some(order_generator) = batch_generator.generators.get_mut(1) {
let order_config = DataConfig {
rows: order_count,
..Default::default()
};
order_generator.update_config(order_config)?;
}
if let Some(user_generator) = batch_generator.generators.get_mut(0) {
let user_config = DataConfig {
rows: user_count,
..Default::default()
};
user_generator.update_config(user_config)?;
}
batch_generator.generate_with_relationships().await
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::schema::templates;
#[test]
fn test_data_generator_new() {
let schema = templates::user_schema();
let config = DataConfig::default();
let result = DataGenerator::new(schema, config);
assert!(result.is_ok());
}
#[test]
fn test_data_generator_with_seed() {
let schema = templates::user_schema();
let config = DataConfig {
rows: 10,
seed: Some(42),
..Default::default()
};
let result = DataGenerator::new(schema, config);
assert!(result.is_ok());
}
#[test]
fn test_batch_generator_new() {
let schemas = vec![templates::user_schema()];
let config = DataConfig::default();
let result = BatchGenerator::new(schemas, config);
assert!(result.is_ok());
}
#[test]
fn test_batch_generator_multiple_schemas() {
let schemas = vec![templates::user_schema(), templates::product_schema()];
let config = DataConfig::default();
let result = BatchGenerator::new(schemas, config);
assert!(result.is_ok());
if let Ok(batch) = result {
assert_eq!(batch.generators.len(), 2);
}
}
#[test]
fn test_data_generator_update_config() {
let schema = templates::user_schema();
let config = DataConfig::default();
let mut generator = DataGenerator::new(schema, config).unwrap();
let new_config = DataConfig {
rows: 50,
..Default::default()
};
let result = generator.update_config(new_config);
assert!(result.is_ok());
}
}