mod schema;
mod generator;
mod sorter;
use clap::{Parser, Subcommand};
use sqlx::postgres::PgPoolOptions;
use crate::generator::Generator;
use std::time::Instant;
#[derive(Parser)]
#[command(name = "synthdb")]
#[command(about = "Production-Ready Synthetic Data Engine")]
#[command(version = "0.1.0")]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand)]
enum Commands {
Clone {
#[arg(short, long)]
url: String,
#[arg(short, long, default_value = "seed.sql")]
output: String,
#[arg(short, long, default_value = "1000")]
rows: usize,
#[arg(long, default_value = "20")]
sample_percent: u8,
#[arg(long, default_value = "4")]
concurrency: usize,
#[arg(long, default_value = "public")]
schema: String,
#[arg(long)]
dry_run: bool,
},
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Clone { url, output, rows, sample_percent, concurrency: _, schema: _, dry_run } => {
let start = Instant::now();
if dry_run {
println!("🐫 Dry run active. Analyzing schema only...");
} else {
println!("🚀 Connecting to database...");
}
let pool = PgPoolOptions::new()
.max_connections(5)
.connect(&url)
.await?;
println!("🔍 Analyzing schema & sampling data ({}%)...", sample_percent);
let raw_schema = schema::extract_schema(&pool).await?;
println!("✅ Found {} tables. Calculating dependencies...", raw_schema.len());
let sorted_schema = sorter::sort_tables(raw_schema)?;
println!("✅ Dependencies resolved. Insertion order determined.");
if dry_run {
println!("📋 Execution Plan (Dry Run):");
for (i, table) in sorted_schema.iter().enumerate() {
println!(" {}. {}", i + 1, table.table_name);
}
println!("✨ Dry run complete.");
return Ok(());
}
println!("🔨 Generating synthetic data...");
let mut generator = Generator::new(sorted_schema);
generator.generate_sql_dump(&output, rows)?;
println!("✨ Done in {:.2?}! Saved to {}", start.elapsed(), output);
}
}
Ok(())
}