use assert_cmd::Command;
use predicates::prelude::*;
use std::fs;
use std::time::Duration;
use tempfile::TempDir;
const TEST_MEMORY_LIMIT: &str = "512";
const TEST_MAX_THREADS: &str = "1";
const TEST_TIMEOUT_SECS: u64 = 300;
#[allow(deprecated)] fn synth_data() -> Command {
let mut cmd = Command::cargo_bin("datasynth-data").unwrap();
cmd.timeout(Duration::from_secs(TEST_TIMEOUT_SECS));
cmd
}
fn synth_data_generate() -> Command {
let mut cmd = synth_data();
cmd.arg("generate")
.arg("--memory-limit")
.arg(TEST_MEMORY_LIMIT)
.arg("--max-threads")
.arg(TEST_MAX_THREADS);
cmd
}
#[test]
fn test_full_workflow_init_validate_generate() {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join("workflow_config.yaml");
let output_dir = temp_dir.path().join("output");
synth_data()
.arg("init")
.arg("-o")
.arg(config_path.to_str().unwrap())
.arg("-i")
.arg("manufacturing")
.arg("-c")
.arg("small")
.assert()
.success();
assert!(config_path.exists(), "Config file should be created");
synth_data()
.arg("validate")
.arg("-c")
.arg(config_path.to_str().unwrap())
.assert()
.success();
synth_data_generate()
.arg("-c")
.arg(config_path.to_str().unwrap())
.arg("-o")
.arg(output_dir.to_str().unwrap())
.arg("-s")
.arg("42")
.assert()
.success();
assert!(output_dir.exists(), "Output directory should be created");
}
#[test]
fn test_all_industry_presets_workflow() {
let industries = [
"manufacturing",
"retail",
"healthcare",
"technology",
"financial_services",
];
for industry in industries {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join(format!("{}_config.yaml", industry));
synth_data()
.arg("init")
.arg("-o")
.arg(config_path.to_str().unwrap())
.arg("-i")
.arg(industry)
.assert()
.success();
synth_data()
.arg("validate")
.arg("-c")
.arg(config_path.to_str().unwrap())
.assert()
.success();
}
}
#[test]
fn test_all_complexity_levels() {
let complexities = ["small", "medium", "large"];
for complexity in complexities {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join(format!("{}_config.yaml", complexity));
synth_data()
.arg("init")
.arg("-o")
.arg(config_path.to_str().unwrap())
.arg("-c")
.arg(complexity)
.assert()
.success();
synth_data()
.arg("validate")
.arg("-c")
.arg(config_path.to_str().unwrap())
.assert()
.success();
let content = fs::read_to_string(&config_path).unwrap();
assert!(
content.contains(complexity),
"Config should contain complexity level: {}",
complexity
);
}
}
#[test]
fn test_generated_json_is_valid() {
let temp_dir = TempDir::new().unwrap();
let output_dir = temp_dir.path().join("output");
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg(output_dir.to_str().unwrap())
.assert()
.success();
let je_path = output_dir.join("journal_entries.json");
assert!(je_path.exists(), "Journal entries JSON should be generated");
let content = fs::read_to_string(&je_path).unwrap();
let parsed: Result<serde_json::Value, _> = serde_json::from_str(&content);
assert!(parsed.is_ok(), "Journal entries should be valid JSON");
}
#[test]
fn test_generated_output_structure() {
let temp_dir = TempDir::new().unwrap();
let output_dir = temp_dir.path().join("output");
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg(output_dir.to_str().unwrap())
.arg("-s")
.arg("12345")
.assert()
.success();
assert!(output_dir.exists(), "Output directory should exist");
let files: Vec<_> = fs::read_dir(&output_dir)
.unwrap()
.filter_map(|e| e.ok())
.map(|e| e.file_name().to_string_lossy().to_string())
.collect();
println!("Generated files: {:?}", files);
assert!(
files.iter().any(|f| f == "journal_entries.json"),
"Should have journal_entries.json"
);
}
#[test]
#[ignore]
fn test_deterministic_generation_with_seed() {
let temp_dir1 = TempDir::new().unwrap();
let temp_dir2 = TempDir::new().unwrap();
let output_dir1 = temp_dir1.path().join("output");
let output_dir2 = temp_dir2.path().join("output");
for (output_dir, _name) in [
(output_dir1.clone(), "first"),
(output_dir2.clone(), "second"),
] {
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg(output_dir.to_str().unwrap())
.arg("-s")
.arg("99999")
.assert()
.success();
}
let content1 = fs::read_to_string(output_dir1.join("sample_entries.json")).unwrap();
let content2 = fs::read_to_string(output_dir2.join("sample_entries.json")).unwrap();
assert_eq!(
content1, content2,
"Same seed should produce identical output"
);
}
#[test]
#[ignore]
fn test_different_seeds_different_output() {
let temp_dir1 = TempDir::new().unwrap();
let temp_dir2 = TempDir::new().unwrap();
let output_dir1 = temp_dir1.path().join("output");
let output_dir2 = temp_dir2.path().join("output");
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg(output_dir1.to_str().unwrap())
.arg("-s")
.arg("11111")
.assert()
.success();
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg(output_dir2.to_str().unwrap())
.arg("-s")
.arg("22222")
.assert()
.success();
let content1 = fs::read_to_string(output_dir1.join("sample_entries.json")).unwrap();
let content2 = fs::read_to_string(output_dir2.join("sample_entries.json")).unwrap();
assert_ne!(
content1, content2,
"Different seeds should produce different output"
);
}
#[test]
#[ignore]
fn test_config_modification_and_regenerate() {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join("modify_config.yaml");
let output_dir = temp_dir.path().join("output");
synth_data()
.arg("init")
.arg("-o")
.arg(config_path.to_str().unwrap())
.assert()
.success();
let content = fs::read_to_string(&config_path).unwrap();
let mut config: serde_yaml::Value = serde_yaml::from_str(&content).unwrap();
if let Some(global) = config.get_mut("global") {
global["seed"] = serde_yaml::Value::Number(serde_yaml::Number::from(42));
}
fs::write(&config_path, serde_yaml::to_string(&config).unwrap()).unwrap();
synth_data()
.arg("validate")
.arg("-c")
.arg(config_path.to_str().unwrap())
.assert()
.success();
synth_data_generate()
.arg("-c")
.arg(config_path.to_str().unwrap())
.arg("-o")
.arg(output_dir.to_str().unwrap())
.assert()
.success();
assert!(output_dir.exists(), "Should generate with modified config");
}
#[test]
fn test_invalid_config_handling() {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join("invalid_config.yaml");
fs::write(
&config_path,
"global:\n seed: invalid_value\n bogus: field",
)
.unwrap();
synth_data()
.arg("validate")
.arg("-c")
.arg(config_path.to_str().unwrap())
.assert()
.failure();
}
#[test]
fn test_missing_config_handling() {
synth_data_generate()
.arg("-c")
.arg("/nonexistent/config.yaml")
.arg("-o")
.arg("/tmp/output")
.assert()
.failure();
}
#[test]
fn test_invalid_output_directory() {
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg("/proc/invalid_output_dir")
.assert()
.failure();
}
#[test]
fn test_multi_company_config_validation() {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join("multi_company.yaml");
synth_data()
.arg("init")
.arg("-o")
.arg(config_path.to_str().unwrap())
.assert()
.success();
let content = fs::read_to_string(&config_path).unwrap();
let mut config: serde_yaml::Value = serde_yaml::from_str(&content).unwrap();
if let Some(companies) = config
.get_mut("companies")
.and_then(|c| c.as_sequence_mut())
{
let second_company = serde_yaml::from_str::<serde_yaml::Value>(
r#"
code: "2000"
name: "Subsidiary Company"
currency: "EUR"
country: "DE"
annual_transaction_volume: "ten_k"
volume_weight: 0.3
fiscal_year_variant: "K4"
"#,
)
.unwrap();
companies.push(second_company);
}
if let Some(companies) = config
.get_mut("companies")
.and_then(|c| c.as_sequence_mut())
{
if let Some(first) = companies.get_mut(0) {
first["volume_weight"] = serde_yaml::Value::Number(serde_yaml::Number::from(0.7f64));
}
}
fs::write(&config_path, serde_yaml::to_string(&config).unwrap()).unwrap();
synth_data()
.arg("validate")
.arg("-c")
.arg(config_path.to_str().unwrap())
.assert()
.success();
}
#[test]
fn test_config_roundtrip() {
let temp_dir = TempDir::new().unwrap();
let config_path = temp_dir.path().join("roundtrip_config.yaml");
synth_data()
.arg("init")
.arg("-o")
.arg(config_path.to_str().unwrap())
.arg("-i")
.arg("manufacturing")
.arg("-c")
.arg("medium")
.assert()
.success();
let content = fs::read_to_string(&config_path).unwrap();
let parsed: datasynth_config::GeneratorConfig =
serde_yaml::from_str(&content).expect("Should parse as GeneratorConfig");
let serialized = serde_yaml::to_string(&parsed).expect("Should serialize back");
let _reparsed: datasynth_config::GeneratorConfig =
serde_yaml::from_str(&serialized).expect("Should parse after roundtrip");
}
#[test]
#[ignore]
fn test_demo_generation_performance() {
let temp_dir = TempDir::new().unwrap();
let output_dir = temp_dir.path().join("output");
let start = std::time::Instant::now();
synth_data_generate()
.arg("--demo")
.arg("-o")
.arg(output_dir.to_str().unwrap())
.assert()
.success();
let duration = start.elapsed();
println!("Demo generation completed in {:?}", duration);
assert!(
duration < Duration::from_secs(20),
"Demo generation should complete in under 20 seconds"
);
}
#[test]
fn test_verbose_mode() {
synth_data()
.arg("-v")
.arg("info")
.assert()
.success()
.stdout(predicate::str::contains("Industry Presets"));
}
#[test]
fn test_verbose_long_form() {
synth_data().arg("--verbose").arg("info").assert().success();
}
#[test]
fn test_subcommand_help() {
for subcommand in ["generate", "init", "validate", "info"] {
synth_data()
.arg(subcommand)
.arg("--help")
.assert()
.success()
.stdout(predicate::str::contains("Usage:"));
}
}