use assert_cmd::Command;
use std::fs;
use std::path::Path;
use std::time::Duration;
use tempfile::TempDir;
const TEST_TIMEOUT_SECS: u64 = 60;
#[allow(deprecated)]
fn synth_data_bin() -> Command {
let mut cmd = Command::cargo_bin("datasynth-data").expect("datasynth-data binary must build");
cmd.timeout(Duration::from_secs(TEST_TIMEOUT_SECS));
cmd
}
fn write_minimal_group_config(path: &Path) {
let yaml = r#"
id: "TEST_GROUP_CLI_2024_Q1"
name: "Test group for CLI tests"
presentation_currency: "CHF"
period:
start_date: "2024-01-01"
length: quarterly
seed: 42
scoping_profiles:
significant:
row_budget: 1000
ownership:
parent_entity_code: TEST_PARENT
entities:
- code: TEST_PARENT
country: CH
functional_currency: CHF
scoping_profile: significant
consolidation_method: parent
fx:
base_currency: CHF
rate_source: inline
rates: {}
policy:
balance_sheet: closing
income_statement: average
equity: historical
audit:
group_materiality:
basis: revenue
percent: 0.01
"#;
fs::write(path, yaml).expect("write minimal group config");
}
fn write_minimal_generator_config(path: &Path) {
let yaml = r#"
global:
industry: manufacturing
period_months: 1
start_date: "2024-01-01"
seed: 42
companies:
- code: ACME
name: Acme Inc
chart_of_accounts:
complexity: small
output:
output_directory: "./out"
"#;
fs::write(path, yaml).expect("write minimal generator config");
}
#[test]
fn group_manifest_happy_path() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let out_path = tmp.path().join("manifest.json");
write_minimal_group_config(&cfg_path);
let assert = synth_data_bin()
.args([
"group",
"manifest",
"--config",
cfg_path.to_str().unwrap(),
"--out",
out_path.to_str().unwrap(),
])
.assert();
assert.success();
assert!(out_path.exists(), "manifest.json must be written");
let bytes = fs::read(&out_path).expect("read manifest");
let manifest: serde_json::Value =
serde_json::from_slice(&bytes).expect("manifest must parse as JSON");
assert_eq!(manifest["group_id"], "TEST_GROUP_CLI_2024_Q1");
assert_eq!(manifest["presentation_currency"], "CHF");
let entities = manifest["ownership_graph"]["entities"]
.as_array()
.expect("ownership_graph.entities must be an array");
assert_eq!(entities.len(), 1);
assert_eq!(entities[0]["code"], "TEST_PARENT");
let shards = manifest["shard_plan"]["shards"]
.as_array()
.expect("shard_plan.shards must be an array");
assert!(!shards.is_empty(), "shard plan must contain >=1 shard");
}
#[test]
fn group_manifest_invalid_config_fails() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("bad.yaml");
let out_path = tmp.path().join("manifest.json");
fs::write(
&cfg_path,
"id: BAD\nname: missing-fields\n",
)
.expect("write bad config");
let output = synth_data_bin()
.args([
"group",
"manifest",
"--config",
cfg_path.to_str().unwrap(),
"--out",
out_path.to_str().unwrap(),
])
.output()
.expect("run datasynth-data");
assert!(
!output.status.success(),
"invalid config must produce a non-zero exit"
);
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("parse") || stderr.contains("missing"),
"stderr should mention parse / missing field; got: {stderr}"
);
assert!(
!out_path.exists(),
"manifest.json must not be written on failure"
);
}
#[test]
fn group_shard_unknown_shard_id_fails_fast() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let manifest_path = tmp.path().join("manifest.json");
let out_path = tmp.path().join("shard_out");
write_minimal_group_config(&cfg_path);
let assert = synth_data_bin()
.args([
"group",
"manifest",
"--config",
cfg_path.to_str().unwrap(),
"--out",
manifest_path.to_str().unwrap(),
])
.assert();
assert.success();
let output = synth_data_bin()
.args([
"group",
"shard",
"--manifest",
manifest_path.to_str().unwrap(),
"--shard-id",
"S_NOT_REAL",
"--out",
out_path.to_str().unwrap(),
])
.output()
.expect("run datasynth-data");
let exit_code = output.status.code().expect("exit code");
assert_eq!(
exit_code,
2,
"unknown shard_id must exit 2; stderr={}",
String::from_utf8_lossy(&output.stderr)
);
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("S_NOT_REAL"),
"stderr should echo the bogus shard id; got: {stderr}"
);
assert!(
stderr.contains("valid ids"),
"stderr should list the valid ids; got: {stderr}"
);
}
#[test]
fn generate_auto_detect_group_config_dispatches_into_group() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let out_path = tmp.path().join("out");
let yaml = r#"
id: "AUTO_DETECT_TEST"
presentation_currency: "CHF"
period: { start_date: "2024-01-01", length: quarterly }
seed: 1
ownership:
parent_entity_code: NOT_DECLARED
entities: []
fx:
base_currency: CHF
rate_source: inline
rates: {}
policy: { balance_sheet: closing, income_statement: average, equity: historical }
"#;
fs::write(&cfg_path, yaml).expect("write group cfg");
let output = synth_data_bin()
.args([
"generate",
"--config",
cfg_path.to_str().unwrap(),
"--output",
out_path.to_str().unwrap(),
])
.output()
.expect("run datasynth-data");
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
assert!(
!output.status.success(),
"must fail since validate rejects the config; stderr={stderr}",
);
assert!(
stderr.contains("parent_entity_code") || stderr.contains("NOT_DECLARED"),
"auto-detected group config should surface group-validator errors; \
stdout={stdout}, stderr={stderr}",
);
}
#[test]
fn generate_auto_detect_passthrough_for_non_group_config() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("single.yaml");
let out_path = tmp.path().join("out");
let yaml = r#"
global:
industry: manufacturing
period_months: 1
start_date: "2024-01-01"
companies:
- code: ACME
name: Acme Inc
"#;
fs::write(&cfg_path, yaml).expect("write single cfg");
let output = synth_data_bin()
.args([
"generate",
"--config",
cfg_path.to_str().unwrap(),
"--output",
out_path.to_str().unwrap(),
])
.output()
.expect("run datasynth-data");
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
!output.status.success(),
"must fail since the GeneratorConfig parse is incomplete; stderr={stderr}"
);
assert!(
!stderr.contains("parent_entity_code"),
"non-group config must not be auto-detected as a group config; stderr={stderr}"
);
}
#[test]
fn group_generate_chain_rejects_empty_periods() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let periods_path = tmp.path().join("periods.json");
let out_dir = tmp.path().join("chain_out");
write_minimal_group_config(&cfg_path);
fs::write(&periods_path, "[]").expect("write empty periods");
let output = synth_data_bin()
.args([
"group",
"generate-chain",
"--config",
cfg_path.to_str().unwrap(),
"--periods",
periods_path.to_str().unwrap(),
"--out",
out_dir.to_str().unwrap(),
])
.output()
.expect("run datasynth-data");
assert!(
!output.status.success(),
"empty periods array must produce a non-zero exit"
);
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("at least one entry") || stderr.contains("must be non-empty"),
"stderr should mention empty periods; got: {stderr}"
);
}
#[test]
fn group_generate_chain_rejects_malformed_periods() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let periods_path = tmp.path().join("periods.json");
let out_dir = tmp.path().join("chain_out");
write_minimal_group_config(&cfg_path);
fs::write(&periods_path, r#"["not", "a", "spec"]"#).expect("write bad periods");
let output = synth_data_bin()
.args([
"group",
"generate-chain",
"--config",
cfg_path.to_str().unwrap(),
"--periods",
periods_path.to_str().unwrap(),
"--out",
out_dir.to_str().unwrap(),
])
.output()
.expect("run datasynth-data");
assert!(
!output.status.success(),
"malformed periods JSON must produce a non-zero exit"
);
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("PeriodChainSpec") || stderr.contains("parse"),
"stderr should mention parse failure; got: {stderr}"
);
}
#[test]
#[ignore = "v5.0: drives orchestrator end-to-end (~17 GiB RSS per entity); run manually with --ignored"]
fn group_shard_happy_path_ignored() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let manifest_path = tmp.path().join("manifest.json");
let shard_out = tmp.path().join("shard_out");
write_minimal_group_config(&cfg_path);
synth_data_bin()
.args([
"group",
"manifest",
"--config",
cfg_path.to_str().unwrap(),
"--out",
manifest_path.to_str().unwrap(),
])
.assert()
.success();
let manifest_json: serde_json::Value =
serde_json::from_slice(&fs::read(&manifest_path).expect("read manifest"))
.expect("parse manifest");
let shard_id = manifest_json["shard_plan"]["shards"][0]["shard_id"]
.as_str()
.expect("shard plan must have at least one shard")
.to_string();
synth_data_bin()
.args([
"group",
"shard",
"--manifest",
manifest_path.to_str().unwrap(),
"--shard-id",
&shard_id,
"--out",
shard_out.to_str().unwrap(),
])
.assert()
.success();
assert!(
shard_out.join("entities").join("TEST_PARENT").exists(),
"shard runner must create entities/TEST_PARENT/"
);
}
#[test]
#[ignore = "v5.0: requires shard-runner output (~17 GiB RSS); run manually with --ignored"]
fn group_aggregate_happy_path_ignored() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let manifest_path = tmp.path().join("manifest.json");
let shards_dir = tmp.path().join("shards");
let agg_out = tmp.path().join("aggregate_out");
write_minimal_group_config(&cfg_path);
synth_data_bin()
.args([
"group",
"manifest",
"--config",
cfg_path.to_str().unwrap(),
"--out",
manifest_path.to_str().unwrap(),
])
.assert()
.success();
let manifest_json: serde_json::Value =
serde_json::from_slice(&fs::read(&manifest_path).expect("read manifest"))
.expect("parse manifest");
let shard_id = manifest_json["shard_plan"]["shards"][0]["shard_id"]
.as_str()
.expect("shard plan must have at least one shard")
.to_string();
synth_data_bin()
.args([
"group",
"shard",
"--manifest",
manifest_path.to_str().unwrap(),
"--shard-id",
&shard_id,
"--out",
shards_dir.to_str().unwrap(),
])
.assert()
.success();
synth_data_bin()
.args([
"group",
"aggregate",
"--manifest",
manifest_path.to_str().unwrap(),
"--shards-dir",
shards_dir.to_str().unwrap(),
"--out",
agg_out.to_str().unwrap(),
])
.assert()
.success();
assert!(
agg_out
.join("consolidated")
.join("consolidated_financial_statements.json")
.exists(),
"aggregate must emit consolidated_financial_statements.json"
);
}
#[test]
#[ignore = "v5.0: drives orchestrator + aggregate end-to-end (~17 GiB RSS); run manually with --ignored"]
fn group_generate_happy_path_ignored() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let out_dir = tmp.path().join("standalone_out");
write_minimal_group_config(&cfg_path);
synth_data_bin()
.args([
"group",
"generate",
"--config",
cfg_path.to_str().unwrap(),
"--out",
out_dir.to_str().unwrap(),
"--no-parallel-shards",
])
.assert()
.success();
assert!(out_dir.join("manifest.json").exists());
assert!(out_dir
.join("consolidated")
.join("consolidated_financial_statements.json")
.exists());
}
#[test]
#[ignore = "v5.0: drives orchestrator + aggregate end-to-end (~17 GiB RSS); run manually with --ignored"]
fn generate_auto_detect_group_config_runs_standalone_ignored() {
let tmp = TempDir::new().expect("tempdir");
let cfg_path = tmp.path().join("group.yaml");
let out_dir = tmp.path().join("auto_detect_out");
write_minimal_group_config(&cfg_path);
synth_data_bin()
.args([
"generate",
"--config",
cfg_path.to_str().unwrap(),
"--output",
out_dir.to_str().unwrap(),
])
.assert()
.success();
assert!(out_dir.join("manifest.json").exists());
assert!(out_dir.join("consolidated").exists());
}
#[test]
fn _helper_is_used() {
let tmp = TempDir::new().expect("tempdir");
let path = tmp.path().join("single.yaml");
write_minimal_generator_config(&path);
assert!(path.exists());
}