use chrono::Datelike;
use datasynth_config::schema::TransactionVolume;
use datasynth_runtime::{EnhancedOrchestrator, PhaseConfig};
use datasynth_test_utils::assertions::check_benford_distribution;
use datasynth_test_utils::fixtures::{fraud_enabled_config, minimal_config, multi_company_config};
use rust_decimal::Decimal;
use std::collections::{HashMap, HashSet};
#[test]
fn test_benford_compliance_basic() {
let mut config = minimal_config();
config.global.seed = Some(42);
config.global.period_months = 6;
config.companies[0].annual_transaction_volume = TransactionVolume::TenK;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let amounts: Vec<Decimal> = result
.journal_entries
.iter()
.flat_map(|entry| {
entry
.lines
.iter()
.map(|l| l.debit_amount + l.credit_amount)
.filter(|&a| a > Decimal::ZERO)
})
.collect();
assert!(
amounts.len() >= 100,
"Need at least 100 amounts for Benford test, got {}",
amounts.len()
);
let (chi_squared, passes) = check_benford_distribution(&amounts);
println!(
"Benford's Law test: n={}, chi-squared={:.2}, strict_pass={}",
amounts.len(),
chi_squared,
passes
);
assert!(
chi_squared < 200.0,
"Benford's Law test failed with chi-squared={:.2}. Expected reasonable compliance (< 200)",
chi_squared
);
}
#[test]
fn test_benford_compliance_with_fraud() {
let mut config = fraud_enabled_config();
config.global.seed = Some(123);
config.global.period_months = 6;
config.fraud.fraud_rate = 0.02;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: true,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let clean_amounts: Vec<Decimal> = result
.journal_entries
.iter()
.filter(|e| !e.header.is_fraud)
.flat_map(|entry| {
entry
.lines
.iter()
.map(|l| l.debit_amount + l.credit_amount)
.filter(|&a| a > Decimal::ZERO)
})
.collect();
if clean_amounts.len() >= 100 {
let (chi_squared, passes) = check_benford_distribution(&clean_amounts);
println!(
"Benford with fraud test: n={}, chi-squared={:.2}, strict_pass={}",
clean_amounts.len(),
chi_squared,
passes
);
assert!(
chi_squared < 200.0,
"Clean transactions should show reasonable Benford compliance, chi-squared={:.2}",
chi_squared
);
}
}
#[test]
fn test_benford_second_digit_analysis() {
let mut config = minimal_config();
config.global.seed = Some(456);
config.global.period_months = 12;
config.companies[0].annual_transaction_volume = TransactionVolume::TenK;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let second_digits: Vec<u32> = result
.journal_entries
.iter()
.flat_map(|entry| entry.lines.iter())
.filter_map(|line| {
let amount = line.debit_amount + line.credit_amount;
if amount > Decimal::ZERO {
let s = amount.to_string();
let digits: Vec<char> = s.chars().filter(|c| c.is_ascii_digit()).collect();
if digits.len() >= 2 && digits[0] != '0' {
digits[1].to_digit(10)
} else {
None
}
} else {
None
}
})
.collect();
if second_digits.len() >= 100 {
let mut counts = [0u32; 10];
for d in &second_digits {
counts[*d as usize] += 1;
}
let total = second_digits.len() as f64;
for (digit, count) in counts.iter().enumerate() {
let freq = *count as f64 / total;
assert!(
(0.05..=0.20).contains(&freq),
"Second digit {} has unusual frequency {:.2}%",
digit,
freq * 100.0
);
}
}
}
#[test]
fn test_balance_equation_holds() {
let mut config = minimal_config();
config.global.seed = Some(789);
config.global.period_months = 3;
config.balance.generate_opening_balances = true;
config.balance.validate_balance_equation = true;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
for entry in &result.journal_entries {
let total_debits: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
let total_credits: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
assert_eq!(
total_debits, total_credits,
"Entry {} is not balanced",
entry.header.document_id
);
}
let mut account_balances: HashMap<String, Decimal> = HashMap::new();
for entry in &result.journal_entries {
for line in &entry.lines {
let balance = account_balances.entry(line.gl_account.clone()).or_default();
*balance += line.debit_amount - line.credit_amount;
}
}
assert!(!account_balances.is_empty(), "Should have account balances");
}
#[test]
fn test_multi_company_balance_coherence() {
let mut config = multi_company_config();
config.global.seed = Some(101112);
config.global.period_months = 3;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config.clone(), phase_config)
.expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let mut entries_by_company: HashMap<String, Vec<_>> = HashMap::new();
for entry in &result.journal_entries {
entries_by_company
.entry(entry.header.company_code.clone())
.or_default()
.push(entry);
}
for (company, entries) in &entries_by_company {
for entry in entries {
let total_debits: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
let total_credits: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
assert_eq!(
total_debits, total_credits,
"Company {} entry {} is not balanced",
company, entry.header.document_id
);
}
}
let company_codes: HashSet<_> = entries_by_company.keys().cloned().collect();
for company in &config.companies {
assert!(
company_codes.contains(&company.code),
"Missing entries for company {}",
company.code
);
}
}
#[test]
fn test_p2p_chain_completeness() {
let mut config = minimal_config();
config.global.seed = Some(131415);
config.global.period_months = 3;
config.master_data.vendors.count = 5;
config.document_flows.p2p.enabled = true;
config.document_flows.p2p.three_way_match_rate = 1.0; config.document_flows.generate_document_references = true;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let doc_flow_entries: Vec<_> = result
.journal_entries
.iter()
.filter(|e| e.header.business_process == Some(datasynth_core::models::BusinessProcess::P2P))
.collect();
if !doc_flow_entries.is_empty() {
let doc_types: HashSet<_> = doc_flow_entries
.iter()
.map(|e| e.header.document_type.as_str())
.collect();
let with_reference: usize = doc_flow_entries
.iter()
.filter(|e| e.header.reference.is_some())
.count();
println!(
"P2P chain test: {} flow entries, {} with references, types: {:?}",
doc_flow_entries.len(),
with_reference,
doc_types
);
}
}
#[test]
fn test_o2c_chain_completeness() {
let mut config = minimal_config();
config.global.seed = Some(161718);
config.global.period_months = 3;
config.master_data.customers.count = 5;
config.document_flows.o2c.enabled = true;
config.document_flows.generate_document_references = true;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let doc_flow_entries: Vec<_> = result
.journal_entries
.iter()
.filter(|e| e.header.business_process == Some(datasynth_core::models::BusinessProcess::O2C))
.collect();
if !doc_flow_entries.is_empty() {
for entry in &doc_flow_entries {
let total_debits: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
let total_credits: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
assert_eq!(
total_debits, total_credits,
"Document flow entry {} is not balanced",
entry.header.document_id
);
}
}
}
#[test]
fn test_document_reference_integrity() {
let mut config = minimal_config();
config.global.seed = Some(192021);
config.global.period_months = 3;
config.master_data.vendors.count = 5;
config.master_data.customers.count = 5;
config.document_flows.p2p.enabled = true;
config.document_flows.o2c.enabled = true;
config.document_flows.generate_document_references = true;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let all_doc_ids: HashSet<_> = result
.journal_entries
.iter()
.map(|e| e.header.document_id.to_string())
.collect();
let entries_with_refs = result
.journal_entries
.iter()
.filter(|e| e.header.reference.is_some())
.count();
println!(
"Document reference test: {} total entries, {} with references",
result.journal_entries.len(),
entries_with_refs
);
assert_eq!(
all_doc_ids.len(),
result.journal_entries.len(),
"Should have unique document IDs"
);
}
#[test]
fn test_master_data_referential_integrity() {
let mut config = minimal_config();
config.global.seed = Some(222324);
config.global.period_months = 3;
config.master_data.vendors.count = 10;
config.master_data.customers.count = 10;
config.master_data.materials.count = 20;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let config_company_codes: HashSet<_> = minimal_config()
.companies
.iter()
.map(|c| c.code.clone())
.collect();
for entry in &result.journal_entries {
assert!(
config_company_codes.contains(&entry.header.company_code),
"Entry references unknown company: {}",
entry.header.company_code
);
}
for entry in &result.journal_entries {
assert!(
entry.header.fiscal_period >= 1 && entry.header.fiscal_period <= 12,
"Invalid fiscal period: {}",
entry.header.fiscal_period
);
}
}
#[test]
fn test_document_flow_consistency() {
let mut config = minimal_config();
config.global.seed = Some(252627);
config.global.period_months = 6;
config.master_data.vendors.count = 5;
config.master_data.customers.count = 5;
config.document_flows.p2p.enabled = true;
config.document_flows.o2c.enabled = true;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: true,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let mut by_process: HashMap<Option<_>, Vec<_>> = HashMap::new();
for entry in &result.journal_entries {
by_process
.entry(entry.header.business_process)
.or_default()
.push(entry);
}
println!(
"Document flow test: {} different business processes",
by_process.len()
);
for (process, entries) in &by_process {
for entry in entries {
let total_debits: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
let total_credits: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
assert_eq!(
total_debits, total_credits,
"Process {:?} entry {} is not balanced",
process, entry.header.document_id
);
}
}
}
#[test]
fn test_orphan_detection() {
let mut config = minimal_config();
config.global.seed = Some(282930);
config.global.period_months = 3;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
for entry in &result.journal_entries {
assert!(
!entry.header.company_code.is_empty(),
"Missing company code"
);
assert!(
entry.header.fiscal_year > 0,
"Invalid fiscal year: {}",
entry.header.fiscal_year
);
assert!(
!entry.lines.is_empty(),
"Entry {} has no lines",
entry.header.document_id
);
for line in &entry.lines {
assert!(!line.gl_account.is_empty(), "Line missing GL account");
}
}
}
#[test]
fn test_cascade_anomaly_tracking() {
let mut config = fraud_enabled_config();
config.global.seed = Some(313233);
config.global.period_months = 3;
config.fraud.enabled = true;
config.fraud.fraud_rate = 0.05;
let phase_config = PhaseConfig {
generate_master_data: true,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: true,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let fraud_count = result
.journal_entries
.iter()
.filter(|e| e.header.is_fraud)
.count();
let total_count = result.journal_entries.len();
if total_count >= 100 {
let observed_rate = fraud_count as f64 / total_count as f64;
assert!(
observed_rate < 0.15, "Fraud rate too high: {:.2}% (expected ~5%)",
observed_rate * 100.0
);
}
for entry in result.journal_entries.iter().filter(|e| e.header.is_fraud) {
assert!(
entry.header.fraud_type.is_some(),
"Fraud entry {} missing fraud_type",
entry.header.document_id
);
}
}
#[test]
fn test_line_item_distribution() {
let mut config = minimal_config();
config.global.seed = Some(343536);
config.global.period_months = 6;
config.companies[0].annual_transaction_volume = TransactionVolume::TenK;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let mut line_count_freq: HashMap<usize, usize> = HashMap::new();
for entry in &result.journal_entries {
*line_count_freq.entry(entry.lines.len()).or_default() += 1;
}
let total = result.journal_entries.len() as f64;
let two_line_count = *line_count_freq.get(&2).unwrap_or(&0) as f64;
let two_line_pct = two_line_count / total;
if total >= 100.0 {
assert!(
(0.20..=0.80).contains(&two_line_pct),
"Two-line entry percentage {:.1}% is outside expected range",
two_line_pct * 100.0
);
}
let even_count = line_count_freq
.iter()
.filter(|(k, _)| **k % 2 == 0)
.map(|(_, v)| *v)
.sum::<usize>();
let even_pct = even_count as f64 / total;
println!(
"Line item distribution: {:.1}% even line counts",
even_pct * 100.0
);
}
#[test]
fn test_amount_distribution() {
let mut config = minimal_config();
config.global.seed = Some(373839);
config.global.period_months = 6;
config.companies[0].annual_transaction_volume = TransactionVolume::TenK;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator = EnhancedOrchestrator::new(config.clone(), phase_config)
.expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let amounts: Vec<f64> = result
.journal_entries
.iter()
.flat_map(|entry| entry.lines.iter())
.map(|line| {
let amt = line.debit_amount + line.credit_amount;
amt.try_into().unwrap_or(0.0f64)
})
.filter(|&a| a > 0.0)
.collect();
if amounts.len() >= 100 {
let min_configured = config.transactions.amounts.min_amount;
let max_configured = config.transactions.amounts.max_amount;
for amount in &amounts {
assert!(
*amount >= min_configured && *amount <= max_configured,
"Amount {} outside configured range [{}, {}]",
amount,
min_configured,
max_configured
);
}
let mean = amounts.iter().sum::<f64>() / amounts.len() as f64;
let variance =
amounts.iter().map(|a| (a - mean).powi(2)).sum::<f64>() / amounts.len() as f64;
let std_dev = variance.sqrt();
println!(
"Amount distribution: n={}, mean={:.2}, std_dev={:.2}",
amounts.len(),
mean,
std_dev
);
}
}
#[test]
fn test_seasonality_month_end_spike() {
let mut config = minimal_config();
config.global.seed = Some(404142);
config.global.period_months = 6;
config.companies[0].annual_transaction_volume = TransactionVolume::TenK;
config.transactions.seasonality.month_end_spike = true;
config.transactions.seasonality.month_end_multiplier = 2.5;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let mut by_day: HashMap<u32, usize> = HashMap::new();
for entry in &result.journal_entries {
let day = entry.header.posting_date.day();
*by_day.entry(day).or_default() += 1;
}
let early_total: usize = (1..=25).filter_map(|d| by_day.get(&d)).sum();
let late_total: usize = (26..=31).filter_map(|d| by_day.get(&d)).sum();
let early_avg = early_total as f64 / 25.0;
let late_avg = late_total as f64 / 6.0;
if early_total > 100 && late_total > 10 {
println!(
"Seasonality test: early avg={:.1}, late avg={:.1}, ratio={:.2}",
early_avg,
late_avg,
late_avg / early_avg.max(1.0)
);
}
}
#[test]
fn test_fiscal_period_distribution() {
let mut config = minimal_config();
config.global.seed = Some(434445);
config.global.period_months = 12;
config.companies[0].annual_transaction_volume = TransactionVolume::TenK;
let phase_config = PhaseConfig {
generate_master_data: false,
generate_document_flows: false,
generate_journal_entries: true,
inject_anomalies: false,
show_progress: false,
..Default::default()
};
let mut orchestrator =
EnhancedOrchestrator::new(config, phase_config).expect("Failed to create orchestrator");
let result = orchestrator.generate().expect("Generation failed");
let mut by_period: HashMap<u8, usize> = HashMap::new();
for entry in &result.journal_entries {
*by_period.entry(entry.header.fiscal_period).or_default() += 1;
}
assert!(
by_period.len() >= 10, "Should have entries across most periods, got {}",
by_period.len()
);
for period in 1..=12 {
let count = by_period.get(&period).copied().unwrap_or(0);
println!("Period {}: {} entries", period, count);
}
}