use chrono::{Datelike, NaiveDate, Timelike};
use datasynth_core::utils::seeded_rng;
use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rust_decimal::prelude::*;
use rust_decimal::Decimal;
use std::sync::Arc;
use tracing::debug;
use datasynth_config::schema::{
FraudConfig, GeneratorConfig, TemplateConfig, TemporalPatternsConfig, TransactionConfig,
};
use datasynth_core::distributions::{
BusinessDayCalculator, CrossDayConfig, DriftAdjustments, DriftConfig, DriftController,
EventType, LagDistribution, PeriodEndConfig, PeriodEndDynamics, PeriodEndModel,
ProcessingLagCalculator, ProcessingLagConfig, *,
};
use datasynth_core::models::*;
use datasynth_core::templates::{
descriptions::DescriptionContext, DescriptionGenerator, ReferenceGenerator, ReferenceType,
};
use datasynth_core::traits::Generator;
use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
use datasynth_core::CountryPack;
use crate::company_selector::WeightedCompanySelector;
use crate::user_generator::{UserGenerator, UserGeneratorConfig};
pub struct JournalEntryGenerator {
rng: ChaCha8Rng,
seed: u64,
config: TransactionConfig,
coa: Arc<ChartOfAccounts>,
companies: Vec<String>,
company_selector: WeightedCompanySelector,
line_sampler: LineItemSampler,
amount_sampler: AmountSampler,
temporal_sampler: TemporalSampler,
start_date: NaiveDate,
end_date: NaiveDate,
count: u64,
uuid_factory: DeterministicUuidFactory,
user_pool: Option<UserPool>,
description_generator: DescriptionGenerator,
reference_generator: ReferenceGenerator,
template_config: TemplateConfig,
vendor_pool: VendorPool,
customer_pool: CustomerPool,
material_pool: Option<MaterialPool>,
using_real_master_data: bool,
fraud_config: FraudConfig,
persona_errors_enabled: bool,
approval_enabled: bool,
approval_threshold: rust_decimal::Decimal,
sod_violation_rate: f64,
batch_state: Option<BatchState>,
drift_controller: Option<DriftController>,
business_day_calculator: Option<BusinessDayCalculator>,
processing_lag_calculator: Option<ProcessingLagCalculator>,
temporal_patterns_config: Option<TemporalPatternsConfig>,
}
#[derive(Clone)]
struct BatchState {
base_account_number: String,
base_amount: rust_decimal::Decimal,
base_business_process: Option<BusinessProcess>,
base_posting_date: NaiveDate,
remaining: u8,
}
impl JournalEntryGenerator {
pub fn new_with_params(
config: TransactionConfig,
coa: Arc<ChartOfAccounts>,
companies: Vec<String>,
start_date: NaiveDate,
end_date: NaiveDate,
seed: u64,
) -> Self {
Self::new_with_full_config(
config,
coa,
companies,
start_date,
end_date,
seed,
TemplateConfig::default(),
None,
)
}
#[allow(clippy::too_many_arguments)]
pub fn new_with_full_config(
config: TransactionConfig,
coa: Arc<ChartOfAccounts>,
companies: Vec<String>,
start_date: NaiveDate,
end_date: NaiveDate,
seed: u64,
template_config: TemplateConfig,
user_pool: Option<UserPool>,
) -> Self {
let user_pool = user_pool.or_else(|| {
if template_config.names.generate_realistic_names {
let user_gen_config = UserGeneratorConfig {
culture_distribution: vec![
(
datasynth_core::templates::NameCulture::WesternUs,
template_config.names.culture_distribution.western_us,
),
(
datasynth_core::templates::NameCulture::Hispanic,
template_config.names.culture_distribution.hispanic,
),
(
datasynth_core::templates::NameCulture::German,
template_config.names.culture_distribution.german,
),
(
datasynth_core::templates::NameCulture::French,
template_config.names.culture_distribution.french,
),
(
datasynth_core::templates::NameCulture::Chinese,
template_config.names.culture_distribution.chinese,
),
(
datasynth_core::templates::NameCulture::Japanese,
template_config.names.culture_distribution.japanese,
),
(
datasynth_core::templates::NameCulture::Indian,
template_config.names.culture_distribution.indian,
),
],
email_domain: template_config.names.email_domain.clone(),
generate_realistic_names: true,
};
let mut user_gen = UserGenerator::with_config(seed + 100, user_gen_config);
Some(user_gen.generate_standard(&companies))
} else {
None
}
});
let mut ref_gen = ReferenceGenerator::new(
start_date.year(),
companies
.first()
.map(std::string::String::as_str)
.unwrap_or("1000"),
);
ref_gen.set_prefix(
ReferenceType::Invoice,
&template_config.references.invoice_prefix,
);
ref_gen.set_prefix(
ReferenceType::PurchaseOrder,
&template_config.references.po_prefix,
);
ref_gen.set_prefix(
ReferenceType::SalesOrder,
&template_config.references.so_prefix,
);
let company_selector = WeightedCompanySelector::uniform(companies.clone());
Self {
rng: seeded_rng(seed, 0),
seed,
config: config.clone(),
coa,
companies,
company_selector,
line_sampler: LineItemSampler::with_config(
seed + 1,
config.line_item_distribution.clone(),
config.even_odd_distribution.clone(),
config.debit_credit_distribution.clone(),
),
amount_sampler: AmountSampler::with_config(seed + 2, config.amounts.clone()),
temporal_sampler: TemporalSampler::with_config(
seed + 3,
config.seasonality.clone(),
WorkingHoursConfig::default(),
Vec::new(),
),
start_date,
end_date,
count: 0,
uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::JournalEntry),
user_pool,
description_generator: DescriptionGenerator::new(),
reference_generator: ref_gen,
template_config,
vendor_pool: VendorPool::standard(),
customer_pool: CustomerPool::standard(),
material_pool: None,
using_real_master_data: false,
fraud_config: FraudConfig::default(),
persona_errors_enabled: true, approval_enabled: true, approval_threshold: rust_decimal::Decimal::new(10000, 0), sod_violation_rate: 0.10, batch_state: None,
drift_controller: None,
business_day_calculator: Some(BusinessDayCalculator::new(HolidayCalendar::new(
Region::US,
start_date.year(),
))),
processing_lag_calculator: None,
temporal_patterns_config: None,
}
}
pub fn from_generator_config(
full_config: &GeneratorConfig,
coa: Arc<ChartOfAccounts>,
start_date: NaiveDate,
end_date: NaiveDate,
seed: u64,
) -> Self {
let companies: Vec<String> = full_config
.companies
.iter()
.map(|c| c.code.clone())
.collect();
let company_selector = WeightedCompanySelector::from_configs(&full_config.companies);
let mut generator = Self::new_with_full_config(
full_config.transactions.clone(),
coa,
companies,
start_date,
end_date,
seed,
full_config.templates.clone(),
None,
);
generator.company_selector = company_selector;
generator.fraud_config = full_config.fraud.clone();
let temporal_config = &full_config.temporal_patterns;
if temporal_config.enabled {
generator = generator.with_temporal_patterns(temporal_config.clone(), seed);
}
generator
}
pub fn with_temporal_patterns(mut self, config: TemporalPatternsConfig, seed: u64) -> Self {
if config.business_days.enabled {
let region = config
.calendars
.regions
.first()
.map(|r| Self::parse_region(r))
.unwrap_or(Region::US);
let calendar = HolidayCalendar::new(region, self.start_date.year());
self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
}
if config.processing_lags.enabled {
let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
self.processing_lag_calculator =
Some(ProcessingLagCalculator::with_config(seed, lag_config));
}
let model = config.period_end.model.as_deref().unwrap_or("flat");
if model != "flat"
|| config
.period_end
.month_end
.as_ref()
.is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
{
let dynamics = Self::convert_period_end_config(&config.period_end);
self.temporal_sampler.set_period_end_dynamics(dynamics);
}
self.temporal_patterns_config = Some(config);
self
}
pub fn with_country_pack_temporal(
mut self,
config: TemporalPatternsConfig,
seed: u64,
pack: &CountryPack,
) -> Self {
if config.business_days.enabled {
let calendar = HolidayCalendar::from_country_pack(pack, self.start_date.year());
self.business_day_calculator = Some(BusinessDayCalculator::new(calendar));
}
if config.processing_lags.enabled {
let lag_config = Self::convert_processing_lag_config(&config.processing_lags);
self.processing_lag_calculator =
Some(ProcessingLagCalculator::with_config(seed, lag_config));
}
let model = config.period_end.model.as_deref().unwrap_or("flat");
if model != "flat"
|| config
.period_end
.month_end
.as_ref()
.is_some_and(|m| m.peak_multiplier.unwrap_or(1.0) != 1.0)
{
let dynamics = Self::convert_period_end_config(&config.period_end);
self.temporal_sampler.set_period_end_dynamics(dynamics);
}
self.temporal_patterns_config = Some(config);
self
}
fn convert_processing_lag_config(
schema: &datasynth_config::schema::ProcessingLagSchemaConfig,
) -> ProcessingLagConfig {
let mut config = ProcessingLagConfig {
enabled: schema.enabled,
..Default::default()
};
let convert_lag = |lag: &datasynth_config::schema::LagDistributionSchemaConfig| {
let mut dist = LagDistribution::log_normal(lag.mu, lag.sigma);
if let Some(min) = lag.min_hours {
dist.min_lag_hours = min;
}
if let Some(max) = lag.max_hours {
dist.max_lag_hours = max;
}
dist
};
if let Some(ref lag) = schema.sales_order_lag {
config
.event_lags
.insert(EventType::SalesOrder, convert_lag(lag));
}
if let Some(ref lag) = schema.purchase_order_lag {
config
.event_lags
.insert(EventType::PurchaseOrder, convert_lag(lag));
}
if let Some(ref lag) = schema.goods_receipt_lag {
config
.event_lags
.insert(EventType::GoodsReceipt, convert_lag(lag));
}
if let Some(ref lag) = schema.invoice_receipt_lag {
config
.event_lags
.insert(EventType::InvoiceReceipt, convert_lag(lag));
}
if let Some(ref lag) = schema.invoice_issue_lag {
config
.event_lags
.insert(EventType::InvoiceIssue, convert_lag(lag));
}
if let Some(ref lag) = schema.payment_lag {
config
.event_lags
.insert(EventType::Payment, convert_lag(lag));
}
if let Some(ref lag) = schema.journal_entry_lag {
config
.event_lags
.insert(EventType::JournalEntry, convert_lag(lag));
}
if let Some(ref cross_day) = schema.cross_day_posting {
config.cross_day = CrossDayConfig {
enabled: cross_day.enabled,
probability_by_hour: cross_day.probability_by_hour.clone(),
..Default::default()
};
}
config
}
fn convert_period_end_config(
schema: &datasynth_config::schema::PeriodEndSchemaConfig,
) -> PeriodEndDynamics {
let model_type = schema.model.as_deref().unwrap_or("exponential");
let convert_period =
|period: Option<&datasynth_config::schema::PeriodEndModelSchemaConfig>,
default_peak: f64|
-> PeriodEndConfig {
if let Some(p) = period {
let model = match model_type {
"flat" => PeriodEndModel::FlatMultiplier {
multiplier: p.peak_multiplier.unwrap_or(default_peak),
},
"extended_crunch" => PeriodEndModel::ExtendedCrunch {
start_day: p.start_day.unwrap_or(-10),
sustained_high_days: p.sustained_high_days.unwrap_or(3),
peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
ramp_up_days: 3, },
_ => PeriodEndModel::ExponentialAcceleration {
start_day: p.start_day.unwrap_or(-10),
base_multiplier: p.base_multiplier.unwrap_or(1.0),
peak_multiplier: p.peak_multiplier.unwrap_or(default_peak),
decay_rate: p.decay_rate.unwrap_or(0.3),
},
};
PeriodEndConfig {
enabled: true,
model,
additional_multiplier: p.additional_multiplier.unwrap_or(1.0),
}
} else {
PeriodEndConfig {
enabled: true,
model: PeriodEndModel::ExponentialAcceleration {
start_day: -10,
base_multiplier: 1.0,
peak_multiplier: default_peak,
decay_rate: 0.3,
},
additional_multiplier: 1.0,
}
}
};
PeriodEndDynamics::new(
convert_period(schema.month_end.as_ref(), 2.0),
convert_period(schema.quarter_end.as_ref(), 3.5),
convert_period(schema.year_end.as_ref(), 5.0),
)
}
fn parse_region(region_str: &str) -> Region {
match region_str.to_uppercase().as_str() {
"US" => Region::US,
"DE" => Region::DE,
"GB" => Region::GB,
"CN" => Region::CN,
"JP" => Region::JP,
"IN" => Region::IN,
"BR" => Region::BR,
"MX" => Region::MX,
"AU" => Region::AU,
"SG" => Region::SG,
"KR" => Region::KR,
"FR" => Region::FR,
"IT" => Region::IT,
"ES" => Region::ES,
"CA" => Region::CA,
_ => Region::US,
}
}
pub fn set_company_selector(&mut self, selector: WeightedCompanySelector) {
self.company_selector = selector;
}
pub fn company_selector(&self) -> &WeightedCompanySelector {
&self.company_selector
}
pub fn set_fraud_config(&mut self, config: FraudConfig) {
self.fraud_config = config;
}
pub fn with_vendors(mut self, vendors: &[Vendor]) -> Self {
if !vendors.is_empty() {
self.vendor_pool = VendorPool::from_vendors(vendors.to_vec());
self.using_real_master_data = true;
}
self
}
pub fn with_customers(mut self, customers: &[Customer]) -> Self {
if !customers.is_empty() {
self.customer_pool = CustomerPool::from_customers(customers.to_vec());
self.using_real_master_data = true;
}
self
}
pub fn with_materials(mut self, materials: &[Material]) -> Self {
if !materials.is_empty() {
self.material_pool = Some(MaterialPool::from_materials(materials.to_vec()));
self.using_real_master_data = true;
}
self
}
pub fn with_master_data(
self,
vendors: &[Vendor],
customers: &[Customer],
materials: &[Material],
) -> Self {
self.with_vendors(vendors)
.with_customers(customers)
.with_materials(materials)
}
pub fn with_country_pack_names(mut self, pack: &CountryPack) -> Self {
let name_gen =
datasynth_core::templates::MultiCultureNameGenerator::from_country_pack(pack);
let config = UserGeneratorConfig {
culture_distribution: Vec::new(),
email_domain: name_gen.email_domain().to_string(),
generate_realistic_names: true,
};
let mut user_gen = UserGenerator::with_name_generator(self.seed + 100, config, name_gen);
self.user_pool = Some(user_gen.generate_standard(&self.companies));
self
}
pub fn is_using_real_master_data(&self) -> bool {
self.using_real_master_data
}
fn determine_fraud(&mut self) -> Option<FraudType> {
if !self.fraud_config.enabled {
return None;
}
if self.rng.random::<f64>() >= self.fraud_config.fraud_rate {
return None;
}
Some(self.select_fraud_type())
}
fn select_fraud_type(&mut self) -> FraudType {
let dist = &self.fraud_config.fraud_type_distribution;
let roll: f64 = self.rng.random();
let mut cumulative = 0.0;
cumulative += dist.suspense_account_abuse;
if roll < cumulative {
return FraudType::SuspenseAccountAbuse;
}
cumulative += dist.fictitious_transaction;
if roll < cumulative {
return FraudType::FictitiousTransaction;
}
cumulative += dist.revenue_manipulation;
if roll < cumulative {
return FraudType::RevenueManipulation;
}
cumulative += dist.expense_capitalization;
if roll < cumulative {
return FraudType::ExpenseCapitalization;
}
cumulative += dist.split_transaction;
if roll < cumulative {
return FraudType::SplitTransaction;
}
cumulative += dist.timing_anomaly;
if roll < cumulative {
return FraudType::TimingAnomaly;
}
cumulative += dist.unauthorized_access;
if roll < cumulative {
return FraudType::UnauthorizedAccess;
}
FraudType::DuplicatePayment
}
fn fraud_type_to_amount_pattern(&self, fraud_type: FraudType) -> FraudAmountPattern {
match fraud_type {
FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
FraudAmountPattern::ThresholdAdjacent
}
FraudType::FictitiousTransaction
| FraudType::FictitiousEntry
| FraudType::SuspenseAccountAbuse
| FraudType::RoundDollarManipulation => FraudAmountPattern::ObviousRoundNumbers,
FraudType::RevenueManipulation
| FraudType::ExpenseCapitalization
| FraudType::ImproperCapitalization
| FraudType::ReserveManipulation
| FraudType::UnauthorizedAccess
| FraudType::PrematureRevenue
| FraudType::UnderstatedLiabilities
| FraudType::OverstatedAssets
| FraudType::ChannelStuffing => FraudAmountPattern::StatisticallyImprobable,
FraudType::DuplicatePayment
| FraudType::TimingAnomaly
| FraudType::SelfApproval
| FraudType::ExceededApprovalLimit
| FraudType::SegregationOfDutiesViolation
| FraudType::UnauthorizedApproval
| FraudType::CollusiveApproval
| FraudType::FictitiousVendor
| FraudType::ShellCompanyPayment
| FraudType::Kickback
| FraudType::KickbackScheme
| FraudType::InvoiceManipulation
| FraudType::AssetMisappropriation
| FraudType::InventoryTheft
| FraudType::GhostEmployee => FraudAmountPattern::Normal,
FraudType::ImproperRevenueRecognition
| FraudType::ImproperPoAllocation
| FraudType::VariableConsiderationManipulation
| FraudType::ContractModificationMisstatement => {
FraudAmountPattern::StatisticallyImprobable
}
FraudType::LeaseClassificationManipulation
| FraudType::OffBalanceSheetLease
| FraudType::LeaseLiabilityUnderstatement
| FraudType::RouAssetMisstatement => FraudAmountPattern::StatisticallyImprobable,
FraudType::FairValueHierarchyManipulation
| FraudType::Level3InputManipulation
| FraudType::ValuationTechniqueManipulation => {
FraudAmountPattern::StatisticallyImprobable
}
FraudType::DelayedImpairment
| FraudType::ImpairmentTestAvoidance
| FraudType::CashFlowProjectionManipulation
| FraudType::ImproperImpairmentReversal => FraudAmountPattern::StatisticallyImprobable,
FraudType::BidRigging
| FraudType::PhantomVendorContract
| FraudType::ConflictOfInterestSourcing => FraudAmountPattern::Normal,
FraudType::SplitContractThreshold => FraudAmountPattern::ThresholdAdjacent,
FraudType::GhostEmployeePayroll
| FraudType::PayrollInflation
| FraudType::DuplicateExpenseReport
| FraudType::FictitiousExpense => FraudAmountPattern::Normal,
FraudType::SplitExpenseToAvoidApproval => FraudAmountPattern::ThresholdAdjacent,
FraudType::RevenueTimingManipulation => FraudAmountPattern::StatisticallyImprobable,
FraudType::QuotePriceOverride => FraudAmountPattern::Normal,
}
}
#[inline]
fn generate_deterministic_uuid(&self) -> uuid::Uuid {
self.uuid_factory.next()
}
const COST_CENTER_POOL: &'static [&'static str] =
&["CC1000", "CC2000", "CC3000", "CC4000", "CC5000"];
fn enrich_line_items(&self, entry: &mut JournalEntry) {
let posting_date = entry.header.posting_date;
let company_code = &entry.header.company_code;
let header_text = entry.header.header_text.clone();
let business_process = entry.header.business_process;
let doc_id_bytes = entry.header.document_id.as_bytes();
let mut cc_seed: usize = 0;
for &b in doc_id_bytes {
cc_seed = cc_seed.wrapping_add(b as usize);
}
for (i, line) in entry.lines.iter_mut().enumerate() {
if line.account_description.is_none() {
line.account_description = self
.coa
.get_account(&line.gl_account)
.map(|a| a.short_description.clone());
}
if line.cost_center.is_none() {
let first_char = line.gl_account.chars().next().unwrap_or('0');
if first_char == '5' || first_char == '6' {
let idx = cc_seed.wrapping_add(i) % Self::COST_CENTER_POOL.len();
line.cost_center = Some(Self::COST_CENTER_POOL[idx].to_string());
}
}
if line.profit_center.is_none() {
let suffix = match business_process {
Some(BusinessProcess::P2P) => "-P2P",
Some(BusinessProcess::O2C) => "-O2C",
Some(BusinessProcess::R2R) => "-R2R",
Some(BusinessProcess::H2R) => "-H2R",
_ => "",
};
line.profit_center = Some(format!("PC-{company_code}{suffix}"));
}
if line.line_text.is_none() {
line.line_text = header_text.clone();
}
if line.value_date.is_none()
&& (line.gl_account.starts_with("1100") || line.gl_account.starts_with("2000"))
{
line.value_date = Some(posting_date);
}
if line.assignment.is_none() {
if line.gl_account.starts_with("2000") {
if let Some(ref ht) = header_text {
if let Some(vendor_part) = ht.rsplit(" - ").next() {
if vendor_part.starts_with("V-")
|| vendor_part.starts_with("VENDOR")
|| vendor_part.starts_with("Vendor")
{
line.assignment = Some(vendor_part.to_string());
}
}
}
} else if line.gl_account.starts_with("1100") {
if let Some(ref ht) = header_text {
if let Some(customer_part) = ht.rsplit(" - ").next() {
if customer_part.starts_with("C-")
|| customer_part.starts_with("CUST")
|| customer_part.starts_with("Customer")
{
line.assignment = Some(customer_part.to_string());
}
}
}
}
}
}
}
pub fn generate(&mut self) -> JournalEntry {
debug!(
count = self.count,
companies = self.companies.len(),
start_date = %self.start_date,
end_date = %self.end_date,
"Generating journal entry"
);
if let Some(ref state) = self.batch_state {
if state.remaining > 0 {
return self.generate_batched_entry();
}
}
self.count += 1;
let document_id = self.generate_deterministic_uuid();
let mut posting_date = self
.temporal_sampler
.sample_date(self.start_date, self.end_date);
if let Some(ref calc) = self.business_day_calculator {
if !calc.is_business_day(posting_date) {
posting_date = calc.next_business_day(posting_date, false);
if posting_date > self.end_date {
posting_date = calc.prev_business_day(self.end_date, true);
}
}
}
let company_code = self.company_selector.select(&mut self.rng).to_string();
let line_spec = self.line_sampler.sample();
let source = self.select_source();
let is_automated = matches!(
source,
TransactionSource::Automated | TransactionSource::Recurring
);
let business_process = self.select_business_process();
let fraud_type = self.determine_fraud();
let is_fraud = fraud_type.is_some();
let time = self.temporal_sampler.sample_time(!is_automated);
let created_at = posting_date.and_time(time).and_utc();
let (created_by, user_persona) = self.select_user(is_automated);
let mut header =
JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
header.created_at = created_at;
header.source = source;
header.created_by = created_by;
header.user_persona = user_persona;
header.business_process = Some(business_process);
header.document_type = Self::document_type_for_process(business_process).to_string();
header.is_fraud = is_fraud;
header.fraud_type = fraud_type;
let is_manual = matches!(source, TransactionSource::Manual);
header.is_manual = is_manual;
header.source_system = if is_manual {
if self.rng.random::<f64>() < 0.70 {
"manual".to_string()
} else {
"spreadsheet".to_string()
}
} else {
let roll: f64 = self.rng.random();
if roll < 0.40 {
"SAP-FI".to_string()
} else if roll < 0.60 {
"SAP-MM".to_string()
} else if roll < 0.80 {
"SAP-SD".to_string()
} else if roll < 0.95 {
"interface".to_string()
} else {
"SAP-HR".to_string()
}
};
let is_post_close = posting_date.month() == self.end_date.month()
&& posting_date.year() == self.end_date.year()
&& posting_date.day() > 25;
header.is_post_close = is_post_close;
let created_date = if is_manual {
posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second())
} else {
let lag_days = self.rng.random_range(0i64..=3);
let created_naive_date = posting_date
.checked_sub_signed(chrono::Duration::days(lag_days))
.unwrap_or(posting_date);
created_naive_date.and_hms_opt(
self.rng.random_range(8u32..=17),
self.rng.random_range(0u32..=59),
self.rng.random_range(0u32..=59),
)
};
header.created_date = created_date;
let mut context =
DescriptionContext::with_period(posting_date.month(), posting_date.year());
match business_process {
BusinessProcess::P2P => {
if let Some(vendor) = self.vendor_pool.random_vendor(&mut self.rng) {
context.vendor_name = Some(vendor.name.clone());
}
}
BusinessProcess::O2C => {
if let Some(customer) = self.customer_pool.random_customer(&mut self.rng) {
context.customer_name = Some(customer.name.clone());
}
}
_ => {}
}
if self.template_config.descriptions.generate_header_text {
header.header_text = Some(self.description_generator.generate_header_text(
business_process,
&context,
&mut self.rng,
));
}
if self.template_config.references.generate_references {
header.reference = Some(
self.reference_generator
.generate_for_process_year(business_process, posting_date.year()),
);
}
header.source_document = header
.reference
.as_deref()
.and_then(DocumentRef::parse)
.or_else(|| {
if header.source == TransactionSource::Manual {
Some(DocumentRef::Manual)
} else {
None
}
});
let mut entry = JournalEntry::new(header);
let base_amount = if let Some(ft) = fraud_type {
let pattern = self.fraud_type_to_amount_pattern(ft);
self.amount_sampler.sample_fraud(pattern)
} else {
self.amount_sampler.sample()
};
let drift_adjusted_amount = {
let drift = self.get_drift_adjustments(posting_date);
if drift.amount_mean_multiplier != 1.0 {
let multiplier = drift.amount_mean_multiplier * drift.seasonal_factor;
let adjusted = base_amount.to_f64().unwrap_or(1.0) * multiplier;
Decimal::from_f64_retain(adjusted).unwrap_or(base_amount)
} else {
base_amount
}
};
let total_amount = if is_automated {
drift_adjusted_amount } else {
self.apply_human_variation(drift_adjusted_amount)
};
let debit_amounts = self
.amount_sampler
.sample_summing_to(line_spec.debit_count, total_amount);
for (i, amount) in debit_amounts.into_iter().enumerate() {
let account_number = self.select_debit_account().account_number.clone();
let mut line = JournalEntryLine::debit(
entry.header.document_id,
(i + 1) as u32,
account_number.clone(),
amount,
);
if self.template_config.descriptions.generate_line_text {
line.line_text = Some(self.description_generator.generate_line_text(
&account_number,
&context,
&mut self.rng,
));
}
entry.add_line(line);
}
let credit_amounts = self
.amount_sampler
.sample_summing_to(line_spec.credit_count, total_amount);
for (i, amount) in credit_amounts.into_iter().enumerate() {
let account_number = self.select_credit_account().account_number.clone();
let mut line = JournalEntryLine::credit(
entry.header.document_id,
(line_spec.debit_count + i + 1) as u32,
account_number.clone(),
amount,
);
if self.template_config.descriptions.generate_line_text {
line.line_text = Some(self.description_generator.generate_line_text(
&account_number,
&context,
&mut self.rng,
));
}
entry.add_line(line);
}
self.enrich_line_items(&mut entry);
if self.persona_errors_enabled && !is_automated {
self.maybe_inject_persona_error(&mut entry);
}
if self.approval_enabled {
self.maybe_apply_approval_workflow(&mut entry, posting_date);
}
self.populate_approval_fields(&mut entry, posting_date);
self.maybe_start_batch(&entry);
entry
}
pub fn with_persona_errors(mut self, enabled: bool) -> Self {
self.persona_errors_enabled = enabled;
self
}
pub fn with_fraud_config(mut self, config: FraudConfig) -> Self {
self.fraud_config = config;
self
}
pub fn persona_errors_enabled(&self) -> bool {
self.persona_errors_enabled
}
pub fn with_batching(mut self, enabled: bool) -> Self {
if !enabled {
self.batch_state = None;
}
self
}
pub fn batching_enabled(&self) -> bool {
true
}
fn maybe_start_batch(&mut self, entry: &JournalEntry) {
if entry.header.source == TransactionSource::Automated || entry.header.is_fraud {
return;
}
if self.rng.random::<f64>() > 0.15 {
return;
}
let base_account = entry
.lines
.first()
.map(|l| l.gl_account.clone())
.unwrap_or_default();
let base_amount = entry.total_debit();
self.batch_state = Some(BatchState {
base_account_number: base_account,
base_amount,
base_business_process: entry.header.business_process,
base_posting_date: entry.header.posting_date,
remaining: self.rng.random_range(2..7), });
}
fn generate_batched_entry(&mut self) -> JournalEntry {
use rust_decimal::Decimal;
if let Some(ref mut state) = self.batch_state {
state.remaining = state.remaining.saturating_sub(1);
}
let Some(batch) = self.batch_state.clone() else {
tracing::warn!(
"generate_batched_entry called without batch_state; generating standard entry"
);
self.batch_state = None;
return self.generate();
};
let posting_date = batch.base_posting_date;
self.count += 1;
let document_id = self.generate_deterministic_uuid();
let company_code = self.company_selector.select(&mut self.rng).to_string();
let _line_spec = LineItemSpec {
total_count: 2,
debit_count: 1,
credit_count: 1,
split_type: DebitCreditSplit::Equal,
};
let source = TransactionSource::Manual;
let business_process = batch.base_business_process.unwrap_or(BusinessProcess::R2R);
let time = self.temporal_sampler.sample_time(true);
let created_at = posting_date.and_time(time).and_utc();
let (created_by, user_persona) = self.select_user(false);
let mut header =
JournalEntryHeader::with_deterministic_id(company_code, posting_date, document_id);
header.created_at = created_at;
header.source = source;
header.created_by = created_by;
header.user_persona = user_persona;
header.business_process = Some(business_process);
header.document_type = Self::document_type_for_process(business_process).to_string();
header.source_document = Some(DocumentRef::Manual);
header.is_manual = true;
header.source_system = if self.rng.random::<f64>() < 0.70 {
"manual".to_string()
} else {
"spreadsheet".to_string()
};
header.is_post_close = posting_date.month() == self.end_date.month()
&& posting_date.year() == self.end_date.year()
&& posting_date.day() > 25;
header.created_date =
posting_date.and_hms_opt(time.hour().min(23), time.minute(), time.second());
let variation = self.rng.random_range(-0.15..0.15);
let varied_amount =
batch.base_amount * (Decimal::ONE + Decimal::try_from(variation).unwrap_or_default());
let total_amount = varied_amount.round_dp(2).max(Decimal::from(1));
let mut entry = JournalEntry::new(header);
let debit_line = JournalEntryLine::debit(
entry.header.document_id,
1,
batch.base_account_number.clone(),
total_amount,
);
entry.add_line(debit_line);
let credit_account = self.select_credit_account().account_number.clone();
let credit_line =
JournalEntryLine::credit(entry.header.document_id, 2, credit_account, total_amount);
entry.add_line(credit_line);
self.enrich_line_items(&mut entry);
if self.persona_errors_enabled {
self.maybe_inject_persona_error(&mut entry);
}
if self.approval_enabled {
self.maybe_apply_approval_workflow(&mut entry, posting_date);
}
self.populate_approval_fields(&mut entry, posting_date);
if batch.remaining <= 1 {
self.batch_state = None;
}
entry
}
fn maybe_inject_persona_error(&mut self, entry: &mut JournalEntry) {
let persona_str = &entry.header.user_persona;
let persona = match persona_str.to_lowercase().as_str() {
s if s.contains("junior") => UserPersona::JuniorAccountant,
s if s.contains("senior") => UserPersona::SeniorAccountant,
s if s.contains("controller") => UserPersona::Controller,
s if s.contains("manager") => UserPersona::Manager,
s if s.contains("executive") => UserPersona::Executive,
_ => return, };
let base_error_rate = persona.error_rate();
let adjusted_rate = self.apply_stress_factors(base_error_rate, entry.header.posting_date);
if self.rng.random::<f64>() >= adjusted_rate {
return; }
self.inject_human_error(entry, persona);
}
fn apply_stress_factors(&self, base_rate: f64, posting_date: chrono::NaiveDate) -> f64 {
use chrono::Datelike;
let mut rate = base_rate;
let day = posting_date.day();
let month = posting_date.month();
if month == 12 && day >= 28 {
rate *= 2.0;
return rate.min(0.5); }
if matches!(month, 3 | 6 | 9 | 12) && day >= 28 {
rate *= 1.75; return rate.min(0.4);
}
if day >= 28 {
rate *= 1.5; }
let weekday = posting_date.weekday();
match weekday {
chrono::Weekday::Mon => {
rate *= 1.2;
}
chrono::Weekday::Fri => {
rate *= 1.3;
}
_ => {}
}
rate.min(0.4)
}
fn apply_human_variation(&mut self, amount: rust_decimal::Decimal) -> rust_decimal::Decimal {
use rust_decimal::Decimal;
if amount < Decimal::from(10) {
return amount;
}
if self.rng.random::<f64>() > 0.70 {
return amount;
}
let variation_type: u8 = self.rng.random_range(0..4);
match variation_type {
0 => {
let variation_pct = self.rng.random_range(-0.02..0.02);
let variation = amount * Decimal::try_from(variation_pct).unwrap_or_default();
(amount + variation).round_dp(2)
}
1 => {
let ten = Decimal::from(10);
(amount / ten).round() * ten
}
2 => {
if amount >= Decimal::from(500) {
let hundred = Decimal::from(100);
(amount / hundred).round() * hundred
} else {
amount
}
}
3 => {
let cents = Decimal::new(self.rng.random_range(-100..100), 2);
(amount + cents).max(Decimal::ZERO).round_dp(2)
}
_ => amount,
}
}
fn rebalance_entry(entry: &mut JournalEntry, modified_was_debit: bool, impact: Decimal) {
let balancing_idx = entry.lines.iter().position(|l| {
if modified_was_debit {
l.credit_amount > Decimal::ZERO
} else {
l.debit_amount > Decimal::ZERO
}
});
if let Some(idx) = balancing_idx {
if modified_was_debit {
entry.lines[idx].credit_amount += impact;
} else {
entry.lines[idx].debit_amount += impact;
}
}
}
fn inject_human_error(&mut self, entry: &mut JournalEntry, persona: UserPersona) {
use rust_decimal::Decimal;
let error_type: u8 = match persona {
UserPersona::JuniorAccountant => {
self.rng.random_range(0..5)
}
UserPersona::SeniorAccountant => {
self.rng.random_range(0..3)
}
UserPersona::Controller | UserPersona::Manager => {
self.rng.random_range(3..5)
}
_ => return,
};
match error_type {
0 => {
if let Some(line) = entry.lines.get_mut(0) {
let is_debit = line.debit_amount > Decimal::ZERO;
let original_amount = if is_debit {
line.debit_amount
} else {
line.credit_amount
};
let s = original_amount.to_string();
if s.len() >= 2 {
let chars: Vec<char> = s.chars().collect();
let pos = self.rng.random_range(0..chars.len().saturating_sub(1));
if chars[pos].is_ascii_digit()
&& chars.get(pos + 1).is_some_and(char::is_ascii_digit)
{
let mut new_chars = chars;
new_chars.swap(pos, pos + 1);
if let Ok(new_amount) =
new_chars.into_iter().collect::<String>().parse::<Decimal>()
{
let impact = new_amount - original_amount;
if is_debit {
entry.lines[0].debit_amount = new_amount;
} else {
entry.lines[0].credit_amount = new_amount;
}
Self::rebalance_entry(entry, is_debit, impact);
entry.header.header_text = Some(
entry.header.header_text.clone().unwrap_or_default()
+ " [HUMAN_ERROR:TRANSPOSITION]",
);
}
}
}
}
}
1 => {
if let Some(line) = entry.lines.get_mut(0) {
let is_debit = line.debit_amount > Decimal::ZERO;
let original_amount = if is_debit {
line.debit_amount
} else {
line.credit_amount
};
let new_amount = original_amount * Decimal::new(10, 0);
let impact = new_amount - original_amount;
if is_debit {
entry.lines[0].debit_amount = new_amount;
} else {
entry.lines[0].credit_amount = new_amount;
}
Self::rebalance_entry(entry, is_debit, impact);
entry.header.header_text = Some(
entry.header.header_text.clone().unwrap_or_default()
+ " [HUMAN_ERROR:DECIMAL_SHIFT]",
);
}
}
2 => {
if let Some(ref mut text) = entry.header.header_text {
let typos = ["teh", "adn", "wiht", "taht", "recieve"];
let correct = ["the", "and", "with", "that", "receive"];
let idx = self.rng.random_range(0..typos.len());
if text.to_lowercase().contains(correct[idx]) {
*text = text.replace(correct[idx], typos[idx]);
*text = format!("{text} [HUMAN_ERROR:TYPO]");
}
}
}
3 => {
if let Some(line) = entry.lines.get_mut(0) {
let is_debit = line.debit_amount > Decimal::ZERO;
let original_amount = if is_debit {
line.debit_amount
} else {
line.credit_amount
};
let new_amount =
(original_amount / Decimal::new(100, 0)).round() * Decimal::new(100, 0);
let impact = new_amount - original_amount;
if is_debit {
entry.lines[0].debit_amount = new_amount;
} else {
entry.lines[0].credit_amount = new_amount;
}
Self::rebalance_entry(entry, is_debit, impact);
entry.header.header_text = Some(
entry.header.header_text.clone().unwrap_or_default()
+ " [HUMAN_ERROR:ROUNDED]",
);
}
}
4 => {
if entry.header.document_date == entry.header.posting_date {
let days_late = self.rng.random_range(5..15);
entry.header.document_date =
entry.header.posting_date - chrono::Duration::days(days_late);
entry.header.header_text = Some(
entry.header.header_text.clone().unwrap_or_default()
+ " [HUMAN_ERROR:LATE_POSTING]",
);
}
}
_ => {}
}
}
fn maybe_apply_approval_workflow(
&mut self,
entry: &mut JournalEntry,
_posting_date: NaiveDate,
) {
use rust_decimal::Decimal;
let amount = entry.total_debit();
if amount <= self.approval_threshold {
let workflow = ApprovalWorkflow::auto_approved(
entry.header.created_by.clone(),
entry.header.user_persona.clone(),
amount,
entry.header.created_at,
);
entry.header.approval_workflow = Some(workflow);
return;
}
entry.header.sox_relevant = true;
let required_levels = if amount > Decimal::new(100000, 0) {
3 } else if amount > Decimal::new(50000, 0) {
2 } else {
1 };
let mut workflow = ApprovalWorkflow::new(
entry.header.created_by.clone(),
entry.header.user_persona.clone(),
amount,
);
workflow.required_levels = required_levels;
let submit_time = entry.header.created_at;
let submit_action = ApprovalAction::new(
entry.header.created_by.clone(),
entry.header.user_persona.clone(),
self.parse_persona(&entry.header.user_persona),
ApprovalActionType::Submit,
0,
)
.with_timestamp(submit_time);
workflow.actions.push(submit_action);
workflow.status = ApprovalStatus::Pending;
workflow.submitted_at = Some(submit_time);
let mut current_time = submit_time;
for level in 1..=required_levels {
let delay_hours = self.rng.random_range(1..4);
current_time += chrono::Duration::hours(delay_hours);
while current_time.weekday() == chrono::Weekday::Sat
|| current_time.weekday() == chrono::Weekday::Sun
{
current_time += chrono::Duration::days(1);
}
let (approver_id, approver_role) = self.select_approver(level);
let approve_action = ApprovalAction::new(
approver_id.clone(),
approver_role.to_string(),
approver_role,
ApprovalActionType::Approve,
level,
)
.with_timestamp(current_time);
workflow.actions.push(approve_action);
workflow.current_level = level;
}
workflow.status = ApprovalStatus::Approved;
workflow.approved_at = Some(current_time);
entry.header.approval_workflow = Some(workflow);
}
fn select_approver(&mut self, level: u8) -> (String, UserPersona) {
let persona = match level {
1 => UserPersona::Manager,
2 => UserPersona::Controller,
_ => UserPersona::Executive,
};
if let Some(ref pool) = self.user_pool {
if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
return (user.user_id.clone(), persona);
}
}
let approver_id = match persona {
UserPersona::Manager => format!("MGR{:04}", self.rng.random_range(1..100)),
UserPersona::Controller => format!("CTRL{:04}", self.rng.random_range(1..20)),
UserPersona::Executive => format!("EXEC{:04}", self.rng.random_range(1..10)),
_ => format!("USR{:04}", self.rng.random_range(1..1000)),
};
(approver_id, persona)
}
fn parse_persona(&self, persona_str: &str) -> UserPersona {
match persona_str.to_lowercase().as_str() {
s if s.contains("junior") => UserPersona::JuniorAccountant,
s if s.contains("senior") => UserPersona::SeniorAccountant,
s if s.contains("controller") => UserPersona::Controller,
s if s.contains("manager") => UserPersona::Manager,
s if s.contains("executive") => UserPersona::Executive,
s if s.contains("automated") || s.contains("system") => UserPersona::AutomatedSystem,
_ => UserPersona::JuniorAccountant, }
}
pub fn with_approval(mut self, enabled: bool) -> Self {
self.approval_enabled = enabled;
self
}
pub fn with_approval_threshold(mut self, threshold: rust_decimal::Decimal) -> Self {
self.approval_threshold = threshold;
self
}
pub fn with_sod_violation_rate(mut self, rate: f64) -> Self {
self.sod_violation_rate = rate;
self
}
fn populate_approval_fields(&mut self, entry: &mut JournalEntry, posting_date: NaiveDate) {
if let Some(ref workflow) = entry.header.approval_workflow {
let last_approver = workflow
.actions
.iter()
.rev()
.find(|a| matches!(a.action, ApprovalActionType::Approve));
if let Some(approver_action) = last_approver {
entry.header.approved_by = Some(approver_action.actor_id.clone());
entry.header.approval_date = Some(approver_action.action_timestamp.date_naive());
} else {
entry.header.approved_by = Some(workflow.preparer_id.clone());
entry.header.approval_date = Some(posting_date);
}
if self.rng.random::<f64>() < self.sod_violation_rate {
let creator = entry.header.created_by.clone();
entry.header.approved_by = Some(creator);
entry.header.sod_violation = true;
entry.header.sod_conflict_type = Some(SodConflictType::PreparerApprover);
}
}
}
pub fn with_drift_controller(mut self, controller: DriftController) -> Self {
self.drift_controller = Some(controller);
self
}
pub fn with_drift_config(mut self, config: DriftConfig, seed: u64) -> Self {
if config.enabled {
let total_periods = self.calculate_total_periods();
self.drift_controller = Some(DriftController::new(config, seed, total_periods));
}
self
}
fn calculate_total_periods(&self) -> u32 {
let start_year = self.start_date.year();
let start_month = self.start_date.month();
let end_year = self.end_date.year();
let end_month = self.end_date.month();
((end_year - start_year) * 12 + (end_month as i32 - start_month as i32) + 1).max(1) as u32
}
fn date_to_period(&self, date: NaiveDate) -> u32 {
let start_year = self.start_date.year();
let start_month = self.start_date.month() as i32;
let date_year = date.year();
let date_month = date.month() as i32;
((date_year - start_year) * 12 + (date_month - start_month)).max(0) as u32
}
fn get_drift_adjustments(&self, date: NaiveDate) -> DriftAdjustments {
if let Some(ref controller) = self.drift_controller {
let period = self.date_to_period(date);
controller.compute_adjustments(period)
} else {
DriftAdjustments::none()
}
}
#[inline]
fn select_user(&mut self, is_automated: bool) -> (String, String) {
if let Some(ref pool) = self.user_pool {
let persona = if is_automated {
UserPersona::AutomatedSystem
} else {
let roll: f64 = self.rng.random();
if roll < 0.4 {
UserPersona::JuniorAccountant
} else if roll < 0.7 {
UserPersona::SeniorAccountant
} else if roll < 0.85 {
UserPersona::Controller
} else {
UserPersona::Manager
}
};
if let Some(user) = pool.get_random_user(persona, &mut self.rng) {
return (user.user_id.clone(), user.persona.to_string());
}
}
if is_automated {
(
format!("BATCH{:04}", self.rng.random_range(1..=20)),
"automated_system".to_string(),
)
} else {
(
format!("USER{:04}", self.rng.random_range(1..=40)),
"senior_accountant".to_string(),
)
}
}
#[inline]
fn select_source(&mut self) -> TransactionSource {
let roll: f64 = self.rng.random();
let dist = &self.config.source_distribution;
if roll < dist.manual {
TransactionSource::Manual
} else if roll < dist.manual + dist.automated {
TransactionSource::Automated
} else if roll < dist.manual + dist.automated + dist.recurring {
TransactionSource::Recurring
} else {
TransactionSource::Adjustment
}
}
#[inline]
fn document_type_for_process(process: BusinessProcess) -> &'static str {
match process {
BusinessProcess::P2P => "KR",
BusinessProcess::O2C => "DR",
BusinessProcess::R2R => "SA",
BusinessProcess::H2R => "HR",
BusinessProcess::A2R => "AA",
_ => "SA",
}
}
fn select_business_process(&mut self) -> BusinessProcess {
let roll: f64 = self.rng.random();
if roll < 0.35 {
BusinessProcess::O2C
} else if roll < 0.65 {
BusinessProcess::P2P
} else if roll < 0.85 {
BusinessProcess::R2R
} else if roll < 0.95 {
BusinessProcess::H2R
} else {
BusinessProcess::A2R
}
}
#[inline]
fn select_debit_account(&mut self) -> &GLAccount {
let accounts = self.coa.get_accounts_by_type(AccountType::Asset);
let expense_accounts = self.coa.get_accounts_by_type(AccountType::Expense);
let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
accounts
} else {
expense_accounts
};
all.choose(&mut self.rng).copied().unwrap_or_else(|| {
tracing::warn!(
"Account selection returned empty list, falling back to first COA account"
);
&self.coa.accounts[0]
})
}
#[inline]
fn select_credit_account(&mut self) -> &GLAccount {
let liability_accounts = self.coa.get_accounts_by_type(AccountType::Liability);
let revenue_accounts = self.coa.get_accounts_by_type(AccountType::Revenue);
let all: Vec<_> = if self.rng.random::<f64>() < 0.6 {
liability_accounts
} else {
revenue_accounts
};
all.choose(&mut self.rng).copied().unwrap_or_else(|| {
tracing::warn!(
"Account selection returned empty list, falling back to first COA account"
);
&self.coa.accounts[0]
})
}
}
impl Generator for JournalEntryGenerator {
type Item = JournalEntry;
type Config = (
TransactionConfig,
Arc<ChartOfAccounts>,
Vec<String>,
NaiveDate,
NaiveDate,
);
fn new(config: Self::Config, seed: u64) -> Self {
Self::new_with_params(config.0, config.1, config.2, config.3, config.4, seed)
}
fn generate_one(&mut self) -> Self::Item {
self.generate()
}
fn reset(&mut self) {
self.rng = seeded_rng(self.seed, 0);
self.line_sampler.reset(self.seed + 1);
self.amount_sampler.reset(self.seed + 2);
self.temporal_sampler.reset(self.seed + 3);
self.count = 0;
self.uuid_factory.reset();
let mut ref_gen = ReferenceGenerator::new(
self.start_date.year(),
self.companies
.first()
.map(std::string::String::as_str)
.unwrap_or("1000"),
);
ref_gen.set_prefix(
ReferenceType::Invoice,
&self.template_config.references.invoice_prefix,
);
ref_gen.set_prefix(
ReferenceType::PurchaseOrder,
&self.template_config.references.po_prefix,
);
ref_gen.set_prefix(
ReferenceType::SalesOrder,
&self.template_config.references.so_prefix,
);
self.reference_generator = ref_gen;
}
fn count(&self) -> u64 {
self.count
}
fn seed(&self) -> u64 {
self.seed
}
}
use datasynth_core::traits::ParallelGenerator;
impl ParallelGenerator for JournalEntryGenerator {
fn split(self, parts: usize) -> Vec<Self> {
let parts = parts.max(1);
(0..parts)
.map(|i| {
let sub_seed = self
.seed
.wrapping_add((i as u64).wrapping_mul(0x9E3779B97F4A7C15));
let mut gen = JournalEntryGenerator::new_with_full_config(
self.config.clone(),
Arc::clone(&self.coa),
self.companies.clone(),
self.start_date,
self.end_date,
sub_seed,
self.template_config.clone(),
self.user_pool.clone(),
);
gen.company_selector = self.company_selector.clone();
gen.vendor_pool = self.vendor_pool.clone();
gen.customer_pool = self.customer_pool.clone();
gen.material_pool = self.material_pool.clone();
gen.using_real_master_data = self.using_real_master_data;
gen.fraud_config = self.fraud_config.clone();
gen.persona_errors_enabled = self.persona_errors_enabled;
gen.approval_enabled = self.approval_enabled;
gen.approval_threshold = self.approval_threshold;
gen.sod_violation_rate = self.sod_violation_rate;
gen.uuid_factory = DeterministicUuidFactory::for_partition(
sub_seed,
GeneratorType::JournalEntry,
i as u8,
);
if let Some(ref config) = self.temporal_patterns_config {
gen.temporal_patterns_config = Some(config.clone());
if config.business_days.enabled {
if let Some(ref bdc) = self.business_day_calculator {
gen.business_day_calculator = Some(bdc.clone());
}
}
if config.processing_lags.enabled {
let lag_config =
Self::convert_processing_lag_config(&config.processing_lags);
gen.processing_lag_calculator =
Some(ProcessingLagCalculator::with_config(sub_seed, lag_config));
}
}
if let Some(ref dc) = self.drift_controller {
gen.drift_controller = Some(dc.clone());
}
gen
})
.collect()
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use crate::ChartOfAccountsGenerator;
#[test]
fn test_generate_balanced_entries() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
);
let mut balanced_count = 0;
for _ in 0..100 {
let entry = je_gen.generate();
let has_human_error = entry
.header
.header_text
.as_ref()
.map(|t| t.contains("[HUMAN_ERROR:"))
.unwrap_or(false);
if !has_human_error {
assert!(
entry.is_balanced(),
"Entry {:?} is not balanced",
entry.header.document_id
);
balanced_count += 1;
}
assert!(entry.line_count() >= 2, "Entry has fewer than 2 lines");
}
assert!(
balanced_count >= 80,
"Expected at least 80 balanced entries, got {}",
balanced_count
);
}
#[test]
fn test_deterministic_generation() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut gen1 = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
Arc::clone(&coa),
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
);
let mut gen2 = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
);
for _ in 0..50 {
let e1 = gen1.generate();
let e2 = gen2.generate();
assert_eq!(e1.header.document_id, e2.header.document_id);
assert_eq!(e1.total_debit(), e2.total_debit());
}
}
#[test]
fn test_templates_generate_descriptions() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let template_config = TemplateConfig {
names: datasynth_config::schema::NameTemplateConfig {
generate_realistic_names: true,
email_domain: "test.com".to_string(),
culture_distribution: datasynth_config::schema::CultureDistribution::default(),
},
descriptions: datasynth_config::schema::DescriptionTemplateConfig {
generate_header_text: true,
generate_line_text: true,
},
references: datasynth_config::schema::ReferenceTemplateConfig {
generate_references: true,
invoice_prefix: "TEST-INV".to_string(),
po_prefix: "TEST-PO".to_string(),
so_prefix: "TEST-SO".to_string(),
},
};
let mut je_gen = JournalEntryGenerator::new_with_full_config(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
template_config,
None,
)
.with_persona_errors(false);
for _ in 0..10 {
let entry = je_gen.generate();
assert!(
entry.header.header_text.is_some(),
"Header text should be populated"
);
assert!(
entry.header.reference.is_some(),
"Reference should be populated"
);
assert!(
entry.header.business_process.is_some(),
"Business process should be set"
);
for line in &entry.lines {
assert!(line.line_text.is_some(), "Line text should be populated");
}
assert!(entry.is_balanced());
}
}
#[test]
fn test_user_pool_integration() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let companies = vec!["1000".to_string()];
let mut user_gen = crate::UserGenerator::new(42);
let user_pool = user_gen.generate_standard(&companies);
let mut je_gen = JournalEntryGenerator::new_with_full_config(
TransactionConfig::default(),
coa,
companies,
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
TemplateConfig::default(),
Some(user_pool),
);
for _ in 0..20 {
let entry = je_gen.generate();
assert!(!entry.header.created_by.is_empty());
}
}
#[test]
fn test_master_data_connection() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let vendors = vec![
Vendor::new("V-TEST-001", "Test Vendor Alpha", VendorType::Supplier),
Vendor::new("V-TEST-002", "Test Vendor Beta", VendorType::Technology),
];
let customers = vec![
Customer::new("C-TEST-001", "Test Customer One", CustomerType::Corporate),
Customer::new(
"C-TEST-002",
"Test Customer Two",
CustomerType::SmallBusiness,
),
];
let materials = vec![Material::new(
"MAT-TEST-001",
"Test Material A",
MaterialType::RawMaterial,
)];
let generator = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
);
assert!(!generator.is_using_real_master_data());
let generator_with_data = generator
.with_vendors(&vendors)
.with_customers(&customers)
.with_materials(&materials);
assert!(generator_with_data.is_using_real_master_data());
}
#[test]
fn test_with_master_data_convenience_method() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let vendors = vec![Vendor::new("V-001", "Vendor One", VendorType::Supplier)];
let customers = vec![Customer::new(
"C-001",
"Customer One",
CustomerType::Corporate,
)];
let materials = vec![Material::new(
"MAT-001",
"Material One",
MaterialType::RawMaterial,
)];
let generator = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_master_data(&vendors, &customers, &materials);
assert!(generator.is_using_real_master_data());
}
#[test]
fn test_stress_factors_increase_error_rate() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let generator = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
);
let base_rate = 0.1;
let regular_day = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(); let regular_rate = generator.apply_stress_factors(base_rate, regular_day);
assert!(
(regular_rate - base_rate).abs() < 0.01,
"Regular day should have minimal stress factor adjustment"
);
let month_end = NaiveDate::from_ymd_opt(2024, 6, 29).unwrap(); let month_end_rate = generator.apply_stress_factors(base_rate, month_end);
assert!(
month_end_rate > regular_rate,
"Month end should have higher error rate than regular day"
);
let year_end = NaiveDate::from_ymd_opt(2024, 12, 30).unwrap(); let year_end_rate = generator.apply_stress_factors(base_rate, year_end);
assert!(
year_end_rate > month_end_rate,
"Year end should have highest error rate"
);
let friday = NaiveDate::from_ymd_opt(2024, 6, 14).unwrap(); let friday_rate = generator.apply_stress_factors(base_rate, friday);
assert!(
friday_rate > regular_rate,
"Friday should have higher error rate than mid-week"
);
let monday = NaiveDate::from_ymd_opt(2024, 6, 17).unwrap(); let monday_rate = generator.apply_stress_factors(base_rate, monday);
assert!(
monday_rate > regular_rate,
"Monday should have higher error rate than mid-week"
);
}
#[test]
fn test_batching_produces_similar_entries() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
123,
)
.with_persona_errors(false);
let entries: Vec<JournalEntry> = (0..200).map(|_| je_gen.generate()).collect();
for entry in &entries {
assert!(
entry.is_balanced(),
"All entries including batched should be balanced"
);
}
let mut date_counts: std::collections::HashMap<NaiveDate, usize> =
std::collections::HashMap::new();
for entry in &entries {
*date_counts.entry(entry.header.posting_date).or_insert(0) += 1;
}
let dates_with_multiple = date_counts.values().filter(|&&c| c > 1).count();
assert!(
dates_with_multiple > 0,
"With batching, should see some dates with multiple entries"
);
}
#[test]
fn test_temporal_patterns_business_days() {
use datasynth_config::schema::{
BusinessDaySchemaConfig, CalendarSchemaConfig, TemporalPatternsConfig,
};
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let temporal_config = TemporalPatternsConfig {
enabled: true,
business_days: BusinessDaySchemaConfig {
enabled: true,
..Default::default()
},
calendars: CalendarSchemaConfig {
regions: vec!["US".to_string()],
custom_holidays: vec![],
},
..Default::default()
};
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 3, 31).unwrap(), 42,
)
.with_temporal_patterns(temporal_config, 42)
.with_persona_errors(false);
let entries: Vec<JournalEntry> = (0..100).map(|_| je_gen.generate()).collect();
for entry in &entries {
let weekday = entry.header.posting_date.weekday();
assert!(
weekday != chrono::Weekday::Sat && weekday != chrono::Weekday::Sun,
"Posting date {:?} should not be a weekend",
entry.header.posting_date
);
}
}
#[test]
fn test_default_generation_filters_weekends() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false);
let total = 500;
let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
let weekend_count = entries
.iter()
.filter(|e| {
let wd = e.header.posting_date.weekday();
wd == chrono::Weekday::Sat || wd == chrono::Weekday::Sun
})
.count();
let weekend_pct = weekend_count as f64 / total as f64;
assert!(
weekend_pct < 0.05,
"Expected weekend entries <5% of total without temporal_patterns enabled, \
but got {:.1}% ({}/{})",
weekend_pct * 100.0,
weekend_count,
total
);
}
#[test]
fn test_document_type_derived_from_business_process() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
99,
)
.with_persona_errors(false)
.with_batching(false);
let total = 200;
let mut doc_types = std::collections::HashSet::new();
let mut sa_count = 0_usize;
for _ in 0..total {
let entry = je_gen.generate();
let dt = &entry.header.document_type;
doc_types.insert(dt.clone());
if dt == "SA" {
sa_count += 1;
}
}
assert!(
doc_types.len() > 3,
"Expected >3 distinct document types, got {} ({:?})",
doc_types.len(),
doc_types,
);
let sa_pct = sa_count as f64 / total as f64;
assert!(
sa_pct < 0.50,
"Expected SA <50%, got {:.1}% ({}/{})",
sa_pct * 100.0,
sa_count,
total,
);
}
#[test]
fn test_enrich_line_items_account_description() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false);
let total = 200;
let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
let lines_with_desc: usize = entries
.iter()
.flat_map(|e| &e.lines)
.filter(|l| l.account_description.is_some())
.count();
let desc_pct = lines_with_desc as f64 / total_lines as f64;
assert!(
desc_pct > 0.95,
"Expected >95% of lines to have account_description, got {:.1}% ({}/{})",
desc_pct * 100.0,
lines_with_desc,
total_lines,
);
}
#[test]
fn test_enrich_line_items_cost_center_for_expense_accounts() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false);
let total = 300;
let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
let expense_lines: Vec<&JournalEntryLine> = entries
.iter()
.flat_map(|e| &e.lines)
.filter(|l| {
let first = l.gl_account.chars().next().unwrap_or('0');
first == '5' || first == '6'
})
.collect();
if !expense_lines.is_empty() {
let with_cc = expense_lines
.iter()
.filter(|l| l.cost_center.is_some())
.count();
let cc_pct = with_cc as f64 / expense_lines.len() as f64;
assert!(
cc_pct > 0.80,
"Expected >80% of expense lines to have cost_center, got {:.1}% ({}/{})",
cc_pct * 100.0,
with_cc,
expense_lines.len(),
);
}
}
#[test]
fn test_enrich_line_items_profit_center_and_line_text() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false);
let total = 100;
let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
let total_lines: usize = entries.iter().map(|e| e.lines.len()).sum();
let with_pc = entries
.iter()
.flat_map(|e| &e.lines)
.filter(|l| l.profit_center.is_some())
.count();
let pc_pct = with_pc as f64 / total_lines as f64;
assert!(
pc_pct > 0.95,
"Expected >95% of lines to have profit_center, got {:.1}% ({}/{})",
pc_pct * 100.0,
with_pc,
total_lines,
);
let with_text = entries
.iter()
.flat_map(|e| &e.lines)
.filter(|l| l.line_text.is_some())
.count();
let text_pct = with_text as f64 / total_lines as f64;
assert!(
text_pct > 0.95,
"Expected >95% of lines to have line_text, got {:.1}% ({}/{})",
text_pct * 100.0,
with_text,
total_lines,
);
}
#[test]
fn test_je_has_audit_flags() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false);
for _ in 0..100 {
let entry = je_gen.generate();
assert!(
!entry.header.source_system.is_empty(),
"source_system should be populated, got empty string"
);
assert!(
!entry.header.created_by.is_empty(),
"created_by should be populated"
);
assert!(
entry.header.created_date.is_some(),
"created_date should be populated"
);
}
}
#[test]
fn test_manual_entry_rate() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false)
.with_batching(false);
let total = 1000;
let entries: Vec<JournalEntry> = (0..total).map(|_| je_gen.generate()).collect();
let manual_count = entries.iter().filter(|e| e.header.is_manual).count();
let manual_rate = manual_count as f64 / total as f64;
assert!(
manual_rate > 0.01 && manual_rate < 0.50,
"Manual entry rate should be reasonable (1%-50%), got {:.1}% ({}/{})",
manual_rate * 100.0,
manual_count,
total,
);
for entry in &entries {
let source_is_manual = entry.header.source == TransactionSource::Manual;
assert_eq!(
entry.header.is_manual, source_is_manual,
"is_manual should match source == Manual"
);
}
}
#[test]
fn test_manual_source_consistency() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false)
.with_batching(false);
for _ in 0..500 {
let entry = je_gen.generate();
if entry.header.is_manual {
assert!(
entry.header.source_system == "manual"
|| entry.header.source_system == "spreadsheet",
"Manual entry should have source_system 'manual' or 'spreadsheet', got '{}'",
entry.header.source_system,
);
} else {
assert!(
entry.header.source_system != "manual"
&& entry.header.source_system != "spreadsheet",
"Non-manual entry should not have source_system 'manual' or 'spreadsheet', got '{}'",
entry.header.source_system,
);
}
}
}
#[test]
fn test_created_date_before_posting() {
let mut coa_gen =
ChartOfAccountsGenerator::new(CoAComplexity::Small, IndustrySector::Manufacturing, 42);
let coa = Arc::new(coa_gen.generate());
let mut je_gen = JournalEntryGenerator::new_with_params(
TransactionConfig::default(),
coa,
vec!["1000".to_string()],
NaiveDate::from_ymd_opt(2024, 1, 1).unwrap(),
NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
42,
)
.with_persona_errors(false);
for _ in 0..500 {
let entry = je_gen.generate();
if let Some(created_date) = entry.header.created_date {
let created_naive_date = created_date.date();
assert!(
created_naive_date <= entry.header.posting_date,
"created_date ({}) should be <= posting_date ({})",
created_naive_date,
entry.header.posting_date,
);
}
}
}
}