datasynth-generators 2.2.0

50+ data generators covering GL, P2P, O2C, S2C, HR, manufacturing, audit, tax, treasury, and ESG
Documentation
//! Integration tests for the Hire-to-Retire (H2R) pipeline.
//!
//! Verifies cross-generator coherence: employees generated by
//! `EmployeeGenerator` flow into payroll, time entries, expense reports,
//! and benefit enrollments with consistent IDs and valid data.

#![allow(clippy::unwrap_used)]

use std::collections::HashSet;

use chrono::{Datelike, NaiveDate};
use datasynth_config::schema::{ExpenseConfig, TimeAttendanceConfig};
use datasynth_generators::{
    BenefitEnrollmentGenerator, DepartmentDefinition, EmployeeGenerator, ExpenseReportGenerator,
    PayrollGenerator, TimeEntryGenerator,
};
use rust_decimal::Decimal;

// =============================================================================
// Helpers
// =============================================================================

fn date(y: i32, m: u32, d: u32) -> NaiveDate {
    NaiveDate::from_ymd_opt(y, m, d).unwrap()
}

// =============================================================================
// Full pipeline coherence
// =============================================================================

/// Generate employees and feed them through every HR sub-generator, then
/// verify that all downstream records reference valid employee IDs.
#[test]
fn test_hr_pipeline_coherence() {
    let seed = 42u64;
    let company_code = "C001";
    let start_date = date(2025, 1, 1);
    let end_date = date(2025, 3, 31);
    let currency = "USD";

    // ── Phase 1: Generate employees ──────────────────────────────────────
    let mut emp_gen = EmployeeGenerator::new(seed);
    let dept = DepartmentDefinition::finance(company_code);
    let pool = emp_gen.generate_department_pool(
        company_code,
        &dept,
        (date(2020, 1, 1), date(2024, 12, 31)),
    );

    let employees = &pool.employees;
    assert!(
        employees.len() >= 10,
        "Finance department should have at least 10 employees, got {}",
        employees.len()
    );

    let employee_id_set: HashSet<&str> = employees.iter().map(|e| e.employee_id.as_str()).collect();

    // Derived collections consumed by downstream generators
    let employee_ids: Vec<String> = employees.iter().map(|e| e.employee_id.clone()).collect();
    let cost_center_ids: Vec<String> = employees
        .iter()
        .filter_map(|e| e.cost_center.clone())
        .collect();
    let employee_pairs: Vec<(String, String)> = employees
        .iter()
        .map(|e| (e.employee_id.clone(), e.display_name.clone()))
        .collect();
    let employees_with_salary: Vec<(String, Decimal, Option<String>, Option<String>)> = employees
        .iter()
        .map(|e| {
            (
                e.employee_id.clone(),
                Decimal::from(5000), // default monthly salary
                e.cost_center.clone(),
                e.department_id.clone(),
            )
        })
        .collect();

    // ── Phase 2: Payroll ─────────────────────────────────────────────────
    let mut payroll_gen =
        PayrollGenerator::new(seed + 1).with_pools(employee_ids.clone(), cost_center_ids.clone());

    let (run, line_items) = payroll_gen.generate(
        company_code,
        &employees_with_salary,
        start_date,
        end_date,
        currency,
    );

    assert!(!run.payroll_id.is_empty(), "Payroll run should have an ID");
    assert!(
        !line_items.is_empty(),
        "Payroll should produce at least one line item"
    );

    for item in &line_items {
        assert!(
            employee_id_set.contains(item.employee_id.as_str()),
            "Payroll line item references unknown employee: {}",
            item.employee_id
        );
        assert!(
            item.gross_pay > Decimal::ZERO,
            "Payroll gross pay should be positive for employee {}",
            item.employee_id
        );
        assert!(
            item.net_pay > Decimal::ZERO,
            "Payroll net pay should be positive for employee {}",
            item.employee_id
        );
    }

    // ── Phase 3: Time entries ────────────────────────────────────────────
    let mut time_gen =
        TimeEntryGenerator::new(seed + 2).with_pools(employee_ids.clone(), cost_center_ids.clone());

    let config_time = TimeAttendanceConfig::default();
    let time_entries = time_gen.generate(&employee_ids, start_date, end_date, &config_time);

    assert!(
        !time_entries.is_empty(),
        "Should generate time entries for employees"
    );

    for entry in &time_entries {
        assert!(
            employee_id_set.contains(entry.employee_id.as_str()),
            "Time entry references unknown employee: {}",
            entry.employee_id
        );
        // Every entry should have some hours (regular, overtime, PTO, or sick)
        let total_hours =
            entry.hours_regular + entry.hours_overtime + entry.hours_pto + entry.hours_sick;
        assert!(
            total_hours > 0.0,
            "Time entry for {} on {} should have hours > 0",
            entry.employee_id,
            entry.date
        );
        // Date should be within the period
        assert!(
            entry.date >= start_date && entry.date <= end_date,
            "Time entry date {} outside expected range [{}, {}]",
            entry.date,
            start_date,
            end_date
        );
        // No weekend entries
        let wd = entry.date.weekday();
        assert!(
            wd != chrono::Weekday::Sat && wd != chrono::Weekday::Sun,
            "Time entry on weekend: {} ({})",
            entry.date,
            wd
        );
    }

    // ── Phase 4: Expense reports ─────────────────────────────────────────
    let mut expense_gen = ExpenseReportGenerator::new(seed + 3)
        .with_pools(employee_ids.clone(), cost_center_ids.clone());

    let config_exp = ExpenseConfig::default();
    let reports = expense_gen.generate(&employee_ids, start_date, end_date, &config_exp);

    assert!(
        !reports.is_empty(),
        "Should generate expense reports for employees"
    );

    for report in &reports {
        assert!(
            employee_id_set.contains(report.employee_id.as_str()),
            "Expense report references unknown employee: {}",
            report.employee_id
        );
        assert!(
            report.total_amount > Decimal::ZERO,
            "Expense report total should be positive"
        );
        assert!(
            !report.line_items.is_empty(),
            "Expense report should have line items"
        );

        // Line item amounts should sum to report total
        let line_sum: Decimal = report.line_items.iter().map(|li| li.amount).sum();
        assert_eq!(
            report.total_amount, line_sum,
            "Expense total mismatch for report {}",
            report.report_id
        );

        // If approved, approved_by should reference a real employee
        if report.approved_by.is_some() {
            let approver = report.approved_by.as_ref().unwrap();
            assert!(
                employee_id_set.contains(approver.as_str()),
                "Expense approved_by references unknown employee: {}",
                approver
            );
        }
    }

    // ── Phase 5: Benefit enrollments ─────────────────────────────────────
    let mut benefit_gen = BenefitEnrollmentGenerator::new(seed + 4);
    let enrollment_date = start_date;

    let enrollments =
        benefit_gen.generate(company_code, &employee_pairs, enrollment_date, currency);

    assert!(
        !enrollments.is_empty(),
        "Should generate benefit enrollments"
    );

    for enrollment in &enrollments {
        assert!(
            employee_id_set.contains(enrollment.employee_id.as_str()),
            "Benefit enrollment references unknown employee: {}",
            enrollment.employee_id
        );
        assert!(
            enrollment.employee_contribution > Decimal::ZERO,
            "Employee contribution should be positive for enrollment {}",
            enrollment.id
        );
        assert_eq!(
            enrollment.entity_code, company_code,
            "Enrollment entity code should match company"
        );
        assert_eq!(
            enrollment.currency, currency,
            "Enrollment currency should match"
        );
    }

    // ── Summary ──────────────────────────────────────────────────────────
    println!(
        "HR pipeline coherence OK: {} employees -> {} payroll items, {} time entries, {} expense reports, {} benefit enrollments",
        employees.len(),
        line_items.len(),
        time_entries.len(),
        reports.len(),
        enrollments.len()
    );
}

// =============================================================================
// Determinism
// =============================================================================

/// Running the pipeline twice with the same seed should produce identical output.
#[test]
fn test_hr_pipeline_deterministic() {
    let seed = 99u64;
    let company_code = "C001";
    let start = date(2025, 1, 1);
    let end = date(2025, 1, 31);

    let generate = |s: u64| {
        let mut emp_gen = EmployeeGenerator::new(s);
        let dept = DepartmentDefinition::finance(company_code);
        let pool = emp_gen.generate_department_pool(
            company_code,
            &dept,
            (date(2020, 1, 1), date(2024, 12, 31)),
        );

        let employee_ids: Vec<String> = pool
            .employees
            .iter()
            .map(|e| e.employee_id.clone())
            .collect();
        let employee_pairs: Vec<(String, String)> = pool
            .employees
            .iter()
            .map(|e| (e.employee_id.clone(), e.display_name.clone()))
            .collect();
        let employees_with_salary: Vec<(String, Decimal, Option<String>, Option<String>)> = pool
            .employees
            .iter()
            .map(|e| {
                (
                    e.employee_id.clone(),
                    Decimal::from(5000),
                    e.cost_center.clone(),
                    e.department_id.clone(),
                )
            })
            .collect();

        let mut payroll_gen = PayrollGenerator::new(s + 1);
        let (run, items) =
            payroll_gen.generate(company_code, &employees_with_salary, start, end, "USD");

        let mut time_gen = TimeEntryGenerator::new(s + 2);
        let config_time = TimeAttendanceConfig::default();
        let time_entries = time_gen.generate(&employee_ids, start, end, &config_time);

        let mut benefit_gen = BenefitEnrollmentGenerator::new(s + 4);
        let enrollments = benefit_gen.generate(company_code, &employee_pairs, start, "USD");

        (
            pool.employees.len(),
            run.payroll_id,
            items.len(),
            time_entries.len(),
            enrollments.len(),
        )
    };

    let run1 = generate(seed);
    let run2 = generate(seed);

    assert_eq!(
        run1, run2,
        "Deterministic runs should produce identical results"
    );
}

// =============================================================================
// Cross-reference checks: approved_by in time entries
// =============================================================================

/// When employee ID pools are provided, time entry approvers should come from
/// the pool.
#[test]
fn test_time_entry_approvers_from_pool() {
    let seed = 77u64;
    let company_code = "C001";

    let mut emp_gen = EmployeeGenerator::new(seed);
    let dept = DepartmentDefinition::procurement(company_code);
    let pool = emp_gen.generate_department_pool(
        company_code,
        &dept,
        (date(2020, 1, 1), date(2024, 12, 31)),
    );

    let employee_ids: Vec<String> = pool
        .employees
        .iter()
        .map(|e| e.employee_id.clone())
        .collect();
    let employee_id_set: HashSet<String> = employee_ids.iter().cloned().collect();
    let cost_center_ids: Vec<String> = pool
        .employees
        .iter()
        .filter_map(|e| e.cost_center.clone())
        .collect();

    let mut time_gen =
        TimeEntryGenerator::new(seed + 10).with_pools(employee_ids.clone(), cost_center_ids);

    let config = TimeAttendanceConfig::default();
    let entries = time_gen.generate(&employee_ids, date(2025, 6, 1), date(2025, 6, 30), &config);

    let approved_entries: Vec<_> = entries.iter().filter(|e| e.approved_by.is_some()).collect();
    assert!(
        !approved_entries.is_empty(),
        "Some time entries should be approved"
    );

    for entry in &approved_entries {
        let approver = entry.approved_by.as_ref().unwrap();
        assert!(
            employee_id_set.contains(approver),
            "Time entry approver '{}' should come from the employee pool",
            approver
        );
    }
}