datasynth-core 5.36.0

Core domain models, traits, and distributions for synthetic enterprise data generation
Documentation
//! Deterministic generation clock.
//!
//! Synthetic generation must be byte-identical across runs with the same
//! `(config, seed)`. Many records stamp `created_at` / `updated_at` /
//! `generated_at` with wall-clock time, which breaks that guarantee. This
//! module provides a process-global deterministic epoch: while it is set,
//! [`now`] returns a fixed, seed-derived timestamp instead of the wall clock,
//! so every timestamp that routes through [`now`] becomes reproducible.
//!
//! The epoch is stored in a global atomic (not a `thread_local!`, unlike
//! [`crate::serde_decimal`]) because generation fans out across worker threads
//! under `parallel = true`; a thread-local set on the main thread would not be
//! visible to those workers.
//!
//! # Usage
//!
//! The orchestrator wraps a generation run in a [`DeterministicClockGuard`]
//! built from the run's seed; on drop the wall clock is restored. Outside a
//! guard, [`now`] is exactly `Utc::now()`.
//!
//! # Limitation
//!
//! The epoch is process-global, so two generation runs executing concurrently
//! in the same process would share it. CLI generation is sequential; callers
//! that generate concurrently in-process (e.g. a server) should serialize the
//! deterministic-clock scope or accept wall-clock timestamps.

use crate::uuid_factory::{DeterministicUuidFactory, GeneratorType};
use chrono::{DateTime, NaiveDate, Utc};
use std::sync::atomic::{AtomicBool, AtomicI64, AtomicU64, Ordering};
use uuid::Uuid;

/// Sentinel meaning "no deterministic epoch set" → use the real wall clock.
const UNSET: i64 = i64::MIN;

/// Deterministic epoch in milliseconds since the Unix epoch, or [`UNSET`].
static DET_EPOCH_MILLIS: AtomicI64 = AtomicI64::new(UNSET);

// --- Deterministic UUID fallback (for `Uuid::now_v7()` sites) ---
//
// Many model constructors (e.g. `JournalEntryHeader::new`) mint a `now_v7`
// document id, which is wall-clock/random. When the deterministic context is
// active, [`next_document_id`] instead returns sequential seeded UUIDs from a
// dedicated namespace (sub-discriminator `0xFF`, so it never collides with the
// hot-path factory that uses the same seed + `GeneratorType::JournalEntry`).
//
// Determinism of these ids requires the *order* of `next_document_id()` calls
// to be reproducible. That holds for the sequential side-path phases that use
// `new()` (period close, balances, tax, standards, intercompany); the parallel
// hot JE path uses `with_deterministic_id` and does not draw from this counter.
const DET_UUID_SUB: u8 = 0xFF;
static DET_UUID_ACTIVE: AtomicBool = AtomicBool::new(false);
static DET_UUID_SEED: AtomicU64 = AtomicU64::new(0);
static DET_UUID_COUNTER: AtomicU64 = AtomicU64::new(0);

/// Set (or clear) the process-global deterministic epoch.
///
/// `Some(epoch)` makes [`now`] return `epoch`; `None` restores the wall clock.
pub fn set_deterministic_epoch(epoch: Option<DateTime<Utc>>) {
    let v = epoch.map(|e| e.timestamp_millis()).unwrap_or(UNSET);
    DET_EPOCH_MILLIS.store(v, Ordering::SeqCst);
}

/// Whether a deterministic epoch is currently active.
pub fn is_deterministic() -> bool {
    DET_EPOCH_MILLIS.load(Ordering::SeqCst) != UNSET
}

/// The current timestamp: the deterministic epoch if one is set, else the
/// real wall clock. Use this in generation code instead of `Utc::now()`.
pub fn now() -> DateTime<Utc> {
    let v = DET_EPOCH_MILLIS.load(Ordering::SeqCst);
    if v == UNSET {
        Utc::now()
    } else {
        DateTime::from_timestamp_millis(v).unwrap_or_else(Utc::now)
    }
}

/// Activate (or clear) the deterministic UUID fallback used by model
/// constructors. `Some(seed)` resets the counter and makes [`next_document_id`]
/// return seeded sequential UUIDs; `None` restores `Uuid::now_v7()`.
pub fn set_deterministic_uuids(seed: Option<u64>) {
    match seed {
        Some(s) => {
            DET_UUID_SEED.store(s, Ordering::SeqCst);
            DET_UUID_COUNTER.store(0, Ordering::SeqCst);
            DET_UUID_ACTIVE.store(true, Ordering::SeqCst);
        }
        None => DET_UUID_ACTIVE.store(false, Ordering::SeqCst),
    }
}

/// A document id for a model constructor: a seeded sequential UUID when the
/// deterministic context is active, otherwise `Uuid::now_v7()`. Use this in
/// place of `Uuid::now_v7()` in generation model constructors.
pub fn next_document_id() -> Uuid {
    if DET_UUID_ACTIVE.load(Ordering::SeqCst) {
        let n = DET_UUID_COUNTER.fetch_add(1, Ordering::SeqCst);
        let seed = DET_UUID_SEED.load(Ordering::SeqCst);
        DeterministicUuidFactory::with_sub_discriminator(
            seed,
            GeneratorType::JournalEntry,
            DET_UUID_SUB,
        )
        .generate_at(n)
    } else {
        Uuid::now_v7()
    }
}

/// Derive a deterministic epoch from a run seed and the config start date.
///
/// Anchored at midnight UTC of `start_date` and nudged by a seed-derived
/// offset within the day, so different seeds yield distinct-but-reproducible
/// epochs while staying close to the simulated period.
pub fn epoch_from_seed(seed: u64, start_date: NaiveDate) -> DateTime<Utc> {
    let base = start_date
        .and_hms_opt(0, 0, 0)
        .unwrap_or_default()
        .and_utc();
    let offset_secs = (seed % 86_400) as i64;
    base + chrono::Duration::seconds(offset_secs)
}

/// RAII guard: sets the deterministic epoch on construction and restores the
/// wall clock on drop. Hold it for the lifetime of a generation run.
#[must_use = "the deterministic clock is only active while the guard is alive"]
pub struct DeterministicClockGuard {
    _private: (),
}

impl DeterministicClockGuard {
    /// Activate the deterministic clock at `epoch` (timestamps only; the UUID
    /// fallback is left untouched).
    pub fn new(epoch: DateTime<Utc>) -> Self {
        set_deterministic_epoch(Some(epoch));
        Self { _private: () }
    }

    /// Activate the full deterministic context — clock epoch + seeded UUID
    /// fallback — for a generation run keyed off `seed`/`start_date`.
    pub fn from_seed(seed: u64, start_date: NaiveDate) -> Self {
        set_deterministic_epoch(Some(epoch_from_seed(seed, start_date)));
        set_deterministic_uuids(Some(seed));
        Self { _private: () }
    }
}

impl Drop for DeterministicClockGuard {
    fn drop(&mut self) {
        set_deterministic_epoch(None);
        set_deterministic_uuids(None);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::sync::Mutex;

    /// Serializes the tests that mutate the process-global deterministic clock
    /// context (epoch + UUID seed). The cargo test runner executes a binary's
    /// tests on multiple threads, so without this lock
    /// `test_now_honors_epoch_and_guard_restores`'s guard drop (which calls
    /// `set_deterministic_uuids(None)`) can fire between the two id sequences in
    /// `test_next_document_id_deterministic_under_context`, flipping the context
    /// off mid-test and yielding a random-v4 second sequence — observed flaking
    /// on Windows CI. These two tests are the only global-context mutators in
    /// this binary, so locking both fully serializes them.
    /// `into_inner()` on poison so one panicking test doesn't cascade.
    static CLOCK_CTX_LOCK: Mutex<()> = Mutex::new(());

    #[test]
    fn test_epoch_from_seed_is_deterministic() {
        let d = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
        assert_eq!(epoch_from_seed(42, d), epoch_from_seed(42, d));
        assert_ne!(epoch_from_seed(42, d), epoch_from_seed(43, d));
        // Anchored on/after the start date.
        assert!(epoch_from_seed(0, d) >= d.and_hms_opt(0, 0, 0).unwrap().and_utc());
    }

    #[test]
    fn test_next_document_id_deterministic_under_context() {
        let _ctx = CLOCK_CTX_LOCK.lock().unwrap_or_else(|e| e.into_inner());
        // Active context → seeded sequential ids, reproducible across resets.
        set_deterministic_uuids(Some(99));
        let s1: Vec<Uuid> = (0..3).map(|_| next_document_id()).collect();
        set_deterministic_uuids(Some(99));
        let s2: Vec<Uuid> = (0..3).map(|_| next_document_id()).collect();
        set_deterministic_uuids(None);
        assert_eq!(s1, s2, "same seed → identical id sequence");
        assert_ne!(s1[0], s1[1], "sequential ids are distinct");
        assert!(!s1[0].is_nil());
    }

    // Shares CLOCK_CTX_LOCK with the UUID test: this test's guard drop resets
    // the global UUID context, which would otherwise race that test.
    #[test]
    fn test_now_honors_epoch_and_guard_restores() {
        let _ctx = CLOCK_CTX_LOCK.lock().unwrap_or_else(|e| e.into_inner());
        let epoch = NaiveDate::from_ymd_opt(2024, 6, 7)
            .unwrap()
            .and_hms_opt(12, 0, 0)
            .unwrap()
            .and_utc();
        assert!(!is_deterministic(), "must start unset");
        {
            let _g = DeterministicClockGuard::new(epoch);
            assert!(is_deterministic());
            assert_eq!(now(), epoch);
            assert_eq!(now(), epoch, "stable across calls");
        }
        // Guard dropped → wall clock restored.
        assert!(!is_deterministic());
        assert_ne!(now(), epoch);
    }
}