Skip to main content

datasynth_core/
uuid_factory.rs

1//! Deterministic UUID generation factory for reproducible synthetic data.
2//!
3//! This module provides a centralized UUID generation system that ensures:
4//! - No collisions between different generator types
5//! - Reproducible output given the same seed
6//! - Thread-safe counter increments
7
8use std::sync::atomic::{AtomicU64, Ordering};
9use uuid::Uuid;
10
11/// Generator type discriminators to prevent UUID collisions across generators.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13#[repr(u8)]
14pub enum GeneratorType {
15    /// Journal Entry generator
16    JournalEntry = 0x01,
17    /// Document Flow (P2P/O2C) generator
18    DocumentFlow = 0x02,
19    /// Master Data - Vendor generator
20    Vendor = 0x03,
21    /// Master Data - Customer generator
22    Customer = 0x04,
23    /// Master Data - Material generator
24    Material = 0x05,
25    /// Master Data - Asset generator
26    Asset = 0x06,
27    /// Master Data - Employee generator
28    Employee = 0x07,
29    /// Subledger - AR generator
30    ARSubledger = 0x08,
31    /// Subledger - AP generator
32    APSubledger = 0x09,
33    /// Subledger - FA generator
34    FASubledger = 0x0A,
35    /// Subledger - Inventory generator
36    InventorySubledger = 0x0B,
37    /// Intercompany generator
38    Intercompany = 0x0C,
39    /// Anomaly injection
40    Anomaly = 0x0D,
41    /// Period close generator
42    PeriodClose = 0x0E,
43    /// FX rate generator
44    FxRate = 0x0F,
45    /// Accrual generator
46    Accrual = 0x10,
47    /// Depreciation generator
48    Depreciation = 0x11,
49    /// Control generator
50    Control = 0x12,
51    /// Opening balance generator
52    OpeningBalance = 0x13,
53    /// Trial balance generator
54    TrialBalance = 0x14,
55    /// Purchase Order document
56    PurchaseOrder = 0x20,
57    /// Goods Receipt document
58    GoodsReceipt = 0x21,
59    /// Vendor Invoice document
60    VendorInvoice = 0x22,
61    /// Payment document
62    Payment = 0x23,
63    /// Sales Order document
64    SalesOrder = 0x24,
65    /// Delivery document
66    Delivery = 0x25,
67    /// Customer Invoice document
68    CustomerInvoice = 0x26,
69    /// Customer Receipt document
70    CustomerReceipt = 0x27,
71
72    // ===== Enterprise Process Chain generators =====
73    /// Sourcing project generator
74    SourcingProject = 0x28,
75    /// RFx event generator
76    RfxEvent = 0x29,
77    /// Supplier bid generator
78    SupplierBid = 0x2A,
79    /// Procurement contract generator
80    ProcurementContract = 0x2B,
81    /// Catalog item generator
82    CatalogItem = 0x2C,
83    /// Bank reconciliation generator
84    BankReconciliation = 0x2D,
85    /// Financial statement generator
86    FinancialStatement = 0x2E,
87    /// Payroll run generator
88    PayrollRun = 0x2F,
89    /// Time entry generator
90    TimeEntry = 0x30,
91    /// Expense report generator
92    ExpenseReport = 0x31,
93    /// Production order generator
94    ProductionOrder = 0x32,
95    /// Cycle count generator
96    CycleCount = 0x33,
97    /// Quality inspection generator
98    QualityInspection = 0x34,
99    /// Sales quote generator
100    SalesQuote = 0x35,
101    /// Budget line generator
102    BudgetLine = 0x36,
103    /// Revenue recognition contract generator
104    RevenueRecognition = 0x37,
105    /// Impairment test generator
106    ImpairmentTest = 0x38,
107    /// Management KPI generator
108    Kpi = 0x39,
109    /// Tax code / jurisdiction generator
110    Tax = 0x3A,
111    /// Project accounting (cost lines, revenue, milestones, change orders, EVM)
112    ProjectAccounting = 0x3B,
113    /// ESG / Sustainability (emissions, energy, water, waste, diversity, safety)
114    Esg = 0x3C,
115    /// Supplier qualification generator
116    SupplierQualification = 0x3D,
117    /// Supplier scorecard generator
118    SupplierScorecard = 0x3E,
119    /// BOM component generator
120    BomComponent = 0x3F,
121    /// Inventory movement generator
122    InventoryMovement = 0x40,
123    /// Benefit enrollment generator
124    BenefitEnrollment = 0x41,
125    /// Disruption event generator
126    Disruption = 0x42,
127    /// Business combination generator (IFRS 3 / ASC 805)
128    BusinessCombination = 0x43,
129    /// Segment reporting generator (IFRS 8 / ASC 280)
130    SegmentReport = 0x44,
131    /// Expected Credit Loss generator (IFRS 9 / ASC 326)
132    ExpectedCreditLoss = 0x45,
133    /// Defined benefit pension generator (IAS 19 / ASC 715)
134    Pension = 0x46,
135    /// Provisions and contingencies generator (IAS 37 / ASC 450)
136    Provision = 0x47,
137    /// Stock-based compensation generator (ASC 718 / IFRS 2)
138    StockCompensation = 0x48,
139    /// Industry benchmark generator (WI-3)
140    IndustryBenchmark = 0x49,
141    /// Governance / board minutes generator (WI-5)
142    Governance = 0x4A,
143    /// Organizational profile generator (WI-6)
144    OrganizationalProfile = 0x4B,
145    /// IT controls generator — access logs and change management (WI-4)
146    ItControls = 0x4C,
147    /// Management report generator (WI-7)
148    ManagementReport = 0x4D,
149    /// Prior-year comparative data generator (WI-2)
150    PriorYear = 0x4E,
151    /// Legal document generator (audit engagement support)
152    LegalDocument = 0x4F,
153    /// External-expectation generator (ISA-520 substantive-analytics layer)
154    ExternalExpectation = 0x50,
155    /// Evidence-anchor generator (ISA-505 external-corroboration layer)
156    EvidenceAnchor = 0x51,
157    /// Scenario / counterfactual-intervention identifiers (paired-rollout engine)
158    Scenario = 0x52,
159}
160
161/// A factory for generating deterministic UUIDs that are guaranteed unique
162/// across different generator types within the same seed.
163///
164/// # UUID Structure (16 bytes)
165///
166/// ```text
167/// Bytes 0-5:   Seed (lower 48 bits)
168/// Byte  6:     Generator type discriminator
169/// Byte  7:     Version nibble (0x4_) | Sub-discriminator
170/// Bytes 8-15:  Counter (64-bit, with variant bits set)
171/// ```
172///
173/// # Thread Safety
174///
175/// The counter uses `AtomicU64` for thread-safe increments, allowing
176/// concurrent UUID generation from multiple threads.
177#[derive(Debug)]
178pub struct DeterministicUuidFactory {
179    seed: u64,
180    generator_type: GeneratorType,
181    counter: AtomicU64,
182    /// Optional sub-discriminator for further namespace separation
183    sub_discriminator: u8,
184}
185
186impl DeterministicUuidFactory {
187    /// Create a new UUID factory for a specific generator type.
188    ///
189    /// # Arguments
190    ///
191    /// * `seed` - The global seed for deterministic generation
192    /// * `generator_type` - The type of generator using this factory
193    ///
194    /// # Example
195    ///
196    /// ```
197    /// use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
198    ///
199    /// let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
200    /// let uuid = factory.next();
201    /// ```
202    pub fn new(seed: u64, generator_type: GeneratorType) -> Self {
203        Self {
204            seed,
205            generator_type,
206            counter: AtomicU64::new(0),
207            sub_discriminator: 0,
208        }
209    }
210
211    /// Create a factory with a sub-discriminator for additional namespace separation.
212    ///
213    /// Useful when the same generator type needs multiple independent UUID streams.
214    pub fn with_sub_discriminator(
215        seed: u64,
216        generator_type: GeneratorType,
217        sub_discriminator: u8,
218    ) -> Self {
219        Self {
220            seed,
221            generator_type,
222            counter: AtomicU64::new(0),
223            sub_discriminator,
224        }
225    }
226
227    /// Create a factory starting from a specific counter value.
228    ///
229    /// Useful for resuming generation from a checkpoint or for partitioned
230    /// parallel generation where each thread gets a non-overlapping counter range.
231    pub fn with_counter(seed: u64, generator_type: GeneratorType, start_counter: u64) -> Self {
232        Self {
233            seed,
234            generator_type,
235            counter: AtomicU64::new(start_counter),
236            sub_discriminator: 0,
237        }
238    }
239
240    /// Create a factory for a specific partition in parallel generation.
241    ///
242    /// Each partition gets a unique sub-discriminator so that counters starting
243    /// from 0 in each partition still produce globally unique UUIDs. This avoids
244    /// atomic contention between threads since each partition has its own factory.
245    pub fn for_partition(seed: u64, generator_type: GeneratorType, partition_index: u8) -> Self {
246        Self {
247            seed,
248            generator_type,
249            counter: AtomicU64::new(0),
250            sub_discriminator: partition_index,
251        }
252    }
253
254    /// Generate the next UUID in the sequence.
255    ///
256    /// This method is thread-safe and can be called from multiple threads.
257    #[inline]
258    pub fn next(&self) -> Uuid {
259        let counter = self.counter.fetch_add(1, Ordering::Relaxed);
260        self.generate_uuid(counter)
261    }
262
263    /// Generate a UUID for a specific counter value without incrementing.
264    ///
265    /// Useful for deterministic regeneration of specific UUIDs.
266    pub fn generate_at(&self, counter: u64) -> Uuid {
267        self.generate_uuid(counter)
268    }
269
270    /// Get the current counter value.
271    pub fn current_counter(&self) -> u64 {
272        self.counter.load(Ordering::Relaxed)
273    }
274
275    /// Reset the counter to zero.
276    pub fn reset(&self) {
277        self.counter.store(0, Ordering::Relaxed);
278    }
279
280    /// Set the counter to a specific value.
281    pub fn set_counter(&self, value: u64) {
282        self.counter.store(value, Ordering::Relaxed);
283    }
284
285    /// Generate a UUID from the seed, generator type, and counter.
286    ///
287    /// Uses a simple hash-based approach to ensure uniqueness while maintaining
288    /// determinism. The hash function is designed to spread entropy across all
289    /// bytes while preserving the UUID v4 format.
290    #[inline]
291    fn generate_uuid(&self, counter: u64) -> Uuid {
292        // Create a unique input by combining all distinguishing factors
293        // Use FNV-1a style hashing for simplicity and determinism
294        let mut hash: u64 = 14695981039346656037; // FNV offset basis
295
296        // Mix in seed
297        for byte in self.seed.to_le_bytes() {
298            hash ^= byte as u64;
299            hash = hash.wrapping_mul(1099511628211); // FNV prime
300        }
301
302        // Mix in generator type
303        hash ^= self.generator_type as u64;
304        hash = hash.wrapping_mul(1099511628211);
305
306        // Mix in sub-discriminator
307        hash ^= self.sub_discriminator as u64;
308        hash = hash.wrapping_mul(1099511628211);
309
310        // Mix in counter (most important for uniqueness within same factory)
311        for byte in counter.to_le_bytes() {
312            hash ^= byte as u64;
313            hash = hash.wrapping_mul(1099511628211);
314        }
315
316        // Create second hash for remaining bytes
317        let mut hash2: u64 = hash;
318        hash2 ^= self.seed.rotate_left(32);
319        hash2 = hash2.wrapping_mul(1099511628211);
320        hash2 ^= counter.rotate_left(32);
321        hash2 = hash2.wrapping_mul(1099511628211);
322
323        let mut bytes = [0u8; 16];
324
325        // First 8 bytes from hash
326        bytes[0..8].copy_from_slice(&hash.to_le_bytes());
327        // Second 8 bytes from hash2
328        bytes[8..16].copy_from_slice(&hash2.to_le_bytes());
329
330        // Set UUID version 4 (bits 12-15 of time_hi_and_version)
331        // Byte 6: xxxx0100 -> set bits 4-7 to 0100
332        bytes[6] = (bytes[6] & 0x0f) | 0x40;
333
334        // Set variant to RFC 4122 (bits 6-7 of clock_seq_hi_and_reserved)
335        // Byte 8: 10xxxxxx -> set bits 6-7 to 10
336        bytes[8] = (bytes[8] & 0x3f) | 0x80;
337
338        Uuid::from_bytes(bytes)
339    }
340}
341
342impl Clone for DeterministicUuidFactory {
343    fn clone(&self) -> Self {
344        Self {
345            seed: self.seed,
346            generator_type: self.generator_type,
347            counter: AtomicU64::new(self.counter.load(Ordering::Relaxed)),
348            sub_discriminator: self.sub_discriminator,
349        }
350    }
351}
352
353/// A registry that manages multiple UUID factories for different generator types.
354///
355/// This ensures a single source of truth for UUID generation across the system.
356#[derive(Debug)]
357pub struct UuidFactoryRegistry {
358    seed: u64,
359    factories: std::collections::HashMap<GeneratorType, DeterministicUuidFactory>,
360}
361
362impl UuidFactoryRegistry {
363    /// Create a new registry with a global seed.
364    pub fn new(seed: u64) -> Self {
365        Self {
366            seed,
367            factories: std::collections::HashMap::new(),
368        }
369    }
370
371    /// Get or create a factory for a specific generator type.
372    pub fn get_factory(&mut self, generator_type: GeneratorType) -> &DeterministicUuidFactory {
373        self.factories
374            .entry(generator_type)
375            .or_insert_with(|| DeterministicUuidFactory::new(self.seed, generator_type))
376    }
377
378    /// Generate the next UUID for a specific generator type.
379    pub fn next_uuid(&mut self, generator_type: GeneratorType) -> Uuid {
380        self.get_factory(generator_type).next()
381    }
382
383    /// Reset all factories.
384    pub fn reset_all(&self) {
385        for factory in self.factories.values() {
386            factory.reset();
387        }
388    }
389
390    /// Get the current counter for a generator type.
391    pub fn get_counter(&self, generator_type: GeneratorType) -> Option<u64> {
392        self.factories
393            .get(&generator_type)
394            .map(DeterministicUuidFactory::current_counter)
395    }
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401    use std::collections::HashSet;
402    use std::thread;
403
404    #[test]
405    fn test_uuid_uniqueness_same_generator() {
406        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
407
408        let mut uuids = HashSet::new();
409        for _ in 0..10000 {
410            let uuid = factory.next();
411            assert!(uuids.insert(uuid), "Duplicate UUID generated");
412        }
413    }
414
415    #[test]
416    fn test_uuid_uniqueness_different_generators() {
417        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
418        let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::DocumentFlow);
419
420        let mut uuids = HashSet::new();
421
422        for _ in 0..5000 {
423            let uuid1 = factory1.next();
424            let uuid2 = factory2.next();
425            assert!(uuids.insert(uuid1), "Duplicate UUID from JE generator");
426            assert!(uuids.insert(uuid2), "Duplicate UUID from DocFlow generator");
427        }
428    }
429
430    #[test]
431    fn test_uuid_determinism() {
432        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
433        let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
434
435        for _ in 0..100 {
436            assert_eq!(factory1.next(), factory2.next());
437        }
438    }
439
440    #[test]
441    fn test_uuid_different_seeds() {
442        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
443        let factory2 = DeterministicUuidFactory::new(67890, GeneratorType::JournalEntry);
444
445        // Different seeds should produce different UUIDs
446        assert_ne!(factory1.next(), factory2.next());
447    }
448
449    #[test]
450    fn test_thread_safety() {
451        use std::sync::Arc;
452
453        let factory = Arc::new(DeterministicUuidFactory::new(
454            12345,
455            GeneratorType::JournalEntry,
456        ));
457        let mut handles = vec![];
458
459        for _ in 0..4 {
460            let factory_clone = Arc::clone(&factory);
461            handles.push(thread::spawn(move || {
462                let mut uuids = Vec::new();
463                for _ in 0..1000 {
464                    uuids.push(factory_clone.next());
465                }
466                uuids
467            }));
468        }
469
470        let mut all_uuids = HashSet::new();
471        for handle in handles {
472            let uuids = handle.join().unwrap();
473            for uuid in uuids {
474                assert!(all_uuids.insert(uuid), "Thread-generated UUID collision");
475            }
476        }
477
478        assert_eq!(all_uuids.len(), 4000);
479    }
480
481    #[test]
482    fn test_sub_discriminator() {
483        let factory1 =
484            DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 0);
485        let factory2 =
486            DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 1);
487
488        // Different sub-discriminators should produce different UUIDs
489        let uuid1 = factory1.next();
490        factory1.reset();
491        let uuid2 = factory2.next();
492
493        assert_ne!(uuid1, uuid2);
494    }
495
496    #[test]
497    fn test_generate_at() {
498        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
499
500        // Generate at specific counter
501        let uuid_at_5 = factory.generate_at(5);
502
503        // Generate sequentially to reach counter 5
504        for _ in 0..5 {
505            factory.next();
506        }
507        let _uuid_sequential = factory.next();
508
509        // The UUID at counter 5 should match
510        assert_eq!(uuid_at_5, factory.generate_at(5));
511    }
512
513    #[test]
514    fn test_registry() {
515        let mut registry = UuidFactoryRegistry::new(12345);
516
517        let uuid1 = registry.next_uuid(GeneratorType::JournalEntry);
518        let uuid2 = registry.next_uuid(GeneratorType::JournalEntry);
519        let uuid3 = registry.next_uuid(GeneratorType::DocumentFlow);
520
521        // All should be unique
522        assert_ne!(uuid1, uuid2);
523        assert_ne!(uuid1, uuid3);
524        assert_ne!(uuid2, uuid3);
525
526        // Counter should be tracked
527        assert_eq!(registry.get_counter(GeneratorType::JournalEntry), Some(2));
528        assert_eq!(registry.get_counter(GeneratorType::DocumentFlow), Some(1));
529    }
530
531    #[test]
532    fn test_uuid_is_valid_v4() {
533        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
534        let uuid = factory.next();
535
536        // Check version is 4
537        assert_eq!(uuid.get_version_num(), 4);
538
539        // Check variant is RFC 4122
540        assert_eq!(uuid.get_variant(), uuid::Variant::RFC4122);
541    }
542}