Skip to main content

datasynth_core/
uuid_factory.rs

1//! Deterministic UUID generation factory for reproducible synthetic data.
2//!
3//! This module provides a centralized UUID generation system that ensures:
4//! - No collisions between different generator types
5//! - Reproducible output given the same seed
6//! - Thread-safe counter increments
7
8use std::sync::atomic::{AtomicU64, Ordering};
9use uuid::Uuid;
10
11/// Generator type discriminators to prevent UUID collisions across generators.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13#[repr(u8)]
14pub enum GeneratorType {
15    /// Journal Entry generator
16    JournalEntry = 0x01,
17    /// Document Flow (P2P/O2C) generator
18    DocumentFlow = 0x02,
19    /// Master Data - Vendor generator
20    Vendor = 0x03,
21    /// Master Data - Customer generator
22    Customer = 0x04,
23    /// Master Data - Material generator
24    Material = 0x05,
25    /// Master Data - Asset generator
26    Asset = 0x06,
27    /// Master Data - Employee generator
28    Employee = 0x07,
29    /// Subledger - AR generator
30    ARSubledger = 0x08,
31    /// Subledger - AP generator
32    APSubledger = 0x09,
33    /// Subledger - FA generator
34    FASubledger = 0x0A,
35    /// Subledger - Inventory generator
36    InventorySubledger = 0x0B,
37    /// Intercompany generator
38    Intercompany = 0x0C,
39    /// Anomaly injection
40    Anomaly = 0x0D,
41    /// Period close generator
42    PeriodClose = 0x0E,
43    /// FX rate generator
44    FxRate = 0x0F,
45    /// Accrual generator
46    Accrual = 0x10,
47    /// Depreciation generator
48    Depreciation = 0x11,
49    /// Control generator
50    Control = 0x12,
51    /// Opening balance generator
52    OpeningBalance = 0x13,
53    /// Trial balance generator
54    TrialBalance = 0x14,
55    /// Purchase Order document
56    PurchaseOrder = 0x20,
57    /// Goods Receipt document
58    GoodsReceipt = 0x21,
59    /// Vendor Invoice document
60    VendorInvoice = 0x22,
61    /// Payment document
62    Payment = 0x23,
63    /// Sales Order document
64    SalesOrder = 0x24,
65    /// Delivery document
66    Delivery = 0x25,
67    /// Customer Invoice document
68    CustomerInvoice = 0x26,
69    /// Customer Receipt document
70    CustomerReceipt = 0x27,
71
72    // ===== Enterprise Process Chain generators =====
73    /// Sourcing project generator
74    SourcingProject = 0x28,
75    /// RFx event generator
76    RfxEvent = 0x29,
77    /// Supplier bid generator
78    SupplierBid = 0x2A,
79    /// Procurement contract generator
80    ProcurementContract = 0x2B,
81    /// Catalog item generator
82    CatalogItem = 0x2C,
83    /// Bank reconciliation generator
84    BankReconciliation = 0x2D,
85    /// Financial statement generator
86    FinancialStatement = 0x2E,
87    /// Payroll run generator
88    PayrollRun = 0x2F,
89    /// Time entry generator
90    TimeEntry = 0x30,
91    /// Expense report generator
92    ExpenseReport = 0x31,
93    /// Production order generator
94    ProductionOrder = 0x32,
95    /// Cycle count generator
96    CycleCount = 0x33,
97    /// Quality inspection generator
98    QualityInspection = 0x34,
99    /// Sales quote generator
100    SalesQuote = 0x35,
101    /// Budget line generator
102    BudgetLine = 0x36,
103    /// Revenue recognition contract generator
104    RevenueRecognition = 0x37,
105    /// Impairment test generator
106    ImpairmentTest = 0x38,
107    /// Management KPI generator
108    Kpi = 0x39,
109}
110
111/// A factory for generating deterministic UUIDs that are guaranteed unique
112/// across different generator types within the same seed.
113///
114/// # UUID Structure (16 bytes)
115///
116/// ```text
117/// Bytes 0-5:   Seed (lower 48 bits)
118/// Byte  6:     Generator type discriminator
119/// Byte  7:     Version nibble (0x4_) | Sub-discriminator
120/// Bytes 8-15:  Counter (64-bit, with variant bits set)
121/// ```
122///
123/// # Thread Safety
124///
125/// The counter uses `AtomicU64` for thread-safe increments, allowing
126/// concurrent UUID generation from multiple threads.
127#[derive(Debug)]
128pub struct DeterministicUuidFactory {
129    seed: u64,
130    generator_type: GeneratorType,
131    counter: AtomicU64,
132    /// Optional sub-discriminator for further namespace separation
133    sub_discriminator: u8,
134}
135
136impl DeterministicUuidFactory {
137    /// Create a new UUID factory for a specific generator type.
138    ///
139    /// # Arguments
140    ///
141    /// * `seed` - The global seed for deterministic generation
142    /// * `generator_type` - The type of generator using this factory
143    ///
144    /// # Example
145    ///
146    /// ```
147    /// use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
148    ///
149    /// let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
150    /// let uuid = factory.next();
151    /// ```
152    pub fn new(seed: u64, generator_type: GeneratorType) -> Self {
153        Self {
154            seed,
155            generator_type,
156            counter: AtomicU64::new(0),
157            sub_discriminator: 0,
158        }
159    }
160
161    /// Create a factory with a sub-discriminator for additional namespace separation.
162    ///
163    /// Useful when the same generator type needs multiple independent UUID streams.
164    pub fn with_sub_discriminator(
165        seed: u64,
166        generator_type: GeneratorType,
167        sub_discriminator: u8,
168    ) -> Self {
169        Self {
170            seed,
171            generator_type,
172            counter: AtomicU64::new(0),
173            sub_discriminator,
174        }
175    }
176
177    /// Create a factory starting from a specific counter value.
178    ///
179    /// Useful for resuming generation from a checkpoint.
180    pub fn with_counter(seed: u64, generator_type: GeneratorType, start_counter: u64) -> Self {
181        Self {
182            seed,
183            generator_type,
184            counter: AtomicU64::new(start_counter),
185            sub_discriminator: 0,
186        }
187    }
188
189    /// Generate the next UUID in the sequence.
190    ///
191    /// This method is thread-safe and can be called from multiple threads.
192    pub fn next(&self) -> Uuid {
193        let counter = self.counter.fetch_add(1, Ordering::Relaxed);
194        self.generate_uuid(counter)
195    }
196
197    /// Generate a UUID for a specific counter value without incrementing.
198    ///
199    /// Useful for deterministic regeneration of specific UUIDs.
200    pub fn generate_at(&self, counter: u64) -> Uuid {
201        self.generate_uuid(counter)
202    }
203
204    /// Get the current counter value.
205    pub fn current_counter(&self) -> u64 {
206        self.counter.load(Ordering::Relaxed)
207    }
208
209    /// Reset the counter to zero.
210    pub fn reset(&self) {
211        self.counter.store(0, Ordering::Relaxed);
212    }
213
214    /// Set the counter to a specific value.
215    pub fn set_counter(&self, value: u64) {
216        self.counter.store(value, Ordering::Relaxed);
217    }
218
219    /// Generate a UUID from the seed, generator type, and counter.
220    ///
221    /// Uses a simple hash-based approach to ensure uniqueness while maintaining
222    /// determinism. The hash function is designed to spread entropy across all
223    /// bytes while preserving the UUID v4 format.
224    fn generate_uuid(&self, counter: u64) -> Uuid {
225        // Create a unique input by combining all distinguishing factors
226        // Use FNV-1a style hashing for simplicity and determinism
227        let mut hash: u64 = 14695981039346656037; // FNV offset basis
228
229        // Mix in seed
230        for byte in self.seed.to_le_bytes() {
231            hash ^= byte as u64;
232            hash = hash.wrapping_mul(1099511628211); // FNV prime
233        }
234
235        // Mix in generator type
236        hash ^= self.generator_type as u64;
237        hash = hash.wrapping_mul(1099511628211);
238
239        // Mix in sub-discriminator
240        hash ^= self.sub_discriminator as u64;
241        hash = hash.wrapping_mul(1099511628211);
242
243        // Mix in counter (most important for uniqueness within same factory)
244        for byte in counter.to_le_bytes() {
245            hash ^= byte as u64;
246            hash = hash.wrapping_mul(1099511628211);
247        }
248
249        // Create second hash for remaining bytes
250        let mut hash2: u64 = hash;
251        hash2 ^= self.seed.rotate_left(32);
252        hash2 = hash2.wrapping_mul(1099511628211);
253        hash2 ^= counter.rotate_left(32);
254        hash2 = hash2.wrapping_mul(1099511628211);
255
256        let mut bytes = [0u8; 16];
257
258        // First 8 bytes from hash
259        bytes[0..8].copy_from_slice(&hash.to_le_bytes());
260        // Second 8 bytes from hash2
261        bytes[8..16].copy_from_slice(&hash2.to_le_bytes());
262
263        // Set UUID version 4 (bits 12-15 of time_hi_and_version)
264        // Byte 6: xxxx0100 -> set bits 4-7 to 0100
265        bytes[6] = (bytes[6] & 0x0f) | 0x40;
266
267        // Set variant to RFC 4122 (bits 6-7 of clock_seq_hi_and_reserved)
268        // Byte 8: 10xxxxxx -> set bits 6-7 to 10
269        bytes[8] = (bytes[8] & 0x3f) | 0x80;
270
271        Uuid::from_bytes(bytes)
272    }
273}
274
275impl Clone for DeterministicUuidFactory {
276    fn clone(&self) -> Self {
277        Self {
278            seed: self.seed,
279            generator_type: self.generator_type,
280            counter: AtomicU64::new(self.counter.load(Ordering::Relaxed)),
281            sub_discriminator: self.sub_discriminator,
282        }
283    }
284}
285
286/// A registry that manages multiple UUID factories for different generator types.
287///
288/// This ensures a single source of truth for UUID generation across the system.
289#[derive(Debug)]
290pub struct UuidFactoryRegistry {
291    seed: u64,
292    factories: std::collections::HashMap<GeneratorType, DeterministicUuidFactory>,
293}
294
295impl UuidFactoryRegistry {
296    /// Create a new registry with a global seed.
297    pub fn new(seed: u64) -> Self {
298        Self {
299            seed,
300            factories: std::collections::HashMap::new(),
301        }
302    }
303
304    /// Get or create a factory for a specific generator type.
305    pub fn get_factory(&mut self, generator_type: GeneratorType) -> &DeterministicUuidFactory {
306        self.factories
307            .entry(generator_type)
308            .or_insert_with(|| DeterministicUuidFactory::new(self.seed, generator_type))
309    }
310
311    /// Generate the next UUID for a specific generator type.
312    pub fn next_uuid(&mut self, generator_type: GeneratorType) -> Uuid {
313        self.get_factory(generator_type).next()
314    }
315
316    /// Reset all factories.
317    pub fn reset_all(&self) {
318        for factory in self.factories.values() {
319            factory.reset();
320        }
321    }
322
323    /// Get the current counter for a generator type.
324    pub fn get_counter(&self, generator_type: GeneratorType) -> Option<u64> {
325        self.factories
326            .get(&generator_type)
327            .map(|f| f.current_counter())
328    }
329}
330
331#[cfg(test)]
332#[allow(clippy::unwrap_used)]
333mod tests {
334    use super::*;
335    use std::collections::HashSet;
336    use std::thread;
337
338    #[test]
339    fn test_uuid_uniqueness_same_generator() {
340        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
341
342        let mut uuids = HashSet::new();
343        for _ in 0..10000 {
344            let uuid = factory.next();
345            assert!(uuids.insert(uuid), "Duplicate UUID generated");
346        }
347    }
348
349    #[test]
350    fn test_uuid_uniqueness_different_generators() {
351        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
352        let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::DocumentFlow);
353
354        let mut uuids = HashSet::new();
355
356        for _ in 0..5000 {
357            let uuid1 = factory1.next();
358            let uuid2 = factory2.next();
359            assert!(uuids.insert(uuid1), "Duplicate UUID from JE generator");
360            assert!(uuids.insert(uuid2), "Duplicate UUID from DocFlow generator");
361        }
362    }
363
364    #[test]
365    fn test_uuid_determinism() {
366        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
367        let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
368
369        for _ in 0..100 {
370            assert_eq!(factory1.next(), factory2.next());
371        }
372    }
373
374    #[test]
375    fn test_uuid_different_seeds() {
376        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
377        let factory2 = DeterministicUuidFactory::new(67890, GeneratorType::JournalEntry);
378
379        // Different seeds should produce different UUIDs
380        assert_ne!(factory1.next(), factory2.next());
381    }
382
383    #[test]
384    fn test_thread_safety() {
385        use std::sync::Arc;
386
387        let factory = Arc::new(DeterministicUuidFactory::new(
388            12345,
389            GeneratorType::JournalEntry,
390        ));
391        let mut handles = vec![];
392
393        for _ in 0..4 {
394            let factory_clone = Arc::clone(&factory);
395            handles.push(thread::spawn(move || {
396                let mut uuids = Vec::new();
397                for _ in 0..1000 {
398                    uuids.push(factory_clone.next());
399                }
400                uuids
401            }));
402        }
403
404        let mut all_uuids = HashSet::new();
405        for handle in handles {
406            let uuids = handle.join().unwrap();
407            for uuid in uuids {
408                assert!(all_uuids.insert(uuid), "Thread-generated UUID collision");
409            }
410        }
411
412        assert_eq!(all_uuids.len(), 4000);
413    }
414
415    #[test]
416    fn test_sub_discriminator() {
417        let factory1 =
418            DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 0);
419        let factory2 =
420            DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 1);
421
422        // Different sub-discriminators should produce different UUIDs
423        let uuid1 = factory1.next();
424        factory1.reset();
425        let uuid2 = factory2.next();
426
427        assert_ne!(uuid1, uuid2);
428    }
429
430    #[test]
431    fn test_generate_at() {
432        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
433
434        // Generate at specific counter
435        let uuid_at_5 = factory.generate_at(5);
436
437        // Generate sequentially to reach counter 5
438        for _ in 0..5 {
439            factory.next();
440        }
441        let _uuid_sequential = factory.next();
442
443        // The UUID at counter 5 should match
444        assert_eq!(uuid_at_5, factory.generate_at(5));
445    }
446
447    #[test]
448    fn test_registry() {
449        let mut registry = UuidFactoryRegistry::new(12345);
450
451        let uuid1 = registry.next_uuid(GeneratorType::JournalEntry);
452        let uuid2 = registry.next_uuid(GeneratorType::JournalEntry);
453        let uuid3 = registry.next_uuid(GeneratorType::DocumentFlow);
454
455        // All should be unique
456        assert_ne!(uuid1, uuid2);
457        assert_ne!(uuid1, uuid3);
458        assert_ne!(uuid2, uuid3);
459
460        // Counter should be tracked
461        assert_eq!(registry.get_counter(GeneratorType::JournalEntry), Some(2));
462        assert_eq!(registry.get_counter(GeneratorType::DocumentFlow), Some(1));
463    }
464
465    #[test]
466    fn test_uuid_is_valid_v4() {
467        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
468        let uuid = factory.next();
469
470        // Check version is 4
471        assert_eq!(uuid.get_version_num(), 4);
472
473        // Check variant is RFC 4122
474        assert_eq!(uuid.get_variant(), uuid::Variant::RFC4122);
475    }
476}