Skip to main content

datasynth_core/
uuid_factory.rs

1//! Deterministic UUID generation factory for reproducible synthetic data.
2//!
3//! This module provides a centralized UUID generation system that ensures:
4//! - No collisions between different generator types
5//! - Reproducible output given the same seed
6//! - Thread-safe counter increments
7
8use std::sync::atomic::{AtomicU64, Ordering};
9use uuid::Uuid;
10
11/// Generator type discriminators to prevent UUID collisions across generators.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13#[repr(u8)]
14pub enum GeneratorType {
15    /// Journal Entry generator
16    JournalEntry = 0x01,
17    /// Document Flow (P2P/O2C) generator
18    DocumentFlow = 0x02,
19    /// Master Data - Vendor generator
20    Vendor = 0x03,
21    /// Master Data - Customer generator
22    Customer = 0x04,
23    /// Master Data - Material generator
24    Material = 0x05,
25    /// Master Data - Asset generator
26    Asset = 0x06,
27    /// Master Data - Employee generator
28    Employee = 0x07,
29    /// Subledger - AR generator
30    ARSubledger = 0x08,
31    /// Subledger - AP generator
32    APSubledger = 0x09,
33    /// Subledger - FA generator
34    FASubledger = 0x0A,
35    /// Subledger - Inventory generator
36    InventorySubledger = 0x0B,
37    /// Intercompany generator
38    Intercompany = 0x0C,
39    /// Anomaly injection
40    Anomaly = 0x0D,
41    /// Period close generator
42    PeriodClose = 0x0E,
43    /// FX rate generator
44    FxRate = 0x0F,
45    /// Accrual generator
46    Accrual = 0x10,
47    /// Depreciation generator
48    Depreciation = 0x11,
49    /// Control generator
50    Control = 0x12,
51    /// Opening balance generator
52    OpeningBalance = 0x13,
53    /// Trial balance generator
54    TrialBalance = 0x14,
55    /// Purchase Order document
56    PurchaseOrder = 0x20,
57    /// Goods Receipt document
58    GoodsReceipt = 0x21,
59    /// Vendor Invoice document
60    VendorInvoice = 0x22,
61    /// Payment document
62    Payment = 0x23,
63    /// Sales Order document
64    SalesOrder = 0x24,
65    /// Delivery document
66    Delivery = 0x25,
67    /// Customer Invoice document
68    CustomerInvoice = 0x26,
69    /// Customer Receipt document
70    CustomerReceipt = 0x27,
71}
72
73/// A factory for generating deterministic UUIDs that are guaranteed unique
74/// across different generator types within the same seed.
75///
76/// # UUID Structure (16 bytes)
77///
78/// ```text
79/// Bytes 0-5:   Seed (lower 48 bits)
80/// Byte  6:     Generator type discriminator
81/// Byte  7:     Version nibble (0x4_) | Sub-discriminator
82/// Bytes 8-15:  Counter (64-bit, with variant bits set)
83/// ```
84///
85/// # Thread Safety
86///
87/// The counter uses `AtomicU64` for thread-safe increments, allowing
88/// concurrent UUID generation from multiple threads.
89#[derive(Debug)]
90pub struct DeterministicUuidFactory {
91    seed: u64,
92    generator_type: GeneratorType,
93    counter: AtomicU64,
94    /// Optional sub-discriminator for further namespace separation
95    sub_discriminator: u8,
96}
97
98impl DeterministicUuidFactory {
99    /// Create a new UUID factory for a specific generator type.
100    ///
101    /// # Arguments
102    ///
103    /// * `seed` - The global seed for deterministic generation
104    /// * `generator_type` - The type of generator using this factory
105    ///
106    /// # Example
107    ///
108    /// ```
109    /// use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
110    ///
111    /// let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
112    /// let uuid = factory.next();
113    /// ```
114    pub fn new(seed: u64, generator_type: GeneratorType) -> Self {
115        Self {
116            seed,
117            generator_type,
118            counter: AtomicU64::new(0),
119            sub_discriminator: 0,
120        }
121    }
122
123    /// Create a factory with a sub-discriminator for additional namespace separation.
124    ///
125    /// Useful when the same generator type needs multiple independent UUID streams.
126    pub fn with_sub_discriminator(
127        seed: u64,
128        generator_type: GeneratorType,
129        sub_discriminator: u8,
130    ) -> Self {
131        Self {
132            seed,
133            generator_type,
134            counter: AtomicU64::new(0),
135            sub_discriminator,
136        }
137    }
138
139    /// Create a factory starting from a specific counter value.
140    ///
141    /// Useful for resuming generation from a checkpoint.
142    pub fn with_counter(seed: u64, generator_type: GeneratorType, start_counter: u64) -> Self {
143        Self {
144            seed,
145            generator_type,
146            counter: AtomicU64::new(start_counter),
147            sub_discriminator: 0,
148        }
149    }
150
151    /// Generate the next UUID in the sequence.
152    ///
153    /// This method is thread-safe and can be called from multiple threads.
154    pub fn next(&self) -> Uuid {
155        let counter = self.counter.fetch_add(1, Ordering::Relaxed);
156        self.generate_uuid(counter)
157    }
158
159    /// Generate a UUID for a specific counter value without incrementing.
160    ///
161    /// Useful for deterministic regeneration of specific UUIDs.
162    pub fn generate_at(&self, counter: u64) -> Uuid {
163        self.generate_uuid(counter)
164    }
165
166    /// Get the current counter value.
167    pub fn current_counter(&self) -> u64 {
168        self.counter.load(Ordering::Relaxed)
169    }
170
171    /// Reset the counter to zero.
172    pub fn reset(&self) {
173        self.counter.store(0, Ordering::Relaxed);
174    }
175
176    /// Set the counter to a specific value.
177    pub fn set_counter(&self, value: u64) {
178        self.counter.store(value, Ordering::Relaxed);
179    }
180
181    /// Generate a UUID from the seed, generator type, and counter.
182    ///
183    /// Uses a simple hash-based approach to ensure uniqueness while maintaining
184    /// determinism. The hash function is designed to spread entropy across all
185    /// bytes while preserving the UUID v4 format.
186    fn generate_uuid(&self, counter: u64) -> Uuid {
187        // Create a unique input by combining all distinguishing factors
188        // Use FNV-1a style hashing for simplicity and determinism
189        let mut hash: u64 = 14695981039346656037; // FNV offset basis
190
191        // Mix in seed
192        for byte in self.seed.to_le_bytes() {
193            hash ^= byte as u64;
194            hash = hash.wrapping_mul(1099511628211); // FNV prime
195        }
196
197        // Mix in generator type
198        hash ^= self.generator_type as u64;
199        hash = hash.wrapping_mul(1099511628211);
200
201        // Mix in sub-discriminator
202        hash ^= self.sub_discriminator as u64;
203        hash = hash.wrapping_mul(1099511628211);
204
205        // Mix in counter (most important for uniqueness within same factory)
206        for byte in counter.to_le_bytes() {
207            hash ^= byte as u64;
208            hash = hash.wrapping_mul(1099511628211);
209        }
210
211        // Create second hash for remaining bytes
212        let mut hash2: u64 = hash;
213        hash2 ^= self.seed.rotate_left(32);
214        hash2 = hash2.wrapping_mul(1099511628211);
215        hash2 ^= counter.rotate_left(32);
216        hash2 = hash2.wrapping_mul(1099511628211);
217
218        let mut bytes = [0u8; 16];
219
220        // First 8 bytes from hash
221        bytes[0..8].copy_from_slice(&hash.to_le_bytes());
222        // Second 8 bytes from hash2
223        bytes[8..16].copy_from_slice(&hash2.to_le_bytes());
224
225        // Set UUID version 4 (bits 12-15 of time_hi_and_version)
226        // Byte 6: xxxx0100 -> set bits 4-7 to 0100
227        bytes[6] = (bytes[6] & 0x0f) | 0x40;
228
229        // Set variant to RFC 4122 (bits 6-7 of clock_seq_hi_and_reserved)
230        // Byte 8: 10xxxxxx -> set bits 6-7 to 10
231        bytes[8] = (bytes[8] & 0x3f) | 0x80;
232
233        Uuid::from_bytes(bytes)
234    }
235}
236
237impl Clone for DeterministicUuidFactory {
238    fn clone(&self) -> Self {
239        Self {
240            seed: self.seed,
241            generator_type: self.generator_type,
242            counter: AtomicU64::new(self.counter.load(Ordering::Relaxed)),
243            sub_discriminator: self.sub_discriminator,
244        }
245    }
246}
247
248/// A registry that manages multiple UUID factories for different generator types.
249///
250/// This ensures a single source of truth for UUID generation across the system.
251#[derive(Debug)]
252pub struct UuidFactoryRegistry {
253    seed: u64,
254    factories: std::collections::HashMap<GeneratorType, DeterministicUuidFactory>,
255}
256
257impl UuidFactoryRegistry {
258    /// Create a new registry with a global seed.
259    pub fn new(seed: u64) -> Self {
260        Self {
261            seed,
262            factories: std::collections::HashMap::new(),
263        }
264    }
265
266    /// Get or create a factory for a specific generator type.
267    pub fn get_factory(&mut self, generator_type: GeneratorType) -> &DeterministicUuidFactory {
268        self.factories
269            .entry(generator_type)
270            .or_insert_with(|| DeterministicUuidFactory::new(self.seed, generator_type))
271    }
272
273    /// Generate the next UUID for a specific generator type.
274    pub fn next_uuid(&mut self, generator_type: GeneratorType) -> Uuid {
275        self.get_factory(generator_type).next()
276    }
277
278    /// Reset all factories.
279    pub fn reset_all(&self) {
280        for factory in self.factories.values() {
281            factory.reset();
282        }
283    }
284
285    /// Get the current counter for a generator type.
286    pub fn get_counter(&self, generator_type: GeneratorType) -> Option<u64> {
287        self.factories
288            .get(&generator_type)
289            .map(|f| f.current_counter())
290    }
291}
292
293#[cfg(test)]
294#[allow(clippy::unwrap_used)]
295mod tests {
296    use super::*;
297    use std::collections::HashSet;
298    use std::thread;
299
300    #[test]
301    fn test_uuid_uniqueness_same_generator() {
302        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
303
304        let mut uuids = HashSet::new();
305        for _ in 0..10000 {
306            let uuid = factory.next();
307            assert!(uuids.insert(uuid), "Duplicate UUID generated");
308        }
309    }
310
311    #[test]
312    fn test_uuid_uniqueness_different_generators() {
313        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
314        let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::DocumentFlow);
315
316        let mut uuids = HashSet::new();
317
318        for _ in 0..5000 {
319            let uuid1 = factory1.next();
320            let uuid2 = factory2.next();
321            assert!(uuids.insert(uuid1), "Duplicate UUID from JE generator");
322            assert!(uuids.insert(uuid2), "Duplicate UUID from DocFlow generator");
323        }
324    }
325
326    #[test]
327    fn test_uuid_determinism() {
328        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
329        let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
330
331        for _ in 0..100 {
332            assert_eq!(factory1.next(), factory2.next());
333        }
334    }
335
336    #[test]
337    fn test_uuid_different_seeds() {
338        let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
339        let factory2 = DeterministicUuidFactory::new(67890, GeneratorType::JournalEntry);
340
341        // Different seeds should produce different UUIDs
342        assert_ne!(factory1.next(), factory2.next());
343    }
344
345    #[test]
346    fn test_thread_safety() {
347        use std::sync::Arc;
348
349        let factory = Arc::new(DeterministicUuidFactory::new(
350            12345,
351            GeneratorType::JournalEntry,
352        ));
353        let mut handles = vec![];
354
355        for _ in 0..4 {
356            let factory_clone = Arc::clone(&factory);
357            handles.push(thread::spawn(move || {
358                let mut uuids = Vec::new();
359                for _ in 0..1000 {
360                    uuids.push(factory_clone.next());
361                }
362                uuids
363            }));
364        }
365
366        let mut all_uuids = HashSet::new();
367        for handle in handles {
368            let uuids = handle.join().unwrap();
369            for uuid in uuids {
370                assert!(all_uuids.insert(uuid), "Thread-generated UUID collision");
371            }
372        }
373
374        assert_eq!(all_uuids.len(), 4000);
375    }
376
377    #[test]
378    fn test_sub_discriminator() {
379        let factory1 =
380            DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 0);
381        let factory2 =
382            DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 1);
383
384        // Different sub-discriminators should produce different UUIDs
385        let uuid1 = factory1.next();
386        factory1.reset();
387        let uuid2 = factory2.next();
388
389        assert_ne!(uuid1, uuid2);
390    }
391
392    #[test]
393    fn test_generate_at() {
394        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
395
396        // Generate at specific counter
397        let uuid_at_5 = factory.generate_at(5);
398
399        // Generate sequentially to reach counter 5
400        for _ in 0..5 {
401            factory.next();
402        }
403        let _uuid_sequential = factory.next();
404
405        // The UUID at counter 5 should match
406        assert_eq!(uuid_at_5, factory.generate_at(5));
407    }
408
409    #[test]
410    fn test_registry() {
411        let mut registry = UuidFactoryRegistry::new(12345);
412
413        let uuid1 = registry.next_uuid(GeneratorType::JournalEntry);
414        let uuid2 = registry.next_uuid(GeneratorType::JournalEntry);
415        let uuid3 = registry.next_uuid(GeneratorType::DocumentFlow);
416
417        // All should be unique
418        assert_ne!(uuid1, uuid2);
419        assert_ne!(uuid1, uuid3);
420        assert_ne!(uuid2, uuid3);
421
422        // Counter should be tracked
423        assert_eq!(registry.get_counter(GeneratorType::JournalEntry), Some(2));
424        assert_eq!(registry.get_counter(GeneratorType::DocumentFlow), Some(1));
425    }
426
427    #[test]
428    fn test_uuid_is_valid_v4() {
429        let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
430        let uuid = factory.next();
431
432        // Check version is 4
433        assert_eq!(uuid.get_version_num(), 4);
434
435        // Check variant is RFC 4122
436        assert_eq!(uuid.get_variant(), uuid::Variant::RFC4122);
437    }
438}