datasynth_core/uuid_factory.rs
1//! Deterministic UUID generation factory for reproducible synthetic data.
2//!
3//! This module provides a centralized UUID generation system that ensures:
4//! - No collisions between different generator types
5//! - Reproducible output given the same seed
6//! - Thread-safe counter increments
7
8use std::sync::atomic::{AtomicU64, Ordering};
9use uuid::Uuid;
10
11/// Generator type discriminators to prevent UUID collisions across generators.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
13#[repr(u8)]
14pub enum GeneratorType {
15 /// Journal Entry generator
16 JournalEntry = 0x01,
17 /// Document Flow (P2P/O2C) generator
18 DocumentFlow = 0x02,
19 /// Master Data - Vendor generator
20 Vendor = 0x03,
21 /// Master Data - Customer generator
22 Customer = 0x04,
23 /// Master Data - Material generator
24 Material = 0x05,
25 /// Master Data - Asset generator
26 Asset = 0x06,
27 /// Master Data - Employee generator
28 Employee = 0x07,
29 /// Subledger - AR generator
30 ARSubledger = 0x08,
31 /// Subledger - AP generator
32 APSubledger = 0x09,
33 /// Subledger - FA generator
34 FASubledger = 0x0A,
35 /// Subledger - Inventory generator
36 InventorySubledger = 0x0B,
37 /// Intercompany generator
38 Intercompany = 0x0C,
39 /// Anomaly injection
40 Anomaly = 0x0D,
41 /// Period close generator
42 PeriodClose = 0x0E,
43 /// FX rate generator
44 FxRate = 0x0F,
45 /// Accrual generator
46 Accrual = 0x10,
47 /// Depreciation generator
48 Depreciation = 0x11,
49 /// Control generator
50 Control = 0x12,
51 /// Opening balance generator
52 OpeningBalance = 0x13,
53 /// Trial balance generator
54 TrialBalance = 0x14,
55 /// Purchase Order document
56 PurchaseOrder = 0x20,
57 /// Goods Receipt document
58 GoodsReceipt = 0x21,
59 /// Vendor Invoice document
60 VendorInvoice = 0x22,
61 /// Payment document
62 Payment = 0x23,
63 /// Sales Order document
64 SalesOrder = 0x24,
65 /// Delivery document
66 Delivery = 0x25,
67 /// Customer Invoice document
68 CustomerInvoice = 0x26,
69 /// Customer Receipt document
70 CustomerReceipt = 0x27,
71
72 // ===== Enterprise Process Chain generators =====
73 /// Sourcing project generator
74 SourcingProject = 0x28,
75 /// RFx event generator
76 RfxEvent = 0x29,
77 /// Supplier bid generator
78 SupplierBid = 0x2A,
79 /// Procurement contract generator
80 ProcurementContract = 0x2B,
81 /// Catalog item generator
82 CatalogItem = 0x2C,
83 /// Bank reconciliation generator
84 BankReconciliation = 0x2D,
85 /// Financial statement generator
86 FinancialStatement = 0x2E,
87 /// Payroll run generator
88 PayrollRun = 0x2F,
89 /// Time entry generator
90 TimeEntry = 0x30,
91 /// Expense report generator
92 ExpenseReport = 0x31,
93 /// Production order generator
94 ProductionOrder = 0x32,
95 /// Cycle count generator
96 CycleCount = 0x33,
97 /// Quality inspection generator
98 QualityInspection = 0x34,
99 /// Sales quote generator
100 SalesQuote = 0x35,
101 /// Budget line generator
102 BudgetLine = 0x36,
103 /// Revenue recognition contract generator
104 RevenueRecognition = 0x37,
105 /// Impairment test generator
106 ImpairmentTest = 0x38,
107 /// Management KPI generator
108 Kpi = 0x39,
109 /// Tax code / jurisdiction generator
110 Tax = 0x3A,
111 /// Project accounting (cost lines, revenue, milestones, change orders, EVM)
112 ProjectAccounting = 0x3B,
113 /// ESG / Sustainability (emissions, energy, water, waste, diversity, safety)
114 Esg = 0x3C,
115 /// Supplier qualification generator
116 SupplierQualification = 0x3D,
117 /// Supplier scorecard generator
118 SupplierScorecard = 0x3E,
119 /// BOM component generator
120 BomComponent = 0x3F,
121 /// Inventory movement generator
122 InventoryMovement = 0x40,
123 /// Benefit enrollment generator
124 BenefitEnrollment = 0x41,
125 /// Disruption event generator
126 Disruption = 0x42,
127 /// Business combination generator (IFRS 3 / ASC 805)
128 BusinessCombination = 0x43,
129 /// Segment reporting generator (IFRS 8 / ASC 280)
130 SegmentReport = 0x44,
131 /// Expected Credit Loss generator (IFRS 9 / ASC 326)
132 ExpectedCreditLoss = 0x45,
133 /// Defined benefit pension generator (IAS 19 / ASC 715)
134 Pension = 0x46,
135 /// Provisions and contingencies generator (IAS 37 / ASC 450)
136 Provision = 0x47,
137 /// Stock-based compensation generator (ASC 718 / IFRS 2)
138 StockCompensation = 0x48,
139 /// Industry benchmark generator (WI-3)
140 IndustryBenchmark = 0x49,
141 /// Governance / board minutes generator (WI-5)
142 Governance = 0x4A,
143 /// Organizational profile generator (WI-6)
144 OrganizationalProfile = 0x4B,
145 /// IT controls generator — access logs and change management (WI-4)
146 ItControls = 0x4C,
147 /// Management report generator (WI-7)
148 ManagementReport = 0x4D,
149 /// Prior-year comparative data generator (WI-2)
150 PriorYear = 0x4E,
151 /// Legal document generator (audit engagement support)
152 LegalDocument = 0x4F,
153 /// External-expectation generator (ISA-520 substantive-analytics layer)
154 ExternalExpectation = 0x50,
155 /// Evidence-anchor generator (ISA-505 external-corroboration layer)
156 EvidenceAnchor = 0x51,
157 /// Scenario / counterfactual-intervention identifiers (paired-rollout engine)
158 Scenario = 0x52,
159}
160
161/// A factory for generating deterministic UUIDs that are guaranteed unique
162/// across different generator types within the same seed.
163///
164/// # UUID Structure (16 bytes)
165///
166/// ```text
167/// Bytes 0-5: Seed (lower 48 bits)
168/// Byte 6: Generator type discriminator
169/// Byte 7: Version nibble (0x4_) | Sub-discriminator
170/// Bytes 8-15: Counter (64-bit, with variant bits set)
171/// ```
172///
173/// # Thread Safety
174///
175/// The counter uses `AtomicU64` for thread-safe increments, allowing
176/// concurrent UUID generation from multiple threads.
177#[derive(Debug)]
178pub struct DeterministicUuidFactory {
179 seed: u64,
180 generator_type: GeneratorType,
181 counter: AtomicU64,
182 /// Optional sub-discriminator for further namespace separation
183 sub_discriminator: u8,
184}
185
186impl DeterministicUuidFactory {
187 /// Create a new UUID factory for a specific generator type.
188 ///
189 /// # Arguments
190 ///
191 /// * `seed` - The global seed for deterministic generation
192 /// * `generator_type` - The type of generator using this factory
193 ///
194 /// # Example
195 ///
196 /// ```
197 /// use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
198 ///
199 /// let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
200 /// let uuid = factory.next();
201 /// ```
202 pub fn new(seed: u64, generator_type: GeneratorType) -> Self {
203 Self {
204 seed,
205 generator_type,
206 counter: AtomicU64::new(0),
207 sub_discriminator: 0,
208 }
209 }
210
211 /// Create a factory with a sub-discriminator for additional namespace separation.
212 ///
213 /// Useful when the same generator type needs multiple independent UUID streams.
214 pub fn with_sub_discriminator(
215 seed: u64,
216 generator_type: GeneratorType,
217 sub_discriminator: u8,
218 ) -> Self {
219 Self {
220 seed,
221 generator_type,
222 counter: AtomicU64::new(0),
223 sub_discriminator,
224 }
225 }
226
227 /// Create a factory starting from a specific counter value.
228 ///
229 /// Useful for resuming generation from a checkpoint or for partitioned
230 /// parallel generation where each thread gets a non-overlapping counter range.
231 pub fn with_counter(seed: u64, generator_type: GeneratorType, start_counter: u64) -> Self {
232 Self {
233 seed,
234 generator_type,
235 counter: AtomicU64::new(start_counter),
236 sub_discriminator: 0,
237 }
238 }
239
240 /// Create a factory for a specific partition in parallel generation.
241 ///
242 /// Each partition gets a unique sub-discriminator so that counters starting
243 /// from 0 in each partition still produce globally unique UUIDs. This avoids
244 /// atomic contention between threads since each partition has its own factory.
245 pub fn for_partition(seed: u64, generator_type: GeneratorType, partition_index: u8) -> Self {
246 Self {
247 seed,
248 generator_type,
249 counter: AtomicU64::new(0),
250 sub_discriminator: partition_index,
251 }
252 }
253
254 /// Generate the next UUID in the sequence.
255 ///
256 /// This method is thread-safe and can be called from multiple threads.
257 #[inline]
258 pub fn next(&self) -> Uuid {
259 let counter = self.counter.fetch_add(1, Ordering::Relaxed);
260 self.generate_uuid(counter)
261 }
262
263 /// Generate a UUID for a specific counter value without incrementing.
264 ///
265 /// Useful for deterministic regeneration of specific UUIDs.
266 pub fn generate_at(&self, counter: u64) -> Uuid {
267 self.generate_uuid(counter)
268 }
269
270 /// Get the current counter value.
271 pub fn current_counter(&self) -> u64 {
272 self.counter.load(Ordering::Relaxed)
273 }
274
275 /// Reset the counter to zero.
276 pub fn reset(&self) {
277 self.counter.store(0, Ordering::Relaxed);
278 }
279
280 /// Set the counter to a specific value.
281 pub fn set_counter(&self, value: u64) {
282 self.counter.store(value, Ordering::Relaxed);
283 }
284
285 /// Generate a UUID from the seed, generator type, and counter.
286 ///
287 /// Uses a simple hash-based approach to ensure uniqueness while maintaining
288 /// determinism. The hash function is designed to spread entropy across all
289 /// bytes while preserving the UUID v4 format.
290 #[inline]
291 fn generate_uuid(&self, counter: u64) -> Uuid {
292 // Create a unique input by combining all distinguishing factors
293 // Use FNV-1a style hashing for simplicity and determinism
294 let mut hash: u64 = 14695981039346656037; // FNV offset basis
295
296 // Mix in seed
297 for byte in self.seed.to_le_bytes() {
298 hash ^= byte as u64;
299 hash = hash.wrapping_mul(1099511628211); // FNV prime
300 }
301
302 // Mix in generator type
303 hash ^= self.generator_type as u64;
304 hash = hash.wrapping_mul(1099511628211);
305
306 // Mix in sub-discriminator
307 hash ^= self.sub_discriminator as u64;
308 hash = hash.wrapping_mul(1099511628211);
309
310 // Mix in counter (most important for uniqueness within same factory)
311 for byte in counter.to_le_bytes() {
312 hash ^= byte as u64;
313 hash = hash.wrapping_mul(1099511628211);
314 }
315
316 // Create second hash for remaining bytes
317 let mut hash2: u64 = hash;
318 hash2 ^= self.seed.rotate_left(32);
319 hash2 = hash2.wrapping_mul(1099511628211);
320 hash2 ^= counter.rotate_left(32);
321 hash2 = hash2.wrapping_mul(1099511628211);
322
323 let mut bytes = [0u8; 16];
324
325 // First 8 bytes from hash
326 bytes[0..8].copy_from_slice(&hash.to_le_bytes());
327 // Second 8 bytes from hash2
328 bytes[8..16].copy_from_slice(&hash2.to_le_bytes());
329
330 // Set UUID version 4 (bits 12-15 of time_hi_and_version)
331 // Byte 6: xxxx0100 -> set bits 4-7 to 0100
332 bytes[6] = (bytes[6] & 0x0f) | 0x40;
333
334 // Set variant to RFC 4122 (bits 6-7 of clock_seq_hi_and_reserved)
335 // Byte 8: 10xxxxxx -> set bits 6-7 to 10
336 bytes[8] = (bytes[8] & 0x3f) | 0x80;
337
338 Uuid::from_bytes(bytes)
339 }
340}
341
342impl Clone for DeterministicUuidFactory {
343 fn clone(&self) -> Self {
344 Self {
345 seed: self.seed,
346 generator_type: self.generator_type,
347 counter: AtomicU64::new(self.counter.load(Ordering::Relaxed)),
348 sub_discriminator: self.sub_discriminator,
349 }
350 }
351}
352
353/// A registry that manages multiple UUID factories for different generator types.
354///
355/// This ensures a single source of truth for UUID generation across the system.
356#[derive(Debug)]
357pub struct UuidFactoryRegistry {
358 seed: u64,
359 factories: std::collections::HashMap<GeneratorType, DeterministicUuidFactory>,
360}
361
362impl UuidFactoryRegistry {
363 /// Create a new registry with a global seed.
364 pub fn new(seed: u64) -> Self {
365 Self {
366 seed,
367 factories: std::collections::HashMap::new(),
368 }
369 }
370
371 /// Get or create a factory for a specific generator type.
372 pub fn get_factory(&mut self, generator_type: GeneratorType) -> &DeterministicUuidFactory {
373 self.factories
374 .entry(generator_type)
375 .or_insert_with(|| DeterministicUuidFactory::new(self.seed, generator_type))
376 }
377
378 /// Generate the next UUID for a specific generator type.
379 pub fn next_uuid(&mut self, generator_type: GeneratorType) -> Uuid {
380 self.get_factory(generator_type).next()
381 }
382
383 /// Reset all factories.
384 pub fn reset_all(&self) {
385 for factory in self.factories.values() {
386 factory.reset();
387 }
388 }
389
390 /// Get the current counter for a generator type.
391 pub fn get_counter(&self, generator_type: GeneratorType) -> Option<u64> {
392 self.factories
393 .get(&generator_type)
394 .map(DeterministicUuidFactory::current_counter)
395 }
396}
397
398#[cfg(test)]
399mod tests {
400 use super::*;
401 use std::collections::HashSet;
402 use std::thread;
403
404 #[test]
405 fn test_uuid_uniqueness_same_generator() {
406 let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
407
408 let mut uuids = HashSet::new();
409 for _ in 0..10000 {
410 let uuid = factory.next();
411 assert!(uuids.insert(uuid), "Duplicate UUID generated");
412 }
413 }
414
415 #[test]
416 fn test_uuid_uniqueness_different_generators() {
417 let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
418 let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::DocumentFlow);
419
420 let mut uuids = HashSet::new();
421
422 for _ in 0..5000 {
423 let uuid1 = factory1.next();
424 let uuid2 = factory2.next();
425 assert!(uuids.insert(uuid1), "Duplicate UUID from JE generator");
426 assert!(uuids.insert(uuid2), "Duplicate UUID from DocFlow generator");
427 }
428 }
429
430 #[test]
431 fn test_uuid_determinism() {
432 let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
433 let factory2 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
434
435 for _ in 0..100 {
436 assert_eq!(factory1.next(), factory2.next());
437 }
438 }
439
440 #[test]
441 fn test_uuid_different_seeds() {
442 let factory1 = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
443 let factory2 = DeterministicUuidFactory::new(67890, GeneratorType::JournalEntry);
444
445 // Different seeds should produce different UUIDs
446 assert_ne!(factory1.next(), factory2.next());
447 }
448
449 #[test]
450 fn test_thread_safety() {
451 use std::sync::Arc;
452
453 let factory = Arc::new(DeterministicUuidFactory::new(
454 12345,
455 GeneratorType::JournalEntry,
456 ));
457 let mut handles = vec![];
458
459 for _ in 0..4 {
460 let factory_clone = Arc::clone(&factory);
461 handles.push(thread::spawn(move || {
462 let mut uuids = Vec::new();
463 for _ in 0..1000 {
464 uuids.push(factory_clone.next());
465 }
466 uuids
467 }));
468 }
469
470 let mut all_uuids = HashSet::new();
471 for handle in handles {
472 let uuids = handle.join().unwrap();
473 for uuid in uuids {
474 assert!(all_uuids.insert(uuid), "Thread-generated UUID collision");
475 }
476 }
477
478 assert_eq!(all_uuids.len(), 4000);
479 }
480
481 #[test]
482 fn test_sub_discriminator() {
483 let factory1 =
484 DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 0);
485 let factory2 =
486 DeterministicUuidFactory::with_sub_discriminator(12345, GeneratorType::JournalEntry, 1);
487
488 // Different sub-discriminators should produce different UUIDs
489 let uuid1 = factory1.next();
490 factory1.reset();
491 let uuid2 = factory2.next();
492
493 assert_ne!(uuid1, uuid2);
494 }
495
496 #[test]
497 fn test_generate_at() {
498 let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
499
500 // Generate at specific counter
501 let uuid_at_5 = factory.generate_at(5);
502
503 // Generate sequentially to reach counter 5
504 for _ in 0..5 {
505 factory.next();
506 }
507 let _uuid_sequential = factory.next();
508
509 // The UUID at counter 5 should match
510 assert_eq!(uuid_at_5, factory.generate_at(5));
511 }
512
513 #[test]
514 fn test_registry() {
515 let mut registry = UuidFactoryRegistry::new(12345);
516
517 let uuid1 = registry.next_uuid(GeneratorType::JournalEntry);
518 let uuid2 = registry.next_uuid(GeneratorType::JournalEntry);
519 let uuid3 = registry.next_uuid(GeneratorType::DocumentFlow);
520
521 // All should be unique
522 assert_ne!(uuid1, uuid2);
523 assert_ne!(uuid1, uuid3);
524 assert_ne!(uuid2, uuid3);
525
526 // Counter should be tracked
527 assert_eq!(registry.get_counter(GeneratorType::JournalEntry), Some(2));
528 assert_eq!(registry.get_counter(GeneratorType::DocumentFlow), Some(1));
529 }
530
531 #[test]
532 fn test_uuid_is_valid_v4() {
533 let factory = DeterministicUuidFactory::new(12345, GeneratorType::JournalEntry);
534 let uuid = factory.next();
535
536 // Check version is 4
537 assert_eq!(uuid.get_version_num(), 4);
538
539 // Check variant is RFC 4122
540 assert_eq!(uuid.get_variant(), uuid::Variant::RFC4122);
541 }
542}