Skip to main content

datasynth_generators/temporal/
temporal_generator.rs

1//! Temporal attribute generator implementation.
2//!
3//! Provides generation of temporal attributes for entities, supporting
4//! bi-temporal data models.
5
6use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use datasynth_core::utils::seeded_rng;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
14
15/// Configuration for temporal attribute generation.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TemporalAttributeConfig {
18    /// Enable temporal attribute generation.
19    pub enabled: bool,
20    /// Valid time configuration.
21    pub valid_time: ValidTimeConfig,
22    /// Transaction time configuration.
23    pub transaction_time: TransactionTimeConfig,
24    /// Generate version chains for entities.
25    pub generate_version_chains: bool,
26    /// Average number of versions per entity.
27    pub avg_versions_per_entity: f64,
28}
29
30impl Default for TemporalAttributeConfig {
31    fn default() -> Self {
32        Self {
33            enabled: true,
34            valid_time: ValidTimeConfig::default(),
35            transaction_time: TransactionTimeConfig::default(),
36            generate_version_chains: false,
37            avg_versions_per_entity: 1.5,
38        }
39    }
40}
41
42/// Configuration for valid time (business time) generation.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ValidTimeConfig {
45    /// Probability that valid_to is set (entity has ended validity).
46    pub closed_probability: f64,
47    /// Average validity duration in days.
48    pub avg_validity_days: u32,
49    /// Standard deviation of validity duration in days.
50    pub validity_stddev_days: u32,
51}
52
53impl Default for ValidTimeConfig {
54    fn default() -> Self {
55        Self {
56            closed_probability: 0.1,
57            avg_validity_days: 365,
58            validity_stddev_days: 90,
59        }
60    }
61}
62
63/// Configuration for transaction time (system time) generation.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct TransactionTimeConfig {
66    /// Average recording delay in seconds (0 = immediate).
67    pub avg_recording_delay_seconds: u32,
68    /// Allow backdating (recording time before valid time).
69    pub allow_backdating: bool,
70    /// Probability of backdating if allowed.
71    pub backdating_probability: f64,
72    /// Maximum backdate days.
73    pub max_backdate_days: u32,
74}
75
76impl Default for TransactionTimeConfig {
77    fn default() -> Self {
78        Self {
79            avg_recording_delay_seconds: 0,
80            allow_backdating: false,
81            backdating_probability: 0.01,
82            max_backdate_days: 30,
83        }
84    }
85}
86
87/// Generator for temporal attributes.
88pub struct TemporalAttributeGenerator {
89    /// Configuration.
90    config: TemporalAttributeConfig,
91    /// Random number generator.
92    rng: ChaCha8Rng,
93    /// Base date for generation.
94    base_date: NaiveDate,
95    /// Generation count.
96    count: u64,
97}
98
99impl TemporalAttributeGenerator {
100    /// Creates a new temporal attribute generator.
101    pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
102        Self {
103            config,
104            rng: seeded_rng(seed, 0),
105            base_date,
106            count: 0,
107        }
108    }
109
110    /// Creates a generator with default configuration.
111    pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
112        Self::new(TemporalAttributeConfig::default(), seed, base_date)
113    }
114
115    /// Wraps an entity with temporal attributes.
116    pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
117        self.count += 1;
118
119        let (valid_from, valid_to) = self.generate_valid_time();
120        let transaction_time = self.generate_transaction_time(valid_from);
121
122        let recorded_by = format!("system_{}", self.rng.random_range(1..=100));
123        let mut temporal = BiTemporal::new(entity)
124            .with_valid_time(valid_from, valid_to)
125            .with_recorded_at(transaction_time)
126            .with_recorded_by(&recorded_by)
127            .with_change_type(TemporalChangeType::Original);
128
129        // Optionally add a change reason
130        if self.rng.random_bool(0.2) {
131            temporal = temporal.with_change_reason("Initial creation");
132        }
133
134        temporal
135    }
136
137    /// Generates a version chain for an entity.
138    pub fn generate_version_chain<T: Clone>(
139        &mut self,
140        entity: T,
141        id: Uuid,
142    ) -> TemporalVersionChain<T> {
143        // Determine number of versions
144        let num_versions = if self.config.generate_version_chains {
145            let base_versions = self.config.avg_versions_per_entity;
146            // Poisson-like distribution
147            let lambda = base_versions;
148            let mut count = 0;
149            let mut p = 1.0;
150            let l = (-lambda).exp();
151            loop {
152                count += 1;
153                p *= self.rng.random::<f64>();
154                if p <= l {
155                    break;
156                }
157            }
158            count.max(1)
159        } else {
160            1
161        };
162
163        // Generate initial version
164        let initial_temporal = self.generate_temporal(entity.clone());
165        let mut chain = TemporalVersionChain::new(id, initial_temporal);
166
167        // Generate subsequent versions
168        let current_entity = entity;
169        for i in 1..num_versions {
170            // Each version is a correction or adjustment
171            let change_type = if i == num_versions - 1 && self.rng.random_bool(0.1) {
172                TemporalChangeType::Reversal
173            } else if self.rng.random_bool(0.3) {
174                TemporalChangeType::Correction
175            } else {
176                TemporalChangeType::Adjustment
177            };
178
179            let version = self.generate_version(current_entity.clone(), change_type);
180            chain.add_version(version);
181        }
182
183        chain
184    }
185
186    /// Generates a new version of an entity.
187    fn generate_version<T: Clone>(
188        &mut self,
189        entity: T,
190        change_type: TemporalChangeType,
191    ) -> BiTemporal<T> {
192        let (valid_from, valid_to) = self.generate_valid_time();
193        let transaction_time = self.generate_transaction_time(valid_from);
194
195        let reason: Option<&str> = match change_type {
196            TemporalChangeType::Correction => Some("Data correction"),
197            TemporalChangeType::Adjustment => Some("Adjustment per policy"),
198            TemporalChangeType::Reversal => Some("Reversed entry"),
199            _ => None,
200        };
201
202        let recorded_by = format!("user_{}", self.rng.random_range(1..=50));
203        let mut temporal = BiTemporal::new(entity)
204            .with_valid_time(valid_from, valid_to)
205            .with_recorded_at(transaction_time)
206            .with_recorded_by(&recorded_by)
207            .with_change_type(change_type);
208
209        if let Some(r) = reason {
210            temporal = temporal.with_change_reason(r);
211        }
212
213        temporal
214    }
215
216    /// Generates valid time (business time) attributes.
217    pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
218        // Generate valid_from within a reasonable range from base_date
219        let days_offset = self.rng.random_range(-365..=365);
220        let valid_from_date = self.base_date + Duration::days(days_offset as i64);
221        let valid_from = valid_from_date
222            .and_hms_opt(
223                self.rng.random_range(0..24),
224                self.rng.random_range(0..60),
225                self.rng.random_range(0..60),
226            )
227            .expect("valid h/m/s ranges");
228
229        // Determine if validity is closed
230        let valid_to = if self
231            .rng
232            .random_bool(self.config.valid_time.closed_probability)
233        {
234            // Generate validity duration
235            let avg_days = self.config.valid_time.avg_validity_days as f64;
236            let stddev_days = self.config.valid_time.validity_stddev_days as f64;
237
238            // Normal distribution for duration
239            let duration_days = (avg_days + self.rng.random::<f64>() * stddev_days * 2.0
240                - stddev_days)
241                .max(1.0) as i64;
242
243            Some(valid_from + Duration::days(duration_days))
244        } else {
245            None
246        };
247
248        (valid_from, valid_to)
249    }
250
251    /// Generates transaction time (system time) based on valid time.
252    pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
253        let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
254
255        // Add recording delay
256        let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
257            let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
258            // Exponential distribution for delay
259            let delay = -avg * self.rng.random::<f64>().ln();
260            delay as i64
261        } else {
262            0
263        };
264
265        let recorded_at = base_time + Duration::seconds(delay_secs);
266
267        // Handle backdating
268        if self.config.transaction_time.allow_backdating
269            && self
270                .rng
271                .random_bool(self.config.transaction_time.backdating_probability)
272        {
273            let backdate_days = self
274                .rng
275                .random_range(1..=self.config.transaction_time.max_backdate_days)
276                as i64;
277            recorded_at - Duration::days(backdate_days)
278        } else {
279            recorded_at
280        }
281    }
282
283    /// Returns the number of entities processed.
284    pub fn count(&self) -> u64 {
285        self.count
286    }
287
288    /// Resets the generator.
289    pub fn reset(&mut self, seed: u64) {
290        self.rng = seeded_rng(seed, 0);
291        self.count = 0;
292    }
293
294    /// Returns the configuration.
295    pub fn config(&self) -> &TemporalAttributeConfig {
296        &self.config
297    }
298}
299
300/// Builder for temporal attribute configuration.
301pub struct TemporalAttributeConfigBuilder {
302    config: TemporalAttributeConfig,
303}
304
305impl TemporalAttributeConfigBuilder {
306    /// Creates a new builder with default values.
307    pub fn new() -> Self {
308        Self {
309            config: TemporalAttributeConfig::default(),
310        }
311    }
312
313    /// Sets whether temporal attributes are enabled.
314    pub fn enabled(mut self, enabled: bool) -> Self {
315        self.config.enabled = enabled;
316        self
317    }
318
319    /// Sets the probability of closed validity.
320    pub fn closed_probability(mut self, prob: f64) -> Self {
321        self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
322        self
323    }
324
325    /// Sets the average validity duration in days.
326    pub fn avg_validity_days(mut self, days: u32) -> Self {
327        self.config.valid_time.avg_validity_days = days;
328        self
329    }
330
331    /// Sets the average recording delay in seconds.
332    pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
333        self.config.transaction_time.avg_recording_delay_seconds = seconds;
334        self
335    }
336
337    /// Enables backdating with the given probability.
338    pub fn allow_backdating(mut self, prob: f64) -> Self {
339        self.config.transaction_time.allow_backdating = true;
340        self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
341        self
342    }
343
344    /// Enables version chain generation.
345    pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
346        self.config.generate_version_chains = true;
347        self.config.avg_versions_per_entity = avg_versions.max(1.0);
348        self
349    }
350
351    /// Builds the configuration.
352    pub fn build(self) -> TemporalAttributeConfig {
353        self.config
354    }
355}
356
357impl Default for TemporalAttributeConfigBuilder {
358    fn default() -> Self {
359        Self::new()
360    }
361}
362
363#[cfg(test)]
364#[allow(clippy::unwrap_used)]
365mod tests {
366    use super::*;
367
368    #[test]
369    fn test_generate_temporal() {
370        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
371        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
372
373        let entity = "test_entity";
374        let temporal = generator.generate_temporal(entity.to_string());
375
376        assert_eq!(temporal.data, "test_entity");
377        assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
378        assert_eq!(temporal.change_type, TemporalChangeType::Original);
379    }
380
381    #[test]
382    fn test_generate_valid_time() {
383        let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
384        let config = TemporalAttributeConfig {
385            valid_time: ValidTimeConfig {
386                closed_probability: 0.5, // 50% chance of closed
387                avg_validity_days: 30,
388                validity_stddev_days: 10,
389            },
390            ..Default::default()
391        };
392        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
393
394        let mut has_closed = false;
395        let mut has_open = false;
396
397        for _ in 0..100 {
398            let (valid_from, valid_to) = generator.generate_valid_time();
399            assert!(valid_from.date() >= base_date - Duration::days(365));
400
401            if valid_to.is_some() {
402                has_closed = true;
403                assert!(valid_to.unwrap() > valid_from);
404            } else {
405                has_open = true;
406            }
407        }
408
409        // With 50% probability, should have both
410        assert!(has_closed);
411        assert!(has_open);
412    }
413
414    #[test]
415    fn test_generate_transaction_time() {
416        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
417        let config = TemporalAttributeConfig {
418            transaction_time: TransactionTimeConfig {
419                avg_recording_delay_seconds: 3600, // 1 hour average delay
420                allow_backdating: false,
421                ..Default::default()
422            },
423            ..Default::default()
424        };
425        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
426
427        let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
428        let transaction_time = generator.generate_transaction_time(valid_from);
429
430        // Transaction time should be >= valid_from when backdating is disabled
431        let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
432        assert!(transaction_time >= valid_from_utc);
433    }
434
435    #[test]
436    fn test_generate_version_chain() {
437        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
438        let config = TemporalAttributeConfig {
439            generate_version_chains: true,
440            avg_versions_per_entity: 3.0,
441            ..Default::default()
442        };
443        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
444
445        let entity = "test_entity";
446        let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
447
448        assert!(!chain.all_versions().is_empty());
449        // Should have at least 1 version
450        assert!(!chain.all_versions().is_empty());
451    }
452
453    #[test]
454    fn test_config_builder() {
455        let config = TemporalAttributeConfigBuilder::new()
456            .enabled(true)
457            .closed_probability(0.3)
458            .avg_validity_days(180)
459            .avg_recording_delay(60)
460            .allow_backdating(0.05)
461            .with_version_chains(2.5)
462            .build();
463
464        assert!(config.enabled);
465        assert_eq!(config.valid_time.closed_probability, 0.3);
466        assert_eq!(config.valid_time.avg_validity_days, 180);
467        assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
468        assert!(config.transaction_time.allow_backdating);
469        assert_eq!(config.transaction_time.backdating_probability, 0.05);
470        assert!(config.generate_version_chains);
471        assert_eq!(config.avg_versions_per_entity, 2.5);
472    }
473
474    #[test]
475    fn test_generator_count() {
476        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
477        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
478
479        assert_eq!(generator.count(), 0);
480
481        for _ in 0..5 {
482            generator.generate_temporal("entity".to_string());
483        }
484
485        assert_eq!(generator.count(), 5);
486
487        generator.reset(42);
488        assert_eq!(generator.count(), 0);
489    }
490}