Skip to main content

datasynth_generators/temporal/
temporal_generator.rs

1//! Temporal attribute generator implementation.
2//!
3//! Provides generation of temporal attributes for entities, supporting
4//! bi-temporal data models.
5
6use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use datasynth_core::utils::seeded_rng;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
14
15/// Configuration for temporal attribute generation.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TemporalAttributeConfig {
18    /// Enable temporal attribute generation.
19    pub enabled: bool,
20    /// Valid time configuration.
21    pub valid_time: ValidTimeConfig,
22    /// Transaction time configuration.
23    pub transaction_time: TransactionTimeConfig,
24    /// Generate version chains for entities.
25    pub generate_version_chains: bool,
26    /// Average number of versions per entity.
27    pub avg_versions_per_entity: f64,
28}
29
30impl Default for TemporalAttributeConfig {
31    fn default() -> Self {
32        Self {
33            enabled: true,
34            valid_time: ValidTimeConfig::default(),
35            transaction_time: TransactionTimeConfig::default(),
36            generate_version_chains: false,
37            avg_versions_per_entity: 1.5,
38        }
39    }
40}
41
42/// Configuration for valid time (business time) generation.
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ValidTimeConfig {
45    /// Probability that valid_to is set (entity has ended validity).
46    pub closed_probability: f64,
47    /// Average validity duration in days.
48    pub avg_validity_days: u32,
49    /// Standard deviation of validity duration in days.
50    pub validity_stddev_days: u32,
51}
52
53impl Default for ValidTimeConfig {
54    fn default() -> Self {
55        Self {
56            closed_probability: 0.1,
57            avg_validity_days: 365,
58            validity_stddev_days: 90,
59        }
60    }
61}
62
63/// Configuration for transaction time (system time) generation.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct TransactionTimeConfig {
66    /// Average recording delay in seconds (0 = immediate).
67    pub avg_recording_delay_seconds: u32,
68    /// Allow backdating (recording time before valid time).
69    pub allow_backdating: bool,
70    /// Probability of backdating if allowed.
71    pub backdating_probability: f64,
72    /// Maximum backdate days.
73    pub max_backdate_days: u32,
74}
75
76impl Default for TransactionTimeConfig {
77    fn default() -> Self {
78        Self {
79            avg_recording_delay_seconds: 0,
80            allow_backdating: false,
81            backdating_probability: 0.01,
82            max_backdate_days: 30,
83        }
84    }
85}
86
87/// Generator for temporal attributes.
88pub struct TemporalAttributeGenerator {
89    /// Configuration.
90    config: TemporalAttributeConfig,
91    /// Random number generator.
92    rng: ChaCha8Rng,
93    /// Base date for generation.
94    base_date: NaiveDate,
95    /// Generation count.
96    count: u64,
97}
98
99impl TemporalAttributeGenerator {
100    /// Creates a new temporal attribute generator.
101    pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
102        Self {
103            config,
104            rng: seeded_rng(seed, 0),
105            base_date,
106            count: 0,
107        }
108    }
109
110    /// Creates a generator with default configuration.
111    pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
112        Self::new(TemporalAttributeConfig::default(), seed, base_date)
113    }
114
115    /// Wraps an entity with temporal attributes.
116    pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
117        self.count += 1;
118
119        let (valid_from, valid_to) = self.generate_valid_time();
120        let transaction_time = self.generate_transaction_time(valid_from);
121
122        let recorded_by = format!("system_{}", self.rng.gen_range(1..=100));
123        let mut temporal = BiTemporal::new(entity)
124            .with_valid_time(valid_from, valid_to)
125            .with_recorded_at(transaction_time)
126            .with_recorded_by(&recorded_by)
127            .with_change_type(TemporalChangeType::Original);
128
129        // Optionally add a change reason
130        if self.rng.gen_bool(0.2) {
131            temporal = temporal.with_change_reason("Initial creation");
132        }
133
134        temporal
135    }
136
137    /// Generates a version chain for an entity.
138    pub fn generate_version_chain<T: Clone>(
139        &mut self,
140        entity: T,
141        id: Uuid,
142    ) -> TemporalVersionChain<T> {
143        // Determine number of versions
144        let num_versions = if self.config.generate_version_chains {
145            let base_versions = self.config.avg_versions_per_entity;
146            // Poisson-like distribution
147            let lambda = base_versions;
148            let mut count = 0;
149            let mut p = 1.0;
150            let l = (-lambda).exp();
151            loop {
152                count += 1;
153                p *= self.rng.gen::<f64>();
154                if p <= l {
155                    break;
156                }
157            }
158            count.max(1)
159        } else {
160            1
161        };
162
163        // Generate initial version
164        let initial_temporal = self.generate_temporal(entity.clone());
165        let mut chain = TemporalVersionChain::new(id, initial_temporal);
166
167        // Generate subsequent versions
168        let current_entity = entity;
169        for i in 1..num_versions {
170            // Each version is a correction or adjustment
171            let change_type = if i == num_versions - 1 && self.rng.gen_bool(0.1) {
172                TemporalChangeType::Reversal
173            } else if self.rng.gen_bool(0.3) {
174                TemporalChangeType::Correction
175            } else {
176                TemporalChangeType::Adjustment
177            };
178
179            let version = self.generate_version(current_entity.clone(), change_type);
180            chain.add_version(version);
181        }
182
183        chain
184    }
185
186    /// Generates a new version of an entity.
187    fn generate_version<T: Clone>(
188        &mut self,
189        entity: T,
190        change_type: TemporalChangeType,
191    ) -> BiTemporal<T> {
192        let (valid_from, valid_to) = self.generate_valid_time();
193        let transaction_time = self.generate_transaction_time(valid_from);
194
195        let reason: Option<&str> = match change_type {
196            TemporalChangeType::Correction => Some("Data correction"),
197            TemporalChangeType::Adjustment => Some("Adjustment per policy"),
198            TemporalChangeType::Reversal => Some("Reversed entry"),
199            _ => None,
200        };
201
202        let recorded_by = format!("user_{}", self.rng.gen_range(1..=50));
203        let mut temporal = BiTemporal::new(entity)
204            .with_valid_time(valid_from, valid_to)
205            .with_recorded_at(transaction_time)
206            .with_recorded_by(&recorded_by)
207            .with_change_type(change_type);
208
209        if let Some(r) = reason {
210            temporal = temporal.with_change_reason(r);
211        }
212
213        temporal
214    }
215
216    /// Generates valid time (business time) attributes.
217    pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
218        // Generate valid_from within a reasonable range from base_date
219        let days_offset = self.rng.gen_range(-365..=365);
220        let valid_from_date = self.base_date + Duration::days(days_offset as i64);
221        let valid_from = valid_from_date
222            .and_hms_opt(
223                self.rng.gen_range(0..24),
224                self.rng.gen_range(0..60),
225                self.rng.gen_range(0..60),
226            )
227            .expect("valid h/m/s ranges");
228
229        // Determine if validity is closed
230        let valid_to = if self.rng.gen_bool(self.config.valid_time.closed_probability) {
231            // Generate validity duration
232            let avg_days = self.config.valid_time.avg_validity_days as f64;
233            let stddev_days = self.config.valid_time.validity_stddev_days as f64;
234
235            // Normal distribution for duration
236            let duration_days = (avg_days + self.rng.gen::<f64>() * stddev_days * 2.0 - stddev_days)
237                .max(1.0) as i64;
238
239            Some(valid_from + Duration::days(duration_days))
240        } else {
241            None
242        };
243
244        (valid_from, valid_to)
245    }
246
247    /// Generates transaction time (system time) based on valid time.
248    pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
249        let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
250
251        // Add recording delay
252        let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
253            let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
254            // Exponential distribution for delay
255            let delay = -avg * self.rng.gen::<f64>().ln();
256            delay as i64
257        } else {
258            0
259        };
260
261        let recorded_at = base_time + Duration::seconds(delay_secs);
262
263        // Handle backdating
264        if self.config.transaction_time.allow_backdating
265            && self
266                .rng
267                .gen_bool(self.config.transaction_time.backdating_probability)
268        {
269            let backdate_days = self
270                .rng
271                .gen_range(1..=self.config.transaction_time.max_backdate_days)
272                as i64;
273            recorded_at - Duration::days(backdate_days)
274        } else {
275            recorded_at
276        }
277    }
278
279    /// Returns the number of entities processed.
280    pub fn count(&self) -> u64 {
281        self.count
282    }
283
284    /// Resets the generator.
285    pub fn reset(&mut self, seed: u64) {
286        self.rng = seeded_rng(seed, 0);
287        self.count = 0;
288    }
289
290    /// Returns the configuration.
291    pub fn config(&self) -> &TemporalAttributeConfig {
292        &self.config
293    }
294}
295
296/// Builder for temporal attribute configuration.
297pub struct TemporalAttributeConfigBuilder {
298    config: TemporalAttributeConfig,
299}
300
301impl TemporalAttributeConfigBuilder {
302    /// Creates a new builder with default values.
303    pub fn new() -> Self {
304        Self {
305            config: TemporalAttributeConfig::default(),
306        }
307    }
308
309    /// Sets whether temporal attributes are enabled.
310    pub fn enabled(mut self, enabled: bool) -> Self {
311        self.config.enabled = enabled;
312        self
313    }
314
315    /// Sets the probability of closed validity.
316    pub fn closed_probability(mut self, prob: f64) -> Self {
317        self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
318        self
319    }
320
321    /// Sets the average validity duration in days.
322    pub fn avg_validity_days(mut self, days: u32) -> Self {
323        self.config.valid_time.avg_validity_days = days;
324        self
325    }
326
327    /// Sets the average recording delay in seconds.
328    pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
329        self.config.transaction_time.avg_recording_delay_seconds = seconds;
330        self
331    }
332
333    /// Enables backdating with the given probability.
334    pub fn allow_backdating(mut self, prob: f64) -> Self {
335        self.config.transaction_time.allow_backdating = true;
336        self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
337        self
338    }
339
340    /// Enables version chain generation.
341    pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
342        self.config.generate_version_chains = true;
343        self.config.avg_versions_per_entity = avg_versions.max(1.0);
344        self
345    }
346
347    /// Builds the configuration.
348    pub fn build(self) -> TemporalAttributeConfig {
349        self.config
350    }
351}
352
353impl Default for TemporalAttributeConfigBuilder {
354    fn default() -> Self {
355        Self::new()
356    }
357}
358
359#[cfg(test)]
360#[allow(clippy::unwrap_used)]
361mod tests {
362    use super::*;
363
364    #[test]
365    fn test_generate_temporal() {
366        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
367        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
368
369        let entity = "test_entity";
370        let temporal = generator.generate_temporal(entity.to_string());
371
372        assert_eq!(temporal.data, "test_entity");
373        assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
374        assert_eq!(temporal.change_type, TemporalChangeType::Original);
375    }
376
377    #[test]
378    fn test_generate_valid_time() {
379        let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
380        let config = TemporalAttributeConfig {
381            valid_time: ValidTimeConfig {
382                closed_probability: 0.5, // 50% chance of closed
383                avg_validity_days: 30,
384                validity_stddev_days: 10,
385            },
386            ..Default::default()
387        };
388        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
389
390        let mut has_closed = false;
391        let mut has_open = false;
392
393        for _ in 0..100 {
394            let (valid_from, valid_to) = generator.generate_valid_time();
395            assert!(valid_from.date() >= base_date - Duration::days(365));
396
397            if valid_to.is_some() {
398                has_closed = true;
399                assert!(valid_to.unwrap() > valid_from);
400            } else {
401                has_open = true;
402            }
403        }
404
405        // With 50% probability, should have both
406        assert!(has_closed);
407        assert!(has_open);
408    }
409
410    #[test]
411    fn test_generate_transaction_time() {
412        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
413        let config = TemporalAttributeConfig {
414            transaction_time: TransactionTimeConfig {
415                avg_recording_delay_seconds: 3600, // 1 hour average delay
416                allow_backdating: false,
417                ..Default::default()
418            },
419            ..Default::default()
420        };
421        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
422
423        let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
424        let transaction_time = generator.generate_transaction_time(valid_from);
425
426        // Transaction time should be >= valid_from when backdating is disabled
427        let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
428        assert!(transaction_time >= valid_from_utc);
429    }
430
431    #[test]
432    fn test_generate_version_chain() {
433        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
434        let config = TemporalAttributeConfig {
435            generate_version_chains: true,
436            avg_versions_per_entity: 3.0,
437            ..Default::default()
438        };
439        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
440
441        let entity = "test_entity";
442        let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
443
444        assert!(!chain.all_versions().is_empty());
445        // Should have at least 1 version
446        assert!(!chain.all_versions().is_empty());
447    }
448
449    #[test]
450    fn test_config_builder() {
451        let config = TemporalAttributeConfigBuilder::new()
452            .enabled(true)
453            .closed_probability(0.3)
454            .avg_validity_days(180)
455            .avg_recording_delay(60)
456            .allow_backdating(0.05)
457            .with_version_chains(2.5)
458            .build();
459
460        assert!(config.enabled);
461        assert_eq!(config.valid_time.closed_probability, 0.3);
462        assert_eq!(config.valid_time.avg_validity_days, 180);
463        assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
464        assert!(config.transaction_time.allow_backdating);
465        assert_eq!(config.transaction_time.backdating_probability, 0.05);
466        assert!(config.generate_version_chains);
467        assert_eq!(config.avg_versions_per_entity, 2.5);
468    }
469
470    #[test]
471    fn test_generator_count() {
472        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
473        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
474
475        assert_eq!(generator.count(), 0);
476
477        for _ in 0..5 {
478            generator.generate_temporal("entity".to_string());
479        }
480
481        assert_eq!(generator.count(), 5);
482
483        generator.reset(42);
484        assert_eq!(generator.count(), 0);
485    }
486}