Skip to main content

datasynth_generators/temporal/
temporal_generator.rs

1//! Temporal attribute generator implementation.
2//!
3//! Provides generation of temporal attributes for entities, supporting
4//! bi-temporal data models.
5
6use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10use uuid::Uuid;
11
12use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
13
14/// Configuration for temporal attribute generation.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct TemporalAttributeConfig {
17    /// Enable temporal attribute generation.
18    pub enabled: bool,
19    /// Valid time configuration.
20    pub valid_time: ValidTimeConfig,
21    /// Transaction time configuration.
22    pub transaction_time: TransactionTimeConfig,
23    /// Generate version chains for entities.
24    pub generate_version_chains: bool,
25    /// Average number of versions per entity.
26    pub avg_versions_per_entity: f64,
27}
28
29impl Default for TemporalAttributeConfig {
30    fn default() -> Self {
31        Self {
32            enabled: true,
33            valid_time: ValidTimeConfig::default(),
34            transaction_time: TransactionTimeConfig::default(),
35            generate_version_chains: false,
36            avg_versions_per_entity: 1.5,
37        }
38    }
39}
40
41/// Configuration for valid time (business time) generation.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ValidTimeConfig {
44    /// Probability that valid_to is set (entity has ended validity).
45    pub closed_probability: f64,
46    /// Average validity duration in days.
47    pub avg_validity_days: u32,
48    /// Standard deviation of validity duration in days.
49    pub validity_stddev_days: u32,
50}
51
52impl Default for ValidTimeConfig {
53    fn default() -> Self {
54        Self {
55            closed_probability: 0.1,
56            avg_validity_days: 365,
57            validity_stddev_days: 90,
58        }
59    }
60}
61
62/// Configuration for transaction time (system time) generation.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct TransactionTimeConfig {
65    /// Average recording delay in seconds (0 = immediate).
66    pub avg_recording_delay_seconds: u32,
67    /// Allow backdating (recording time before valid time).
68    pub allow_backdating: bool,
69    /// Probability of backdating if allowed.
70    pub backdating_probability: f64,
71    /// Maximum backdate days.
72    pub max_backdate_days: u32,
73}
74
75impl Default for TransactionTimeConfig {
76    fn default() -> Self {
77        Self {
78            avg_recording_delay_seconds: 0,
79            allow_backdating: false,
80            backdating_probability: 0.01,
81            max_backdate_days: 30,
82        }
83    }
84}
85
86/// Generator for temporal attributes.
87pub struct TemporalAttributeGenerator {
88    /// Configuration.
89    config: TemporalAttributeConfig,
90    /// Random number generator.
91    rng: ChaCha8Rng,
92    /// Base date for generation.
93    base_date: NaiveDate,
94    /// Generation count.
95    count: u64,
96}
97
98impl TemporalAttributeGenerator {
99    /// Creates a new temporal attribute generator.
100    pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
101        Self {
102            config,
103            rng: ChaCha8Rng::seed_from_u64(seed),
104            base_date,
105            count: 0,
106        }
107    }
108
109    /// Creates a generator with default configuration.
110    pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
111        Self::new(TemporalAttributeConfig::default(), seed, base_date)
112    }
113
114    /// Wraps an entity with temporal attributes.
115    pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
116        self.count += 1;
117
118        let (valid_from, valid_to) = self.generate_valid_time();
119        let transaction_time = self.generate_transaction_time(valid_from);
120
121        let recorded_by = format!("system_{}", self.rng.gen_range(1..=100));
122        let mut temporal = BiTemporal::new(entity)
123            .with_valid_time(valid_from, valid_to)
124            .with_recorded_at(transaction_time)
125            .with_recorded_by(&recorded_by)
126            .with_change_type(TemporalChangeType::Original);
127
128        // Optionally add a change reason
129        if self.rng.gen_bool(0.2) {
130            temporal = temporal.with_change_reason("Initial creation");
131        }
132
133        temporal
134    }
135
136    /// Generates a version chain for an entity.
137    pub fn generate_version_chain<T: Clone>(
138        &mut self,
139        entity: T,
140        id: Uuid,
141    ) -> TemporalVersionChain<T> {
142        // Determine number of versions
143        let num_versions = if self.config.generate_version_chains {
144            let base_versions = self.config.avg_versions_per_entity;
145            // Poisson-like distribution
146            let lambda = base_versions;
147            let mut count = 0;
148            let mut p = 1.0;
149            let l = (-lambda).exp();
150            loop {
151                count += 1;
152                p *= self.rng.gen::<f64>();
153                if p <= l {
154                    break;
155                }
156            }
157            count.max(1)
158        } else {
159            1
160        };
161
162        // Generate initial version
163        let initial_temporal = self.generate_temporal(entity.clone());
164        let mut chain = TemporalVersionChain::new(id, initial_temporal);
165
166        // Generate subsequent versions
167        let current_entity = entity;
168        for i in 1..num_versions {
169            // Each version is a correction or adjustment
170            let change_type = if i == num_versions - 1 && self.rng.gen_bool(0.1) {
171                TemporalChangeType::Reversal
172            } else if self.rng.gen_bool(0.3) {
173                TemporalChangeType::Correction
174            } else {
175                TemporalChangeType::Adjustment
176            };
177
178            let version = self.generate_version(current_entity.clone(), change_type);
179            chain.add_version(version);
180        }
181
182        chain
183    }
184
185    /// Generates a new version of an entity.
186    fn generate_version<T: Clone>(
187        &mut self,
188        entity: T,
189        change_type: TemporalChangeType,
190    ) -> BiTemporal<T> {
191        let (valid_from, valid_to) = self.generate_valid_time();
192        let transaction_time = self.generate_transaction_time(valid_from);
193
194        let reason: Option<&str> = match change_type {
195            TemporalChangeType::Correction => Some("Data correction"),
196            TemporalChangeType::Adjustment => Some("Adjustment per policy"),
197            TemporalChangeType::Reversal => Some("Reversed entry"),
198            _ => None,
199        };
200
201        let recorded_by = format!("user_{}", self.rng.gen_range(1..=50));
202        let mut temporal = BiTemporal::new(entity)
203            .with_valid_time(valid_from, valid_to)
204            .with_recorded_at(transaction_time)
205            .with_recorded_by(&recorded_by)
206            .with_change_type(change_type);
207
208        if let Some(r) = reason {
209            temporal = temporal.with_change_reason(r);
210        }
211
212        temporal
213    }
214
215    /// Generates valid time (business time) attributes.
216    pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
217        // Generate valid_from within a reasonable range from base_date
218        let days_offset = self.rng.gen_range(-365..=365);
219        let valid_from_date = self.base_date + Duration::days(days_offset as i64);
220        let valid_from = valid_from_date
221            .and_hms_opt(
222                self.rng.gen_range(0..24),
223                self.rng.gen_range(0..60),
224                self.rng.gen_range(0..60),
225            )
226            .unwrap();
227
228        // Determine if validity is closed
229        let valid_to = if self.rng.gen_bool(self.config.valid_time.closed_probability) {
230            // Generate validity duration
231            let avg_days = self.config.valid_time.avg_validity_days as f64;
232            let stddev_days = self.config.valid_time.validity_stddev_days as f64;
233
234            // Normal distribution for duration
235            let duration_days = (avg_days + self.rng.gen::<f64>() * stddev_days * 2.0 - stddev_days)
236                .max(1.0) as i64;
237
238            Some(valid_from + Duration::days(duration_days))
239        } else {
240            None
241        };
242
243        (valid_from, valid_to)
244    }
245
246    /// Generates transaction time (system time) based on valid time.
247    pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
248        let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
249
250        // Add recording delay
251        let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
252            let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
253            // Exponential distribution for delay
254            let delay = -avg * self.rng.gen::<f64>().ln();
255            delay as i64
256        } else {
257            0
258        };
259
260        let recorded_at = base_time + Duration::seconds(delay_secs);
261
262        // Handle backdating
263        if self.config.transaction_time.allow_backdating
264            && self
265                .rng
266                .gen_bool(self.config.transaction_time.backdating_probability)
267        {
268            let backdate_days = self
269                .rng
270                .gen_range(1..=self.config.transaction_time.max_backdate_days)
271                as i64;
272            recorded_at - Duration::days(backdate_days)
273        } else {
274            recorded_at
275        }
276    }
277
278    /// Returns the number of entities processed.
279    pub fn count(&self) -> u64 {
280        self.count
281    }
282
283    /// Resets the generator.
284    pub fn reset(&mut self, seed: u64) {
285        self.rng = ChaCha8Rng::seed_from_u64(seed);
286        self.count = 0;
287    }
288
289    /// Returns the configuration.
290    pub fn config(&self) -> &TemporalAttributeConfig {
291        &self.config
292    }
293}
294
295/// Builder for temporal attribute configuration.
296pub struct TemporalAttributeConfigBuilder {
297    config: TemporalAttributeConfig,
298}
299
300impl TemporalAttributeConfigBuilder {
301    /// Creates a new builder with default values.
302    pub fn new() -> Self {
303        Self {
304            config: TemporalAttributeConfig::default(),
305        }
306    }
307
308    /// Sets whether temporal attributes are enabled.
309    pub fn enabled(mut self, enabled: bool) -> Self {
310        self.config.enabled = enabled;
311        self
312    }
313
314    /// Sets the probability of closed validity.
315    pub fn closed_probability(mut self, prob: f64) -> Self {
316        self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
317        self
318    }
319
320    /// Sets the average validity duration in days.
321    pub fn avg_validity_days(mut self, days: u32) -> Self {
322        self.config.valid_time.avg_validity_days = days;
323        self
324    }
325
326    /// Sets the average recording delay in seconds.
327    pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
328        self.config.transaction_time.avg_recording_delay_seconds = seconds;
329        self
330    }
331
332    /// Enables backdating with the given probability.
333    pub fn allow_backdating(mut self, prob: f64) -> Self {
334        self.config.transaction_time.allow_backdating = true;
335        self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
336        self
337    }
338
339    /// Enables version chain generation.
340    pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
341        self.config.generate_version_chains = true;
342        self.config.avg_versions_per_entity = avg_versions.max(1.0);
343        self
344    }
345
346    /// Builds the configuration.
347    pub fn build(self) -> TemporalAttributeConfig {
348        self.config
349    }
350}
351
352impl Default for TemporalAttributeConfigBuilder {
353    fn default() -> Self {
354        Self::new()
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    #[test]
363    fn test_generate_temporal() {
364        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
365        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
366
367        let entity = "test_entity";
368        let temporal = generator.generate_temporal(entity.to_string());
369
370        assert_eq!(temporal.data, "test_entity");
371        assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
372        assert_eq!(temporal.change_type, TemporalChangeType::Original);
373    }
374
375    #[test]
376    fn test_generate_valid_time() {
377        let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
378        let config = TemporalAttributeConfig {
379            valid_time: ValidTimeConfig {
380                closed_probability: 0.5, // 50% chance of closed
381                avg_validity_days: 30,
382                validity_stddev_days: 10,
383            },
384            ..Default::default()
385        };
386        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
387
388        let mut has_closed = false;
389        let mut has_open = false;
390
391        for _ in 0..100 {
392            let (valid_from, valid_to) = generator.generate_valid_time();
393            assert!(valid_from.date() >= base_date - Duration::days(365));
394
395            if valid_to.is_some() {
396                has_closed = true;
397                assert!(valid_to.unwrap() > valid_from);
398            } else {
399                has_open = true;
400            }
401        }
402
403        // With 50% probability, should have both
404        assert!(has_closed);
405        assert!(has_open);
406    }
407
408    #[test]
409    fn test_generate_transaction_time() {
410        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
411        let config = TemporalAttributeConfig {
412            transaction_time: TransactionTimeConfig {
413                avg_recording_delay_seconds: 3600, // 1 hour average delay
414                allow_backdating: false,
415                ..Default::default()
416            },
417            ..Default::default()
418        };
419        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
420
421        let valid_from = NaiveDateTime::from_timestamp_opt(1704067200, 0).unwrap();
422        let transaction_time = generator.generate_transaction_time(valid_from);
423
424        // Transaction time should be >= valid_from when backdating is disabled
425        let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
426        assert!(transaction_time >= valid_from_utc);
427    }
428
429    #[test]
430    fn test_generate_version_chain() {
431        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
432        let config = TemporalAttributeConfig {
433            generate_version_chains: true,
434            avg_versions_per_entity: 3.0,
435            ..Default::default()
436        };
437        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
438
439        let entity = "test_entity";
440        let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
441
442        assert!(!chain.all_versions().is_empty());
443        // Should have at least 1 version
444        assert!(chain.all_versions().len() >= 1);
445    }
446
447    #[test]
448    fn test_config_builder() {
449        let config = TemporalAttributeConfigBuilder::new()
450            .enabled(true)
451            .closed_probability(0.3)
452            .avg_validity_days(180)
453            .avg_recording_delay(60)
454            .allow_backdating(0.05)
455            .with_version_chains(2.5)
456            .build();
457
458        assert!(config.enabled);
459        assert_eq!(config.valid_time.closed_probability, 0.3);
460        assert_eq!(config.valid_time.avg_validity_days, 180);
461        assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
462        assert!(config.transaction_time.allow_backdating);
463        assert_eq!(config.transaction_time.backdating_probability, 0.05);
464        assert!(config.generate_version_chains);
465        assert_eq!(config.avg_versions_per_entity, 2.5);
466    }
467
468    #[test]
469    fn test_generator_count() {
470        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
471        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
472
473        assert_eq!(generator.count(), 0);
474
475        for _ in 0..5 {
476            generator.generate_temporal("entity".to_string());
477        }
478
479        assert_eq!(generator.count(), 5);
480
481        generator.reset(42);
482        assert_eq!(generator.count(), 0);
483    }
484}