Skip to main content

datasynth_generators/temporal/
temporal_generator.rs

1//! Temporal attribute generator implementation.
2//!
3//! Provides generation of temporal attributes for entities, supporting
4//! bi-temporal data models.
5
6use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10use uuid::Uuid;
11
12use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
13
14/// Configuration for temporal attribute generation.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct TemporalAttributeConfig {
17    /// Enable temporal attribute generation.
18    pub enabled: bool,
19    /// Valid time configuration.
20    pub valid_time: ValidTimeConfig,
21    /// Transaction time configuration.
22    pub transaction_time: TransactionTimeConfig,
23    /// Generate version chains for entities.
24    pub generate_version_chains: bool,
25    /// Average number of versions per entity.
26    pub avg_versions_per_entity: f64,
27}
28
29impl Default for TemporalAttributeConfig {
30    fn default() -> Self {
31        Self {
32            enabled: true,
33            valid_time: ValidTimeConfig::default(),
34            transaction_time: TransactionTimeConfig::default(),
35            generate_version_chains: false,
36            avg_versions_per_entity: 1.5,
37        }
38    }
39}
40
41/// Configuration for valid time (business time) generation.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ValidTimeConfig {
44    /// Probability that valid_to is set (entity has ended validity).
45    pub closed_probability: f64,
46    /// Average validity duration in days.
47    pub avg_validity_days: u32,
48    /// Standard deviation of validity duration in days.
49    pub validity_stddev_days: u32,
50}
51
52impl Default for ValidTimeConfig {
53    fn default() -> Self {
54        Self {
55            closed_probability: 0.1,
56            avg_validity_days: 365,
57            validity_stddev_days: 90,
58        }
59    }
60}
61
62/// Configuration for transaction time (system time) generation.
63#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct TransactionTimeConfig {
65    /// Average recording delay in seconds (0 = immediate).
66    pub avg_recording_delay_seconds: u32,
67    /// Allow backdating (recording time before valid time).
68    pub allow_backdating: bool,
69    /// Probability of backdating if allowed.
70    pub backdating_probability: f64,
71    /// Maximum backdate days.
72    pub max_backdate_days: u32,
73}
74
75impl Default for TransactionTimeConfig {
76    fn default() -> Self {
77        Self {
78            avg_recording_delay_seconds: 0,
79            allow_backdating: false,
80            backdating_probability: 0.01,
81            max_backdate_days: 30,
82        }
83    }
84}
85
86/// Generator for temporal attributes.
87pub struct TemporalAttributeGenerator {
88    /// Configuration.
89    config: TemporalAttributeConfig,
90    /// Random number generator.
91    rng: ChaCha8Rng,
92    /// Base date for generation.
93    base_date: NaiveDate,
94    /// Generation count.
95    count: u64,
96}
97
98impl TemporalAttributeGenerator {
99    /// Creates a new temporal attribute generator.
100    pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
101        Self {
102            config,
103            rng: ChaCha8Rng::seed_from_u64(seed),
104            base_date,
105            count: 0,
106        }
107    }
108
109    /// Creates a generator with default configuration.
110    pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
111        Self::new(TemporalAttributeConfig::default(), seed, base_date)
112    }
113
114    /// Wraps an entity with temporal attributes.
115    pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
116        self.count += 1;
117
118        let (valid_from, valid_to) = self.generate_valid_time();
119        let transaction_time = self.generate_transaction_time(valid_from);
120
121        let recorded_by = format!("system_{}", self.rng.gen_range(1..=100));
122        let mut temporal = BiTemporal::new(entity)
123            .with_valid_time(valid_from, valid_to)
124            .with_recorded_at(transaction_time)
125            .with_recorded_by(&recorded_by)
126            .with_change_type(TemporalChangeType::Original);
127
128        // Optionally add a change reason
129        if self.rng.gen_bool(0.2) {
130            temporal = temporal.with_change_reason("Initial creation");
131        }
132
133        temporal
134    }
135
136    /// Generates a version chain for an entity.
137    pub fn generate_version_chain<T: Clone>(
138        &mut self,
139        entity: T,
140        id: Uuid,
141    ) -> TemporalVersionChain<T> {
142        // Determine number of versions
143        let num_versions = if self.config.generate_version_chains {
144            let base_versions = self.config.avg_versions_per_entity;
145            // Poisson-like distribution
146            let lambda = base_versions;
147            let mut count = 0;
148            let mut p = 1.0;
149            let l = (-lambda).exp();
150            loop {
151                count += 1;
152                p *= self.rng.gen::<f64>();
153                if p <= l {
154                    break;
155                }
156            }
157            count.max(1)
158        } else {
159            1
160        };
161
162        // Generate initial version
163        let initial_temporal = self.generate_temporal(entity.clone());
164        let mut chain = TemporalVersionChain::new(id, initial_temporal);
165
166        // Generate subsequent versions
167        let current_entity = entity;
168        for i in 1..num_versions {
169            // Each version is a correction or adjustment
170            let change_type = if i == num_versions - 1 && self.rng.gen_bool(0.1) {
171                TemporalChangeType::Reversal
172            } else if self.rng.gen_bool(0.3) {
173                TemporalChangeType::Correction
174            } else {
175                TemporalChangeType::Adjustment
176            };
177
178            let version = self.generate_version(current_entity.clone(), change_type);
179            chain.add_version(version);
180        }
181
182        chain
183    }
184
185    /// Generates a new version of an entity.
186    fn generate_version<T: Clone>(
187        &mut self,
188        entity: T,
189        change_type: TemporalChangeType,
190    ) -> BiTemporal<T> {
191        let (valid_from, valid_to) = self.generate_valid_time();
192        let transaction_time = self.generate_transaction_time(valid_from);
193
194        let reason: Option<&str> = match change_type {
195            TemporalChangeType::Correction => Some("Data correction"),
196            TemporalChangeType::Adjustment => Some("Adjustment per policy"),
197            TemporalChangeType::Reversal => Some("Reversed entry"),
198            _ => None,
199        };
200
201        let recorded_by = format!("user_{}", self.rng.gen_range(1..=50));
202        let mut temporal = BiTemporal::new(entity)
203            .with_valid_time(valid_from, valid_to)
204            .with_recorded_at(transaction_time)
205            .with_recorded_by(&recorded_by)
206            .with_change_type(change_type);
207
208        if let Some(r) = reason {
209            temporal = temporal.with_change_reason(r);
210        }
211
212        temporal
213    }
214
215    /// Generates valid time (business time) attributes.
216    pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
217        // Generate valid_from within a reasonable range from base_date
218        let days_offset = self.rng.gen_range(-365..=365);
219        let valid_from_date = self.base_date + Duration::days(days_offset as i64);
220        let valid_from = valid_from_date
221            .and_hms_opt(
222                self.rng.gen_range(0..24),
223                self.rng.gen_range(0..60),
224                self.rng.gen_range(0..60),
225            )
226            .expect("valid h/m/s ranges");
227
228        // Determine if validity is closed
229        let valid_to = if self.rng.gen_bool(self.config.valid_time.closed_probability) {
230            // Generate validity duration
231            let avg_days = self.config.valid_time.avg_validity_days as f64;
232            let stddev_days = self.config.valid_time.validity_stddev_days as f64;
233
234            // Normal distribution for duration
235            let duration_days = (avg_days + self.rng.gen::<f64>() * stddev_days * 2.0 - stddev_days)
236                .max(1.0) as i64;
237
238            Some(valid_from + Duration::days(duration_days))
239        } else {
240            None
241        };
242
243        (valid_from, valid_to)
244    }
245
246    /// Generates transaction time (system time) based on valid time.
247    pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
248        let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
249
250        // Add recording delay
251        let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
252            let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
253            // Exponential distribution for delay
254            let delay = -avg * self.rng.gen::<f64>().ln();
255            delay as i64
256        } else {
257            0
258        };
259
260        let recorded_at = base_time + Duration::seconds(delay_secs);
261
262        // Handle backdating
263        if self.config.transaction_time.allow_backdating
264            && self
265                .rng
266                .gen_bool(self.config.transaction_time.backdating_probability)
267        {
268            let backdate_days = self
269                .rng
270                .gen_range(1..=self.config.transaction_time.max_backdate_days)
271                as i64;
272            recorded_at - Duration::days(backdate_days)
273        } else {
274            recorded_at
275        }
276    }
277
278    /// Returns the number of entities processed.
279    pub fn count(&self) -> u64 {
280        self.count
281    }
282
283    /// Resets the generator.
284    pub fn reset(&mut self, seed: u64) {
285        self.rng = ChaCha8Rng::seed_from_u64(seed);
286        self.count = 0;
287    }
288
289    /// Returns the configuration.
290    pub fn config(&self) -> &TemporalAttributeConfig {
291        &self.config
292    }
293}
294
295/// Builder for temporal attribute configuration.
296pub struct TemporalAttributeConfigBuilder {
297    config: TemporalAttributeConfig,
298}
299
300impl TemporalAttributeConfigBuilder {
301    /// Creates a new builder with default values.
302    pub fn new() -> Self {
303        Self {
304            config: TemporalAttributeConfig::default(),
305        }
306    }
307
308    /// Sets whether temporal attributes are enabled.
309    pub fn enabled(mut self, enabled: bool) -> Self {
310        self.config.enabled = enabled;
311        self
312    }
313
314    /// Sets the probability of closed validity.
315    pub fn closed_probability(mut self, prob: f64) -> Self {
316        self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
317        self
318    }
319
320    /// Sets the average validity duration in days.
321    pub fn avg_validity_days(mut self, days: u32) -> Self {
322        self.config.valid_time.avg_validity_days = days;
323        self
324    }
325
326    /// Sets the average recording delay in seconds.
327    pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
328        self.config.transaction_time.avg_recording_delay_seconds = seconds;
329        self
330    }
331
332    /// Enables backdating with the given probability.
333    pub fn allow_backdating(mut self, prob: f64) -> Self {
334        self.config.transaction_time.allow_backdating = true;
335        self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
336        self
337    }
338
339    /// Enables version chain generation.
340    pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
341        self.config.generate_version_chains = true;
342        self.config.avg_versions_per_entity = avg_versions.max(1.0);
343        self
344    }
345
346    /// Builds the configuration.
347    pub fn build(self) -> TemporalAttributeConfig {
348        self.config
349    }
350}
351
352impl Default for TemporalAttributeConfigBuilder {
353    fn default() -> Self {
354        Self::new()
355    }
356}
357
358#[cfg(test)]
359#[allow(clippy::unwrap_used)]
360mod tests {
361    use super::*;
362
363    #[test]
364    fn test_generate_temporal() {
365        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
366        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
367
368        let entity = "test_entity";
369        let temporal = generator.generate_temporal(entity.to_string());
370
371        assert_eq!(temporal.data, "test_entity");
372        assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
373        assert_eq!(temporal.change_type, TemporalChangeType::Original);
374    }
375
376    #[test]
377    fn test_generate_valid_time() {
378        let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
379        let config = TemporalAttributeConfig {
380            valid_time: ValidTimeConfig {
381                closed_probability: 0.5, // 50% chance of closed
382                avg_validity_days: 30,
383                validity_stddev_days: 10,
384            },
385            ..Default::default()
386        };
387        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
388
389        let mut has_closed = false;
390        let mut has_open = false;
391
392        for _ in 0..100 {
393            let (valid_from, valid_to) = generator.generate_valid_time();
394            assert!(valid_from.date() >= base_date - Duration::days(365));
395
396            if valid_to.is_some() {
397                has_closed = true;
398                assert!(valid_to.unwrap() > valid_from);
399            } else {
400                has_open = true;
401            }
402        }
403
404        // With 50% probability, should have both
405        assert!(has_closed);
406        assert!(has_open);
407    }
408
409    #[test]
410    fn test_generate_transaction_time() {
411        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
412        let config = TemporalAttributeConfig {
413            transaction_time: TransactionTimeConfig {
414                avg_recording_delay_seconds: 3600, // 1 hour average delay
415                allow_backdating: false,
416                ..Default::default()
417            },
418            ..Default::default()
419        };
420        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
421
422        let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
423        let transaction_time = generator.generate_transaction_time(valid_from);
424
425        // Transaction time should be >= valid_from when backdating is disabled
426        let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
427        assert!(transaction_time >= valid_from_utc);
428    }
429
430    #[test]
431    fn test_generate_version_chain() {
432        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
433        let config = TemporalAttributeConfig {
434            generate_version_chains: true,
435            avg_versions_per_entity: 3.0,
436            ..Default::default()
437        };
438        let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
439
440        let entity = "test_entity";
441        let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
442
443        assert!(!chain.all_versions().is_empty());
444        // Should have at least 1 version
445        assert!(!chain.all_versions().is_empty());
446    }
447
448    #[test]
449    fn test_config_builder() {
450        let config = TemporalAttributeConfigBuilder::new()
451            .enabled(true)
452            .closed_probability(0.3)
453            .avg_validity_days(180)
454            .avg_recording_delay(60)
455            .allow_backdating(0.05)
456            .with_version_chains(2.5)
457            .build();
458
459        assert!(config.enabled);
460        assert_eq!(config.valid_time.closed_probability, 0.3);
461        assert_eq!(config.valid_time.avg_validity_days, 180);
462        assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
463        assert!(config.transaction_time.allow_backdating);
464        assert_eq!(config.transaction_time.backdating_probability, 0.05);
465        assert!(config.generate_version_chains);
466        assert_eq!(config.avg_versions_per_entity, 2.5);
467    }
468
469    #[test]
470    fn test_generator_count() {
471        let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
472        let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
473
474        assert_eq!(generator.count(), 0);
475
476        for _ in 0..5 {
477            generator.generate_temporal("entity".to_string());
478        }
479
480        assert_eq!(generator.count(), 5);
481
482        generator.reset(42);
483        assert_eq!(generator.count(), 0);
484    }
485}