datasynth_generators/temporal/
temporal_generator.rs1use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use datasynth_core::utils::seeded_rng;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TemporalAttributeConfig {
18 pub enabled: bool,
20 pub valid_time: ValidTimeConfig,
22 pub transaction_time: TransactionTimeConfig,
24 pub generate_version_chains: bool,
26 pub avg_versions_per_entity: f64,
28}
29
30impl Default for TemporalAttributeConfig {
31 fn default() -> Self {
32 Self {
33 enabled: true,
34 valid_time: ValidTimeConfig::default(),
35 transaction_time: TransactionTimeConfig::default(),
36 generate_version_chains: false,
37 avg_versions_per_entity: 1.5,
38 }
39 }
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ValidTimeConfig {
45 pub closed_probability: f64,
47 pub avg_validity_days: u32,
49 pub validity_stddev_days: u32,
51}
52
53impl Default for ValidTimeConfig {
54 fn default() -> Self {
55 Self {
56 closed_probability: 0.1,
57 avg_validity_days: 365,
58 validity_stddev_days: 90,
59 }
60 }
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct TransactionTimeConfig {
66 pub avg_recording_delay_seconds: u32,
68 pub allow_backdating: bool,
70 pub backdating_probability: f64,
72 pub max_backdate_days: u32,
74}
75
76impl Default for TransactionTimeConfig {
77 fn default() -> Self {
78 Self {
79 avg_recording_delay_seconds: 0,
80 allow_backdating: false,
81 backdating_probability: 0.01,
82 max_backdate_days: 30,
83 }
84 }
85}
86
87pub struct TemporalAttributeGenerator {
89 config: TemporalAttributeConfig,
91 rng: ChaCha8Rng,
93 base_date: NaiveDate,
95 count: u64,
97}
98
99impl TemporalAttributeGenerator {
100 pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
102 Self {
103 config,
104 rng: seeded_rng(seed, 0),
105 base_date,
106 count: 0,
107 }
108 }
109
110 pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
112 Self::new(TemporalAttributeConfig::default(), seed, base_date)
113 }
114
115 pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
117 self.count += 1;
118
119 let (valid_from, valid_to) = self.generate_valid_time();
120 let transaction_time = self.generate_transaction_time(valid_from);
121
122 let recorded_by = format!("system_{}", self.rng.random_range(1..=100));
123 let mut temporal = BiTemporal::new(entity)
124 .with_valid_time(valid_from, valid_to)
125 .with_recorded_at(transaction_time)
126 .with_recorded_by(&recorded_by)
127 .with_change_type(TemporalChangeType::Original);
128
129 if self.rng.random_bool(0.2) {
131 temporal = temporal.with_change_reason("Initial creation");
132 }
133
134 temporal
135 }
136
137 pub fn generate_version_chain<T: Clone>(
139 &mut self,
140 entity: T,
141 id: Uuid,
142 ) -> TemporalVersionChain<T> {
143 let num_versions = if self.config.generate_version_chains {
145 let base_versions = self.config.avg_versions_per_entity;
146 let lambda = base_versions;
148 let mut count = 0;
149 let mut p = 1.0;
150 let l = (-lambda).exp();
151 loop {
152 count += 1;
153 p *= self.rng.random::<f64>();
154 if p <= l {
155 break;
156 }
157 }
158 count.max(1)
159 } else {
160 1
161 };
162
163 let initial_temporal = self.generate_temporal(entity.clone());
165 let mut chain = TemporalVersionChain::new(id, initial_temporal);
166
167 let current_entity = entity;
169 for i in 1..num_versions {
170 let change_type = if i == num_versions - 1 && self.rng.random_bool(0.1) {
172 TemporalChangeType::Reversal
173 } else if self.rng.random_bool(0.3) {
174 TemporalChangeType::Correction
175 } else {
176 TemporalChangeType::Adjustment
177 };
178
179 let version = self.generate_version(current_entity.clone(), change_type);
180 chain.add_version(version);
181 }
182
183 chain
184 }
185
186 fn generate_version<T: Clone>(
188 &mut self,
189 entity: T,
190 change_type: TemporalChangeType,
191 ) -> BiTemporal<T> {
192 let (valid_from, valid_to) = self.generate_valid_time();
193 let transaction_time = self.generate_transaction_time(valid_from);
194
195 let reason: Option<&str> = match change_type {
196 TemporalChangeType::Correction => Some("Data correction"),
197 TemporalChangeType::Adjustment => Some("Adjustment per policy"),
198 TemporalChangeType::Reversal => Some("Reversed entry"),
199 _ => None,
200 };
201
202 let recorded_by = format!("user_{}", self.rng.random_range(1..=50));
203 let mut temporal = BiTemporal::new(entity)
204 .with_valid_time(valid_from, valid_to)
205 .with_recorded_at(transaction_time)
206 .with_recorded_by(&recorded_by)
207 .with_change_type(change_type);
208
209 if let Some(r) = reason {
210 temporal = temporal.with_change_reason(r);
211 }
212
213 temporal
214 }
215
216 pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
218 let days_offset = self.rng.random_range(-365..=365);
220 let valid_from_date = self.base_date + Duration::days(days_offset as i64);
221 let valid_from = valid_from_date
222 .and_hms_opt(
223 self.rng.random_range(0..24),
224 self.rng.random_range(0..60),
225 self.rng.random_range(0..60),
226 )
227 .expect("valid h/m/s ranges");
228
229 let valid_to = if self
231 .rng
232 .random_bool(self.config.valid_time.closed_probability)
233 {
234 let avg_days = self.config.valid_time.avg_validity_days as f64;
236 let stddev_days = self.config.valid_time.validity_stddev_days as f64;
237
238 let duration_days = (avg_days + self.rng.random::<f64>() * stddev_days * 2.0
240 - stddev_days)
241 .max(1.0) as i64;
242
243 Some(valid_from + Duration::days(duration_days))
244 } else {
245 None
246 };
247
248 (valid_from, valid_to)
249 }
250
251 pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
253 let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
254
255 let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
257 let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
258 let delay = -avg * self.rng.random::<f64>().ln();
260 delay as i64
261 } else {
262 0
263 };
264
265 let recorded_at = base_time + Duration::seconds(delay_secs);
266
267 if self.config.transaction_time.allow_backdating
269 && self
270 .rng
271 .random_bool(self.config.transaction_time.backdating_probability)
272 {
273 let backdate_days = self
274 .rng
275 .random_range(1..=self.config.transaction_time.max_backdate_days)
276 as i64;
277 recorded_at - Duration::days(backdate_days)
278 } else {
279 recorded_at
280 }
281 }
282
283 pub fn count(&self) -> u64 {
285 self.count
286 }
287
288 pub fn reset(&mut self, seed: u64) {
290 self.rng = seeded_rng(seed, 0);
291 self.count = 0;
292 }
293
294 pub fn config(&self) -> &TemporalAttributeConfig {
296 &self.config
297 }
298}
299
300pub struct TemporalAttributeConfigBuilder {
302 config: TemporalAttributeConfig,
303}
304
305impl TemporalAttributeConfigBuilder {
306 pub fn new() -> Self {
308 Self {
309 config: TemporalAttributeConfig::default(),
310 }
311 }
312
313 pub fn enabled(mut self, enabled: bool) -> Self {
315 self.config.enabled = enabled;
316 self
317 }
318
319 pub fn closed_probability(mut self, prob: f64) -> Self {
321 self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
322 self
323 }
324
325 pub fn avg_validity_days(mut self, days: u32) -> Self {
327 self.config.valid_time.avg_validity_days = days;
328 self
329 }
330
331 pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
333 self.config.transaction_time.avg_recording_delay_seconds = seconds;
334 self
335 }
336
337 pub fn allow_backdating(mut self, prob: f64) -> Self {
339 self.config.transaction_time.allow_backdating = true;
340 self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
341 self
342 }
343
344 pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
346 self.config.generate_version_chains = true;
347 self.config.avg_versions_per_entity = avg_versions.max(1.0);
348 self
349 }
350
351 pub fn build(self) -> TemporalAttributeConfig {
353 self.config
354 }
355}
356
357impl Default for TemporalAttributeConfigBuilder {
358 fn default() -> Self {
359 Self::new()
360 }
361}
362
363#[cfg(test)]
364#[allow(clippy::unwrap_used)]
365mod tests {
366 use super::*;
367
368 #[test]
369 fn test_generate_temporal() {
370 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
371 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
372
373 let entity = "test_entity";
374 let temporal = generator.generate_temporal(entity.to_string());
375
376 assert_eq!(temporal.data, "test_entity");
377 assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
378 assert_eq!(temporal.change_type, TemporalChangeType::Original);
379 }
380
381 #[test]
382 fn test_generate_valid_time() {
383 let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
384 let config = TemporalAttributeConfig {
385 valid_time: ValidTimeConfig {
386 closed_probability: 0.5, avg_validity_days: 30,
388 validity_stddev_days: 10,
389 },
390 ..Default::default()
391 };
392 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
393
394 let mut has_closed = false;
395 let mut has_open = false;
396
397 for _ in 0..100 {
398 let (valid_from, valid_to) = generator.generate_valid_time();
399 assert!(valid_from.date() >= base_date - Duration::days(365));
400
401 if valid_to.is_some() {
402 has_closed = true;
403 assert!(valid_to.unwrap() > valid_from);
404 } else {
405 has_open = true;
406 }
407 }
408
409 assert!(has_closed);
411 assert!(has_open);
412 }
413
414 #[test]
415 fn test_generate_transaction_time() {
416 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
417 let config = TemporalAttributeConfig {
418 transaction_time: TransactionTimeConfig {
419 avg_recording_delay_seconds: 3600, allow_backdating: false,
421 ..Default::default()
422 },
423 ..Default::default()
424 };
425 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
426
427 let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
428 let transaction_time = generator.generate_transaction_time(valid_from);
429
430 let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
432 assert!(transaction_time >= valid_from_utc);
433 }
434
435 #[test]
436 fn test_generate_version_chain() {
437 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
438 let config = TemporalAttributeConfig {
439 generate_version_chains: true,
440 avg_versions_per_entity: 3.0,
441 ..Default::default()
442 };
443 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
444
445 let entity = "test_entity";
446 let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
447
448 assert!(!chain.all_versions().is_empty());
449 assert!(!chain.all_versions().is_empty());
451 }
452
453 #[test]
454 fn test_config_builder() {
455 let config = TemporalAttributeConfigBuilder::new()
456 .enabled(true)
457 .closed_probability(0.3)
458 .avg_validity_days(180)
459 .avg_recording_delay(60)
460 .allow_backdating(0.05)
461 .with_version_chains(2.5)
462 .build();
463
464 assert!(config.enabled);
465 assert_eq!(config.valid_time.closed_probability, 0.3);
466 assert_eq!(config.valid_time.avg_validity_days, 180);
467 assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
468 assert!(config.transaction_time.allow_backdating);
469 assert_eq!(config.transaction_time.backdating_probability, 0.05);
470 assert!(config.generate_version_chains);
471 assert_eq!(config.avg_versions_per_entity, 2.5);
472 }
473
474 #[test]
475 fn test_generator_count() {
476 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
477 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
478
479 assert_eq!(generator.count(), 0);
480
481 for _ in 0..5 {
482 generator.generate_temporal("entity".to_string());
483 }
484
485 assert_eq!(generator.count(), 5);
486
487 generator.reset(42);
488 assert_eq!(generator.count(), 0);
489 }
490}