datasynth_generators/temporal/
temporal_generator.rs1use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use datasynth_core::utils::seeded_rng;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TemporalAttributeConfig {
18 pub enabled: bool,
20 pub valid_time: ValidTimeConfig,
22 pub transaction_time: TransactionTimeConfig,
24 pub generate_version_chains: bool,
26 pub avg_versions_per_entity: f64,
28}
29
30impl Default for TemporalAttributeConfig {
31 fn default() -> Self {
32 Self {
33 enabled: true,
34 valid_time: ValidTimeConfig::default(),
35 transaction_time: TransactionTimeConfig::default(),
36 generate_version_chains: false,
37 avg_versions_per_entity: 1.5,
38 }
39 }
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ValidTimeConfig {
45 pub closed_probability: f64,
47 pub avg_validity_days: u32,
49 pub validity_stddev_days: u32,
51}
52
53impl Default for ValidTimeConfig {
54 fn default() -> Self {
55 Self {
56 closed_probability: 0.1,
57 avg_validity_days: 365,
58 validity_stddev_days: 90,
59 }
60 }
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct TransactionTimeConfig {
66 pub avg_recording_delay_seconds: u32,
68 pub allow_backdating: bool,
70 pub backdating_probability: f64,
72 pub max_backdate_days: u32,
74}
75
76impl Default for TransactionTimeConfig {
77 fn default() -> Self {
78 Self {
79 avg_recording_delay_seconds: 0,
80 allow_backdating: false,
81 backdating_probability: 0.01,
82 max_backdate_days: 30,
83 }
84 }
85}
86
87pub struct TemporalAttributeGenerator {
89 config: TemporalAttributeConfig,
91 rng: ChaCha8Rng,
93 base_date: NaiveDate,
95 count: u64,
97}
98
99impl TemporalAttributeGenerator {
100 pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
102 Self {
103 config,
104 rng: seeded_rng(seed, 0),
105 base_date,
106 count: 0,
107 }
108 }
109
110 pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
112 Self::new(TemporalAttributeConfig::default(), seed, base_date)
113 }
114
115 pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
117 self.count += 1;
118
119 let (valid_from, valid_to) = self.generate_valid_time();
120 let transaction_time = self.generate_transaction_time(valid_from);
121
122 let recorded_by = format!("system_{}", self.rng.random_range(1..=100));
123 let mut temporal = BiTemporal::new(entity)
124 .with_valid_time(valid_from, valid_to)
125 .with_recorded_at(transaction_time)
126 .with_recorded_by(&recorded_by)
127 .with_change_type(TemporalChangeType::Original);
128
129 if self.rng.random_bool(0.2) {
131 temporal = temporal.with_change_reason("Initial creation");
132 }
133
134 temporal
135 }
136
137 pub fn generate_version_chain<T: Clone>(
139 &mut self,
140 entity: T,
141 id: Uuid,
142 ) -> TemporalVersionChain<T> {
143 let num_versions = if self.config.generate_version_chains {
145 let base_versions = self.config.avg_versions_per_entity;
146 let lambda = base_versions;
148 let mut count = 0;
149 let mut p = 1.0;
150 let l = (-lambda).exp();
151 loop {
152 count += 1;
153 p *= self.rng.random::<f64>();
154 if p <= l {
155 break;
156 }
157 }
158 count.max(1)
159 } else {
160 1
161 };
162
163 let initial_temporal = self.generate_temporal(entity.clone());
165 let mut chain = TemporalVersionChain::new(id, initial_temporal);
166
167 let current_entity = entity;
169 for i in 1..num_versions {
170 let change_type = if i == num_versions - 1 && self.rng.random_bool(0.1) {
172 TemporalChangeType::Reversal
173 } else if self.rng.random_bool(0.3) {
174 TemporalChangeType::Correction
175 } else {
176 TemporalChangeType::Adjustment
177 };
178
179 let version = self.generate_version(current_entity.clone(), change_type);
180 chain.add_version(version);
181 }
182
183 chain
184 }
185
186 fn generate_version<T: Clone>(
188 &mut self,
189 entity: T,
190 change_type: TemporalChangeType,
191 ) -> BiTemporal<T> {
192 let (valid_from, valid_to) = self.generate_valid_time();
193 let transaction_time = self.generate_transaction_time(valid_from);
194
195 let reason: Option<&str> = match change_type {
196 TemporalChangeType::Correction => Some("Data correction"),
197 TemporalChangeType::Adjustment => Some("Adjustment per policy"),
198 TemporalChangeType::Reversal => Some("Reversed entry"),
199 _ => None,
200 };
201
202 let recorded_by = format!("user_{}", self.rng.random_range(1..=50));
203 let mut temporal = BiTemporal::new(entity)
204 .with_valid_time(valid_from, valid_to)
205 .with_recorded_at(transaction_time)
206 .with_recorded_by(&recorded_by)
207 .with_change_type(change_type);
208
209 if let Some(r) = reason {
210 temporal = temporal.with_change_reason(r);
211 }
212
213 temporal
214 }
215
216 pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
218 let days_offset = self.rng.random_range(-365..=365);
220 let valid_from_date = self.base_date + Duration::days(days_offset as i64);
221 let valid_from = valid_from_date
222 .and_hms_opt(
223 self.rng.random_range(0..24),
224 self.rng.random_range(0..60),
225 self.rng.random_range(0..60),
226 )
227 .expect("valid h/m/s ranges");
228
229 let valid_to = if self
231 .rng
232 .random_bool(self.config.valid_time.closed_probability)
233 {
234 let avg_days = self.config.valid_time.avg_validity_days as f64;
236 let stddev_days = self.config.valid_time.validity_stddev_days as f64;
237
238 let duration_days = (avg_days + self.rng.random::<f64>() * stddev_days * 2.0
240 - stddev_days)
241 .max(1.0) as i64;
242
243 Some(valid_from + Duration::days(duration_days))
244 } else {
245 None
246 };
247
248 (valid_from, valid_to)
249 }
250
251 pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
253 let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
254
255 let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
257 let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
258 let delay = -avg * self.rng.random::<f64>().ln();
260 delay as i64
261 } else {
262 0
263 };
264
265 let recorded_at = base_time + Duration::seconds(delay_secs);
266
267 if self.config.transaction_time.allow_backdating
269 && self
270 .rng
271 .random_bool(self.config.transaction_time.backdating_probability)
272 {
273 let backdate_days = self
274 .rng
275 .random_range(1..=self.config.transaction_time.max_backdate_days)
276 as i64;
277 recorded_at - Duration::days(backdate_days)
278 } else {
279 recorded_at
280 }
281 }
282
283 pub fn count(&self) -> u64 {
285 self.count
286 }
287
288 pub fn reset(&mut self, seed: u64) {
290 self.rng = seeded_rng(seed, 0);
291 self.count = 0;
292 }
293
294 pub fn config(&self) -> &TemporalAttributeConfig {
296 &self.config
297 }
298}
299
300pub struct TemporalAttributeConfigBuilder {
302 config: TemporalAttributeConfig,
303}
304
305impl TemporalAttributeConfigBuilder {
306 pub fn new() -> Self {
308 Self {
309 config: TemporalAttributeConfig::default(),
310 }
311 }
312
313 pub fn enabled(mut self, enabled: bool) -> Self {
315 self.config.enabled = enabled;
316 self
317 }
318
319 pub fn closed_probability(mut self, prob: f64) -> Self {
321 self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
322 self
323 }
324
325 pub fn avg_validity_days(mut self, days: u32) -> Self {
327 self.config.valid_time.avg_validity_days = days;
328 self
329 }
330
331 pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
333 self.config.transaction_time.avg_recording_delay_seconds = seconds;
334 self
335 }
336
337 pub fn allow_backdating(mut self, prob: f64) -> Self {
339 self.config.transaction_time.allow_backdating = true;
340 self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
341 self
342 }
343
344 pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
346 self.config.generate_version_chains = true;
347 self.config.avg_versions_per_entity = avg_versions.max(1.0);
348 self
349 }
350
351 pub fn build(self) -> TemporalAttributeConfig {
353 self.config
354 }
355}
356
357impl Default for TemporalAttributeConfigBuilder {
358 fn default() -> Self {
359 Self::new()
360 }
361}
362
363#[cfg(test)]
364mod tests {
365 use super::*;
366
367 #[test]
368 fn test_generate_temporal() {
369 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
370 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
371
372 let entity = "test_entity";
373 let temporal = generator.generate_temporal(entity.to_string());
374
375 assert_eq!(temporal.data, "test_entity");
376 assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
377 assert_eq!(temporal.change_type, TemporalChangeType::Original);
378 }
379
380 #[test]
381 fn test_generate_valid_time() {
382 let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
383 let config = TemporalAttributeConfig {
384 valid_time: ValidTimeConfig {
385 closed_probability: 0.5, avg_validity_days: 30,
387 validity_stddev_days: 10,
388 },
389 ..Default::default()
390 };
391 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
392
393 let mut has_closed = false;
394 let mut has_open = false;
395
396 for _ in 0..100 {
397 let (valid_from, valid_to) = generator.generate_valid_time();
398 assert!(valid_from.date() >= base_date - Duration::days(365));
399
400 if let Some(valid_to) = valid_to {
401 has_closed = true;
402 assert!(valid_to > valid_from);
403 } else {
404 has_open = true;
405 }
406 }
407
408 assert!(has_closed);
410 assert!(has_open);
411 }
412
413 #[test]
414 fn test_generate_transaction_time() {
415 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
416 let config = TemporalAttributeConfig {
417 transaction_time: TransactionTimeConfig {
418 avg_recording_delay_seconds: 3600, allow_backdating: false,
420 ..Default::default()
421 },
422 ..Default::default()
423 };
424 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
425
426 let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
427 let transaction_time = generator.generate_transaction_time(valid_from);
428
429 let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
431 assert!(transaction_time >= valid_from_utc);
432 }
433
434 #[test]
435 fn test_generate_version_chain() {
436 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
437 let config = TemporalAttributeConfig {
438 generate_version_chains: true,
439 avg_versions_per_entity: 3.0,
440 ..Default::default()
441 };
442 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
443
444 let entity = "test_entity";
445 let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
446
447 assert!(!chain.all_versions().is_empty());
448 assert!(!chain.all_versions().is_empty());
450 }
451
452 #[test]
453 fn test_config_builder() {
454 let config = TemporalAttributeConfigBuilder::new()
455 .enabled(true)
456 .closed_probability(0.3)
457 .avg_validity_days(180)
458 .avg_recording_delay(60)
459 .allow_backdating(0.05)
460 .with_version_chains(2.5)
461 .build();
462
463 assert!(config.enabled);
464 assert_eq!(config.valid_time.closed_probability, 0.3);
465 assert_eq!(config.valid_time.avg_validity_days, 180);
466 assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
467 assert!(config.transaction_time.allow_backdating);
468 assert_eq!(config.transaction_time.backdating_probability, 0.05);
469 assert!(config.generate_version_chains);
470 assert_eq!(config.avg_versions_per_entity, 2.5);
471 }
472
473 #[test]
474 fn test_generator_count() {
475 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
476 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
477
478 assert_eq!(generator.count(), 0);
479
480 for _ in 0..5 {
481 generator.generate_temporal("entity".to_string());
482 }
483
484 assert_eq!(generator.count(), 5);
485
486 generator.reset(42);
487 assert_eq!(generator.count(), 0);
488 }
489}