datasynth_generators/temporal/
temporal_generator.rs1use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use datasynth_core::utils::seeded_rng;
8use rand::prelude::*;
9use rand_chacha::ChaCha8Rng;
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct TemporalAttributeConfig {
18 pub enabled: bool,
20 pub valid_time: ValidTimeConfig,
22 pub transaction_time: TransactionTimeConfig,
24 pub generate_version_chains: bool,
26 pub avg_versions_per_entity: f64,
28}
29
30impl Default for TemporalAttributeConfig {
31 fn default() -> Self {
32 Self {
33 enabled: true,
34 valid_time: ValidTimeConfig::default(),
35 transaction_time: TransactionTimeConfig::default(),
36 generate_version_chains: false,
37 avg_versions_per_entity: 1.5,
38 }
39 }
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct ValidTimeConfig {
45 pub closed_probability: f64,
47 pub avg_validity_days: u32,
49 pub validity_stddev_days: u32,
51}
52
53impl Default for ValidTimeConfig {
54 fn default() -> Self {
55 Self {
56 closed_probability: 0.1,
57 avg_validity_days: 365,
58 validity_stddev_days: 90,
59 }
60 }
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct TransactionTimeConfig {
66 pub avg_recording_delay_seconds: u32,
68 pub allow_backdating: bool,
70 pub backdating_probability: f64,
72 pub max_backdate_days: u32,
74}
75
76impl Default for TransactionTimeConfig {
77 fn default() -> Self {
78 Self {
79 avg_recording_delay_seconds: 0,
80 allow_backdating: false,
81 backdating_probability: 0.01,
82 max_backdate_days: 30,
83 }
84 }
85}
86
87pub struct TemporalAttributeGenerator {
89 config: TemporalAttributeConfig,
91 rng: ChaCha8Rng,
93 base_date: NaiveDate,
95 count: u64,
97}
98
99impl TemporalAttributeGenerator {
100 pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
102 Self {
103 config,
104 rng: seeded_rng(seed, 0),
105 base_date,
106 count: 0,
107 }
108 }
109
110 pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
112 Self::new(TemporalAttributeConfig::default(), seed, base_date)
113 }
114
115 pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
117 self.count += 1;
118
119 let (valid_from, valid_to) = self.generate_valid_time();
120 let transaction_time = self.generate_transaction_time(valid_from);
121
122 let recorded_by = format!("system_{}", self.rng.gen_range(1..=100));
123 let mut temporal = BiTemporal::new(entity)
124 .with_valid_time(valid_from, valid_to)
125 .with_recorded_at(transaction_time)
126 .with_recorded_by(&recorded_by)
127 .with_change_type(TemporalChangeType::Original);
128
129 if self.rng.gen_bool(0.2) {
131 temporal = temporal.with_change_reason("Initial creation");
132 }
133
134 temporal
135 }
136
137 pub fn generate_version_chain<T: Clone>(
139 &mut self,
140 entity: T,
141 id: Uuid,
142 ) -> TemporalVersionChain<T> {
143 let num_versions = if self.config.generate_version_chains {
145 let base_versions = self.config.avg_versions_per_entity;
146 let lambda = base_versions;
148 let mut count = 0;
149 let mut p = 1.0;
150 let l = (-lambda).exp();
151 loop {
152 count += 1;
153 p *= self.rng.gen::<f64>();
154 if p <= l {
155 break;
156 }
157 }
158 count.max(1)
159 } else {
160 1
161 };
162
163 let initial_temporal = self.generate_temporal(entity.clone());
165 let mut chain = TemporalVersionChain::new(id, initial_temporal);
166
167 let current_entity = entity;
169 for i in 1..num_versions {
170 let change_type = if i == num_versions - 1 && self.rng.gen_bool(0.1) {
172 TemporalChangeType::Reversal
173 } else if self.rng.gen_bool(0.3) {
174 TemporalChangeType::Correction
175 } else {
176 TemporalChangeType::Adjustment
177 };
178
179 let version = self.generate_version(current_entity.clone(), change_type);
180 chain.add_version(version);
181 }
182
183 chain
184 }
185
186 fn generate_version<T: Clone>(
188 &mut self,
189 entity: T,
190 change_type: TemporalChangeType,
191 ) -> BiTemporal<T> {
192 let (valid_from, valid_to) = self.generate_valid_time();
193 let transaction_time = self.generate_transaction_time(valid_from);
194
195 let reason: Option<&str> = match change_type {
196 TemporalChangeType::Correction => Some("Data correction"),
197 TemporalChangeType::Adjustment => Some("Adjustment per policy"),
198 TemporalChangeType::Reversal => Some("Reversed entry"),
199 _ => None,
200 };
201
202 let recorded_by = format!("user_{}", self.rng.gen_range(1..=50));
203 let mut temporal = BiTemporal::new(entity)
204 .with_valid_time(valid_from, valid_to)
205 .with_recorded_at(transaction_time)
206 .with_recorded_by(&recorded_by)
207 .with_change_type(change_type);
208
209 if let Some(r) = reason {
210 temporal = temporal.with_change_reason(r);
211 }
212
213 temporal
214 }
215
216 pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
218 let days_offset = self.rng.gen_range(-365..=365);
220 let valid_from_date = self.base_date + Duration::days(days_offset as i64);
221 let valid_from = valid_from_date
222 .and_hms_opt(
223 self.rng.gen_range(0..24),
224 self.rng.gen_range(0..60),
225 self.rng.gen_range(0..60),
226 )
227 .expect("valid h/m/s ranges");
228
229 let valid_to = if self.rng.gen_bool(self.config.valid_time.closed_probability) {
231 let avg_days = self.config.valid_time.avg_validity_days as f64;
233 let stddev_days = self.config.valid_time.validity_stddev_days as f64;
234
235 let duration_days = (avg_days + self.rng.gen::<f64>() * stddev_days * 2.0 - stddev_days)
237 .max(1.0) as i64;
238
239 Some(valid_from + Duration::days(duration_days))
240 } else {
241 None
242 };
243
244 (valid_from, valid_to)
245 }
246
247 pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
249 let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
250
251 let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
253 let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
254 let delay = -avg * self.rng.gen::<f64>().ln();
256 delay as i64
257 } else {
258 0
259 };
260
261 let recorded_at = base_time + Duration::seconds(delay_secs);
262
263 if self.config.transaction_time.allow_backdating
265 && self
266 .rng
267 .gen_bool(self.config.transaction_time.backdating_probability)
268 {
269 let backdate_days = self
270 .rng
271 .gen_range(1..=self.config.transaction_time.max_backdate_days)
272 as i64;
273 recorded_at - Duration::days(backdate_days)
274 } else {
275 recorded_at
276 }
277 }
278
279 pub fn count(&self) -> u64 {
281 self.count
282 }
283
284 pub fn reset(&mut self, seed: u64) {
286 self.rng = seeded_rng(seed, 0);
287 self.count = 0;
288 }
289
290 pub fn config(&self) -> &TemporalAttributeConfig {
292 &self.config
293 }
294}
295
296pub struct TemporalAttributeConfigBuilder {
298 config: TemporalAttributeConfig,
299}
300
301impl TemporalAttributeConfigBuilder {
302 pub fn new() -> Self {
304 Self {
305 config: TemporalAttributeConfig::default(),
306 }
307 }
308
309 pub fn enabled(mut self, enabled: bool) -> Self {
311 self.config.enabled = enabled;
312 self
313 }
314
315 pub fn closed_probability(mut self, prob: f64) -> Self {
317 self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
318 self
319 }
320
321 pub fn avg_validity_days(mut self, days: u32) -> Self {
323 self.config.valid_time.avg_validity_days = days;
324 self
325 }
326
327 pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
329 self.config.transaction_time.avg_recording_delay_seconds = seconds;
330 self
331 }
332
333 pub fn allow_backdating(mut self, prob: f64) -> Self {
335 self.config.transaction_time.allow_backdating = true;
336 self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
337 self
338 }
339
340 pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
342 self.config.generate_version_chains = true;
343 self.config.avg_versions_per_entity = avg_versions.max(1.0);
344 self
345 }
346
347 pub fn build(self) -> TemporalAttributeConfig {
349 self.config
350 }
351}
352
353impl Default for TemporalAttributeConfigBuilder {
354 fn default() -> Self {
355 Self::new()
356 }
357}
358
359#[cfg(test)]
360#[allow(clippy::unwrap_used)]
361mod tests {
362 use super::*;
363
364 #[test]
365 fn test_generate_temporal() {
366 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
367 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
368
369 let entity = "test_entity";
370 let temporal = generator.generate_temporal(entity.to_string());
371
372 assert_eq!(temporal.data, "test_entity");
373 assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
374 assert_eq!(temporal.change_type, TemporalChangeType::Original);
375 }
376
377 #[test]
378 fn test_generate_valid_time() {
379 let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
380 let config = TemporalAttributeConfig {
381 valid_time: ValidTimeConfig {
382 closed_probability: 0.5, avg_validity_days: 30,
384 validity_stddev_days: 10,
385 },
386 ..Default::default()
387 };
388 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
389
390 let mut has_closed = false;
391 let mut has_open = false;
392
393 for _ in 0..100 {
394 let (valid_from, valid_to) = generator.generate_valid_time();
395 assert!(valid_from.date() >= base_date - Duration::days(365));
396
397 if valid_to.is_some() {
398 has_closed = true;
399 assert!(valid_to.unwrap() > valid_from);
400 } else {
401 has_open = true;
402 }
403 }
404
405 assert!(has_closed);
407 assert!(has_open);
408 }
409
410 #[test]
411 fn test_generate_transaction_time() {
412 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
413 let config = TemporalAttributeConfig {
414 transaction_time: TransactionTimeConfig {
415 avg_recording_delay_seconds: 3600, allow_backdating: false,
417 ..Default::default()
418 },
419 ..Default::default()
420 };
421 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
422
423 let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
424 let transaction_time = generator.generate_transaction_time(valid_from);
425
426 let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
428 assert!(transaction_time >= valid_from_utc);
429 }
430
431 #[test]
432 fn test_generate_version_chain() {
433 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
434 let config = TemporalAttributeConfig {
435 generate_version_chains: true,
436 avg_versions_per_entity: 3.0,
437 ..Default::default()
438 };
439 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
440
441 let entity = "test_entity";
442 let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
443
444 assert!(!chain.all_versions().is_empty());
445 assert!(!chain.all_versions().is_empty());
447 }
448
449 #[test]
450 fn test_config_builder() {
451 let config = TemporalAttributeConfigBuilder::new()
452 .enabled(true)
453 .closed_probability(0.3)
454 .avg_validity_days(180)
455 .avg_recording_delay(60)
456 .allow_backdating(0.05)
457 .with_version_chains(2.5)
458 .build();
459
460 assert!(config.enabled);
461 assert_eq!(config.valid_time.closed_probability, 0.3);
462 assert_eq!(config.valid_time.avg_validity_days, 180);
463 assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
464 assert!(config.transaction_time.allow_backdating);
465 assert_eq!(config.transaction_time.backdating_probability, 0.05);
466 assert!(config.generate_version_chains);
467 assert_eq!(config.avg_versions_per_entity, 2.5);
468 }
469
470 #[test]
471 fn test_generator_count() {
472 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
473 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
474
475 assert_eq!(generator.count(), 0);
476
477 for _ in 0..5 {
478 generator.generate_temporal("entity".to_string());
479 }
480
481 assert_eq!(generator.count(), 5);
482
483 generator.reset(42);
484 assert_eq!(generator.count(), 0);
485 }
486}