datasynth_generators/temporal/
temporal_generator.rs1use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10use uuid::Uuid;
11
12use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct TemporalAttributeConfig {
17 pub enabled: bool,
19 pub valid_time: ValidTimeConfig,
21 pub transaction_time: TransactionTimeConfig,
23 pub generate_version_chains: bool,
25 pub avg_versions_per_entity: f64,
27}
28
29impl Default for TemporalAttributeConfig {
30 fn default() -> Self {
31 Self {
32 enabled: true,
33 valid_time: ValidTimeConfig::default(),
34 transaction_time: TransactionTimeConfig::default(),
35 generate_version_chains: false,
36 avg_versions_per_entity: 1.5,
37 }
38 }
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ValidTimeConfig {
44 pub closed_probability: f64,
46 pub avg_validity_days: u32,
48 pub validity_stddev_days: u32,
50}
51
52impl Default for ValidTimeConfig {
53 fn default() -> Self {
54 Self {
55 closed_probability: 0.1,
56 avg_validity_days: 365,
57 validity_stddev_days: 90,
58 }
59 }
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct TransactionTimeConfig {
65 pub avg_recording_delay_seconds: u32,
67 pub allow_backdating: bool,
69 pub backdating_probability: f64,
71 pub max_backdate_days: u32,
73}
74
75impl Default for TransactionTimeConfig {
76 fn default() -> Self {
77 Self {
78 avg_recording_delay_seconds: 0,
79 allow_backdating: false,
80 backdating_probability: 0.01,
81 max_backdate_days: 30,
82 }
83 }
84}
85
86pub struct TemporalAttributeGenerator {
88 config: TemporalAttributeConfig,
90 rng: ChaCha8Rng,
92 base_date: NaiveDate,
94 count: u64,
96}
97
98impl TemporalAttributeGenerator {
99 pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
101 Self {
102 config,
103 rng: ChaCha8Rng::seed_from_u64(seed),
104 base_date,
105 count: 0,
106 }
107 }
108
109 pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
111 Self::new(TemporalAttributeConfig::default(), seed, base_date)
112 }
113
114 pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
116 self.count += 1;
117
118 let (valid_from, valid_to) = self.generate_valid_time();
119 let transaction_time = self.generate_transaction_time(valid_from);
120
121 let recorded_by = format!("system_{}", self.rng.gen_range(1..=100));
122 let mut temporal = BiTemporal::new(entity)
123 .with_valid_time(valid_from, valid_to)
124 .with_recorded_at(transaction_time)
125 .with_recorded_by(&recorded_by)
126 .with_change_type(TemporalChangeType::Original);
127
128 if self.rng.gen_bool(0.2) {
130 temporal = temporal.with_change_reason("Initial creation");
131 }
132
133 temporal
134 }
135
136 pub fn generate_version_chain<T: Clone>(
138 &mut self,
139 entity: T,
140 id: Uuid,
141 ) -> TemporalVersionChain<T> {
142 let num_versions = if self.config.generate_version_chains {
144 let base_versions = self.config.avg_versions_per_entity;
145 let lambda = base_versions;
147 let mut count = 0;
148 let mut p = 1.0;
149 let l = (-lambda).exp();
150 loop {
151 count += 1;
152 p *= self.rng.gen::<f64>();
153 if p <= l {
154 break;
155 }
156 }
157 count.max(1)
158 } else {
159 1
160 };
161
162 let initial_temporal = self.generate_temporal(entity.clone());
164 let mut chain = TemporalVersionChain::new(id, initial_temporal);
165
166 let current_entity = entity;
168 for i in 1..num_versions {
169 let change_type = if i == num_versions - 1 && self.rng.gen_bool(0.1) {
171 TemporalChangeType::Reversal
172 } else if self.rng.gen_bool(0.3) {
173 TemporalChangeType::Correction
174 } else {
175 TemporalChangeType::Adjustment
176 };
177
178 let version = self.generate_version(current_entity.clone(), change_type);
179 chain.add_version(version);
180 }
181
182 chain
183 }
184
185 fn generate_version<T: Clone>(
187 &mut self,
188 entity: T,
189 change_type: TemporalChangeType,
190 ) -> BiTemporal<T> {
191 let (valid_from, valid_to) = self.generate_valid_time();
192 let transaction_time = self.generate_transaction_time(valid_from);
193
194 let reason: Option<&str> = match change_type {
195 TemporalChangeType::Correction => Some("Data correction"),
196 TemporalChangeType::Adjustment => Some("Adjustment per policy"),
197 TemporalChangeType::Reversal => Some("Reversed entry"),
198 _ => None,
199 };
200
201 let recorded_by = format!("user_{}", self.rng.gen_range(1..=50));
202 let mut temporal = BiTemporal::new(entity)
203 .with_valid_time(valid_from, valid_to)
204 .with_recorded_at(transaction_time)
205 .with_recorded_by(&recorded_by)
206 .with_change_type(change_type);
207
208 if let Some(r) = reason {
209 temporal = temporal.with_change_reason(r);
210 }
211
212 temporal
213 }
214
215 pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
217 let days_offset = self.rng.gen_range(-365..=365);
219 let valid_from_date = self.base_date + Duration::days(days_offset as i64);
220 let valid_from = valid_from_date
221 .and_hms_opt(
222 self.rng.gen_range(0..24),
223 self.rng.gen_range(0..60),
224 self.rng.gen_range(0..60),
225 )
226 .expect("valid h/m/s ranges");
227
228 let valid_to = if self.rng.gen_bool(self.config.valid_time.closed_probability) {
230 let avg_days = self.config.valid_time.avg_validity_days as f64;
232 let stddev_days = self.config.valid_time.validity_stddev_days as f64;
233
234 let duration_days = (avg_days + self.rng.gen::<f64>() * stddev_days * 2.0 - stddev_days)
236 .max(1.0) as i64;
237
238 Some(valid_from + Duration::days(duration_days))
239 } else {
240 None
241 };
242
243 (valid_from, valid_to)
244 }
245
246 pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
248 let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
249
250 let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
252 let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
253 let delay = -avg * self.rng.gen::<f64>().ln();
255 delay as i64
256 } else {
257 0
258 };
259
260 let recorded_at = base_time + Duration::seconds(delay_secs);
261
262 if self.config.transaction_time.allow_backdating
264 && self
265 .rng
266 .gen_bool(self.config.transaction_time.backdating_probability)
267 {
268 let backdate_days = self
269 .rng
270 .gen_range(1..=self.config.transaction_time.max_backdate_days)
271 as i64;
272 recorded_at - Duration::days(backdate_days)
273 } else {
274 recorded_at
275 }
276 }
277
278 pub fn count(&self) -> u64 {
280 self.count
281 }
282
283 pub fn reset(&mut self, seed: u64) {
285 self.rng = ChaCha8Rng::seed_from_u64(seed);
286 self.count = 0;
287 }
288
289 pub fn config(&self) -> &TemporalAttributeConfig {
291 &self.config
292 }
293}
294
295pub struct TemporalAttributeConfigBuilder {
297 config: TemporalAttributeConfig,
298}
299
300impl TemporalAttributeConfigBuilder {
301 pub fn new() -> Self {
303 Self {
304 config: TemporalAttributeConfig::default(),
305 }
306 }
307
308 pub fn enabled(mut self, enabled: bool) -> Self {
310 self.config.enabled = enabled;
311 self
312 }
313
314 pub fn closed_probability(mut self, prob: f64) -> Self {
316 self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
317 self
318 }
319
320 pub fn avg_validity_days(mut self, days: u32) -> Self {
322 self.config.valid_time.avg_validity_days = days;
323 self
324 }
325
326 pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
328 self.config.transaction_time.avg_recording_delay_seconds = seconds;
329 self
330 }
331
332 pub fn allow_backdating(mut self, prob: f64) -> Self {
334 self.config.transaction_time.allow_backdating = true;
335 self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
336 self
337 }
338
339 pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
341 self.config.generate_version_chains = true;
342 self.config.avg_versions_per_entity = avg_versions.max(1.0);
343 self
344 }
345
346 pub fn build(self) -> TemporalAttributeConfig {
348 self.config
349 }
350}
351
352impl Default for TemporalAttributeConfigBuilder {
353 fn default() -> Self {
354 Self::new()
355 }
356}
357
358#[cfg(test)]
359#[allow(clippy::unwrap_used)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn test_generate_temporal() {
365 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
366 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
367
368 let entity = "test_entity";
369 let temporal = generator.generate_temporal(entity.to_string());
370
371 assert_eq!(temporal.data, "test_entity");
372 assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
373 assert_eq!(temporal.change_type, TemporalChangeType::Original);
374 }
375
376 #[test]
377 fn test_generate_valid_time() {
378 let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
379 let config = TemporalAttributeConfig {
380 valid_time: ValidTimeConfig {
381 closed_probability: 0.5, avg_validity_days: 30,
383 validity_stddev_days: 10,
384 },
385 ..Default::default()
386 };
387 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
388
389 let mut has_closed = false;
390 let mut has_open = false;
391
392 for _ in 0..100 {
393 let (valid_from, valid_to) = generator.generate_valid_time();
394 assert!(valid_from.date() >= base_date - Duration::days(365));
395
396 if valid_to.is_some() {
397 has_closed = true;
398 assert!(valid_to.unwrap() > valid_from);
399 } else {
400 has_open = true;
401 }
402 }
403
404 assert!(has_closed);
406 assert!(has_open);
407 }
408
409 #[test]
410 fn test_generate_transaction_time() {
411 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
412 let config = TemporalAttributeConfig {
413 transaction_time: TransactionTimeConfig {
414 avg_recording_delay_seconds: 3600, allow_backdating: false,
416 ..Default::default()
417 },
418 ..Default::default()
419 };
420 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
421
422 let valid_from = DateTime::from_timestamp(1704067200, 0).unwrap().naive_utc();
423 let transaction_time = generator.generate_transaction_time(valid_from);
424
425 let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
427 assert!(transaction_time >= valid_from_utc);
428 }
429
430 #[test]
431 fn test_generate_version_chain() {
432 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
433 let config = TemporalAttributeConfig {
434 generate_version_chains: true,
435 avg_versions_per_entity: 3.0,
436 ..Default::default()
437 };
438 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
439
440 let entity = "test_entity";
441 let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
442
443 assert!(!chain.all_versions().is_empty());
444 assert!(!chain.all_versions().is_empty());
446 }
447
448 #[test]
449 fn test_config_builder() {
450 let config = TemporalAttributeConfigBuilder::new()
451 .enabled(true)
452 .closed_probability(0.3)
453 .avg_validity_days(180)
454 .avg_recording_delay(60)
455 .allow_backdating(0.05)
456 .with_version_chains(2.5)
457 .build();
458
459 assert!(config.enabled);
460 assert_eq!(config.valid_time.closed_probability, 0.3);
461 assert_eq!(config.valid_time.avg_validity_days, 180);
462 assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
463 assert!(config.transaction_time.allow_backdating);
464 assert_eq!(config.transaction_time.backdating_probability, 0.05);
465 assert!(config.generate_version_chains);
466 assert_eq!(config.avg_versions_per_entity, 2.5);
467 }
468
469 #[test]
470 fn test_generator_count() {
471 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
472 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
473
474 assert_eq!(generator.count(), 0);
475
476 for _ in 0..5 {
477 generator.generate_temporal("entity".to_string());
478 }
479
480 assert_eq!(generator.count(), 5);
481
482 generator.reset(42);
483 assert_eq!(generator.count(), 0);
484 }
485}