datasynth_generators/temporal/
temporal_generator.rs1use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc};
7use rand::prelude::*;
8use rand_chacha::ChaCha8Rng;
9use serde::{Deserialize, Serialize};
10use uuid::Uuid;
11
12use datasynth_core::models::{BiTemporal, TemporalChangeType, TemporalVersionChain};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct TemporalAttributeConfig {
17 pub enabled: bool,
19 pub valid_time: ValidTimeConfig,
21 pub transaction_time: TransactionTimeConfig,
23 pub generate_version_chains: bool,
25 pub avg_versions_per_entity: f64,
27}
28
29impl Default for TemporalAttributeConfig {
30 fn default() -> Self {
31 Self {
32 enabled: true,
33 valid_time: ValidTimeConfig::default(),
34 transaction_time: TransactionTimeConfig::default(),
35 generate_version_chains: false,
36 avg_versions_per_entity: 1.5,
37 }
38 }
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct ValidTimeConfig {
44 pub closed_probability: f64,
46 pub avg_validity_days: u32,
48 pub validity_stddev_days: u32,
50}
51
52impl Default for ValidTimeConfig {
53 fn default() -> Self {
54 Self {
55 closed_probability: 0.1,
56 avg_validity_days: 365,
57 validity_stddev_days: 90,
58 }
59 }
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize)]
64pub struct TransactionTimeConfig {
65 pub avg_recording_delay_seconds: u32,
67 pub allow_backdating: bool,
69 pub backdating_probability: f64,
71 pub max_backdate_days: u32,
73}
74
75impl Default for TransactionTimeConfig {
76 fn default() -> Self {
77 Self {
78 avg_recording_delay_seconds: 0,
79 allow_backdating: false,
80 backdating_probability: 0.01,
81 max_backdate_days: 30,
82 }
83 }
84}
85
86pub struct TemporalAttributeGenerator {
88 config: TemporalAttributeConfig,
90 rng: ChaCha8Rng,
92 base_date: NaiveDate,
94 count: u64,
96}
97
98impl TemporalAttributeGenerator {
99 pub fn new(config: TemporalAttributeConfig, seed: u64, base_date: NaiveDate) -> Self {
101 Self {
102 config,
103 rng: ChaCha8Rng::seed_from_u64(seed),
104 base_date,
105 count: 0,
106 }
107 }
108
109 pub fn with_defaults(seed: u64, base_date: NaiveDate) -> Self {
111 Self::new(TemporalAttributeConfig::default(), seed, base_date)
112 }
113
114 pub fn generate_temporal<T: Clone>(&mut self, entity: T) -> BiTemporal<T> {
116 self.count += 1;
117
118 let (valid_from, valid_to) = self.generate_valid_time();
119 let transaction_time = self.generate_transaction_time(valid_from);
120
121 let recorded_by = format!("system_{}", self.rng.gen_range(1..=100));
122 let mut temporal = BiTemporal::new(entity)
123 .with_valid_time(valid_from, valid_to)
124 .with_recorded_at(transaction_time)
125 .with_recorded_by(&recorded_by)
126 .with_change_type(TemporalChangeType::Original);
127
128 if self.rng.gen_bool(0.2) {
130 temporal = temporal.with_change_reason("Initial creation");
131 }
132
133 temporal
134 }
135
136 pub fn generate_version_chain<T: Clone>(
138 &mut self,
139 entity: T,
140 id: Uuid,
141 ) -> TemporalVersionChain<T> {
142 let num_versions = if self.config.generate_version_chains {
144 let base_versions = self.config.avg_versions_per_entity;
145 let lambda = base_versions;
147 let mut count = 0;
148 let mut p = 1.0;
149 let l = (-lambda).exp();
150 loop {
151 count += 1;
152 p *= self.rng.gen::<f64>();
153 if p <= l {
154 break;
155 }
156 }
157 count.max(1)
158 } else {
159 1
160 };
161
162 let initial_temporal = self.generate_temporal(entity.clone());
164 let mut chain = TemporalVersionChain::new(id, initial_temporal);
165
166 let current_entity = entity;
168 for i in 1..num_versions {
169 let change_type = if i == num_versions - 1 && self.rng.gen_bool(0.1) {
171 TemporalChangeType::Reversal
172 } else if self.rng.gen_bool(0.3) {
173 TemporalChangeType::Correction
174 } else {
175 TemporalChangeType::Adjustment
176 };
177
178 let version = self.generate_version(current_entity.clone(), change_type);
179 chain.add_version(version);
180 }
181
182 chain
183 }
184
185 fn generate_version<T: Clone>(
187 &mut self,
188 entity: T,
189 change_type: TemporalChangeType,
190 ) -> BiTemporal<T> {
191 let (valid_from, valid_to) = self.generate_valid_time();
192 let transaction_time = self.generate_transaction_time(valid_from);
193
194 let reason: Option<&str> = match change_type {
195 TemporalChangeType::Correction => Some("Data correction"),
196 TemporalChangeType::Adjustment => Some("Adjustment per policy"),
197 TemporalChangeType::Reversal => Some("Reversed entry"),
198 _ => None,
199 };
200
201 let recorded_by = format!("user_{}", self.rng.gen_range(1..=50));
202 let mut temporal = BiTemporal::new(entity)
203 .with_valid_time(valid_from, valid_to)
204 .with_recorded_at(transaction_time)
205 .with_recorded_by(&recorded_by)
206 .with_change_type(change_type);
207
208 if let Some(r) = reason {
209 temporal = temporal.with_change_reason(r);
210 }
211
212 temporal
213 }
214
215 pub fn generate_valid_time(&mut self) -> (NaiveDateTime, Option<NaiveDateTime>) {
217 let days_offset = self.rng.gen_range(-365..=365);
219 let valid_from_date = self.base_date + Duration::days(days_offset as i64);
220 let valid_from = valid_from_date
221 .and_hms_opt(
222 self.rng.gen_range(0..24),
223 self.rng.gen_range(0..60),
224 self.rng.gen_range(0..60),
225 )
226 .unwrap();
227
228 let valid_to = if self.rng.gen_bool(self.config.valid_time.closed_probability) {
230 let avg_days = self.config.valid_time.avg_validity_days as f64;
232 let stddev_days = self.config.valid_time.validity_stddev_days as f64;
233
234 let duration_days = (avg_days + self.rng.gen::<f64>() * stddev_days * 2.0 - stddev_days)
236 .max(1.0) as i64;
237
238 Some(valid_from + Duration::days(duration_days))
239 } else {
240 None
241 };
242
243 (valid_from, valid_to)
244 }
245
246 pub fn generate_transaction_time(&mut self, valid_from: NaiveDateTime) -> DateTime<Utc> {
248 let base_time = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
249
250 let delay_secs = if self.config.transaction_time.avg_recording_delay_seconds > 0 {
252 let avg = self.config.transaction_time.avg_recording_delay_seconds as f64;
253 let delay = -avg * self.rng.gen::<f64>().ln();
255 delay as i64
256 } else {
257 0
258 };
259
260 let recorded_at = base_time + Duration::seconds(delay_secs);
261
262 if self.config.transaction_time.allow_backdating
264 && self
265 .rng
266 .gen_bool(self.config.transaction_time.backdating_probability)
267 {
268 let backdate_days = self
269 .rng
270 .gen_range(1..=self.config.transaction_time.max_backdate_days)
271 as i64;
272 recorded_at - Duration::days(backdate_days)
273 } else {
274 recorded_at
275 }
276 }
277
278 pub fn count(&self) -> u64 {
280 self.count
281 }
282
283 pub fn reset(&mut self, seed: u64) {
285 self.rng = ChaCha8Rng::seed_from_u64(seed);
286 self.count = 0;
287 }
288
289 pub fn config(&self) -> &TemporalAttributeConfig {
291 &self.config
292 }
293}
294
295pub struct TemporalAttributeConfigBuilder {
297 config: TemporalAttributeConfig,
298}
299
300impl TemporalAttributeConfigBuilder {
301 pub fn new() -> Self {
303 Self {
304 config: TemporalAttributeConfig::default(),
305 }
306 }
307
308 pub fn enabled(mut self, enabled: bool) -> Self {
310 self.config.enabled = enabled;
311 self
312 }
313
314 pub fn closed_probability(mut self, prob: f64) -> Self {
316 self.config.valid_time.closed_probability = prob.clamp(0.0, 1.0);
317 self
318 }
319
320 pub fn avg_validity_days(mut self, days: u32) -> Self {
322 self.config.valid_time.avg_validity_days = days;
323 self
324 }
325
326 pub fn avg_recording_delay(mut self, seconds: u32) -> Self {
328 self.config.transaction_time.avg_recording_delay_seconds = seconds;
329 self
330 }
331
332 pub fn allow_backdating(mut self, prob: f64) -> Self {
334 self.config.transaction_time.allow_backdating = true;
335 self.config.transaction_time.backdating_probability = prob.clamp(0.0, 1.0);
336 self
337 }
338
339 pub fn with_version_chains(mut self, avg_versions: f64) -> Self {
341 self.config.generate_version_chains = true;
342 self.config.avg_versions_per_entity = avg_versions.max(1.0);
343 self
344 }
345
346 pub fn build(self) -> TemporalAttributeConfig {
348 self.config
349 }
350}
351
352impl Default for TemporalAttributeConfigBuilder {
353 fn default() -> Self {
354 Self::new()
355 }
356}
357
358#[cfg(test)]
359mod tests {
360 use super::*;
361
362 #[test]
363 fn test_generate_temporal() {
364 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
365 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
366
367 let entity = "test_entity";
368 let temporal = generator.generate_temporal(entity.to_string());
369
370 assert_eq!(temporal.data, "test_entity");
371 assert!(temporal.recorded_at > DateTime::<Utc>::MIN_UTC);
372 assert_eq!(temporal.change_type, TemporalChangeType::Original);
373 }
374
375 #[test]
376 fn test_generate_valid_time() {
377 let base_date = NaiveDate::from_ymd_opt(2024, 6, 15).unwrap();
378 let config = TemporalAttributeConfig {
379 valid_time: ValidTimeConfig {
380 closed_probability: 0.5, avg_validity_days: 30,
382 validity_stddev_days: 10,
383 },
384 ..Default::default()
385 };
386 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
387
388 let mut has_closed = false;
389 let mut has_open = false;
390
391 for _ in 0..100 {
392 let (valid_from, valid_to) = generator.generate_valid_time();
393 assert!(valid_from.date() >= base_date - Duration::days(365));
394
395 if valid_to.is_some() {
396 has_closed = true;
397 assert!(valid_to.unwrap() > valid_from);
398 } else {
399 has_open = true;
400 }
401 }
402
403 assert!(has_closed);
405 assert!(has_open);
406 }
407
408 #[test]
409 fn test_generate_transaction_time() {
410 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
411 let config = TemporalAttributeConfig {
412 transaction_time: TransactionTimeConfig {
413 avg_recording_delay_seconds: 3600, allow_backdating: false,
415 ..Default::default()
416 },
417 ..Default::default()
418 };
419 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
420
421 let valid_from = NaiveDateTime::from_timestamp_opt(1704067200, 0).unwrap();
422 let transaction_time = generator.generate_transaction_time(valid_from);
423
424 let valid_from_utc = DateTime::<Utc>::from_naive_utc_and_offset(valid_from, Utc);
426 assert!(transaction_time >= valid_from_utc);
427 }
428
429 #[test]
430 fn test_generate_version_chain() {
431 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
432 let config = TemporalAttributeConfig {
433 generate_version_chains: true,
434 avg_versions_per_entity: 3.0,
435 ..Default::default()
436 };
437 let mut generator = TemporalAttributeGenerator::new(config, 42, base_date);
438
439 let entity = "test_entity";
440 let chain = generator.generate_version_chain(entity.to_string(), Uuid::new_v4());
441
442 assert!(!chain.all_versions().is_empty());
443 assert!(chain.all_versions().len() >= 1);
445 }
446
447 #[test]
448 fn test_config_builder() {
449 let config = TemporalAttributeConfigBuilder::new()
450 .enabled(true)
451 .closed_probability(0.3)
452 .avg_validity_days(180)
453 .avg_recording_delay(60)
454 .allow_backdating(0.05)
455 .with_version_chains(2.5)
456 .build();
457
458 assert!(config.enabled);
459 assert_eq!(config.valid_time.closed_probability, 0.3);
460 assert_eq!(config.valid_time.avg_validity_days, 180);
461 assert_eq!(config.transaction_time.avg_recording_delay_seconds, 60);
462 assert!(config.transaction_time.allow_backdating);
463 assert_eq!(config.transaction_time.backdating_probability, 0.05);
464 assert!(config.generate_version_chains);
465 assert_eq!(config.avg_versions_per_entity, 2.5);
466 }
467
468 #[test]
469 fn test_generator_count() {
470 let base_date = NaiveDate::from_ymd_opt(2024, 1, 1).unwrap();
471 let mut generator = TemporalAttributeGenerator::with_defaults(42, base_date);
472
473 assert_eq!(generator.count(), 0);
474
475 for _ in 0..5 {
476 generator.generate_temporal("entity".to_string());
477 }
478
479 assert_eq!(generator.count(), 5);
480
481 generator.reset(42);
482 assert_eq!(generator.count(), 0);
483 }
484}