Skip to main content

datasynth_core/distributions/
drift.rs

1//! Temporal drift simulation for realistic data distribution evolution.
2//!
3//! Implements gradual, sudden, and seasonal drift patterns commonly observed
4//! in real-world enterprise data, useful for training drift detection models.
5
6use rand::prelude::*;
7use rand_chacha::ChaCha8Rng;
8use serde::{Deserialize, Serialize};
9
10/// Types of temporal drift patterns.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
12#[serde(rename_all = "snake_case")]
13pub enum DriftType {
14    /// Gradual, continuous drift over time (like inflation).
15    #[default]
16    Gradual,
17    /// Sudden, point-in-time shifts (like policy changes).
18    Sudden,
19    /// Recurring patterns that cycle (like seasonal variations).
20    Recurring,
21    /// Combination of gradual background drift with occasional sudden shifts.
22    Mixed,
23}
24
25/// Configuration for temporal drift simulation.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct DriftConfig {
28    /// Enable temporal drift simulation.
29    pub enabled: bool,
30    /// Amount mean drift per period (e.g., 0.02 = 2% shift per month).
31    pub amount_mean_drift: f64,
32    /// Amount variance drift per period.
33    pub amount_variance_drift: f64,
34    /// Anomaly rate drift per period.
35    pub anomaly_rate_drift: f64,
36    /// Concept drift rate (0.0-1.0).
37    pub concept_drift_rate: f64,
38    /// Probability of sudden drift in any period.
39    pub sudden_drift_probability: f64,
40    /// Magnitude of sudden drift events.
41    pub sudden_drift_magnitude: f64,
42    /// Enable seasonal drift patterns.
43    pub seasonal_drift: bool,
44    /// Period to start drift (0 = from beginning).
45    pub drift_start_period: u32,
46    /// Type of drift pattern.
47    pub drift_type: DriftType,
48}
49
50impl Default for DriftConfig {
51    fn default() -> Self {
52        Self {
53            enabled: false,
54            amount_mean_drift: 0.02,
55            amount_variance_drift: 0.0,
56            anomaly_rate_drift: 0.0,
57            concept_drift_rate: 0.01,
58            sudden_drift_probability: 0.0,
59            sudden_drift_magnitude: 2.0,
60            seasonal_drift: false,
61            drift_start_period: 0,
62            drift_type: DriftType::Gradual,
63        }
64    }
65}
66
67/// Drift adjustments computed for a specific period.
68#[derive(Debug, Clone, Default)]
69pub struct DriftAdjustments {
70    /// Multiplier for amount mean (1.0 = no change).
71    pub amount_mean_multiplier: f64,
72    /// Multiplier for amount variance (1.0 = no change).
73    pub amount_variance_multiplier: f64,
74    /// Additive adjustment to anomaly rate.
75    pub anomaly_rate_adjustment: f64,
76    /// Overall concept drift factor (0.0-1.0).
77    pub concept_drift_factor: f64,
78    /// Whether a sudden drift event occurred.
79    pub sudden_drift_occurred: bool,
80    /// Seasonal factor (1.0 = baseline, varies by month).
81    pub seasonal_factor: f64,
82}
83
84impl DriftAdjustments {
85    /// No drift (identity adjustments).
86    pub fn none() -> Self {
87        Self {
88            amount_mean_multiplier: 1.0,
89            amount_variance_multiplier: 1.0,
90            anomaly_rate_adjustment: 0.0,
91            concept_drift_factor: 0.0,
92            sudden_drift_occurred: false,
93            seasonal_factor: 1.0,
94        }
95    }
96}
97
98/// Controller for computing and applying temporal drift.
99pub struct DriftController {
100    config: DriftConfig,
101    rng: ChaCha8Rng,
102    /// Track which periods had sudden drift events for reproducibility.
103    sudden_drift_periods: Vec<u32>,
104    /// Total periods in the simulation.
105    total_periods: u32,
106}
107
108impl DriftController {
109    /// Create a new drift controller with the given configuration.
110    pub fn new(config: DriftConfig, seed: u64, total_periods: u32) -> Self {
111        let mut controller = Self {
112            config,
113            rng: ChaCha8Rng::seed_from_u64(seed),
114            sudden_drift_periods: Vec::new(),
115            total_periods,
116        };
117
118        // Pre-compute sudden drift events for reproducibility
119        if controller.config.enabled
120            && (controller.config.drift_type == DriftType::Sudden
121                || controller.config.drift_type == DriftType::Mixed)
122        {
123            controller.precompute_sudden_drifts();
124        }
125
126        controller
127    }
128
129    /// Pre-compute which periods will have sudden drift events.
130    fn precompute_sudden_drifts(&mut self) {
131        for period in 0..self.total_periods {
132            if period >= self.config.drift_start_period
133                && self.rng.gen::<f64>() < self.config.sudden_drift_probability
134            {
135                self.sudden_drift_periods.push(period);
136            }
137        }
138    }
139
140    /// Check if drift is enabled.
141    pub fn is_enabled(&self) -> bool {
142        self.config.enabled
143    }
144
145    /// Compute drift adjustments for a specific period (0-indexed).
146    pub fn compute_adjustments(&self, period: u32) -> DriftAdjustments {
147        if !self.config.enabled {
148            return DriftAdjustments::none();
149        }
150
151        // No drift before start period
152        if period < self.config.drift_start_period {
153            return DriftAdjustments::none();
154        }
155
156        let effective_period = period - self.config.drift_start_period;
157        let mut adjustments = DriftAdjustments::none();
158
159        match self.config.drift_type {
160            DriftType::Gradual => {
161                self.apply_gradual_drift(&mut adjustments, effective_period);
162            }
163            DriftType::Sudden => {
164                self.apply_sudden_drift(&mut adjustments, period);
165            }
166            DriftType::Recurring => {
167                self.apply_recurring_drift(&mut adjustments, effective_period);
168            }
169            DriftType::Mixed => {
170                // Combine gradual background drift with sudden events
171                self.apply_gradual_drift(&mut adjustments, effective_period);
172                self.apply_sudden_drift(&mut adjustments, period);
173            }
174        }
175
176        // Apply seasonal drift if enabled (additive to other drift)
177        if self.config.seasonal_drift {
178            adjustments.seasonal_factor = self.compute_seasonal_factor(period);
179        }
180
181        adjustments
182    }
183
184    /// Apply gradual drift (compound growth model).
185    fn apply_gradual_drift(&self, adjustments: &mut DriftAdjustments, effective_period: u32) {
186        let p = effective_period as f64;
187
188        // Compound growth: (1 + rate)^period
189        adjustments.amount_mean_multiplier = (1.0 + self.config.amount_mean_drift).powf(p);
190
191        adjustments.amount_variance_multiplier = (1.0 + self.config.amount_variance_drift).powf(p);
192
193        // Linear accumulation for anomaly rate
194        adjustments.anomaly_rate_adjustment = self.config.anomaly_rate_drift * p;
195
196        // Concept drift accumulates but is bounded 0-1
197        adjustments.concept_drift_factor = (self.config.concept_drift_rate * p).min(1.0);
198    }
199
200    /// Apply sudden drift based on pre-computed events.
201    fn apply_sudden_drift(&self, adjustments: &mut DriftAdjustments, period: u32) {
202        // Count how many sudden events have occurred up to this period
203        let events_occurred: usize = self
204            .sudden_drift_periods
205            .iter()
206            .filter(|&&p| p <= period)
207            .count();
208
209        if events_occurred > 0 {
210            adjustments.sudden_drift_occurred = self.sudden_drift_periods.contains(&period);
211
212            // Each sudden event multiplies by the magnitude
213            let cumulative_magnitude = self
214                .config
215                .sudden_drift_magnitude
216                .powi(events_occurred as i32);
217
218            adjustments.amount_mean_multiplier *= cumulative_magnitude;
219            adjustments.amount_variance_multiplier *= cumulative_magnitude.sqrt();
220            // Variance grows slower
221        }
222    }
223
224    /// Apply recurring (seasonal) drift patterns.
225    fn apply_recurring_drift(&self, adjustments: &mut DriftAdjustments, effective_period: u32) {
226        // 12-month cycle for seasonality
227        let cycle_position = (effective_period % 12) as f64;
228        let cycle_radians = (cycle_position / 12.0) * 2.0 * std::f64::consts::PI;
229
230        // Sinusoidal pattern with configurable amplitude
231        let seasonal_amplitude = self.config.concept_drift_rate;
232        adjustments.amount_mean_multiplier = 1.0 + seasonal_amplitude * cycle_radians.sin();
233
234        // Phase-shifted variance pattern
235        adjustments.amount_variance_multiplier =
236            1.0 + (seasonal_amplitude * 0.5) * (cycle_radians + std::f64::consts::FRAC_PI_2).sin();
237    }
238
239    /// Compute seasonal factor based on period (month).
240    fn compute_seasonal_factor(&self, period: u32) -> f64 {
241        // Map period to month (0-11)
242        let month = period % 12;
243
244        // Q4 spike (Oct-Dec), Q1 dip (Jan-Feb)
245        match month {
246            0 | 1 => 0.85, // Jan-Feb: post-holiday slowdown
247            2 => 0.90,     // Mar: recovering
248            3 | 4 => 0.95, // Apr-May: Q2 start
249            5 => 1.0,      // Jun: mid-year
250            6 | 7 => 0.95, // Jul-Aug: summer slowdown
251            8 => 1.0,      // Sep: back to business
252            9 => 1.10,     // Oct: Q4 ramp-up
253            10 => 1.20,    // Nov: pre-holiday surge
254            11 => 1.30,    // Dec: year-end close
255            _ => 1.0,
256        }
257    }
258
259    /// Get the list of periods with sudden drift events.
260    pub fn sudden_drift_periods(&self) -> &[u32] {
261        &self.sudden_drift_periods
262    }
263
264    /// Get the configuration.
265    pub fn config(&self) -> &DriftConfig {
266        &self.config
267    }
268}
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    #[test]
275    fn test_no_drift_when_disabled() {
276        let config = DriftConfig::default();
277        let controller = DriftController::new(config, 42, 12);
278
279        let adjustments = controller.compute_adjustments(6);
280        assert!(!controller.is_enabled());
281        assert!((adjustments.amount_mean_multiplier - 1.0).abs() < 0.001);
282        assert!((adjustments.anomaly_rate_adjustment).abs() < 0.001);
283    }
284
285    #[test]
286    fn test_gradual_drift() {
287        let config = DriftConfig {
288            enabled: true,
289            amount_mean_drift: 0.02,
290            anomaly_rate_drift: 0.001,
291            drift_type: DriftType::Gradual,
292            ..Default::default()
293        };
294        let controller = DriftController::new(config, 42, 12);
295
296        // Period 0: no drift yet
297        let adj0 = controller.compute_adjustments(0);
298        assert!((adj0.amount_mean_multiplier - 1.0).abs() < 0.001);
299
300        // Period 6: ~12.6% drift (1.02^6 ≈ 1.126)
301        let adj6 = controller.compute_adjustments(6);
302        assert!(adj6.amount_mean_multiplier > 1.10);
303        assert!(adj6.amount_mean_multiplier < 1.15);
304
305        // Period 12: ~26.8% drift (1.02^12 ≈ 1.268)
306        let adj12 = controller.compute_adjustments(12);
307        assert!(adj12.amount_mean_multiplier > 1.20);
308        assert!(adj12.amount_mean_multiplier < 1.30);
309    }
310
311    #[test]
312    fn test_drift_start_period() {
313        let config = DriftConfig {
314            enabled: true,
315            amount_mean_drift: 0.02,
316            drift_start_period: 3,
317            drift_type: DriftType::Gradual,
318            ..Default::default()
319        };
320        let controller = DriftController::new(config, 42, 12);
321
322        // Before drift start: no drift
323        let adj2 = controller.compute_adjustments(2);
324        assert!((adj2.amount_mean_multiplier - 1.0).abs() < 0.001);
325
326        // At drift start: no drift yet (effective_period = 0)
327        let adj3 = controller.compute_adjustments(3);
328        assert!((adj3.amount_mean_multiplier - 1.0).abs() < 0.001);
329
330        // After drift start: drift begins
331        let adj6 = controller.compute_adjustments(6);
332        assert!(adj6.amount_mean_multiplier > 1.0);
333    }
334
335    #[test]
336    fn test_seasonal_factor() {
337        let config = DriftConfig {
338            enabled: true,
339            seasonal_drift: true,
340            drift_type: DriftType::Gradual,
341            ..Default::default()
342        };
343        let controller = DriftController::new(config, 42, 12);
344
345        // December (month 11) should have highest seasonal factor
346        let adj_dec = controller.compute_adjustments(11);
347        assert!(adj_dec.seasonal_factor > 1.2);
348
349        // January (month 0) should have lower seasonal factor
350        let adj_jan = controller.compute_adjustments(0);
351        assert!(adj_jan.seasonal_factor < 0.9);
352    }
353
354    #[test]
355    fn test_sudden_drift_reproducibility() {
356        let config = DriftConfig {
357            enabled: true,
358            sudden_drift_probability: 0.5,
359            sudden_drift_magnitude: 1.5,
360            drift_type: DriftType::Sudden,
361            ..Default::default()
362        };
363
364        // Same seed should produce same sudden drift periods
365        let controller1 = DriftController::new(config.clone(), 42, 12);
366        let controller2 = DriftController::new(config, 42, 12);
367
368        assert_eq!(
369            controller1.sudden_drift_periods(),
370            controller2.sudden_drift_periods()
371        );
372    }
373}