irithyll_core/drift/
adwin.rs

1//! ADWIN (ADaptive WINdowing) drift detector.
2//!
3//! ADWIN maintains a variable-length window of recent values using an exponential
4//! histogram for compression. It detects concept drift by comparing the means of
5//! sub-windows at every bucket boundary. When a statistically significant difference
6//! is found, the window is shrunk by removing the older portion.
7//!
8//! # Algorithm
9//!
10//! Each new value is inserted as a bucket of order 0. When a row accumulates more
11//! than `max_buckets` entries, the two oldest are merged into a single bucket at the
12//! next order (capacity doubles). This gives O(log W) memory for a window of width W.
13//!
14//! Drift is tested by iterating bucket boundaries from newest to oldest, splitting
15//! the window into a right (newer) and left (older) sub-window. The Hoeffding-based
16//! cut uses `epsilon = sqrt((1 / 2m) * ln(4 / delta'))` where `m` is the harmonic
17//! count and `delta' = delta / ln(width)`.
18//!
19//! # Reference
20//!
21//! Bifet, A. & Gavalda, R. (2007). "Learning from Time-Changing Data with Adaptive
22//! Windowing." In *Proceedings of the 2007 SIAM International Conference on Data Mining*.
23
24use alloc::boxed::Box;
25use alloc::vec;
26use alloc::vec::Vec;
27
28use super::{DriftDetector, DriftSignal};
29
30// ---------------------------------------------------------------------------
31// Bucket
32// ---------------------------------------------------------------------------
33
34/// A single bucket in the exponential histogram.
35///
36/// Buckets at row `i` summarise `2^i` original observations.
37#[derive(Debug, Clone)]
38#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
39struct Bucket {
40    /// Sum of values stored in this bucket.
41    total: f64,
42    /// Variance accumulator: sum of squared deviations (variance * count).
43    variance: f64,
44    /// Number of original values represented by this bucket.
45    count: u64,
46}
47
48impl Bucket {
49    /// Create a leaf bucket for a single observed value.
50    #[inline]
51    fn singleton(value: f64) -> Self {
52        Self {
53            total: value,
54            variance: 0.0,
55            count: 1,
56        }
57    }
58
59    /// Merge two buckets into one, combining their sufficient statistics.
60    ///
61    /// The variance update follows the parallel variance formula:
62    /// `Var(A+B) = Var(A) + Var(B) + (mean_A - mean_B)^2 * n_A * n_B / (n_A + n_B)`
63    fn merge(a: &Bucket, b: &Bucket) -> Self {
64        let count = a.count + b.count;
65        let total = a.total + b.total;
66
67        let mean_a = a.total / a.count as f64;
68        let mean_b = b.total / b.count as f64;
69        let diff = mean_a - mean_b;
70        let variance = a.variance
71            + b.variance
72            + diff * diff * (a.count as f64) * (b.count as f64) / (count as f64);
73
74        Self {
75            total,
76            variance,
77            count,
78        }
79    }
80}
81
82// ---------------------------------------------------------------------------
83// Adwin
84// ---------------------------------------------------------------------------
85
86/// ADWIN (ADaptive WINdowing) drift detector.
87///
88/// Maintains an adaptive-size window over a stream of real-valued observations
89/// using an exponential histogram. Detects distribution shift by comparing
90/// sub-window means with a Hoeffding-style bound.
91///
92/// # Examples
93///
94/// ```
95/// use irithyll_core::drift::{DriftDetector, DriftSignal};
96/// use irithyll_core::drift::adwin::Adwin;
97///
98/// let mut det = Adwin::new();
99/// // Feed stable values
100/// for _ in 0..200 {
101///     let sig = det.update(0.0);
102///     assert_ne!(sig, DriftSignal::Drift);
103/// }
104/// ```
105#[derive(Debug, Clone)]
106#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
107pub struct Adwin {
108    /// Confidence parameter. Smaller values require stronger evidence to declare
109    /// drift. Default: 0.002.
110    delta: f64,
111    /// Maximum number of buckets allowed per row before compression triggers.
112    /// Default: 5.
113    max_buckets: usize,
114    /// Bucket rows. `rows[i]` holds buckets whose capacity is `2^i`.
115    /// Within each row the *oldest* bucket is at the front (index 0) and the
116    /// *newest* is at the back.
117    rows: Vec<Vec<Bucket>>,
118    /// Running sum of all values currently in the window.
119    total: f64,
120    /// Running variance accumulator for the full window.
121    variance: f64,
122    /// Total number of original observations in the window.
123    count: u64,
124    /// Width of the window (mirrors `count`; kept for semantic clarity).
125    width: u64,
126    /// Minimum number of observations before drift checking begins.
127    min_window: u64,
128}
129
130impl Adwin {
131    /// Create a new ADWIN detector with the default confidence `delta = 0.002`.
132    pub fn new() -> Self {
133        Self::with_delta(0.002)
134    }
135
136    /// Create a new ADWIN detector with the given confidence parameter.
137    ///
138    /// `delta` controls sensitivity: smaller values mean fewer false positives but
139    /// slower reaction to real drift. Typical range: 0.0001 to 0.1.
140    ///
141    /// # Panics
142    ///
143    /// Panics if `delta` is not in `(0, 1)`.
144    pub fn with_delta(delta: f64) -> Self {
145        assert!(
146            delta > 0.0 && delta < 1.0,
147            "delta must be in (0, 1), got {delta}"
148        );
149        Self {
150            delta,
151            max_buckets: 5,
152            rows: Vec::new(),
153            total: 0.0,
154            variance: 0.0,
155            count: 0,
156            width: 0,
157            min_window: 32,
158        }
159    }
160
161    /// Set the maximum number of buckets per row (default 5).
162    ///
163    /// Higher values use more memory but give finer granularity for split-point
164    /// testing. Must be at least 2.
165    pub fn with_max_buckets(mut self, m: usize) -> Self {
166        assert!(m >= 2, "max_buckets must be >= 2, got {m}");
167        self.max_buckets = m;
168        self
169    }
170
171    /// Set the minimum window size before drift checking begins (default 32).
172    pub fn with_min_window(mut self, min: u64) -> Self {
173        self.min_window = min;
174        self
175    }
176
177    /// Current window width (number of original observations).
178    #[inline]
179    pub fn width(&self) -> u64 {
180        self.width
181    }
182
183    // -----------------------------------------------------------------------
184    // Internal helpers
185    // -----------------------------------------------------------------------
186
187    /// Insert a new singleton bucket at row 0.
188    fn insert_bucket(&mut self, value: f64) {
189        if self.rows.is_empty() {
190            self.rows.push(Vec::new());
191        }
192        self.rows[0].push(Bucket::singleton(value));
193
194        // Update running stats using Welford's online method for variance.
195        self.count += 1;
196        self.width += 1;
197        let old_mean = if self.count > 1 {
198            (self.total) / (self.count - 1) as f64
199        } else {
200            0.0
201        };
202        self.total += value;
203        let new_mean = self.total / self.count as f64;
204        self.variance += (value - old_mean) * (value - new_mean);
205    }
206
207    /// Compress bucket rows: whenever a row has more than `max_buckets` entries,
208    /// merge the two oldest (front of the vec) into one bucket at the next row.
209    fn compress(&mut self) {
210        let max = self.max_buckets;
211        let mut row_idx = 0;
212        while row_idx < self.rows.len() {
213            if self.rows[row_idx].len() <= max {
214                break; // rows beyond this one can't have overflowed from this insertion
215            }
216            // Merge the two oldest buckets (indices 0 and 1).
217            let b1 = self.rows[row_idx].remove(0);
218            let b2 = self.rows[row_idx].remove(0);
219            let merged = Bucket::merge(&b1, &b2);
220
221            let next_row = row_idx + 1;
222            if next_row >= self.rows.len() {
223                self.rows.push(Vec::new());
224            }
225            self.rows[next_row].push(merged);
226
227            row_idx += 1;
228        }
229    }
230
231    /// Check for drift by scanning split points from newest to oldest.
232    ///
233    /// Returns `(drift_detected, warning_detected)`.
234    fn check_drift(&self) -> (bool, bool) {
235        if self.width <= self.min_window {
236            return (false, false);
237        }
238        // We need at least 2 observations on each side of a split.
239        if self.width < 4 {
240            return (false, false);
241        }
242
243        let ln_width = crate::math::ln(self.width as f64);
244        // Guard: if width is so small that ln(width) <= 0 we skip.
245        if ln_width <= 0.0 {
246            return (false, false);
247        }
248
249        let delta_prime = self.delta / ln_width;
250        let delta_warn = (2.0 * self.delta) / ln_width;
251
252        // Traverse buckets from newest to oldest, accumulating the "right"
253        // (newer) sub-window. The "left" (older) sub-window is the remainder.
254        let mut right_count: u64 = 0;
255        let mut right_total: f64 = 0.0;
256
257        let mut warning_found = false;
258
259        // Iterate rows from 0 (smallest buckets, most recent) upward, and
260        // within each row from the back (newest) to the front (oldest).
261        for row in &self.rows {
262            for bucket in row.iter().rev() {
263                right_count += bucket.count;
264                right_total += bucket.total;
265
266                let left_count = self.count - right_count;
267                if left_count < 1 || right_count < 1 {
268                    continue;
269                }
270
271                let left_total = self.total - right_total;
272
273                let mean_left = left_total / left_count as f64;
274                let mean_right = right_total / right_count as f64;
275                let abs_diff = (mean_left - mean_right).abs();
276
277                // Harmonic mean of the two sub-window sizes.
278                let n0 = left_count as f64;
279                let n1 = right_count as f64;
280                let m = 1.0 / (1.0 / n0 + 1.0 / n1);
281
282                // Drift threshold.
283                let epsilon_drift =
284                    crate::math::sqrt((1.0 / (2.0 * m)) * crate::math::ln(4.0 / delta_prime));
285
286                if abs_diff >= epsilon_drift {
287                    // No need to keep scanning -- we found the outermost drift point.
288                    return (true, true);
289                }
290
291                // Warning threshold (looser).
292                let epsilon_warn =
293                    crate::math::sqrt((1.0 / (2.0 * m)) * crate::math::ln(4.0 / delta_warn));
294                if abs_diff >= epsilon_warn {
295                    warning_found = true;
296                }
297            }
298        }
299
300        (false, warning_found)
301    }
302
303    /// Remove the oldest portion of the window up to (and including) the split
304    /// point where drift was detected. We find the split point again and remove
305    /// everything on the left side.
306    fn shrink_window(&mut self) {
307        let ln_width = crate::math::ln(self.width as f64);
308        if ln_width <= 0.0 {
309            return;
310        }
311        let delta_prime = self.delta / ln_width;
312
313        // Traverse newest-to-oldest just like check_drift, find the first
314        // split where drift is confirmed, then remove the left (older) side.
315        let mut right_count: u64 = 0;
316        let mut right_total: f64 = 0.0;
317        let mut right_variance: f64 = 0.0;
318
319        // We need to record which buckets comprise the right sub-window.
320        // Strategy: find how many buckets (from the newest end) form the right
321        // sub-window, then rebuild rows keeping only those.
322
323        // Collect all buckets in order from newest to oldest.
324        let mut all_buckets: Vec<(usize, usize)> = Vec::new(); // (row_idx, bucket_idx_in_row)
325        for (row_idx, row) in self.rows.iter().enumerate() {
326            for (bucket_idx, _) in row.iter().enumerate().rev() {
327                all_buckets.push((row_idx, bucket_idx));
328            }
329        }
330
331        let mut split_pos = all_buckets.len(); // keep everything by default
332        for (pos, &(row_idx, bucket_idx)) in all_buckets.iter().enumerate() {
333            let bucket = &self.rows[row_idx][bucket_idx];
334            // Update right sub-window variance using parallel formula.
335            if right_count > 0 {
336                let mean_right_old = right_total / right_count as f64;
337                let mean_bucket = bucket.total / bucket.count as f64;
338                let diff = mean_right_old - mean_bucket;
339                right_variance = right_variance
340                    + bucket.variance
341                    + diff * diff * (right_count as f64) * (bucket.count as f64)
342                        / (right_count + bucket.count) as f64;
343            } else {
344                right_variance = bucket.variance;
345            }
346            right_count += bucket.count;
347            right_total += bucket.total;
348
349            let left_count = self.count - right_count;
350            if left_count < 1 || right_count < 1 {
351                continue;
352            }
353            let left_total = self.total - right_total;
354            let mean_left = left_total / left_count as f64;
355            let mean_right = right_total / right_count as f64;
356            let abs_diff = (mean_left - mean_right).abs();
357
358            let n0 = left_count as f64;
359            let n1 = right_count as f64;
360            let m = 1.0 / (1.0 / n0 + 1.0 / n1);
361            let epsilon = crate::math::sqrt((1.0 / (2.0 * m)) * crate::math::ln(4.0 / delta_prime));
362
363            if abs_diff >= epsilon {
364                // The right sub-window (positions 0..=pos) is the part we keep.
365                split_pos = pos + 1;
366                break;
367            }
368        }
369
370        if split_pos >= all_buckets.len() {
371            // No split found (shouldn't happen if called after check_drift),
372            // but just in case, don't remove anything.
373            return;
374        }
375
376        // Rebuild: keep only the buckets in all_buckets[0..split_pos].
377        // These are the *right* (newer) sub-window.
378        let keep_set: Vec<(usize, usize)> = all_buckets[..split_pos].to_vec();
379
380        // Build a set for fast lookup: which (row, idx) to keep.
381        let mut new_rows: Vec<Vec<Bucket>> = Vec::new();
382        // We need to reconstruct in the proper order: within each row,
383        // oldest first (low index) to newest (high index).
384
385        // First, determine max row.
386        let max_row = self.rows.len();
387        new_rows.resize_with(max_row, Vec::new);
388
389        // Mark which buckets to keep. Since keep_set lists them newest-first,
390        // we process them in reverse to restore oldest-first order within rows.
391        let mut keep_flags: Vec<Vec<bool>> =
392            self.rows.iter().map(|row| vec![false; row.len()]).collect();
393        for &(r, b) in &keep_set {
394            keep_flags[r][b] = true;
395        }
396
397        let mut new_total: f64 = 0.0;
398        let mut new_count: u64 = 0;
399        for (row_idx, row) in self.rows.iter().enumerate() {
400            for (bucket_idx, bucket) in row.iter().enumerate() {
401                if keep_flags[row_idx][bucket_idx] {
402                    new_total += bucket.total;
403                    new_count += bucket.count;
404                    new_rows[row_idx].push(bucket.clone());
405                }
406            }
407        }
408
409        // Trim empty trailing rows.
410        while new_rows.last().is_some_and(|r| r.is_empty()) {
411            new_rows.pop();
412        }
413
414        self.rows = new_rows;
415        self.total = new_total;
416        self.count = new_count;
417        self.width = new_count;
418        // Recompute variance from the remaining buckets.
419        self.recompute_variance();
420    }
421
422    /// Recompute the aggregate variance from the bucket tree.
423    ///
424    /// This is called after shrinking the window because we cannot cheaply
425    /// subtract out the removed portion's contribution to variance.
426    fn recompute_variance(&mut self) {
427        if self.count == 0 {
428            self.variance = 0.0;
429            return;
430        }
431        // Merge all buckets' variances using the parallel formula, processing
432        // from oldest to newest.
433        let mut running_total: f64 = 0.0;
434        let mut running_count: u64 = 0;
435        let mut running_var: f64 = 0.0;
436
437        // Process in order: highest row first (oldest, largest buckets),
438        // then within each row from index 0 (oldest) upward.
439        for row in self.rows.iter().rev() {
440            for bucket in row.iter() {
441                if running_count == 0 {
442                    running_total = bucket.total;
443                    running_count = bucket.count;
444                    running_var = bucket.variance;
445                } else {
446                    let combined_count = running_count + bucket.count;
447                    let mean_running = running_total / running_count as f64;
448                    let mean_bucket = bucket.total / bucket.count as f64;
449                    let diff = mean_running - mean_bucket;
450                    running_var = running_var
451                        + bucket.variance
452                        + diff * diff * (running_count as f64) * (bucket.count as f64)
453                            / combined_count as f64;
454                    running_total += bucket.total;
455                    running_count = combined_count;
456                }
457            }
458        }
459        self.variance = running_var;
460    }
461}
462
463impl Default for Adwin {
464    fn default() -> Self {
465        Self::new()
466    }
467}
468
469// ---------------------------------------------------------------------------
470// Display
471// ---------------------------------------------------------------------------
472
473impl core::fmt::Display for Adwin {
474    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
475        write!(
476            f,
477            "Adwin(delta={}, width={}, mean={:.6})",
478            self.delta,
479            self.width,
480            self.estimated_mean()
481        )
482    }
483}
484
485// ---------------------------------------------------------------------------
486// DriftDetector trait
487// ---------------------------------------------------------------------------
488
489impl DriftDetector for Adwin {
490    fn update(&mut self, value: f64) -> DriftSignal {
491        // 1. Insert new observation.
492        self.insert_bucket(value);
493
494        // 2. Compress overflowing rows.
495        self.compress();
496
497        // 3. Check for drift.
498        let (drift, warning) = self.check_drift();
499
500        if drift {
501            // Shrink window: remove the older sub-window.
502            self.shrink_window();
503            DriftSignal::Drift
504        } else if warning {
505            DriftSignal::Warning
506        } else {
507            DriftSignal::Stable
508        }
509    }
510
511    fn reset(&mut self) {
512        self.rows.clear();
513        self.total = 0.0;
514        self.variance = 0.0;
515        self.count = 0;
516        self.width = 0;
517    }
518
519    fn clone_fresh(&self) -> Box<dyn DriftDetector> {
520        Box::new(Self::with_delta(self.delta).with_max_buckets(self.max_buckets))
521    }
522
523    fn clone_boxed(&self) -> Box<dyn DriftDetector> {
524        Box::new(self.clone())
525    }
526
527    fn estimated_mean(&self) -> f64 {
528        if self.count == 0 {
529            0.0
530        } else {
531            self.total / self.count as f64
532        }
533    }
534
535    fn serialize_state(&self) -> Option<super::DriftDetectorState> {
536        use super::{AdwinBucketState, DriftDetectorState};
537        let rows = self
538            .rows
539            .iter()
540            .map(|row| {
541                row.iter()
542                    .map(|b| AdwinBucketState {
543                        total: b.total,
544                        variance: b.variance,
545                        count: b.count,
546                    })
547                    .collect()
548            })
549            .collect();
550        Some(DriftDetectorState::Adwin {
551            rows,
552            total: self.total,
553            variance: self.variance,
554            count: self.count,
555            width: self.width,
556        })
557    }
558
559    fn restore_state(&mut self, state: &super::DriftDetectorState) -> bool {
560        if let super::DriftDetectorState::Adwin {
561            rows,
562            total,
563            variance,
564            count,
565            width,
566        } = state
567        {
568            self.rows = rows
569                .iter()
570                .map(|row| {
571                    row.iter()
572                        .map(|b| Bucket {
573                            total: b.total,
574                            variance: b.variance,
575                            count: b.count,
576                        })
577                        .collect()
578                })
579                .collect();
580            self.total = *total;
581            self.variance = *variance;
582            self.count = *count;
583            self.width = *width;
584            true
585        } else {
586            false
587        }
588    }
589}
590
591// ---------------------------------------------------------------------------
592// Tests
593// ---------------------------------------------------------------------------
594
595#[cfg(test)]
596mod tests {
597    use super::super::{DriftDetector, DriftSignal};
598    use super::*;
599    use alloc::vec::Vec;
600
601    /// Helper: simple deterministic PRNG for reproducible tests without pulling
602    /// in `rand` in the test module. Uses xorshift64.
603    struct Xorshift64(u64);
604
605    impl Xorshift64 {
606        fn new(seed: u64) -> Self {
607            Self(if seed == 0 { 1 } else { seed })
608        }
609
610        fn next_u64(&mut self) -> u64 {
611            let mut x = self.0;
612            x ^= x << 13;
613            x ^= x >> 7;
614            x ^= x << 17;
615            self.0 = x;
616            x
617        }
618
619        /// Uniform f64 in [0, 1).
620        fn next_f64(&mut self) -> f64 {
621            (self.next_u64() >> 11) as f64 / (1u64 << 53) as f64
622        }
623
624        /// Approximate normal via Box-Muller transform.
625        fn next_normal(&mut self, mean: f64, std: f64) -> f64 {
626            let u1 = self.next_f64().max(1e-15); // avoid ln(0)
627            let u2 = self.next_f64();
628            let z = crate::math::sqrt(-2.0 * crate::math::ln(u1))
629                * crate::math::cos(2.0 * core::f64::consts::PI * u2);
630            mean + std * z
631        }
632    }
633
634    // -----------------------------------------------------------------------
635    // 1. No false alarm on constant distribution
636    // -----------------------------------------------------------------------
637    #[test]
638    fn no_false_alarm_constant() {
639        let mut det = Adwin::with_delta(0.002);
640        let mut rng = Xorshift64::new(42);
641
642        let mut drift_count = 0;
643        for _ in 0..10_000 {
644            let value = 0.5 + rng.next_normal(0.0, 0.01);
645            if det.update(value) == DriftSignal::Drift {
646                drift_count += 1;
647            }
648        }
649        // With delta=0.002 over 10k samples we might get a rare false positive,
650        // but certainly not more than a handful.
651        assert!(
652            drift_count <= 2,
653            "Too many false alarms on constant distribution: {drift_count}"
654        );
655    }
656
657    // -----------------------------------------------------------------------
658    // 2. Detect abrupt shift
659    // -----------------------------------------------------------------------
660    #[test]
661    fn detect_abrupt_shift() {
662        let mut det = Adwin::with_delta(0.002);
663        let mut rng = Xorshift64::new(123);
664
665        // Phase 1: N(0, 0.1)
666        for _ in 0..2000 {
667            det.update(rng.next_normal(0.0, 0.1));
668        }
669
670        // Phase 2: N(5, 0.1) -- huge shift, should detect quickly.
671        let mut detected = false;
672        for i in 0..2000 {
673            let sig = det.update(rng.next_normal(5.0, 0.1));
674            if sig == DriftSignal::Drift {
675                detected = true;
676                // Should detect within the first ~100 samples of the new regime.
677                assert!(
678                    i < 500,
679                    "Drift detected too late after abrupt shift: sample {i}"
680                );
681                break;
682            }
683        }
684        assert!(detected, "Failed to detect abrupt mean shift from 0 to 5");
685    }
686
687    // -----------------------------------------------------------------------
688    // 3. Detect gradual shift
689    // -----------------------------------------------------------------------
690    #[test]
691    fn detect_gradual_shift() {
692        let mut det = Adwin::with_delta(0.01); // slightly more sensitive for gradual
693        let mut rng = Xorshift64::new(456);
694
695        let mut detected = false;
696        for i in 0..5000 {
697            // Mean ramps linearly from 0 to 5.
698            let mean = 5.0 * (i as f64) / 5000.0;
699            let value = rng.next_normal(mean, 0.1);
700            if det.update(value) == DriftSignal::Drift {
701                detected = true;
702                break;
703            }
704        }
705        assert!(detected, "Failed to detect gradual mean shift from 0 to 5");
706    }
707
708    // -----------------------------------------------------------------------
709    // 4. estimated_mean tracks recent values
710    // -----------------------------------------------------------------------
711    #[test]
712    fn estimated_mean_tracks() {
713        let mut det = Adwin::new();
714        // Feed exactly 100 values of 3.0.
715        for _ in 0..100 {
716            det.update(3.0);
717        }
718        let mean = det.estimated_mean();
719        assert!((mean - 3.0).abs() < 1e-9, "Expected mean ~3.0, got {mean}");
720
721        // Feed 100 values of 7.0.
722        for _ in 0..100 {
723            det.update(7.0);
724        }
725        // Mean should be somewhere between 3 and 7 depending on window.
726        let mean = det.estimated_mean();
727        assert!(
728            mean > 2.5 && mean < 7.5,
729            "Mean out of expected range: {mean}"
730        );
731    }
732
733    // -----------------------------------------------------------------------
734    // 5. reset clears all state
735    // -----------------------------------------------------------------------
736    #[test]
737    fn reset_clears_state() {
738        let mut det = Adwin::new();
739        for _ in 0..500 {
740            det.update(1.0);
741        }
742        assert!(det.width() > 0);
743        assert!(det.estimated_mean() > 0.0);
744
745        det.reset();
746
747        assert_eq!(det.width(), 0);
748        assert_eq!(det.estimated_mean(), 0.0);
749        assert!(det.rows.is_empty());
750        assert_eq!(det.count, 0);
751        assert_eq!(det.total, 0.0);
752        assert_eq!(det.variance, 0.0);
753    }
754
755    // -----------------------------------------------------------------------
756    // 6. clone_fresh returns clean instance with same delta
757    // -----------------------------------------------------------------------
758    #[test]
759    fn clone_fresh_preserves_config() {
760        let mut det = Adwin::with_delta(0.05).with_max_buckets(7);
761        for _ in 0..200 {
762            det.update(42.0);
763        }
764
765        let fresh = det.clone_fresh();
766        // The fresh detector should have zero state.
767        assert_eq!(fresh.estimated_mean(), 0.0);
768
769        // We can't directly inspect the boxed detector's fields, so we verify
770        // behaviour: feeding the same constant shouldn't drift.
771        // (This implicitly tests that delta was preserved -- if it were near 1.0,
772        // we'd get spurious drifts.)
773        let mut fresh = fresh;
774        let mut drifts = 0;
775        for _ in 0..1000 {
776            if fresh.update(1.0) == DriftSignal::Drift {
777                drifts += 1;
778            }
779        }
780        assert!(
781            drifts <= 1,
782            "clone_fresh produced detector with too many false alarms: {drifts}"
783        );
784    }
785
786    // -----------------------------------------------------------------------
787    // 7. Small window warmup: no drift check before min_window
788    // -----------------------------------------------------------------------
789    #[test]
790    fn warmup_suppresses_early_detection() {
791        let mut det = Adwin::with_delta(0.002).with_min_window(100);
792
793        // Feed an extreme shift within the first 100 samples: 50 zeros then 50
794        // very large values. Without warmup this would certainly trigger.
795        let mut any_drift = false;
796        for _ in 0..50 {
797            if det.update(0.0) == DriftSignal::Drift {
798                any_drift = true;
799            }
800        }
801        for _ in 0..50 {
802            if det.update(100.0) == DriftSignal::Drift {
803                any_drift = true;
804            }
805        }
806        assert!(
807            !any_drift,
808            "Drift should not fire before min_window=100 samples"
809        );
810    }
811
812    // -----------------------------------------------------------------------
813    // 8. Bucket compression keeps memory bounded
814    // -----------------------------------------------------------------------
815    #[test]
816    fn compression_bounds_memory() {
817        let mut det = Adwin::with_delta(0.002).with_max_buckets(5);
818
819        for i in 0..10_000 {
820            det.update(i as f64);
821        }
822
823        // After compression, no row should have more than max_buckets entries.
824        for (row_idx, row) in det.rows.iter().enumerate() {
825            assert!(
826                row.len() <= det.max_buckets + 1, // +1 tolerance for the just-inserted bucket before next compress
827                "Row {row_idx} has {} buckets, exceeding max {}",
828                row.len(),
829                det.max_buckets
830            );
831        }
832
833        // Total bucket count should be O(log W * max_buckets).
834        let total_buckets: usize = det.rows.iter().map(|r| r.len()).sum();
835        let expected_max = det.rows.len() * (det.max_buckets + 1);
836        assert!(
837            total_buckets <= expected_max,
838            "Total buckets {total_buckets} exceeds expected max {expected_max}"
839        );
840    }
841
842    // -----------------------------------------------------------------------
843    // 9. Window shrinks after drift
844    // -----------------------------------------------------------------------
845    #[test]
846    fn window_shrinks_on_drift() {
847        let mut det = Adwin::with_delta(0.002);
848
849        // Build up a big window.
850        for _ in 0..2000 {
851            det.update(0.0);
852        }
853        let width_before = det.width();
854        assert!(
855            width_before >= 1900,
856            "Expected large window, got {width_before}"
857        );
858
859        // Inject abrupt shift to force drift.
860        let mut drifted = false;
861        for _ in 0..500 {
862            if det.update(100.0) == DriftSignal::Drift {
863                drifted = true;
864                break;
865            }
866        }
867        assert!(drifted, "Expected drift on extreme shift");
868
869        // After drift, window should be significantly smaller.
870        let width_after = det.width();
871        assert!(
872            width_after < width_before,
873            "Window should shrink after drift: before={width_before}, after={width_after}"
874        );
875    }
876
877    // -----------------------------------------------------------------------
878    // 10. Warning signal fires before drift
879    // -----------------------------------------------------------------------
880    #[test]
881    fn warning_precedes_drift() {
882        let mut det = Adwin::with_delta(0.002);
883        let mut rng = Xorshift64::new(789);
884
885        // Stable phase.
886        for _ in 0..1000 {
887            det.update(rng.next_normal(0.0, 0.1));
888        }
889
890        // Moderate shift that might trigger warning before drift.
891        let mut _saw_warning = false;
892        let mut saw_drift = false;
893        for _ in 0..2000 {
894            let sig = det.update(rng.next_normal(2.0, 0.1));
895            match sig {
896                DriftSignal::Warning => {
897                    if !saw_drift {
898                        _saw_warning = true;
899                    }
900                }
901                DriftSignal::Drift => {
902                    saw_drift = true;
903                    break;
904                }
905                _ => {}
906            }
907        }
908
909        // We should have detected drift on a shift this large.
910        assert!(saw_drift, "Should have detected drift on shift from 0 to 2");
911        // Warning may or may not precede drift depending on how fast it triggers,
912        // but we at least verify the mechanism didn't panic.
913    }
914
915    // -----------------------------------------------------------------------
916    // 11. Deterministic reproducibility
917    // -----------------------------------------------------------------------
918    #[test]
919    fn deterministic_for_same_input() {
920        let values: Vec<f64> = (0..500)
921            .map(|i| crate::math::sin(i as f64 * 0.01))
922            .collect();
923
924        let mut det1 = Adwin::with_delta(0.01);
925        let mut det2 = Adwin::with_delta(0.01);
926
927        let signals1: Vec<DriftSignal> = values.iter().map(|&v| det1.update(v)).collect();
928        let signals2: Vec<DriftSignal> = values.iter().map(|&v| det2.update(v)).collect();
929
930        assert_eq!(
931            signals1, signals2,
932            "Same input must produce identical signals"
933        );
934    }
935}
irithyll_core/drift/adwin.rs

irithyll_core/drift/
adwin.rs