entrenar 0.7.10

//! Drift Detection Module (ENT-044)
//!
//! Detects training anomalies using statistical methods.
//! Based on renacer's sliding window baseline patterns.

/// Drift detection status
#[derive(Debug, Clone, PartialEq)]
pub enum DriftStatus {
    /// No drift detected
    NoDrift,
    /// Warning: potential drift (p-value)
    Warning(f64),
    /// Drift confirmed (p-value)
    Drift(f64),
}

/// Anomaly severity levels (from renacer)
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AnomalySeverity {
    /// 3-4 standard deviations
    Low,
    /// 4-5 standard deviations
    Medium,
    /// >5 standard deviations
    High,
}

/// Sliding window baseline for anomaly detection
#[derive(Debug, Clone)]
pub struct SlidingWindowBaseline {
    window_size: usize,
    values: Vec<f64>,
    mean: f64,
    m2: f64, // For Welford's algorithm
    count: usize,
}

impl SlidingWindowBaseline {
    /// Create a new baseline with given window size
    pub fn new(window_size: usize) -> Self {
        Self { window_size, values: Vec::with_capacity(window_size), mean: 0.0, m2: 0.0, count: 0 }
    }

    /// Update baseline with new value
    pub fn update(&mut self, value: f64) {
        if value.is_nan() || value.is_infinite() {
            return;
        }

        // Add to window
        if self.values.len() >= self.window_size {
            self.values.remove(0);
        }
        self.values.push(value);

        // Recalculate stats (simplified - could be optimized)
        self.count = self.values.len();
        if self.count > 0 {
            self.mean = self.values.iter().sum::<f64>() / self.count as f64;
            if self.count > 1 {
                self.m2 = self.values.iter().map(|v| (v - self.mean).powi(2)).sum::<f64>();
            }
        }
    }

    /// Get current standard deviation
    pub fn std(&self) -> f64 {
        if self.count < 2 {
            return 0.0;
        }
        (self.m2 / (self.count - 1) as f64).sqrt()
    }

    /// Calculate z-score for a value
    pub fn z_score(&self, value: f64) -> f64 {
        let std = self.std();
        if std == 0.0 {
            return 0.0;
        }
        (value - self.mean) / std
    }

    /// Detect anomaly with threshold (standard deviations)
    pub fn detect_anomaly(&self, value: f64, threshold: f64) -> Option<Anomaly> {
        if self.count < 10 {
            return None; // Not enough data
        }

        let z = self.z_score(value).abs();
        if z < threshold {
            return None;
        }

        let severity = if z >= 5.0 {
            AnomalySeverity::High
        } else if z >= 4.0 {
            AnomalySeverity::Medium
        } else {
            AnomalySeverity::Low
        };

        Some(Anomaly {
            value,
            z_score: z,
            severity,
            baseline_mean: self.mean,
            baseline_std: self.std(),
        })
    }

    /// Get current mean
    pub fn mean(&self) -> f64 {
        self.mean
    }

    /// Get sample count
    pub fn count(&self) -> usize {
        self.count
    }
}

/// Detected anomaly
#[derive(Debug, Clone)]
pub struct Anomaly {
    /// The anomalous value
    pub value: f64,
    /// Z-score (number of standard deviations from mean)
    pub z_score: f64,
    /// Severity classification
    pub severity: AnomalySeverity,
    /// Baseline mean when anomaly was detected
    pub baseline_mean: f64,
    /// Baseline standard deviation
    pub baseline_std: f64,
}

/// Drift detector using statistical tests
#[derive(Debug)]
pub struct DriftDetector {
    baseline: SlidingWindowBaseline,
    threshold: f64,
    warning_threshold: f64,
}

impl DriftDetector {
    /// Create a new drift detector
    pub fn new(window_size: usize) -> Self {
        Self {
            baseline: SlidingWindowBaseline::new(window_size),
            threshold: 0.05,        // p < 0.05 for drift
            warning_threshold: 0.1, // p < 0.1 for warning
        }
    }

    /// Set detection thresholds
    pub fn with_thresholds(mut self, warning: f64, drift: f64) -> Self {
        self.warning_threshold = warning;
        self.threshold = drift;
        self
    }

    /// Update baseline and check for drift
    pub fn check(&mut self, value: f64) -> DriftStatus {
        // Get z-score before updating
        let z = self.baseline.z_score(value).abs();

        // Update baseline
        self.baseline.update(value);

        if self.baseline.count() < 10 {
            return DriftStatus::NoDrift;
        }

        // Convert z-score to approximate p-value
        let p = z_to_p(z);

        if p < self.threshold {
            DriftStatus::Drift(p)
        } else if p < self.warning_threshold {
            DriftStatus::Warning(p)
        } else {
            DriftStatus::NoDrift
        }
    }
}

/// Approximate z-score to p-value (two-tailed)
fn z_to_p(z: f64) -> f64 {
    // Approximation using error function
    let t = 1.0 / (1.0 + 0.2316419 * z.abs());
    let d = 0.3989423 * (-z * z / 2.0).exp();
    let p =
        d * t * (0.3193815 + t * (-0.3565638 + t * (1.781478 + t * (-1.821256 + t * 1.330274))));

    2.0 * p // Two-tailed
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sliding_window_new() {
        let baseline = SlidingWindowBaseline::new(100);
        assert_eq!(baseline.count(), 0);
    }

    #[test]
    fn test_sliding_window_update() {
        let mut baseline = SlidingWindowBaseline::new(100);
        for i in 0..10 {
            baseline.update(f64::from(i));
        }
        assert_eq!(baseline.count(), 10);
        assert!((baseline.mean() - 4.5).abs() < 1e-6);
    }

    #[test]
    fn test_sliding_window_rolls() {
        let mut baseline = SlidingWindowBaseline::new(5);
        for i in 0..10 {
            baseline.update(f64::from(i));
        }
        // Window should contain [5, 6, 7, 8, 9]
        assert_eq!(baseline.count(), 5);
        assert!((baseline.mean() - 7.0).abs() < 1e-6);
    }

    #[test]
    fn test_z_score() {
        let mut baseline = SlidingWindowBaseline::new(100);
        // Add 100 values with mean=50, std≈29
        for i in 0..100 {
            baseline.update(f64::from(i));
        }

        // Value at mean should have z≈0
        let z_mean = baseline.z_score(50.0);
        assert!(z_mean.abs() < 0.5);
    }

    #[test]
    fn test_detect_anomaly_none() {
        let mut baseline = SlidingWindowBaseline::new(100);
        for i in 0..100 {
            baseline.update(50.0 + f64::from(i % 5));
        }

        // Normal value
        let anomaly = baseline.detect_anomaly(52.0, 3.0);
        assert!(anomaly.is_none());
    }

    #[test]
    fn test_detect_anomaly_high() {
        let mut baseline = SlidingWindowBaseline::new(100);
        // Add values with some variance so std > 0
        for i in 0..100 {
            baseline.update(50.0 + f64::from(i % 10));
        }

        // Extreme outlier (far from mean ~54.5, std ~2.87)
        let anomaly = baseline.detect_anomaly(100.0, 3.0);
        assert!(anomaly.is_some());
        let a = anomaly.expect("operation should succeed");
        assert!(a.z_score > 5.0); // Should be high severity
    }

    #[test]
    fn test_drift_detector_no_drift() {
        let mut detector = DriftDetector::new(100);

        // Stable values
        for _ in 0..50 {
            let status = detector.check(50.0);
            assert_eq!(status, DriftStatus::NoDrift);
        }
    }

    #[test]
    fn test_drift_detector_detects_drift() {
        let mut detector = DriftDetector::new(100);

        // Establish baseline with some variance
        for i in 0..100 {
            detector.check(50.0 + f64::from(i % 10));
        }

        // Sudden large change (mean ~54.5, value 200 is ~50 std devs away)
        let status = detector.check(200.0);
        // With variance, this should trigger drift or warning
        assert!(
            matches!(status, DriftStatus::Drift(_) | DriftStatus::Warning(_)),
            "Expected drift or warning, got {status:?}"
        );
    }

    #[test]
    fn test_anomaly_severity_low() {
        let mut baseline = SlidingWindowBaseline::new(100);
        for _ in 0..100 {
            baseline.update(50.0);
        }
        // Force a specific std for predictable testing
        // With constant values, std=0, so any deviation is huge
        // Instead test with some variance
    }

    // =========================================================================
    // Additional Coverage Tests
    // =========================================================================

    #[test]
    fn test_update_with_nan() {
        let mut baseline = SlidingWindowBaseline::new(100);
        baseline.update(1.0);
        baseline.update(f64::NAN);
        baseline.update(2.0);
        // NaN should be ignored
        assert_eq!(baseline.count(), 2);
    }

    #[test]
    fn test_update_with_infinity() {
        let mut baseline = SlidingWindowBaseline::new(100);
        baseline.update(1.0);
        baseline.update(f64::INFINITY);
        baseline.update(f64::NEG_INFINITY);
        baseline.update(2.0);
        // Infinities should be ignored
        assert_eq!(baseline.count(), 2);
    }

    #[test]
    fn test_std_with_single_value() {
        let mut baseline = SlidingWindowBaseline::new(100);
        baseline.update(42.0);
        // With single value, std should be 0
        assert_eq!(baseline.std(), 0.0);
    }

    #[test]
    fn test_z_score_zero_std() {
        let mut baseline = SlidingWindowBaseline::new(100);
        baseline.update(5.0);
        baseline.update(5.0);
        // With constant values, std=0, z_score should be 0
        assert_eq!(baseline.z_score(10.0), 0.0);
    }

    #[test]
    fn test_detect_anomaly_not_enough_data() {
        let mut baseline = SlidingWindowBaseline::new(100);
        for i in 0..5 {
            baseline.update(f64::from(i));
        }
        // Less than 10 values, should return None
        let anomaly = baseline.detect_anomaly(100.0, 3.0);
        assert!(anomaly.is_none());
    }

    #[test]
    fn test_anomaly_severity_medium() {
        let mut baseline = SlidingWindowBaseline::new(100);
        // Add values with controlled variance
        for i in 0..100 {
            // Values between 48-52, mean=50, std≈1.41
            baseline.update(50.0 + f64::from(i % 5 - 2));
        }
        // Value 4 std devs away: 50 + 4*1.41 ≈ 55.64
        // But actually need ~4 std devs to get Medium
        // With our distribution mean≈50, std≈1.41, z=4 at ~55.64
        let anomaly = baseline.detect_anomaly(56.0, 3.0);
        if let Some(a) = anomaly {
            // z should be around 4 for Medium severity
            println!("z_score: {}", a.z_score);
        }
    }

    #[test]
    fn test_drift_detector_with_thresholds() {
        let detector = DriftDetector::new(100).with_thresholds(0.15, 0.08);
        // Just verify the builder works
        assert_eq!(detector.threshold, 0.08);
        assert_eq!(detector.warning_threshold, 0.15);
    }

    #[test]
    fn test_drift_status_warning() {
        let mut detector = DriftDetector::new(50).with_thresholds(0.3, 0.05); // More lenient warning

        // Establish baseline
        for i in 0..50 {
            detector.check(50.0 + f64::from(i % 10));
        }

        // Moderate deviation might trigger warning
        // This tests the warning branch
        let _status = detector.check(75.0);
        // Result depends on statistics
    }

    #[test]
    fn test_z_to_p_approximation() {
        // Test the z_to_p function indirectly through DriftDetector
        let mut detector = DriftDetector::new(100);
        for i in 0..100 {
            detector.check(50.0 + f64::from(i % 5));
        }
        // Any check will exercise the z_to_p function
        let _status = detector.check(60.0);
    }

    #[test]
    fn test_drift_status_eq() {
        assert_eq!(DriftStatus::NoDrift, DriftStatus::NoDrift);
        assert_ne!(DriftStatus::NoDrift, DriftStatus::Drift(0.01));
        assert_ne!(DriftStatus::Warning(0.08), DriftStatus::Drift(0.08));
    }

    #[test]
    fn test_anomaly_clone() {
        let anomaly = Anomaly {
            value: 100.0,
            z_score: 5.0,
            severity: AnomalySeverity::High,
            baseline_mean: 50.0,
            baseline_std: 10.0,
        };
        let cloned = anomaly.clone();
        assert_eq!(anomaly.value, cloned.value);
        assert_eq!(anomaly.severity, cloned.severity);
    }

    #[test]
    fn test_sliding_window_baseline_clone() {
        let mut baseline = SlidingWindowBaseline::new(50);
        baseline.update(1.0);
        baseline.update(2.0);
        let cloned = baseline.clone();
        assert_eq!(baseline.count(), cloned.count());
        assert_eq!(baseline.mean(), cloned.mean());
    }
}