ragdrift-core 0.1.4

Pure-Rust core for ragdrift: 5-dimensional drift detection for RAG systems.
Documentation
//! Data drift detector: feature-wise KS + PSI on tabular features.

use ndarray::{ArrayView2, Axis};

use crate::error::Result;
use crate::stats::{ks_two_sample, psi, PsiBinning};
use crate::types::{check_min_samples, check_same_cols, DriftDimension, DriftScore};

/// Configuration for [`DataDriftDetector`].
#[derive(Debug, Clone, Copy)]
pub struct DataDriftConfig {
    /// Threshold on the combined per-feature score.
    pub threshold: f64,
    /// Binning strategy passed to PSI for each feature.
    pub psi_binning: PsiBinning,
}

impl Default for DataDriftConfig {
    fn default() -> Self {
        Self {
            threshold: 0.25,
            psi_binning: PsiBinning::Quantile(10),
        }
    }
}

/// Detects drift on tabular feature matrices.
///
/// Computes both KS and PSI per feature column. The reported score is the
/// max over features of `max(KS_D, PSI / 0.25)` so a single threshold of
/// 0.25 lines up with the standard PSI table.
#[derive(Debug, Clone, Copy, Default)]
pub struct DataDriftDetector {
    config: DataDriftConfig,
}

impl DataDriftDetector {
    /// Construct a detector from a custom config.
    pub fn new(config: DataDriftConfig) -> Self {
        Self { config }
    }

    /// Compute drift between two `(n_samples, n_features)` matrices.
    pub fn detect(
        &self,
        baseline: &ArrayView2<'_, f64>,
        current: &ArrayView2<'_, f64>,
    ) -> Result<DriftScore> {
        check_same_cols(baseline, current)?;
        check_min_samples(baseline.nrows(), 2)?;
        check_min_samples(current.nrows(), 2)?;

        let mut max_score = 0.0_f64;
        for col in 0..baseline.ncols() {
            let b_col = baseline.index_axis(Axis(1), col);
            let c_col = current.index_axis(Axis(1), col);
            let ks = ks_two_sample(&b_col, &c_col)?.statistic;
            // PSI may fail if a single feature has too few unique values.
            // Treat that feature as zero-drift rather than failing the whole
            // detection.
            let p = psi(&b_col, &c_col, self.config.psi_binning).unwrap_or(0.0);
            let combined = ks.max(p / 0.25);
            if combined > max_score {
                max_score = combined;
            }
        }
        Ok(DriftScore::new(
            DriftDimension::Data,
            max_score,
            self.config.threshold,
            "ks+psi",
        ))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use ndarray::Array2;

    #[test]
    fn identical_matrices_score_zero() {
        let a = Array2::from_shape_fn((100, 4), |(i, j)| (i + j) as f64);
        let detector = DataDriftDetector::default();
        let s = detector.detect(&a.view(), &a.view()).unwrap();
        assert_eq!(s.score, 0.0);
        assert!(!s.exceeded);
    }

    #[test]
    fn one_drifted_feature_flags_overall() {
        let baseline = Array2::from_shape_fn((200, 3), |(i, _)| i as f64);
        let mut current = baseline.clone();
        // Shift only column 1.
        for i in 0..current.nrows() {
            current[[i, 1]] += 100.0;
        }
        let detector = DataDriftDetector::default();
        let s = detector.detect(&baseline.view(), &current.view()).unwrap();
        assert!(s.exceeded, "expected drift, score={}", s.score);
    }

    #[test]
    fn rejects_dim_mismatch() {
        let a = Array2::<f64>::zeros((10, 3));
        let b = Array2::<f64>::zeros((10, 4));
        let detector = DataDriftDetector::default();
        assert!(detector.detect(&a.view(), &b.view()).is_err());
    }
}