frlearn_descriptors 0.1.0

Descriptor and novelty-scoring models for fuzzy-rough workflows.
Documentation
use frlearn_core::{FrError, FrResult, Matrix, Predictor};
use frlearn_math::safe_divide;
use frlearn_neighbor::{BruteForceIndex, Metric, NeighborIndex};
use ndarray::Array2;

#[derive(Debug, Clone, Copy)]
#[allow(clippy::upper_case_acronyms)]
pub struct NND {
    pub k: usize,
    pub metric: Metric,
}

impl Default for NND {
    fn default() -> Self {
        Self {
            k: 5,
            metric: Metric::Euclidean,
        }
    }
}

impl NND {
    pub fn fit(&self, x_inlier: &Matrix) -> FrResult<NNDModel> {
        if x_inlier.nrows() == 0 || x_inlier.ncols() == 0 {
            return Err(FrError::EmptyInput);
        }
        if self.k == 0 {
            return Err(FrError::InvalidInput("k must be at least 1".to_string()));
        }

        Ok(NNDModel {
            x_inlier: x_inlier.clone(),
            index: BruteForceIndex::new(x_inlier.clone(), self.metric),
            k: self.k,
            metric: self.metric,
        })
    }
}

#[derive(Debug, Clone)]
#[allow(clippy::upper_case_acronyms)]
pub struct NNDModel {
    pub x_inlier: Matrix,
    pub index: BruteForceIndex,
    pub k: usize,
    pub metric: Metric,
}

impl NNDModel {
    pub fn predict_anomaly_scores(&self, xq: &Matrix) -> FrResult<Vec<f64>> {
        let (_indices, distances) = self.index.query(xq, self.k)?;
        let k_eff = distances.ncols();

        if k_eff == 0 {
            return Ok(vec![0.0; xq.nrows()]);
        }

        let mut raw_scores = Vec::with_capacity(distances.nrows());
        for row in distances.rows() {
            let distance_sum = row.iter().copied().sum::<f64>();
            let avg_distance = safe_divide(distance_sum, k_eff as f64, 0.0);
            raw_scores.push(avg_distance.max(0.0));
        }

        Ok(normalize_scores(raw_scores))
    }
}

impl Predictor for NNDModel {
    fn predict_scores(&self, xq: &Matrix) -> FrResult<Matrix> {
        let anomaly_scores = self.predict_anomaly_scores(xq)?;
        let mut output = Array2::<f64>::zeros((anomaly_scores.len(), 1));
        for (row_idx, score) in anomaly_scores.into_iter().enumerate() {
            output[[row_idx, 0]] = score;
        }
        Ok(output)
    }
}

fn normalize_scores(raw_scores: Vec<f64>) -> Vec<f64> {
    if raw_scores.is_empty() {
        return raw_scores;
    }

    let min_score = raw_scores.iter().copied().fold(f64::INFINITY, f64::min);
    let max_score = raw_scores.iter().copied().fold(f64::NEG_INFINITY, f64::max);
    let range = max_score - min_score;

    if !range.is_finite() || range <= f64::EPSILON {
        return vec![0.0; raw_scores.len()];
    }

    raw_scores
        .into_iter()
        .map(|score| safe_divide(score - min_score, range, 0.0).clamp(0.0, 1.0))
        .collect()
}