rcf3 0.5.1 - Docs.rs

use crate::math;

/// How anomaly scores are computed and normalised.
///
/// This stays crate-internal so low-level scoring choices do not become part
/// of the public API, but the constructors below document the scoring modes
/// the tree traversal can support.
#[derive(Clone, Debug)]
pub(in crate::rcf) struct ScoreMode {
    score_seen: fn(usize, usize) -> f64,
    score_unseen: fn(usize, usize) -> f64,
    damp: fn(usize, usize) -> f64,
    normalizer: fn(f64, usize) -> f64,
}

// ---------------------------------------------------------------------------
// Score functions (matching the AWS reference implementation)
// ---------------------------------------------------------------------------

/// Standard isolation score for a point already in the tree.
///
/// `x` = depth, `y` = leaf mass.
fn score_seen(x: usize, y: usize) -> f64 {
    1.0 / (x as f64 + math::log2(1.0 + y as f64))
}

/// Standard isolation score for a point *not* already in the tree.
fn score_unseen(x: usize, _y: usize) -> f64 {
    1.0 / (x as f64 + 1.0)
}

/// Standard normalizer: multiplies by log₂(1 + tree_mass).
fn normalizer(x: f64, y: usize) -> f64 {
    x * math::log2(1.0 + y as f64)
}

/// Damping applied when the query is a duplicate of a leaf point.
fn damp(x: usize, y: usize) -> f64 {
    if y == 0 {
        return 1.0;
    }
    1.0 - (x as f64) / (2.0 * y as f64)
}

/// Displacement score for *seen* points (ignores depth, uses mass).
fn score_seen_displacement(_x: usize, y: usize) -> f64 {
    1.0 / (1.0 + y as f64)
}

/// Displacement score for *unseen* points.
fn score_unseen_displacement(_x: usize, y: usize) -> f64 {
    y as f64
}

/// Displacement normalizer: `score / (1 + tree_mass)`.
fn displacement_normalizer(x: f64, y: usize) -> f64 {
    x / (1.0 + y as f64)
}

/// Identity normalizer (used for density mode).
fn identity(x: f64, _y: usize) -> f64 {
    x
}

// ---------------------------------------------------------------------------
// ScoreMode constructors
// ---------------------------------------------------------------------------

impl ScoreMode {
    /// Standard anomaly-score mode (matches the AWS reference default).
    pub(in crate::rcf) fn standard() -> Self {
        ScoreMode {
            score_seen,
            score_unseen,
            damp,
            normalizer,
        }
    }

    /// Displacement-based score (density-sensitive).
    pub(in crate::rcf) fn displacement() -> Self {
        ScoreMode {
            score_seen: score_seen_displacement,
            score_unseen: score_unseen_displacement,
            damp,
            normalizer: displacement_normalizer,
        }
    }

    /// Density estimation mode.
    ///
    /// `Forest::density` currently has a dedicated traversal, but keeping this
    /// mode documents the equivalent scoring shape if density is later routed
    /// through the generic scoring visitor again.
    #[allow(dead_code)]
    pub(in crate::rcf) fn density() -> Self {
        ScoreMode {
            score_seen: score_unseen_displacement,
            score_unseen: score_unseen_displacement,
            damp,
            normalizer: identity,
        }
    }

    /// Build a fully custom mode for internal experiments.
    ///
    /// This is intentionally not exposed from the crate facade; it is a compact
    /// reference for how custom scoring can be represented without reopening
    /// the public API.
    #[allow(dead_code)]
    pub(in crate::rcf) fn custom(
        score_seen: fn(usize, usize) -> f64,
        score_unseen: fn(usize, usize) -> f64,
        damp: fn(usize, usize) -> f64,
        normalizer: fn(f64, usize) -> f64,
    ) -> Self {
        ScoreMode {
            score_seen,
            score_unseen,
            damp,
            normalizer,
        }
    }

    pub(in crate::rcf) fn score_seen(&self, depth: usize, mass: usize) -> f64 {
        (self.score_seen)(depth, mass)
    }

    pub(in crate::rcf) fn score_unseen(&self, depth: usize, mass: usize) -> f64 {
        (self.score_unseen)(depth, mass)
    }

    pub(in crate::rcf) fn damp(&self, mass: usize, tree_mass: usize) -> f64 {
        (self.damp)(mass, tree_mass)
    }

    pub(in crate::rcf) fn normalize(&self, raw: f64, tree_mass: usize) -> f64 {
        (self.normalizer)(raw, tree_mass)
    }
}

// ---------------------------------------------------------------------------
// Attribution result
// ---------------------------------------------------------------------------

/// Per-dimension anomaly attribution.
///
/// For each dimension `i`:
/// - `above` = contribution from cuts whose threshold is *above* the query
///   value (i.e., the query was isolated because it is too *small* in dim `i`).
/// - `below` = contribution from cuts whose threshold is *below* the query
///   value (isolated because it is too *large* in dim `i`).
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Attribution {
    pub below: f64,
    pub above: f64,
}

impl Attribution {
    /// Sum of both components: `below + above`.
    pub fn total(self) -> f64 {
        self.below + self.above
    }

    /// Scale both components by `factor`, returning a new `Attribution`.
    pub fn scale(self, factor: f64) -> Self {
        Attribution {
            below: self.below * factor,
            above: self.above * factor,
        }
    }
}

impl Default for Attribution {
    fn default() -> Self {
        Attribution {
            below: 0.0,
            above: 0.0,
        }
    }
}

impl core::ops::Add for Attribution {
    type Output = Self;
    fn add(self, rhs: Self) -> Self {
        Attribution {
            below: self.below + rhs.below,
            above: self.above + rhs.above,
        }
    }
}

impl core::ops::AddAssign for Attribution {
    fn add_assign(&mut self, rhs: Self) {
        self.below += rhs.below;
        self.above += rhs.above;
    }
}

/// Sum of all attribution components equals `score` (up to floating-point error).
#[cfg(test)]
pub(in crate::rcf) fn attribution_total(attr: &[Attribution]) -> f64 {
    attr.iter().map(|a| a.total()).sum()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn score_seen_decreases_with_depth_and_mass() {
        let s00 = score_seen(0, 0);
        let s01 = score_seen(0, 1);
        let s10 = score_seen(1, 0);
        let s11 = score_seen(1, 1);
        assert!(s00 > s01 && s00 > s10 && s01 > s11 && s10 > s11);
    }

    #[test]
    fn standard_score_unseen_decreases_with_depth() {
        let s0 = score_unseen(0, 1);
        let s1 = score_unseen(1, 1);
        let s5 = score_unseen(5, 1);
        assert!(s0 > s1 && s1 > s5);
    }

    #[test]
    fn normalizer_scales_with_tree_mass() {
        let s = normalizer(1.0, 256);
        assert!(s > 1.0); // log2(257) ≈ 8
    }

    #[cfg(feature = "std")]
    mod proptest_tests {
        use super::*;
        use approx::abs_diff_eq;
        use proptest::prelude::*;

        proptest! {
            #[test]
            fn damp_in_unit_interval(
                tree_mass in 1usize..=1000,
                mass in 0usize..=1000,
            ) {
                // damp is in [0,1] when mass <= tree_mass (the only sensible usage)
                let capped = mass.min(tree_mass);
                let d = damp(capped, tree_mass);
                prop_assert!((0.0..=1.0).contains(&d), "damp({capped},{tree_mass})={d}");
            }

            #[test]
            fn attribution_total_equals_sum(below in -1e6f64..1e6f64, above in -1e6f64..1e6f64) {
                let a = Attribution { below, above };
                prop_assert!(abs_diff_eq!(a.total(), below + above, epsilon = 1e-9));
            }

            #[test]
            fn attribution_scale_distributes(
                below in -1e3f64..1e3f64,
                above in -1e3f64..1e3f64,
                factor in -1e3f64..1e3f64,
            ) {
                let a = Attribution { below, above };
                let s = a.scale(factor);
                prop_assert!(abs_diff_eq!(s.below, below * factor, epsilon = 1e-9));
                prop_assert!(abs_diff_eq!(s.above, above * factor, epsilon = 1e-9));
            }

            #[test]
            fn attribution_add_by_component(
                b1 in -1e6f64..1e6f64,
                a1 in -1e6f64..1e6f64,
                b2 in -1e6f64..1e6f64,
                a2 in -1e6f64..1e6f64,
            ) {
                let x = Attribution { below: b1, above: a1 };
                let y = Attribution { below: b2, above: a2 };
                let sum = x + y;
                prop_assert!(abs_diff_eq!(sum.below, b1 + b2, epsilon = 1e-9));
                prop_assert!(abs_diff_eq!(sum.above, a1 + a2, epsilon = 1e-9));
            }
        }
    }
}