parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Signal family definitions and family-adjusted confidence scoring.
//!
//! Correlated signals from the same RFC mechanism are grouped into families. Within a family,
//! confidence contributions have diminishing returns to prevent double-counting. Impact points
//! are NOT family-capped and always count fully.

/// Groups of signals from the same RFC mechanism.
///
/// Signals within the same family share a confidence cap to prevent correlated evidence from
/// inflating scores. Impact points from any signal always count regardless of family.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SignalFamily {
    /// 206, 416, Content-Range, Accept-Ranges.
    Range,
    /// 304, `ETag`, `Last-Modified`.
    CacheValidator,
    /// 401, 403, `WWW-Authenticate`.
    Auth,
    /// 412, If-Match/If-Unmodified-Since consequences.
    Precondition,
    /// 406, Accept consequences.
    Negotiation,
    /// Response body content differentials.
    ErrorBody,
    /// 300-303, 307, 308, `Location`.
    Redirect,
    /// Everything else.
    General,
}

impl SignalFamily {
    /// Total number of variants. Used for fixed-size array indexing.
    pub(crate) const COUNT: usize = 8;

    /// Maps each variant to a unique index in `[0, COUNT)`.
    pub(crate) const fn index(self) -> usize {
        match self {
            Self::Range => 0,
            Self::CacheValidator => 1,
            Self::Auth => 2,
            Self::Precondition => 3,
            Self::Negotiation => 4,
            Self::ErrorBody => 5,
            Self::Redirect => 6,
            Self::General => 7,
        }
    }
}

/// Maximum confidence contribution per signal within a family. High enough for body-diff
/// signals (raw 70) to carry findings independently; existing header/metadata signals
/// (raw 3-15) are unaffected.
const FAMILY_CAP: u8 = 75;

/// A signal's raw contribution before family adjustment.
///
/// The `'a` lifetime ties `description` to the originating `Signal.evidence` string,
/// avoiding a clone during the scoring pipeline.
pub(crate) struct SignalContribution<'a> {
    /// Which family this signal belongs to.
    pub family: SignalFamily,
    /// Raw confidence points before family diminishing.
    pub confidence: f32,
    /// Impact points (not family-capped).
    pub impact: u8,
    /// Human-readable description — borrows from `Signal.evidence`.
    pub description: &'a str,
}

/// Result of family-adjusted scoring across all signals.
pub(crate) struct FamilyAdjustedScores {
    /// Total family-adjusted confidence points.
    pub(crate) confidence_total: f32,
    /// Total impact points (uncapped).
    pub(crate) impact_total: u8,
    /// Number of distinct families with at least one signal.
    pub(crate) family_count: usize,
}

/// Applies family-based diminishing returns to signal confidence contributions.
///
/// Within each family: first signal gets full points, second gets half, third+ get zero.
/// Impact points always count fully regardless of family position.
pub(crate) fn apply_family_adjustment(
    contributions: &[SignalContribution<'_>],
) -> FamilyAdjustedScores {
    let mut family_counts = [0u8; SignalFamily::COUNT];
    let mut confidence_total: f32 = 0.0;
    let mut impact_total: u8 = 0;

    for contrib in contributions {
        let idx = contrib.family.index();
        let count = family_counts[idx];
        let capped = family_adjusted_confidence(contrib.confidence, count);
        confidence_total += capped;
        impact_total = impact_total.saturating_add(contrib.impact);
        family_counts[idx] += 1;
    }

    let family_count = family_counts.iter().filter(|&&c| c > 0).count();
    FamilyAdjustedScores {
        confidence_total,
        impact_total,
        family_count,
    }
}

/// Diminishes confidence based on signal's position within its family.
fn family_adjusted_confidence(raw: f32, position: u8) -> f32 {
    let cap = f32::from(FAMILY_CAP);
    let clamped = raw.min(cap);
    match position {
        0 => clamped,
        1 => clamped * 0.5,
        _ => 0.0,
    }
}

/// Maps a header name to its signal family.
pub(crate) fn header_family(name: &str) -> SignalFamily {
    match name {
        "content-range" | "accept-ranges" => SignalFamily::Range,
        "etag" | "last-modified" => SignalFamily::CacheValidator,
        "www-authenticate" => SignalFamily::Auth,
        "location" => SignalFamily::Redirect,
        _ => SignalFamily::General,
    }
}

/// Maps a signal kind to a default family for status-code-derived signals.
pub(crate) fn status_code_family(baseline_status: u16) -> SignalFamily {
    match baseline_status {
        206 | 416 => SignalFamily::Range,
        304 => SignalFamily::CacheValidator,
        401 | 403 => SignalFamily::Auth,
        412 => SignalFamily::Precondition,
        406 => SignalFamily::Negotiation,
        300 | 301 | 302 | 303 | 307 | 308 => SignalFamily::Redirect,
        _ => SignalFamily::General,
    }
}

/// Returns the corroboration bonus for multiple independent signal families.
pub(crate) fn corroboration_bonus(family_count: usize) -> u8 {
    match family_count {
        0 | 1 => 0,
        2 => 3,
        3 => 6,
        _ => 8,
    }
}

#[cfg(test)]
#[path = "families_tests.rs"]
mod tests;