parlov-analysis 0.6.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Signal family definitions and family-adjusted confidence scoring.
//!
//! Correlated signals from the same RFC mechanism are grouped into families. Within a family,
//! confidence contributions have diminishing returns to prevent double-counting. Impact points
//! are NOT family-capped and always count fully.

use parlov_core::SignalKind;

/// Groups of signals from the same RFC mechanism.
///
/// Signals within the same family share a confidence cap to prevent correlated evidence from
/// inflating scores. Impact points from any signal always count regardless of family.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) enum SignalFamily {
    /// 206, 416, Content-Range, Accept-Ranges.
    Range,
    /// 304, `ETag`, `Last-Modified`.
    CacheValidator,
    /// 401, 403, `WWW-Authenticate`.
    Auth,
    /// 412, If-Match/If-Unmodified-Since consequences.
    Precondition,
    /// 406, Accept consequences.
    Negotiation,
    /// Response body content differentials.
    ErrorBody,
    /// 300-303, 307, 308, `Location`.
    Redirect,
    /// Everything else.
    General,
}

/// Maximum confidence contribution per signal within a family. High enough for body-diff
/// signals (raw 70) to carry findings independently; existing header/metadata signals
/// (raw 3-15) are unaffected.
const FAMILY_CAP: u8 = 75;

/// A signal's raw contribution before family adjustment.
pub(crate) struct SignalContribution {
    /// Which family this signal belongs to.
    pub family: SignalFamily,
    /// Raw confidence points before family diminishing.
    pub confidence: f32,
    /// Impact points (not family-capped).
    pub impact: u8,
    /// Human-readable description for scoring reason.
    pub description: String,
}

/// Result of family-adjusted scoring across all signals.
pub(crate) struct FamilyAdjustedScores {
    /// Total family-adjusted confidence points.
    pub confidence_total: f32,
    /// Total impact points (uncapped).
    pub impact_total: u8,
    /// Number of distinct families with at least one signal.
    pub family_count: usize,
}

/// Applies family-based diminishing returns to signal confidence contributions.
///
/// Within each family: first signal gets full points, second gets half, third+ get zero.
/// Impact points always count fully regardless of family position.
pub(crate) fn apply_family_adjustment(
    contributions: &[SignalContribution],
) -> FamilyAdjustedScores {
    let mut family_counts = std::collections::HashMap::new();
    let mut confidence_total: f32 = 0.0;
    let mut impact_total: u16 = 0;

    for contrib in contributions {
        let count = family_counts
            .entry(contrib.family)
            .or_insert(0u8);

        let capped = family_adjusted_confidence(contrib.confidence, *count);
        confidence_total += capped;
        impact_total += u16::from(contrib.impact);
        *count += 1;
    }

    FamilyAdjustedScores {
        confidence_total,
        impact_total: impact_total.min(255) as u8,
        family_count: family_counts.len(),
    }
}

/// Diminishes confidence based on signal's position within its family.
fn family_adjusted_confidence(raw: f32, position: u8) -> f32 {
    let cap = f32::from(FAMILY_CAP);
    let clamped = raw.min(cap);
    match position {
        0 => clamped,
        1 => clamped * 0.5,
        _ => 0.0,
    }
}

/// Maps a header name to its signal family.
pub(crate) fn header_family(name: &str) -> SignalFamily {
    match name {
        "content-range" | "accept-ranges" => SignalFamily::Range,
        "etag" | "last-modified" => SignalFamily::CacheValidator,
        "www-authenticate" => SignalFamily::Auth,
        "location" => SignalFamily::Redirect,
        _ => SignalFamily::General,
    }
}

/// Maps a signal kind to a default family for status-code-derived signals.
pub(crate) fn status_code_family(baseline_status: u16) -> SignalFamily {
    match baseline_status {
        206 | 416 => SignalFamily::Range,
        304 => SignalFamily::CacheValidator,
        401 | 403 => SignalFamily::Auth,
        412 => SignalFamily::Precondition,
        406 => SignalFamily::Negotiation,
        300 | 301 | 302 | 303 | 307 | 308 => SignalFamily::Redirect,
        _ => SignalFamily::General,
    }
}

/// Returns the corroboration bonus for multiple independent signal families.
pub(crate) fn corroboration_bonus(family_count: usize) -> u8 {
    match family_count {
        0 | 1 => 0,
        2 => 3,
        3 => 6,
        _ => 8,
    }
}

/// Maps a `SignalKind` to a default family when no header context is available.
pub(crate) fn signal_kind_family(_kind: SignalKind) -> SignalFamily {
    // Without header name context, all signal kinds default to General.
    // Header-specific families are resolved by header_family() or evidence text parsing.
    SignalFamily::General
}

#[cfg(test)]
#[path = "families_tests.rs"]
mod tests;