parlov-analysis 0.4.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Signal family definitions and family-adjusted confidence scoring.
//!
//! Correlated signals from the same RFC mechanism are grouped into families. Within a family,
//! confidence contributions have diminishing returns to prevent double-counting. Impact points
//! are NOT family-capped and always count fully.

use parlov_core::SignalKind;

/// Groups of signals from the same RFC mechanism.
///
/// Signals within the same family share a confidence cap to prevent correlated evidence from
/// inflating scores. Impact points from any signal always count regardless of family.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(crate) enum SignalFamily {
    /// 206, 416, Content-Range, Accept-Ranges.
    Range,
    /// 304, `ETag`, `Last-Modified`.
    CacheValidator,
    /// 401, 403, `WWW-Authenticate`.
    Auth,
    /// 412, If-Match/If-Unmodified-Since consequences.
    Precondition,
    /// 406, Accept consequences.
    Negotiation,
    /// Everything else.
    General,
}

/// Maximum confidence bonus per family (first signal gets full, second half, third+ zero).
const FAMILY_CAP: u8 = 25;

/// A signal's raw contribution before family adjustment.
pub(crate) struct SignalContribution {
    /// Which family this signal belongs to.
    pub family: SignalFamily,
    /// Raw confidence points before family diminishing.
    pub confidence: f32,
    /// Impact points (not family-capped).
    pub impact: u8,
    /// Human-readable description for scoring reason.
    pub description: String,
}

/// Result of family-adjusted scoring across all signals.
pub(crate) struct FamilyAdjustedScores {
    /// Total family-adjusted confidence points.
    pub confidence_total: f32,
    /// Total impact points (uncapped).
    pub impact_total: u8,
    /// Number of distinct families with at least one signal.
    pub family_count: usize,
}

/// Applies family-based diminishing returns to signal confidence contributions.
///
/// Within each family: first signal gets full points, second gets half, third+ get zero.
/// Impact points always count fully regardless of family position.
pub(crate) fn apply_family_adjustment(
    contributions: &[SignalContribution],
) -> FamilyAdjustedScores {
    let mut family_counts = std::collections::HashMap::new();
    let mut confidence_total: f32 = 0.0;
    let mut impact_total: u16 = 0;

    for contrib in contributions {
        let count = family_counts
            .entry(contrib.family)
            .or_insert(0u8);

        let capped = family_adjusted_confidence(contrib.confidence, *count);
        confidence_total += capped;
        impact_total += u16::from(contrib.impact);
        *count += 1;
    }

    FamilyAdjustedScores {
        confidence_total,
        impact_total: impact_total.min(255) as u8,
        family_count: family_counts.len(),
    }
}

/// Diminishes confidence based on signal's position within its family.
fn family_adjusted_confidence(raw: f32, position: u8) -> f32 {
    let cap = f32::from(FAMILY_CAP);
    let clamped = raw.min(cap);
    match position {
        0 => clamped,
        1 => clamped * 0.5,
        _ => 0.0,
    }
}

/// Maps a header name to its signal family.
pub(crate) fn header_family(name: &str) -> SignalFamily {
    match name {
        "content-range" | "accept-ranges" => SignalFamily::Range,
        "etag" | "last-modified" => SignalFamily::CacheValidator,
        "www-authenticate" => SignalFamily::Auth,
        _ => SignalFamily::General,
    }
}

/// Maps a signal kind to a default family for status-code-derived signals.
pub(crate) fn status_code_family(baseline_status: u16) -> SignalFamily {
    match baseline_status {
        206 | 416 => SignalFamily::Range,
        304 => SignalFamily::CacheValidator,
        401 | 403 => SignalFamily::Auth,
        412 => SignalFamily::Precondition,
        406 => SignalFamily::Negotiation,
        _ => SignalFamily::General,
    }
}

/// Returns the corroboration bonus for multiple independent signal families.
pub(crate) fn corroboration_bonus(family_count: usize) -> u8 {
    match family_count {
        0 | 1 => 0,
        2 => 3,
        3 => 6,
        _ => 8,
    }
}

/// Maps a `SignalKind` to a default family when no header context is available.
pub(crate) fn signal_kind_family(_kind: SignalKind) -> SignalFamily {
    // Without header name context, all signal kinds default to General.
    // Header-specific families are resolved by header_family() or evidence text parsing.
    SignalFamily::General
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn first_signal_gets_full_points() {
        let contributions = vec![SignalContribution {
            family: SignalFamily::Range,
            confidence: 12.0,
            impact: 8,
            description: "test".into(),
        }];
        let result = apply_family_adjustment(&contributions);
        assert!((result.confidence_total - 12.0).abs() < 0.01);
        assert_eq!(result.impact_total, 8);
    }

    #[test]
    fn second_signal_in_family_gets_half() {
        let contributions = vec![
            SignalContribution {
                family: SignalFamily::Range,
                confidence: 12.0,
                impact: 8,
                description: "first".into(),
            },
            SignalContribution {
                family: SignalFamily::Range,
                confidence: 10.0,
                impact: 5,
                description: "second".into(),
            },
        ];
        let result = apply_family_adjustment(&contributions);
        assert!((result.confidence_total - 17.0).abs() < 0.01);
        assert_eq!(result.impact_total, 13);
    }

    #[test]
    fn third_signal_in_family_gets_zero_confidence() {
        let contributions = vec![
            SignalContribution {
                family: SignalFamily::Range,
                confidence: 12.0,
                impact: 8,
                description: "first".into(),
            },
            SignalContribution {
                family: SignalFamily::Range,
                confidence: 10.0,
                impact: 5,
                description: "second".into(),
            },
            SignalContribution {
                family: SignalFamily::Range,
                confidence: 8.0,
                impact: 15,
                description: "third".into(),
            },
        ];
        let result = apply_family_adjustment(&contributions);
        assert!((result.confidence_total - 17.0).abs() < 0.01);
        // Impact still counts: 8 + 5 + 15 = 28
        assert_eq!(result.impact_total, 28);
    }

    #[test]
    fn different_families_count_independently() {
        let contributions = vec![
            SignalContribution {
                family: SignalFamily::Range,
                confidence: 12.0,
                impact: 8,
                description: "range".into(),
            },
            SignalContribution {
                family: SignalFamily::Auth,
                confidence: 8.0,
                impact: 8,
                description: "auth".into(),
            },
        ];
        let result = apply_family_adjustment(&contributions);
        assert!((result.confidence_total - 20.0).abs() < 0.01);
        assert_eq!(result.family_count, 2);
    }

    #[test]
    fn corroboration_bonus_values() {
        assert_eq!(corroboration_bonus(0), 0);
        assert_eq!(corroboration_bonus(1), 0);
        assert_eq!(corroboration_bonus(2), 3);
        assert_eq!(corroboration_bonus(3), 6);
        assert_eq!(corroboration_bonus(5), 8);
    }

    #[test]
    fn header_family_mappings() {
        assert_eq!(header_family("content-range"), SignalFamily::Range);
        assert_eq!(header_family("etag"), SignalFamily::CacheValidator);
        assert_eq!(header_family("www-authenticate"), SignalFamily::Auth);
        assert_eq!(header_family("x-custom"), SignalFamily::General);
    }
}