parlov-analysis 0.5.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Per-signal confidence and impact contribution weights.
//!
//! Defines the raw points each signal type contributes before family adjustment and
//! normative/reproducibility modifiers are applied.

use parlov_core::{NormativeStrength, Signal, SignalKind};

use super::families::{header_family, signal_kind_family, SignalContribution, SignalFamily};

/// Raw weight for a single signal before modifiers.
struct RawWeight {
    confidence: f32,
    impact: u8,
    family: SignalFamily,
}

/// Converts a signal into a family-tagged contribution with normative and reproducibility
/// modifiers applied.
pub(crate) fn weight_signal(
    signal: &Signal,
    strength: NormativeStrength,
    reproducibility: f32,
) -> SignalContribution {
    let raw = raw_weight(signal);
    let normative = normative_multiplier(strength);
    let adjusted_confidence = raw.confidence * normative * reproducibility;

    SignalContribution {
        family: raw.family,
        confidence: adjusted_confidence,
        impact: raw.impact,
        description: signal.evidence.clone(),
    }
}

/// Returns raw confidence and impact points for a signal based on its kind and content.
fn raw_weight(signal: &Signal) -> RawWeight {
    match signal.kind {
        SignalKind::HeaderPresence => header_presence_weight(signal),
        SignalKind::MetadataLeak => metadata_leak_weight(signal),
        SignalKind::HeaderValue => header_value_weight(signal),
        SignalKind::BodyDiff => body_diff_weight(signal),
        _ => RawWeight {
            confidence: 0.0,
            impact: 0,
            family: signal_kind_family(signal.kind),
        },
    }
}

/// Weights for header-presence signals based on which header is observed.
fn header_presence_weight(signal: &Signal) -> RawWeight {
    let evidence = signal.evidence.to_lowercase();
    if evidence.contains("content-range") {
        return RawWeight { confidence: 12.0, impact: 8, family: SignalFamily::Range };
    }
    if evidence.contains("etag") {
        return RawWeight { confidence: 10.0, impact: 5, family: SignalFamily::CacheValidator };
    }
    if evidence.contains("last-modified") {
        return RawWeight { confidence: 8.0, impact: 5, family: SignalFamily::CacheValidator };
    }
    if evidence.contains("accept-ranges") {
        return RawWeight { confidence: 5.0, impact: 0, family: SignalFamily::Range };
    }
    if evidence.contains("www-authenticate") {
        return RawWeight { confidence: 8.0, impact: 8, family: SignalFamily::Auth };
    }
    if evidence.contains("allow") {
        return RawWeight { confidence: 6.0, impact: 5, family: SignalFamily::General };
    }
    RawWeight { confidence: 3.0, impact: 0, family: SignalFamily::General }
}

/// Weights for metadata leak signals.
fn metadata_leak_weight(signal: &Signal) -> RawWeight {
    let evidence = signal.evidence.to_lowercase();
    let family = if evidence.contains("content-range") {
        SignalFamily::Range
    } else {
        header_family_from_evidence(&evidence)
    };

    if evidence.contains("content-range") && evidence.contains("size") {
        return RawWeight { confidence: 5.0, impact: 15, family };
    }
    if evidence.contains("etag") {
        return RawWeight { confidence: 3.0, impact: 5, family };
    }
    RawWeight { confidence: 3.0, impact: 5, family }
}

/// Weights for body differential signals.
///
/// Body content diff is a primary signal comparable to a status code differential. When status
/// codes match (`base_confidence` = 0), this carries the full finding. Content-type mismatch is
/// a supporting signal indicating different response pipelines.
fn body_diff_weight(signal: &Signal) -> RawWeight {
    let evidence = signal.evidence.to_lowercase();
    if evidence.contains("content-type") {
        return RawWeight {
            confidence: 25.0,
            impact: 10,
            family: SignalFamily::ErrorBody,
        };
    }
    RawWeight { confidence: 70.0, impact: 15, family: SignalFamily::ErrorBody }
}

/// Weights for header-value differential signals.
fn header_value_weight(signal: &Signal) -> RawWeight {
    let evidence = signal.evidence.to_lowercase();
    let family = header_family_from_evidence(&evidence);
    RawWeight { confidence: 4.0, impact: 3, family }
}

/// Infers signal family from evidence text by checking known header names.
fn header_family_from_evidence(evidence: &str) -> SignalFamily {
    const KNOWN_HEADERS: &[&str] = &[
        "content-range",
        "accept-ranges",
        "etag",
        "last-modified",
        "www-authenticate",
        "allow",
    ];
    for name in KNOWN_HEADERS {
        if evidence.contains(name) {
            return header_family(name);
        }
    }
    SignalFamily::General
}

/// Returns the normative strength multiplier for per-signal confidence adjustment.
fn normative_multiplier(strength: NormativeStrength) -> f32 {
    match strength {
        NormativeStrength::Must | NormativeStrength::MustNot => 1.0,
        NormativeStrength::Should => 0.9,
        NormativeStrength::May => 0.75,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use parlov_core::Signal;

    fn signal(kind: SignalKind, evidence: &str) -> Signal {
        Signal { kind, evidence: evidence.into(), rfc_basis: None }
    }

    #[test]
    fn etag_presence_weight() {
        let s = signal(SignalKind::HeaderPresence, "etag present in baseline");
        let c = weight_signal(&s, NormativeStrength::Must, 1.0);
        assert!((c.confidence - 10.0).abs() < 0.01);
        assert_eq!(c.impact, 5);
        assert_eq!(c.family, SignalFamily::CacheValidator);
    }

    #[test]
    fn content_range_size_leak_weight() {
        let s = signal(
            SignalKind::MetadataLeak,
            "Content-Range leaks total resource size: 1024 bytes",
        );
        let c = weight_signal(&s, NormativeStrength::Must, 1.0);
        assert!((c.confidence - 5.0).abs() < 0.01);
        assert_eq!(c.impact, 15);
        assert_eq!(c.family, SignalFamily::Range);
    }

    #[test]
    fn normative_should_reduces_confidence() {
        let s = signal(SignalKind::HeaderPresence, "etag present in baseline");
        let c = weight_signal(&s, NormativeStrength::Should, 1.0);
        assert!((c.confidence - 9.0).abs() < 0.01);
    }

    #[test]
    fn normative_may_reduces_confidence() {
        let s = signal(SignalKind::HeaderPresence, "etag present in baseline");
        let c = weight_signal(&s, NormativeStrength::May, 1.0);
        assert!((c.confidence - 7.5).abs() < 0.01);
    }

    #[test]
    fn reproducibility_reduces_confidence() {
        let s = signal(SignalKind::HeaderPresence, "etag present in baseline");
        let c = weight_signal(&s, NormativeStrength::Must, 0.7);
        assert!((c.confidence - 7.0).abs() < 0.01);
        // Impact is NOT affected by reproducibility
        assert_eq!(c.impact, 5);
    }

    #[test]
    fn www_authenticate_weight() {
        let s = signal(
            SignalKind::HeaderPresence,
            "www-authenticate present in baseline, absent in probe",
        );
        let c = weight_signal(&s, NormativeStrength::Must, 1.0);
        assert!((c.confidence - 8.0).abs() < 0.01);
        assert_eq!(c.impact, 8);
        assert_eq!(c.family, SignalFamily::Auth);
    }

    #[test]
    fn body_diff_content_weight() {
        let s = signal(
            SignalKind::BodyDiff,
            "body length: 27 (baseline) vs 45 (probe)",
        );
        let c = weight_signal(&s, NormativeStrength::Must, 1.0);
        assert!((c.confidence - 70.0).abs() < 0.01);
        assert_eq!(c.impact, 15);
        assert_eq!(c.family, SignalFamily::ErrorBody);
    }

    #[test]
    fn body_diff_content_type_weight() {
        let s = signal(
            SignalKind::BodyDiff,
            "content-type: application/json (baseline) vs text/html (probe)",
        );
        let c = weight_signal(&s, NormativeStrength::Must, 1.0);
        assert!((c.confidence - 25.0).abs() < 0.01);
        assert_eq!(c.impact, 10);
        assert_eq!(c.family, SignalFamily::ErrorBody);
    }

    #[test]
    fn body_diff_normative_may_reduces_confidence() {
        let s = signal(
            SignalKind::BodyDiff,
            "body content differs (same length: 100 bytes)",
        );
        let c = weight_signal(&s, NormativeStrength::May, 1.0);
        // 70.0 * 0.75 = 52.5
        assert!((c.confidence - 52.5).abs() < 0.01);
    }
}