parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Surface-relevance gate for evidence modifiers.
//!
//! Detects when a `SameStatus` Contradictory is mis-surfaced — i.e., the technique declares
//! `Status` as its primary surface, the `SameStatus` arm fired (statuses are equal), but the
//! body or headers show a significant differential. The actual oracle signal is on a non-status
//! surface, so the status-equality Contradictory conclusion is wrong and the outcome should
//! downgrade to Inapplicable.

use bytes::Bytes;
use http::HeaderMap;
use parlov_core::{DifferentialSet, ProbeExchange, SignalSurface, Technique};

/// Body content divergence ratio above which the surface is considered mismatched. Counts
/// differing byte positions plus length difference, normalized by the maximum body length.
///
/// Calibrated at 10%: catches meaningful content divergence (different resource payloads,
/// different error structures) while tolerating minor noise such as a single dynamic field
/// (nonce, timestamp, request ID) embedded in an otherwise identical body. Values below
/// this threshold are consistent with cosmetic/templating variation on the same code path.
const BODY_SURFACE_MISMATCH_THRESHOLD: f64 = 0.10;

/// Header divergence ratio above which the surface is considered mismatched. Counts diverged
/// entries (key absent on one side, or shared key whose values differ) over the union size.
///
/// Calibrated at 50%: a majority of header keys diverging indicates structurally different
/// response paths (e.g. authenticated vs unauthenticated), not incidental per-request variation.
/// Headers have far fewer entries than body bytes, so a lower threshold would false-positive on
/// single-header differences (e.g. `X-Request-Id` absent on one side).
const HEADER_SURFACE_MISMATCH_THRESHOLD: f64 = 0.50;

/// Decision returned by [`surface_relevance`].
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SurfaceDecision {
    /// Surface is fully relevant — confidence in `[0.0, 1.0]`.
    Reached(f64),
    /// Hard block — differential is on a different surface than the technique tests.
    Blocked,
}

impl SurfaceDecision {
    /// Numeric confidence: `Reached(c)` → `c`; `Blocked` → `0.0`.
    #[must_use]
    pub fn confidence(self) -> f64 {
        match self {
            Self::Reached(c) => c,
            Self::Blocked => 0.0,
        }
    }
}

/// Computes the surface relevance for a technique against a probe pair.
///
/// When the technique's `contradiction_surface` is `Status` and the `SameStatus` path has fired
/// (statuses are equal — implicit since this is called from the `SameStatus` arm), inspect the
/// body and headers. A meaningful body or header differential indicates the actual oracle signal
/// is on a non-status surface — the status-equality Contradictory is mis-surfaced.
///
/// Returns `Reached(1.0)` for technique surfaces other than `Status` (those techniques have
/// `normalization_weight: None` and shouldn't reach the `SameStatus` arm anyway, but the gate
/// is defensive).
#[must_use]
pub fn surface_relevance(technique: &Technique, differential: &DifferentialSet) -> SurfaceDecision {
    let Some((b, p)) = first_pair(differential) else {
        return SurfaceDecision::Reached(1.0);
    };
    match technique.contradiction_surface {
        SignalSurface::Status => {
            if body_diff_ratio(&b.response.body, &p.response.body) > BODY_SURFACE_MISMATCH_THRESHOLD
                || header_diff_ratio(&b.response.headers, &p.response.headers)
                    > HEADER_SURFACE_MISMATCH_THRESHOLD
            {
                SurfaceDecision::Blocked
            } else {
                SurfaceDecision::Reached(1.0)
            }
        }
        SignalSurface::Body
        | SignalSurface::Headers
        | SignalSurface::Timing
        | SignalSurface::Composite => SurfaceDecision::Reached(1.0),
    }
}

fn first_pair(differential: &DifferentialSet) -> Option<(&ProbeExchange, &ProbeExchange)> {
    let b = differential.baseline.first()?;
    let p = differential.probe.first()?;
    Some((b, p))
}

/// Body content divergence ratio in `[0.0, 1.0]`.
///
/// Counts differing byte positions across the overlapping range plus the length difference,
/// normalized by the maximum length. Equal bodies return `0.0`; completely disjoint same-length
/// bodies return `1.0`; partially overlapping bodies return a fractional ratio. Two empty
/// bodies return `0.0`.
#[must_use]
#[allow(clippy::cast_precision_loss)] // body lengths are bounded well below 2^52 in practice
pub fn body_diff_ratio(a: &Bytes, b: &Bytes) -> f64 {
    let max_len = a.len().max(b.len());
    if max_len == 0 {
        return 0.0;
    }
    let min_len = a.len().min(b.len());
    let differing_positions = a.iter().zip(b.iter()).filter(|(x, y)| x != y).count();
    let length_diff = max_len - min_len;
    (differing_positions + length_diff) as f64 / max_len as f64
}

/// Header divergence ratio in `[0.0, 1.0]`.
///
/// Counts diverged entries: keys present on only one side, plus shared keys whose full value
/// sequence differs (multi-value headers compared in order). Normalized by the union of unique
/// keys. Two empty header maps return `0.0`.
#[must_use]
#[allow(clippy::cast_precision_loss)] // key counts are bounded well below 2^52 in practice
pub fn header_diff_ratio(a: &HeaderMap, b: &HeaderMap) -> f64 {
    let mut all_keys: std::collections::HashSet<&http::HeaderName> =
        std::collections::HashSet::new();
    all_keys.extend(a.keys());
    all_keys.extend(b.keys());
    if all_keys.is_empty() {
        return 0.0;
    }
    let diverged = all_keys
        .iter()
        .filter(|k| {
            let av: Vec<&[u8]> = a
                .get_all(**k)
                .iter()
                .map(http::HeaderValue::as_bytes)
                .collect();
            let bv: Vec<&[u8]> = b
                .get_all(**k)
                .iter()
                .map(http::HeaderValue::as_bytes)
                .collect();
            av != bv
        })
        .count();
    diverged as f64 / all_keys.len() as f64
}

#[cfg(test)]
#[path = "surface_tests.rs"]
mod tests;