Skip to main content

parlov_analysis/aggregation/
surface.rs

1//! Surface-relevance gate for evidence modifiers.
2//!
3//! Detects when a `SameStatus` Contradictory is mis-surfaced — i.e., the technique declares
4//! `Status` as its primary surface, the `SameStatus` arm fired (statuses are equal), but the
5//! body or headers show a significant differential. The actual oracle signal is on a non-status
6//! surface, so the status-equality Contradictory conclusion is wrong and the outcome should
7//! downgrade to Inapplicable.
8
9use bytes::Bytes;
10use http::HeaderMap;
11use parlov_core::{DifferentialSet, ProbeExchange, SignalSurface, Technique};
12
13/// Body content divergence ratio above which the surface is considered mismatched. Counts
14/// differing byte positions plus length difference, normalized by the maximum body length.
15///
16/// Calibrated at 10%: catches meaningful content divergence (different resource payloads,
17/// different error structures) while tolerating minor noise such as a single dynamic field
18/// (nonce, timestamp, request ID) embedded in an otherwise identical body. Values below
19/// this threshold are consistent with cosmetic/templating variation on the same code path.
20const BODY_SURFACE_MISMATCH_THRESHOLD: f64 = 0.10;
21
22/// Header divergence ratio above which the surface is considered mismatched. Counts diverged
23/// entries (key absent on one side, or shared key whose values differ) over the union size.
24///
25/// Calibrated at 50%: a majority of header keys diverging indicates structurally different
26/// response paths (e.g. authenticated vs unauthenticated), not incidental per-request variation.
27/// Headers have far fewer entries than body bytes, so a lower threshold would false-positive on
28/// single-header differences (e.g. `X-Request-Id` absent on one side).
29const HEADER_SURFACE_MISMATCH_THRESHOLD: f64 = 0.50;
30
31/// Decision returned by [`surface_relevance`].
32#[derive(Debug, Clone, Copy, PartialEq)]
33pub enum SurfaceDecision {
34    /// Surface is fully relevant — confidence in `[0.0, 1.0]`.
35    Reached(f64),
36    /// Hard block — differential is on a different surface than the technique tests.
37    Blocked,
38}
39
40impl SurfaceDecision {
41    /// Numeric confidence: `Reached(c)` → `c`; `Blocked` → `0.0`.
42    #[must_use]
43    pub fn confidence(self) -> f64 {
44        match self {
45            Self::Reached(c) => c,
46            Self::Blocked => 0.0,
47        }
48    }
49}
50
51/// Computes the surface relevance for a technique against a probe pair.
52///
53/// When the technique's `contradiction_surface` is `Status` and the `SameStatus` path has fired
54/// (statuses are equal — implicit since this is called from the `SameStatus` arm), inspect the
55/// body and headers. A meaningful body or header differential indicates the actual oracle signal
56/// is on a non-status surface — the status-equality Contradictory is mis-surfaced.
57///
58/// Returns `Reached(1.0)` for technique surfaces other than `Status` (those techniques have
59/// `normalization_weight: None` and shouldn't reach the `SameStatus` arm anyway, but the gate
60/// is defensive).
61#[must_use]
62pub fn surface_relevance(technique: &Technique, differential: &DifferentialSet) -> SurfaceDecision {
63    let Some((b, p)) = first_pair(differential) else {
64        return SurfaceDecision::Reached(1.0);
65    };
66    match technique.contradiction_surface {
67        SignalSurface::Status => {
68            if body_diff_ratio(&b.response.body, &p.response.body) > BODY_SURFACE_MISMATCH_THRESHOLD
69                || header_diff_ratio(&b.response.headers, &p.response.headers)
70                    > HEADER_SURFACE_MISMATCH_THRESHOLD
71            {
72                SurfaceDecision::Blocked
73            } else {
74                SurfaceDecision::Reached(1.0)
75            }
76        }
77        SignalSurface::Body
78        | SignalSurface::Headers
79        | SignalSurface::Timing
80        | SignalSurface::Composite => SurfaceDecision::Reached(1.0),
81    }
82}
83
84fn first_pair(differential: &DifferentialSet) -> Option<(&ProbeExchange, &ProbeExchange)> {
85    let b = differential.baseline.first()?;
86    let p = differential.probe.first()?;
87    Some((b, p))
88}
89
90/// Body content divergence ratio in `[0.0, 1.0]`.
91///
92/// Counts differing byte positions across the overlapping range plus the length difference,
93/// normalized by the maximum length. Equal bodies return `0.0`; completely disjoint same-length
94/// bodies return `1.0`; partially overlapping bodies return a fractional ratio. Two empty
95/// bodies return `0.0`.
96#[must_use]
97#[allow(clippy::cast_precision_loss)] // body lengths are bounded well below 2^52 in practice
98pub fn body_diff_ratio(a: &Bytes, b: &Bytes) -> f64 {
99    let max_len = a.len().max(b.len());
100    if max_len == 0 {
101        return 0.0;
102    }
103    let min_len = a.len().min(b.len());
104    let differing_positions = a.iter().zip(b.iter()).filter(|(x, y)| x != y).count();
105    let length_diff = max_len - min_len;
106    (differing_positions + length_diff) as f64 / max_len as f64
107}
108
109/// Header divergence ratio in `[0.0, 1.0]`.
110///
111/// Counts diverged entries: keys present on only one side, plus shared keys whose full value
112/// sequence differs (multi-value headers compared in order). Normalized by the union of unique
113/// keys. Two empty header maps return `0.0`.
114#[must_use]
115#[allow(clippy::cast_precision_loss)] // key counts are bounded well below 2^52 in practice
116pub fn header_diff_ratio(a: &HeaderMap, b: &HeaderMap) -> f64 {
117    let mut all_keys: std::collections::HashSet<&http::HeaderName> =
118        std::collections::HashSet::new();
119    all_keys.extend(a.keys());
120    all_keys.extend(b.keys());
121    if all_keys.is_empty() {
122        return 0.0;
123    }
124    let diverged = all_keys
125        .iter()
126        .filter(|k| {
127            let av: Vec<&[u8]> = a
128                .get_all(**k)
129                .iter()
130                .map(http::HeaderValue::as_bytes)
131                .collect();
132            let bv: Vec<&[u8]> = b
133                .get_all(**k)
134                .iter()
135                .map(http::HeaderValue::as_bytes)
136                .collect();
137            av != bv
138        })
139        .count();
140    diverged as f64 / all_keys.len() as f64
141}
142
143#[cfg(test)]
144#[path = "surface_tests.rs"]
145mod tests;