parlov_analysis/aggregation/surface.rs
1//! Surface-relevance gate for evidence modifiers.
2//!
3//! Detects when a `SameStatus` Contradictory is mis-surfaced — i.e., the technique declares
4//! `Status` as its primary surface, the `SameStatus` arm fired (statuses are equal), but the
5//! body or headers show a significant differential. The actual oracle signal is on a non-status
6//! surface, so the status-equality Contradictory conclusion is wrong and the outcome should
7//! downgrade to Inapplicable.
8
9use bytes::Bytes;
10use http::HeaderMap;
11use parlov_core::{DifferentialSet, ProbeExchange, SignalSurface, Technique};
12
13/// Body content divergence ratio above which the surface is considered mismatched. Counts
14/// differing byte positions plus length difference, normalized by the maximum body length.
15///
16/// Calibrated at 10%: catches meaningful content divergence (different resource payloads,
17/// different error structures) while tolerating minor noise such as a single dynamic field
18/// (nonce, timestamp, request ID) embedded in an otherwise identical body. Values below
19/// this threshold are consistent with cosmetic/templating variation on the same code path.
20const BODY_SURFACE_MISMATCH_THRESHOLD: f64 = 0.10;
21
22/// Header divergence ratio above which the surface is considered mismatched. Counts diverged
23/// entries (key absent on one side, or shared key whose values differ) over the union size.
24///
25/// Calibrated at 50%: a majority of header keys diverging indicates structurally different
26/// response paths (e.g. authenticated vs unauthenticated), not incidental per-request variation.
27/// Headers have far fewer entries than body bytes, so a lower threshold would false-positive on
28/// single-header differences (e.g. `X-Request-Id` absent on one side).
29const HEADER_SURFACE_MISMATCH_THRESHOLD: f64 = 0.50;
30
31/// Decision returned by [`surface_relevance`].
32#[derive(Debug, Clone, Copy, PartialEq)]
33pub enum SurfaceDecision {
34 /// Surface is fully relevant — confidence in `[0.0, 1.0]`.
35 Reached(f64),
36 /// Hard block — differential is on a different surface than the technique tests.
37 Blocked,
38}
39
40impl SurfaceDecision {
41 /// Numeric confidence: `Reached(c)` → `c`; `Blocked` → `0.0`.
42 #[must_use]
43 pub fn confidence(self) -> f64 {
44 match self {
45 Self::Reached(c) => c,
46 Self::Blocked => 0.0,
47 }
48 }
49}
50
51/// Computes the surface relevance for a technique against a probe pair.
52///
53/// When the technique's `contradiction_surface` is `Status` and the `SameStatus` path has fired
54/// (statuses are equal — implicit since this is called from the `SameStatus` arm), inspect the
55/// body and headers. A meaningful body or header differential indicates the actual oracle signal
56/// is on a non-status surface — the status-equality Contradictory is mis-surfaced.
57///
58/// Returns `Reached(1.0)` for technique surfaces other than `Status` (those techniques have
59/// `normalization_weight: None` and shouldn't reach the `SameStatus` arm anyway, but the gate
60/// is defensive).
61#[must_use]
62pub fn surface_relevance(technique: &Technique, differential: &DifferentialSet) -> SurfaceDecision {
63 let Some((b, p)) = first_pair(differential) else {
64 return SurfaceDecision::Reached(1.0);
65 };
66 match technique.contradiction_surface {
67 SignalSurface::Status => {
68 if body_diff_ratio(&b.response.body, &p.response.body) > BODY_SURFACE_MISMATCH_THRESHOLD
69 || header_diff_ratio(&b.response.headers, &p.response.headers)
70 > HEADER_SURFACE_MISMATCH_THRESHOLD
71 {
72 SurfaceDecision::Blocked
73 } else {
74 SurfaceDecision::Reached(1.0)
75 }
76 }
77 SignalSurface::Body
78 | SignalSurface::Headers
79 | SignalSurface::Timing
80 | SignalSurface::Composite => SurfaceDecision::Reached(1.0),
81 }
82}
83
84fn first_pair(differential: &DifferentialSet) -> Option<(&ProbeExchange, &ProbeExchange)> {
85 let b = differential.baseline.first()?;
86 let p = differential.probe.first()?;
87 Some((b, p))
88}
89
90/// Body content divergence ratio in `[0.0, 1.0]`.
91///
92/// Counts differing byte positions across the overlapping range plus the length difference,
93/// normalized by the maximum length. Equal bodies return `0.0`; completely disjoint same-length
94/// bodies return `1.0`; partially overlapping bodies return a fractional ratio. Two empty
95/// bodies return `0.0`.
96#[must_use]
97#[allow(clippy::cast_precision_loss)] // body lengths are bounded well below 2^52 in practice
98pub fn body_diff_ratio(a: &Bytes, b: &Bytes) -> f64 {
99 let max_len = a.len().max(b.len());
100 if max_len == 0 {
101 return 0.0;
102 }
103 let min_len = a.len().min(b.len());
104 let differing_positions = a.iter().zip(b.iter()).filter(|(x, y)| x != y).count();
105 let length_diff = max_len - min_len;
106 (differing_positions + length_diff) as f64 / max_len as f64
107}
108
109/// Header divergence ratio in `[0.0, 1.0]`.
110///
111/// Counts diverged entries: keys present on only one side, plus shared keys whose full value
112/// sequence differs (multi-value headers compared in order). Normalized by the union of unique
113/// keys. Two empty header maps return `0.0`.
114#[must_use]
115#[allow(clippy::cast_precision_loss)] // key counts are bounded well below 2^52 in practice
116pub fn header_diff_ratio(a: &HeaderMap, b: &HeaderMap) -> f64 {
117 let mut all_keys: std::collections::HashSet<&http::HeaderName> =
118 std::collections::HashSet::new();
119 all_keys.extend(a.keys());
120 all_keys.extend(b.keys());
121 if all_keys.is_empty() {
122 return 0.0;
123 }
124 let diverged = all_keys
125 .iter()
126 .filter(|k| {
127 let av: Vec<&[u8]> = a
128 .get_all(**k)
129 .iter()
130 .map(http::HeaderValue::as_bytes)
131 .collect();
132 let bv: Vec<&[u8]> = b
133 .get_all(**k)
134 .iter()
135 .map(http::HeaderValue::as_bytes)
136 .collect();
137 av != bv
138 })
139 .count();
140 diverged as f64 / all_keys.len() as f64
141}
142
143#[cfg(test)]
144#[path = "surface_tests.rs"]
145mod tests;