parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
use std::collections::BTreeSet;

use parlov_core::{DifferentialSet, Signal, SignalKind};

/// Headers worth flagging for existence oracle detection.
const NOTABLE_HEADERS: &[&str] = &[
    "etag",
    "last-modified",
    "content-range",
    "accept-ranges",
    "www-authenticate",
    "allow",
    "location",
];

/// Pushes header differential signals into `out`.
///
/// Compares the most recent baseline and probe response headers. For each header present in one
/// but not the other, pushes a `HeaderPresence` signal. For each header with different values,
/// pushes a `HeaderValue` signal. Only flags notable security-relevant headers.
pub fn extract_into(data: &DifferentialSet, out: &mut Vec<Signal>) {
    let Some(baseline) = data.baseline.last() else {
        return;
    };
    let Some(probe) = data.probe.last() else {
        return;
    };

    let b_headers = &baseline.response.headers;
    let p_headers = &probe.response.headers;

    let names: BTreeSet<&str> = b_headers
        .keys()
        .chain(p_headers.keys())
        .map(http::HeaderName::as_str)
        .filter(|n| NOTABLE_HEADERS.contains(n))
        .collect();

    for name in names {
        let b_val = b_headers.get(name).and_then(|v| v.to_str().ok());
        let p_val = p_headers.get(name).and_then(|v| v.to_str().ok());

        match (b_val, p_val) {
            (Some(_), None) => out.push(Signal {
                kind: SignalKind::HeaderPresence,
                evidence: format!("{name} present in baseline, absent in probe"),
                rfc_basis: None,
            }),
            (None, Some(_)) => out.push(Signal {
                kind: SignalKind::HeaderPresence,
                evidence: format!("{name} absent in baseline, present in probe"),
                rfc_basis: None,
            }),
            (Some(b), Some(p)) if b != p => out.push(Signal {
                kind: SignalKind::HeaderValue,
                evidence: format!("{name}: \"{b}\" (baseline) vs \"{p}\" (probe)"),
                rfc_basis: None,
            }),
            _ => {}
        }
    }
}

/// Owned-vec variant of [`extract_into`] for callers that want a fresh `Vec<Signal>`.
#[must_use]
pub fn extract(data: &DifferentialSet) -> Vec<Signal> {
    let mut out = Vec::new();
    extract_into(data, &mut out);
    out
}

/// Returns the names of rate-limit headers present in `baseline` but absent in `probe`.
#[must_use]
pub fn rate_limit_diff(baseline: &http::HeaderMap, probe: &http::HeaderMap) -> Vec<String> {
    baseline
        .keys()
        .filter(|k| is_rate_limit_header(k.as_str()) && !probe.contains_key(*k))
        .map(|k| k.as_str().to_owned())
        .collect()
}

/// Returns `true` for headers belonging to the rate-limit family.
#[must_use]
pub fn is_rate_limit_header(name: &str) -> bool {
    name.starts_with("ratelimit-")
        || name.starts_with("x-ratelimit-")
        || name == "ratelimit"
        || name == "x-ratelimit"
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::signals::tests::single_diff_set_with_headers;
    use http::{HeaderMap, HeaderName, HeaderValue};

    fn headers_with(pairs: &[(&str, &str)]) -> HeaderMap {
        let mut map = HeaderMap::new();
        for &(name, value) in pairs {
            map.insert(
                HeaderName::from_bytes(name.as_bytes()).expect("valid header name"),
                HeaderValue::from_str(value).expect("valid header value"),
            );
        }
        map
    }

    #[test]
    fn etag_present_in_baseline_only() {
        let b = headers_with(&[("etag", "\"abc\"")]);
        let p = HeaderMap::new();
        let ds = single_diff_set_with_headers(200, 404, b, p);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::HeaderPresence);
        assert!(signals[0].evidence.contains("etag"));
        assert!(signals[0].evidence.contains("baseline"));
    }

    #[test]
    fn www_authenticate_present_in_probe_only() {
        let b = HeaderMap::new();
        let p = headers_with(&[("www-authenticate", "Bearer")]);
        let ds = single_diff_set_with_headers(200, 401, b, p);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::HeaderPresence);
        assert!(signals[0].evidence.contains("www-authenticate"));
        assert!(signals[0].evidence.contains("probe"));
    }

    #[test]
    fn allow_header_different_values() {
        let b = headers_with(&[("allow", "GET, HEAD")]);
        let p = headers_with(&[("allow", "GET, HEAD, POST")]);
        let ds = single_diff_set_with_headers(405, 405, b, p);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::HeaderValue);
        assert!(signals[0].evidence.contains("allow"));
    }

    #[test]
    fn identical_notable_headers_produce_no_signal() {
        let b = headers_with(&[("etag", "\"abc\"")]);
        let p = headers_with(&[("etag", "\"abc\"")]);
        let ds = single_diff_set_with_headers(200, 200, b, p);
        assert!(extract(&ds).is_empty());
    }

    #[test]
    fn non_notable_headers_ignored() {
        let b = headers_with(&[("x-custom", "foo")]);
        let p = headers_with(&[("x-custom", "bar")]);
        let ds = single_diff_set_with_headers(200, 200, b, p);
        assert!(extract(&ds).is_empty());
    }

    #[test]
    fn location_present_in_baseline_only() {
        let b = headers_with(&[("location", "https://example.com/new")]);
        let p = HeaderMap::new();
        let ds = single_diff_set_with_headers(302, 404, b, p);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::HeaderPresence);
        assert!(signals[0].evidence.contains("location"));
        assert!(signals[0].evidence.contains("baseline"));
    }

    #[test]
    fn location_different_values() {
        let b = headers_with(&[("location", "https://a.com/old")]);
        let p = headers_with(&[("location", "https://b.com/new")]);
        let ds = single_diff_set_with_headers(301, 301, b, p);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::HeaderValue);
        assert!(signals[0].evidence.contains("location"));
    }

    #[test]
    fn empty_exchanges_produce_no_signals() {
        let ds = crate::signals::tests::diff_set_with_statuses(&[], &[]);
        assert!(extract(&ds).is_empty());
    }

    // --- is_rate_limit_header ---

    #[test]
    fn ratelimit_remaining_is_rate_limit() {
        assert!(is_rate_limit_header("ratelimit-remaining"));
    }

    #[test]
    fn x_ratelimit_limit_is_rate_limit() {
        assert!(is_rate_limit_header("x-ratelimit-limit"));
    }

    #[test]
    fn ratelimit_bare_is_rate_limit() {
        assert!(is_rate_limit_header("ratelimit"));
    }

    #[test]
    fn x_ratelimit_bare_is_rate_limit() {
        assert!(is_rate_limit_header("x-ratelimit"));
    }

    #[test]
    fn etag_is_not_rate_limit() {
        assert!(!is_rate_limit_header("etag"));
    }

    #[test]
    fn x_rate_limit_wrong_prefix_is_not_rate_limit() {
        assert!(!is_rate_limit_header("x-rate-limit-limit"));
    }

    // --- rate_limit_diff ---

    fn make_header_map(pairs: &[(&str, &str)]) -> HeaderMap {
        let mut map = HeaderMap::new();
        for &(name, value) in pairs {
            map.insert(
                HeaderName::from_bytes(name.as_bytes()).expect("valid header name"),
                HeaderValue::from_str(value).expect("valid header value"),
            );
        }
        map
    }

    #[test]
    fn rate_limit_header_in_baseline_only_returned() {
        let baseline = make_header_map(&[("ratelimit-remaining", "99")]);
        let probe = HeaderMap::new();
        let diff = rate_limit_diff(&baseline, &probe);
        assert_eq!(diff, vec!["ratelimit-remaining"]);
    }

    #[test]
    fn rate_limit_header_in_both_not_returned() {
        let baseline = make_header_map(&[("ratelimit-remaining", "99")]);
        let probe = make_header_map(&[("ratelimit-remaining", "99")]);
        let diff = rate_limit_diff(&baseline, &probe);
        assert!(diff.is_empty());
    }

    #[test]
    fn rate_limit_header_in_probe_only_not_returned() {
        let baseline = HeaderMap::new();
        let probe = make_header_map(&[("ratelimit-remaining", "99")]);
        let diff = rate_limit_diff(&baseline, &probe);
        assert!(diff.is_empty());
    }

    #[test]
    fn no_rate_limit_headers_on_either_side_is_empty() {
        let baseline = make_header_map(&[("etag", "\"abc\"")]);
        let probe = make_header_map(&[("content-type", "application/json")]);
        let diff = rate_limit_diff(&baseline, &probe);
        assert!(diff.is_empty());
    }

    #[test]
    fn multiple_rate_limit_headers_in_baseline_none_in_probe_all_returned() {
        let baseline =
            make_header_map(&[("ratelimit-remaining", "99"), ("x-ratelimit-limit", "100")]);
        let probe = HeaderMap::new();
        let mut diff = rate_limit_diff(&baseline, &probe);
        diff.sort();
        assert_eq!(diff, vec!["ratelimit-remaining", "x-ratelimit-limit"]);
    }

    #[test]
    fn mixed_shared_and_unshared_rate_limit_headers_only_unshared_returned() {
        let baseline =
            make_header_map(&[("ratelimit-remaining", "99"), ("x-ratelimit-limit", "100")]);
        let probe = make_header_map(&[("x-ratelimit-limit", "100")]);
        let diff = rate_limit_diff(&baseline, &probe);
        assert_eq!(diff, vec!["ratelimit-remaining"]);
    }
}