parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
use parlov_core::{DifferentialSet, Signal, SignalKind};

/// Pushes metadata leak signals into `out` from baseline response headers.
///
/// Looks for `Content-Range` headers that leak total resource size (e.g. `bytes */N` or
/// `bytes 0-M/N`) and `ETag` values that leak version identifiers. Only inspects the baseline
/// (existing resource) side since metadata leaks require the resource to exist.
pub fn extract_into(data: &DifferentialSet, out: &mut Vec<Signal>) {
    let Some(baseline) = data.baseline.last() else {
        return;
    };

    let headers = &baseline.response.headers;

    if let Some(cr) = headers.get("content-range").and_then(|v| v.to_str().ok()) {
        if let Some(signal) = parse_content_range_leak(cr) {
            out.push(signal);
        }
    }

    if let Some(etag) = headers.get("etag").and_then(|v| v.to_str().ok()) {
        out.push(Signal {
            kind: SignalKind::MetadataLeak,
            evidence: format!("ETag value \"{etag}\" leaks resource version identifier"),
            rfc_basis: Some("RFC 9110 §8.8.3".into()),
        });
    }
}

/// Extracts metadata leak signals from baseline response headers.
///
/// Thin wrapper around `extract_into` for callers that want an owned vec.
#[must_use]
pub fn extract(data: &DifferentialSet) -> Vec<Signal> {
    let mut out = Vec::new();
    extract_into(data, &mut out);
    out
}

/// Parses `Content-Range` header for total size leaks.
fn parse_content_range_leak(value: &str) -> Option<Signal> {
    // Matches "bytes */N" or "bytes 0-M/N" where N is the total size.
    let after_slash = value.rsplit('/').next()?;
    let total = after_slash.trim();
    if total == "*" {
        return None;
    }
    if total.chars().all(|c| c.is_ascii_digit()) && !total.is_empty() {
        return Some(Signal {
            kind: SignalKind::MetadataLeak,
            evidence: format!("Content-Range leaks total resource size: {total} bytes"),
            rfc_basis: Some("RFC 9110 §14.4".into()),
        });
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::signals::tests::single_diff_set_with_baseline_headers;
    use http::{HeaderMap, HeaderName, HeaderValue};

    fn headers_with(pairs: &[(&str, &str)]) -> HeaderMap {
        let mut map = HeaderMap::new();
        for &(name, value) in pairs {
            map.insert(
                HeaderName::from_bytes(name.as_bytes()).expect("valid header name"),
                HeaderValue::from_str(value).expect("valid header value"),
            );
        }
        map
    }

    #[test]
    fn content_range_with_total_size_produces_leak() {
        let b = headers_with(&[("content-range", "bytes 0-99/500")]);
        let ds = single_diff_set_with_baseline_headers(206, 404, b);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::MetadataLeak);
        assert!(signals[0].evidence.contains("500"));
    }

    #[test]
    fn content_range_unsatisfied_with_total_produces_leak() {
        let b = headers_with(&[("content-range", "bytes */1024")]);
        let ds = single_diff_set_with_baseline_headers(416, 404, b);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert!(signals[0].evidence.contains("1024"));
    }

    #[test]
    fn content_range_with_star_total_produces_no_leak() {
        let b = headers_with(&[("content-range", "bytes 0-99/*")]);
        let ds = single_diff_set_with_baseline_headers(206, 404, b);
        assert!(extract(&ds).is_empty());
    }

    #[test]
    fn etag_produces_metadata_leak() {
        let b = headers_with(&[("etag", "\"v2-abc123\"")]);
        let ds = single_diff_set_with_baseline_headers(200, 404, b);
        let signals = extract(&ds);
        assert_eq!(signals.len(), 1);
        assert_eq!(signals[0].kind, SignalKind::MetadataLeak);
        assert!(signals[0].evidence.contains("v2-abc123"));
    }

    #[test]
    fn no_metadata_headers_produces_no_signals() {
        let ds = single_diff_set_with_baseline_headers(200, 404, HeaderMap::new());
        assert!(extract(&ds).is_empty());
    }

    #[test]
    fn empty_baseline_produces_no_signals() {
        let ds = crate::signals::tests::diff_set_with_statuses(&[], &[404]);
        assert!(extract(&ds).is_empty());
    }
}