use parlov_core::{DifferentialSet, Signal, SignalKind};
pub fn extract_into(data: &DifferentialSet, out: &mut Vec<Signal>) {
let Some(baseline) = data.baseline.last() else {
return;
};
let headers = &baseline.response.headers;
if let Some(cr) = headers.get("content-range").and_then(|v| v.to_str().ok()) {
if let Some(signal) = parse_content_range_leak(cr) {
out.push(signal);
}
}
if let Some(etag) = headers.get("etag").and_then(|v| v.to_str().ok()) {
out.push(Signal {
kind: SignalKind::MetadataLeak,
evidence: format!("ETag value \"{etag}\" leaks resource version identifier"),
rfc_basis: Some("RFC 9110 §8.8.3".into()),
});
}
}
#[must_use]
pub fn extract(data: &DifferentialSet) -> Vec<Signal> {
let mut out = Vec::new();
extract_into(data, &mut out);
out
}
fn parse_content_range_leak(value: &str) -> Option<Signal> {
let after_slash = value.rsplit('/').next()?;
let total = after_slash.trim();
if total == "*" {
return None;
}
if total.chars().all(|c| c.is_ascii_digit()) && !total.is_empty() {
return Some(Signal {
kind: SignalKind::MetadataLeak,
evidence: format!("Content-Range leaks total resource size: {total} bytes"),
rfc_basis: Some("RFC 9110 §14.4".into()),
});
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::signals::tests::single_diff_set_with_baseline_headers;
use http::{HeaderMap, HeaderName, HeaderValue};
fn headers_with(pairs: &[(&str, &str)]) -> HeaderMap {
let mut map = HeaderMap::new();
for &(name, value) in pairs {
map.insert(
HeaderName::from_bytes(name.as_bytes()).expect("valid header name"),
HeaderValue::from_str(value).expect("valid header value"),
);
}
map
}
#[test]
fn content_range_with_total_size_produces_leak() {
let b = headers_with(&[("content-range", "bytes 0-99/500")]);
let ds = single_diff_set_with_baseline_headers(206, 404, b);
let signals = extract(&ds);
assert_eq!(signals.len(), 1);
assert_eq!(signals[0].kind, SignalKind::MetadataLeak);
assert!(signals[0].evidence.contains("500"));
}
#[test]
fn content_range_unsatisfied_with_total_produces_leak() {
let b = headers_with(&[("content-range", "bytes */1024")]);
let ds = single_diff_set_with_baseline_headers(416, 404, b);
let signals = extract(&ds);
assert_eq!(signals.len(), 1);
assert!(signals[0].evidence.contains("1024"));
}
#[test]
fn content_range_with_star_total_produces_no_leak() {
let b = headers_with(&[("content-range", "bytes 0-99/*")]);
let ds = single_diff_set_with_baseline_headers(206, 404, b);
assert!(extract(&ds).is_empty());
}
#[test]
fn etag_produces_metadata_leak() {
let b = headers_with(&[("etag", "\"v2-abc123\"")]);
let ds = single_diff_set_with_baseline_headers(200, 404, b);
let signals = extract(&ds);
assert_eq!(signals.len(), 1);
assert_eq!(signals[0].kind, SignalKind::MetadataLeak);
assert!(signals[0].evidence.contains("v2-abc123"));
}
#[test]
fn no_metadata_headers_produces_no_signals() {
let ds = single_diff_set_with_baseline_headers(200, 404, HeaderMap::new());
assert!(extract(&ds).is_empty());
}
#[test]
fn empty_baseline_produces_no_signals() {
let ds = crate::signals::tests::diff_set_with_statuses(&[], &[404]);
assert!(extract(&ds).is_empty());
}
}