use parlov_core::ResponseClass;
use http::{HeaderMap, StatusCode};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EtagStrength {
Strong,
Weak,
}
#[derive(Debug, Default)]
pub struct ConditionalValidators {
pub etag: Option<(String, EtagStrength)>,
pub last_modified: Option<String>,
}
#[derive(Debug, Default)]
pub struct RedirectSignals {
pub location: Vec<String>,
pub redirect_target_valid: Vec<String>,
}
#[derive(Debug, Default)]
pub struct RangeSignals {
pub accept_ranges: Option<String>,
pub content_range_size: Option<u64>,
}
#[derive(Debug, Default)]
pub struct HarvestedObservations {
pub validators: ConditionalValidators,
pub redirect: RedirectSignals,
pub range: RangeSignals,
}
impl HarvestedObservations {
#[must_use]
pub fn from_baseline(status: StatusCode, headers: &HeaderMap) -> Self {
match ResponseClass::classify(status, headers) {
ResponseClass::Success => Self {
validators: extract_validators(headers),
..Default::default()
},
ResponseClass::Redirect => Self {
redirect: extract_redirect(headers),
..Default::default()
},
ResponseClass::PartialContent | ResponseClass::RangeNotSatisfiable => Self {
range: extract_range(headers),
..Default::default()
},
_ => Self::default(),
}
}
#[must_use]
pub fn empty() -> Self {
Self::default()
}
}
fn extract_validators(headers: &HeaderMap) -> ConditionalValidators {
let etag = headers
.get(http::header::ETAG)
.and_then(|v| v.to_str().ok())
.map(|raw| {
let strength = if raw.starts_with("W/\"") {
EtagStrength::Weak
} else {
EtagStrength::Strong
};
(raw.to_owned(), strength)
});
let last_modified = headers
.get(http::header::LAST_MODIFIED)
.and_then(|v| v.to_str().ok())
.map(str::to_owned);
ConditionalValidators {
etag,
last_modified,
}
}
fn extract_redirect(headers: &HeaderMap) -> RedirectSignals {
let loc = headers
.get(http::header::LOCATION)
.and_then(|v| v.to_str().ok())
.map(str::to_owned);
let redirect_target_valid: Vec<String> = loc.iter().cloned().collect();
let location: Vec<String> = loc.into_iter().collect();
RedirectSignals {
location,
redirect_target_valid,
}
}
fn extract_range(headers: &HeaderMap) -> RangeSignals {
let accept_ranges = headers
.get(http::header::ACCEPT_RANGES)
.and_then(|v| v.to_str().ok())
.map(str::to_owned);
let content_range_size = headers
.get(http::header::CONTENT_RANGE)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.split('/').nth(1))
.and_then(|s| s.trim().parse::<u64>().ok());
RangeSignals {
accept_ranges,
content_range_size,
}
}
#[cfg(test)]
mod tests {
use super::*;
use http::{HeaderMap, HeaderValue, StatusCode};
#[test]
fn from_baseline_206_extracts_range_signals() {
let mut headers = HeaderMap::new();
headers.insert(
http::header::ACCEPT_RANGES,
HeaderValue::from_static("bytes"),
);
headers.insert(
http::header::CONTENT_RANGE,
HeaderValue::from_static("bytes 0-511/3000"),
);
let obs = HarvestedObservations::from_baseline(StatusCode::PARTIAL_CONTENT, &headers);
assert_eq!(obs.range.accept_ranges.as_deref(), Some("bytes"));
assert_eq!(obs.range.content_range_size, Some(3000));
assert!(obs.validators.etag.is_none());
assert!(obs.redirect.location.is_empty());
}
#[test]
fn from_baseline_416_extracts_size_from_wildcard_content_range() {
let mut headers = HeaderMap::new();
headers.insert(
http::header::CONTENT_RANGE,
HeaderValue::from_static("bytes */8192"),
);
let obs = HarvestedObservations::from_baseline(StatusCode::RANGE_NOT_SATISFIABLE, &headers);
assert_eq!(obs.range.content_range_size, Some(8192));
assert!(obs.range.accept_ranges.is_none());
}
#[test]
fn from_baseline_200_with_etag() {
let mut headers = HeaderMap::new();
headers.insert(http::header::ETAG, HeaderValue::from_static("\"abc123\""));
let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
assert_eq!(
obs.validators.etag,
Some(("\"abc123\"".to_owned(), EtagStrength::Strong))
);
assert!(obs.validators.last_modified.is_none());
assert!(obs.redirect.location.is_empty());
assert!(obs.redirect.redirect_target_valid.is_empty());
}
#[test]
fn from_baseline_200_with_last_modified() {
let mut headers = HeaderMap::new();
headers.insert(
http::header::LAST_MODIFIED,
HeaderValue::from_static("Wed, 01 Jan 2025 00:00:00 GMT"),
);
let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
assert_eq!(
obs.validators.last_modified.as_deref(),
Some("Wed, 01 Jan 2025 00:00:00 GMT")
);
assert!(obs.validators.etag.is_none());
assert!(obs.redirect.location.is_empty());
assert!(obs.redirect.redirect_target_valid.is_empty());
}
#[test]
fn from_baseline_200_with_both_etag_and_last_modified() {
let mut headers = HeaderMap::new();
headers.insert(http::header::ETAG, HeaderValue::from_static("\"v1\""));
headers.insert(
http::header::LAST_MODIFIED,
HeaderValue::from_static("Fri, 10 Jan 2025 12:00:00 GMT"),
);
let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
assert_eq!(
obs.validators.etag,
Some(("\"v1\"".to_owned(), EtagStrength::Strong))
);
assert_eq!(
obs.validators.last_modified.as_deref(),
Some("Fri, 10 Jan 2025 12:00:00 GMT")
);
assert!(obs.redirect.location.is_empty());
assert!(obs.redirect.redirect_target_valid.is_empty());
}
#[test]
fn from_baseline_200_with_no_relevant_headers() {
let obs = HarvestedObservations::from_baseline(StatusCode::OK, &HeaderMap::new());
assert!(obs.validators.etag.is_none());
assert!(obs.validators.last_modified.is_none());
assert!(obs.redirect.location.is_empty());
assert!(obs.redirect.redirect_target_valid.is_empty());
}
#[test]
fn from_baseline_301_with_location() {
let mut headers = HeaderMap::new();
headers.insert(
http::header::LOCATION,
HeaderValue::from_static("https://example.com/new"),
);
let obs = HarvestedObservations::from_baseline(StatusCode::MOVED_PERMANENTLY, &headers);
assert_eq!(obs.redirect.location, vec!["https://example.com/new"]);
assert_eq!(
obs.redirect.redirect_target_valid,
vec!["https://example.com/new"]
);
assert!(obs.validators.etag.is_none());
assert!(obs.validators.last_modified.is_none());
}
#[test]
fn from_baseline_302_without_location() {
let obs = HarvestedObservations::from_baseline(StatusCode::FOUND, &HeaderMap::new());
assert!(obs.redirect.location.is_empty());
assert!(obs.redirect.redirect_target_valid.is_empty());
assert!(obs.validators.etag.is_none());
assert!(obs.validators.last_modified.is_none());
}
#[test]
fn from_baseline_3xx_location_and_redirect_target_are_in_sync() {
let mut headers = HeaderMap::new();
headers.insert(
http::header::LOCATION,
HeaderValue::from_static("https://example.com/resource"),
);
let obs = HarvestedObservations::from_baseline(StatusCode::SEE_OTHER, &headers);
assert_eq!(obs.redirect.location, obs.redirect.redirect_target_valid);
assert_eq!(obs.redirect.location.len(), 1);
}
#[test]
fn empty_has_all_fields_empty() {
let obs = HarvestedObservations::empty();
assert!(obs.validators.etag.is_none());
assert!(obs.validators.last_modified.is_none());
assert!(obs.redirect.location.is_empty());
assert!(obs.redirect.redirect_target_valid.is_empty());
}
#[test]
fn from_baseline_200_with_strong_etag_classified_correctly() {
let mut headers = HeaderMap::new();
headers.insert(http::header::ETAG, HeaderValue::from_static("\"abc123\""));
let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
assert_eq!(
obs.validators.etag,
Some(("\"abc123\"".to_owned(), EtagStrength::Strong))
);
}
#[test]
fn from_baseline_200_with_weak_etag_classified_correctly() {
let mut headers = HeaderMap::new();
headers.insert(http::header::ETAG, HeaderValue::from_static("W/\"abc123\""));
let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
assert_eq!(
obs.validators.etag,
Some(("W/\"abc123\"".to_owned(), EtagStrength::Weak))
);
}
}