parlov-elicit 0.5.0

Elicitation engine: strategy selection and probe plan generation for parlov.
Documentation
//! Harvested observations from phase 1 responses, used to seed phase 2 chained probes.
//!
//! After phase 1 completes, the scan engine extracts response header values from
//! baseline exchanges and stores them here. Phase 2 strategies consume these real
//! values instead of the placeholder wildcards used in phase 1.
//!
//! Observations are organised by H-family: each nested struct groups fields produced
//! by one `ResponseClass`. H4–H8 families are added when their B-phase chain consumers
//! need them.

use parlov_core::ResponseClass;

use http::{HeaderMap, StatusCode};

/// Discriminates RFC 9110 §8.8.3 strong vs weak `ETags`.
///
/// `If-Range` MUST NOT use a weak `ETag` (RFC 9110 §13.1.5). Strategies that
/// enforce this constraint match on `Strong` and skip `Weak` variants.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EtagStrength {
    /// Strong validator — suitable for all precondition headers including `If-Range`.
    Strong,
    /// Weak validator — not suitable for `If-Range`; valid in `If-Match`/`If-None-Match`.
    Weak,
}

/// H1 — Conditional-request validators (RFC 9110 §8.8, §13.1.5; RFC 7232).
#[derive(Debug, Default)]
pub struct ConditionalValidators {
    /// Strong or weak `ETag` from the `ETag` response header.
    pub etag: Option<(String, EtagStrength)>,
    /// `Last-Modified` date string from the response header.
    pub last_modified: Option<String>,
}

/// H2 — Redirect location signals (RFC 9110 §15.4).
#[derive(Debug, Default)]
pub struct RedirectSignals {
    /// `Location` header values from 3xx responses.
    pub location: Vec<String>,
    /// Redirect targets confirmed as valid (same value as `location` for known-valid baselines).
    pub redirect_target_valid: Vec<String>,
}

/// H3 — Range / partial-content signals (RFC 7233; RFC 9110 §14).
#[derive(Debug, Default)]
pub struct RangeSignals {
    /// Raw `Accept-Ranges` header value (e.g. `"bytes"`, `"none"`).
    pub accept_ranges: Option<String>,
    /// Total resource size in bytes, parsed from `Content-Range` on 206 or 416 responses.
    ///
    /// Parsed from `Content-Range: bytes start-end/size` or `bytes */size`.
    pub content_range_size: Option<u64>,
}

/// Response header values harvested from phase 1 for use in phase 2 chained probes.
///
/// Organised by H-family: each nested struct groups fields produced by one response class.
/// H4–H8 families are added when their B-phase chain consumers need them.
#[derive(Debug, Default)]
pub struct HarvestedObservations {
    /// H1 — conditional-request validators from 2xx responses.
    pub validators: ConditionalValidators,
    /// H2 — redirect location signals from 3xx responses.
    pub redirect: RedirectSignals,
    /// H3 — range / partial-content signals from 206 and 416 responses.
    pub range: RangeSignals,
}

impl HarvestedObservations {
    /// Extracts observations from a baseline response by routing on `ResponseClass`.
    ///
    /// Routes `Success` → H1 validators, `Redirect` → H2 location signals,
    /// `PartialContent`/`RangeNotSatisfiable` → H3 range signals. All other
    /// classes produce `HarvestedObservations::default()`.
    #[must_use]
    pub fn from_baseline(status: StatusCode, headers: &HeaderMap) -> Self {
        match ResponseClass::classify(status, headers) {
            ResponseClass::Success => Self {
                validators: extract_validators(headers),
                ..Default::default()
            },
            ResponseClass::Redirect => Self {
                redirect: extract_redirect(headers),
                ..Default::default()
            },
            ResponseClass::PartialContent | ResponseClass::RangeNotSatisfiable => Self {
                range: extract_range(headers),
                ..Default::default()
            },
            _ => Self::default(),
        }
    }

    /// Returns observations with all fields empty.
    #[must_use]
    pub fn empty() -> Self {
        Self::default()
    }
}

/// Extracts H1 conditional validators from a 2xx response's headers.
fn extract_validators(headers: &HeaderMap) -> ConditionalValidators {
    let etag = headers
        .get(http::header::ETAG)
        .and_then(|v| v.to_str().ok())
        .map(|raw| {
            let strength = if raw.starts_with("W/\"") {
                EtagStrength::Weak
            } else {
                EtagStrength::Strong
            };
            (raw.to_owned(), strength)
        });
    let last_modified = headers
        .get(http::header::LAST_MODIFIED)
        .and_then(|v| v.to_str().ok())
        .map(str::to_owned);
    ConditionalValidators {
        etag,
        last_modified,
    }
}

/// Extracts H2 redirect signals from a 3xx response's headers.
fn extract_redirect(headers: &HeaderMap) -> RedirectSignals {
    let loc = headers
        .get(http::header::LOCATION)
        .and_then(|v| v.to_str().ok())
        .map(str::to_owned);
    let redirect_target_valid: Vec<String> = loc.iter().cloned().collect();
    let location: Vec<String> = loc.into_iter().collect();
    RedirectSignals {
        location,
        redirect_target_valid,
    }
}

/// Extracts H3 range signals from a 206 or 416 response's headers.
///
/// Parses `Content-Range: bytes start-end/size` and `bytes */size` to extract
/// the total resource size. Returns `None` for the size when the header is absent
/// or unparseable.
fn extract_range(headers: &HeaderMap) -> RangeSignals {
    let accept_ranges = headers
        .get(http::header::ACCEPT_RANGES)
        .and_then(|v| v.to_str().ok())
        .map(str::to_owned);

    // Content-Range: bytes start-end/size  OR  bytes */size
    let content_range_size = headers
        .get(http::header::CONTENT_RANGE)
        .and_then(|v| v.to_str().ok())
        .and_then(|s| s.split('/').nth(1))
        .and_then(|s| s.trim().parse::<u64>().ok());

    RangeSignals {
        accept_ranges,
        content_range_size,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use http::{HeaderMap, HeaderValue, StatusCode};

    // --- H3 characterization tests (added before rewrite to pin behavior) ---

    #[test]
    fn from_baseline_206_extracts_range_signals() {
        let mut headers = HeaderMap::new();
        headers.insert(
            http::header::ACCEPT_RANGES,
            HeaderValue::from_static("bytes"),
        );
        headers.insert(
            http::header::CONTENT_RANGE,
            HeaderValue::from_static("bytes 0-511/3000"),
        );
        let obs = HarvestedObservations::from_baseline(StatusCode::PARTIAL_CONTENT, &headers);
        assert_eq!(obs.range.accept_ranges.as_deref(), Some("bytes"));
        assert_eq!(obs.range.content_range_size, Some(3000));
        assert!(obs.validators.etag.is_none());
        assert!(obs.redirect.location.is_empty());
    }

    #[test]
    fn from_baseline_416_extracts_size_from_wildcard_content_range() {
        let mut headers = HeaderMap::new();
        headers.insert(
            http::header::CONTENT_RANGE,
            HeaderValue::from_static("bytes */8192"),
        );
        let obs = HarvestedObservations::from_baseline(StatusCode::RANGE_NOT_SATISFIABLE, &headers);
        assert_eq!(obs.range.content_range_size, Some(8192));
        assert!(obs.range.accept_ranges.is_none());
    }

    // --- HarvestedObservations::from_baseline ---

    #[test]
    fn from_baseline_200_with_etag() {
        let mut headers = HeaderMap::new();
        headers.insert(http::header::ETAG, HeaderValue::from_static("\"abc123\""));
        let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
        assert_eq!(
            obs.validators.etag,
            Some(("\"abc123\"".to_owned(), EtagStrength::Strong))
        );
        assert!(obs.validators.last_modified.is_none());
        assert!(obs.redirect.location.is_empty());
        assert!(obs.redirect.redirect_target_valid.is_empty());
    }

    #[test]
    fn from_baseline_200_with_last_modified() {
        let mut headers = HeaderMap::new();
        headers.insert(
            http::header::LAST_MODIFIED,
            HeaderValue::from_static("Wed, 01 Jan 2025 00:00:00 GMT"),
        );
        let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
        assert_eq!(
            obs.validators.last_modified.as_deref(),
            Some("Wed, 01 Jan 2025 00:00:00 GMT")
        );
        assert!(obs.validators.etag.is_none());
        assert!(obs.redirect.location.is_empty());
        assert!(obs.redirect.redirect_target_valid.is_empty());
    }

    #[test]
    fn from_baseline_200_with_both_etag_and_last_modified() {
        let mut headers = HeaderMap::new();
        headers.insert(http::header::ETAG, HeaderValue::from_static("\"v1\""));
        headers.insert(
            http::header::LAST_MODIFIED,
            HeaderValue::from_static("Fri, 10 Jan 2025 12:00:00 GMT"),
        );
        let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
        assert_eq!(
            obs.validators.etag,
            Some(("\"v1\"".to_owned(), EtagStrength::Strong))
        );
        assert_eq!(
            obs.validators.last_modified.as_deref(),
            Some("Fri, 10 Jan 2025 12:00:00 GMT")
        );
        assert!(obs.redirect.location.is_empty());
        assert!(obs.redirect.redirect_target_valid.is_empty());
    }

    #[test]
    fn from_baseline_200_with_no_relevant_headers() {
        let obs = HarvestedObservations::from_baseline(StatusCode::OK, &HeaderMap::new());
        assert!(obs.validators.etag.is_none());
        assert!(obs.validators.last_modified.is_none());
        assert!(obs.redirect.location.is_empty());
        assert!(obs.redirect.redirect_target_valid.is_empty());
    }

    #[test]
    fn from_baseline_301_with_location() {
        let mut headers = HeaderMap::new();
        headers.insert(
            http::header::LOCATION,
            HeaderValue::from_static("https://example.com/new"),
        );
        let obs = HarvestedObservations::from_baseline(StatusCode::MOVED_PERMANENTLY, &headers);
        assert_eq!(obs.redirect.location, vec!["https://example.com/new"]);
        assert_eq!(
            obs.redirect.redirect_target_valid,
            vec!["https://example.com/new"]
        );
        assert!(obs.validators.etag.is_none());
        assert!(obs.validators.last_modified.is_none());
    }

    #[test]
    fn from_baseline_302_without_location() {
        let obs = HarvestedObservations::from_baseline(StatusCode::FOUND, &HeaderMap::new());
        assert!(obs.redirect.location.is_empty());
        assert!(obs.redirect.redirect_target_valid.is_empty());
        assert!(obs.validators.etag.is_none());
        assert!(obs.validators.last_modified.is_none());
    }

    #[test]
    fn from_baseline_3xx_location_and_redirect_target_are_in_sync() {
        let mut headers = HeaderMap::new();
        headers.insert(
            http::header::LOCATION,
            HeaderValue::from_static("https://example.com/resource"),
        );
        let obs = HarvestedObservations::from_baseline(StatusCode::SEE_OTHER, &headers);
        assert_eq!(obs.redirect.location, obs.redirect.redirect_target_valid);
        assert_eq!(obs.redirect.location.len(), 1);
    }

    // --- HarvestedObservations::empty ---

    #[test]
    fn empty_has_all_fields_empty() {
        let obs = HarvestedObservations::empty();
        assert!(obs.validators.etag.is_none());
        assert!(obs.validators.last_modified.is_none());
        assert!(obs.redirect.location.is_empty());
        assert!(obs.redirect.redirect_target_valid.is_empty());
    }

    // --- EtagStrength classification ---

    #[test]
    fn from_baseline_200_with_strong_etag_classified_correctly() {
        let mut headers = HeaderMap::new();
        headers.insert(http::header::ETAG, HeaderValue::from_static("\"abc123\""));
        let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
        assert_eq!(
            obs.validators.etag,
            Some(("\"abc123\"".to_owned(), EtagStrength::Strong))
        );
    }

    #[test]
    fn from_baseline_200_with_weak_etag_classified_correctly() {
        let mut headers = HeaderMap::new();
        headers.insert(http::header::ETAG, HeaderValue::from_static("W/\"abc123\""));
        let obs = HarvestedObservations::from_baseline(StatusCode::OK, &headers);
        assert_eq!(
            obs.validators.etag,
            Some(("W/\"abc123\"".to_owned(), EtagStrength::Weak))
        );
    }
}