parlov-elicit 0.5.0

Elicitation engine: strategy selection and probe plan generation for parlov.
Documentation
//! Internal utility helpers.
//!
//! Pure, stateless functions with no business logic. Grows as strategies are
//! implemented.

use bytes::Bytes;
use http::{HeaderMap, HeaderName, HeaderValue, Method};
use parlov_core::{ProbeDefinition, Technique};

use crate::context::ScanContext;
use crate::types::{ProbePair, StrategyMetadata};

/// Substitutes `{id}` in the URL template with `id`.
///
/// Only the first occurrence is replaced.
#[must_use]
pub fn substitute_url(template: &str, id: &str) -> String {
    template.replacen("{id}", id, 1)
}

/// Substitutes `{id}` in a body template, returning `None` when no template is provided.
#[must_use]
pub fn substitute_body(template: Option<&str>, id: &str) -> Option<Bytes> {
    template.map(|t| Bytes::from(t.replace("{id}", id)))
}

/// Builds a `ProbePair` from a `ScanContext`, method, and explicit header/body overrides.
///
/// `baseline_headers` and `probe_headers` are the final headers to use — this
/// function does not merge with `ctx.headers`. When `body` is `None` and
/// `ctx.body_template` is set, the template is substituted for each side
/// independently. Strategies that pass an explicit body are unaffected.
/// The caller supplies `metadata` and `technique` describing the strategy.
#[must_use]
pub fn build_pair(
    ctx: &ScanContext,
    method: Method,
    baseline_headers: HeaderMap,
    probe_headers: HeaderMap,
    body: Option<Bytes>,
    metadata: StrategyMetadata,
    technique: Technique,
) -> ProbePair {
    let baseline_url = substitute_url(&ctx.target, &ctx.baseline_id);
    let probe_url = substitute_url(&ctx.target, &ctx.probe_id);

    let baseline_body = body
        .clone()
        .or_else(|| substitute_body(ctx.body_template.as_deref(), &ctx.baseline_id));
    let probe_body = body.or_else(|| substitute_body(ctx.body_template.as_deref(), &ctx.probe_id));

    ProbePair {
        baseline: ProbeDefinition {
            url: baseline_url,
            method: method.clone(),
            headers: baseline_headers,
            body: baseline_body,
        },
        probe: ProbeDefinition {
            url: probe_url,
            method,
            headers: probe_headers,
            body: probe_body,
        },
        canonical_baseline: None,
        metadata,
        technique,
        chain_provenance: None,
    }
}

/// Builds a `ProbePair` carrying a canonical (unmutated) baseline for control-integrity
/// verification.
///
/// Used by route-mutating strategies (`case_normalize`, `trailing_slash`) to register the
/// unmutated baseline alongside the mutated `baseline`/`probe` pair. The runner dispatches the
/// canonical request as a third concurrent request when `canonical_baseline.is_some()`. The
/// resulting `ProbeExchange` is wired into `DifferentialSet.canonical` and consumed by
/// `control_integrity` to detect mutation-induced route destruction.
///
/// `baseline` and `probe` are the mutated definitions; `canonical_baseline` is the original
/// unmutated request.
#[must_use]
pub(crate) fn url_pair_specs_with_canonical(
    baseline_url: &str,
    probe_url: &str,
    canonical_url: &str,
    headers: &http::HeaderMap,
    metadata: &crate::types::StrategyMetadata,
    technique: &parlov_core::Technique,
) -> Vec<crate::types::ProbeSpec> {
    let mut specs = Vec::with_capacity(2);
    for method in [http::Method::GET, http::Method::HEAD] {
        let canonical = parlov_core::ProbeDefinition {
            url: canonical_url.to_owned(),
            method: method.clone(),
            headers: headers.clone(),
            body: None,
        };
        let pair = crate::types::ProbePair {
            baseline: parlov_core::ProbeDefinition {
                url: baseline_url.to_owned(),
                method: method.clone(),
                headers: headers.clone(),
                body: None,
            },
            probe: parlov_core::ProbeDefinition {
                url: probe_url.to_owned(),
                method,
                headers: headers.clone(),
                body: None,
            },
            canonical_baseline: Some(canonical),
            metadata: metadata.clone(),
            technique: *technique,
            chain_provenance: None,
        };
        specs.push(crate::types::ProbeSpec::Pair(pair));
    }
    specs
}

/// Returns a clone of `base` with `key` set to `value`, overwriting any existing value.
///
/// Both parameters must be `'static` string literals. For runtime-supplied values such
/// as harvested `ETag` or `Last-Modified` headers, use [`try_clone_headers_with`] instead.
#[must_use]
pub(crate) fn clone_headers_static(
    base: &HeaderMap,
    key: &'static str,
    value: &'static str,
) -> HeaderMap {
    let mut map = base.clone();
    map.insert(
        HeaderName::from_static(key),
        HeaderValue::from_static(value),
    );
    map
}

/// Returns a clone of `base` with `key` set to `value`, or `None` if `value`
/// contains characters invalid in a header value (CR, LF, NUL).
///
/// Use for runtime-supplied values such as harvested `ETag` or `Last-Modified` headers.
/// For `'static` keys and values, use [`clone_headers_static`].
#[must_use]
pub(crate) fn try_clone_headers_with(
    base: &HeaderMap,
    key: &'static str,
    value: &str,
) -> Option<HeaderMap> {
    let val = HeaderValue::from_str(value).ok()?;
    let mut map = base.clone();
    map.insert(HeaderName::from_static(key), val);
    Some(map)
}

/// Generates GET and HEAD `ProbeSpec::Pair` variants for a URL-mutation strategy.
///
/// Both `baseline_url` and `probe_url` must already be fully substituted and
/// transformed by the caller. `body_template` is intentionally ignored: this
/// helper is for strategies that mutate the URL, not the body.
#[must_use]
pub(crate) fn url_pair_specs(
    baseline_url: &str,
    probe_url: &str,
    headers: &http::HeaderMap,
    metadata: &crate::types::StrategyMetadata,
    technique: &parlov_core::Technique,
) -> Vec<crate::types::ProbeSpec> {
    let mut specs = Vec::with_capacity(2);
    for method in [http::Method::GET, http::Method::HEAD] {
        let pair = crate::types::ProbePair {
            baseline: parlov_core::ProbeDefinition {
                url: baseline_url.to_owned(),
                method: method.clone(),
                headers: headers.clone(),
                body: None,
            },
            probe: parlov_core::ProbeDefinition {
                url: probe_url.to_owned(),
                method,
                headers: headers.clone(),
                body: None,
            },
            canonical_baseline: None,
            metadata: metadata.clone(),
            technique: *technique,
            chain_provenance: None,
        };
        specs.push(crate::types::ProbeSpec::Pair(pair));
    }
    specs
}

/// Replaces the path segment containing `{id}` with a nonsense segment that bypasses
/// application routing, producing a server-default 404 response.
///
/// - Splits on `?` first; query string is preserved and reattached.
/// - Locates the path portion after the host (skips `://` and port).
/// - If the path has a parent segment, replaces the parent with `_parlov_no_route/0`.
/// - If there is no parent segment (single-level path), replaces the segment in place.
/// - Uses `replacen` for `{id}` substitution so only the first occurrence is affected.
#[must_use]
pub(crate) fn garble_path_segment(target: &str) -> String {
    let (url_part, query) = match target.find('?') {
        Some(q) => (&target[..q], &target[q..]),
        None => (target, ""),
    };

    let path_start = url_part.find("://").map_or(0, |i| {
        i + 3
            + url_part[i + 3..]
                .find('/')
                .unwrap_or(url_part.len() - i - 3)
    });

    let garbled = garble_path(url_part, path_start);
    format!("{garbled}{query}")
}

/// Applies the garbling logic to the path portion of the URL (no query string).
fn garble_path(url: &str, path_start: usize) -> String {
    let path = &url[path_start..];

    if let Some(rel) = path.find("/{id}") {
        let id_seg_start = path_start + rel; // position of '/' before {id}
        let prefix = &url[..id_seg_start];
        // Try to go up one more level
        if let Some(parent_rel) = prefix[path_start..].rfind('/') {
            let parent_pos = path_start + parent_rel;
            return format!("{}/_parlov_no_route/0", &url[..parent_pos]);
        }
        // Single-level: just replace {id} in place without the parent trick
        return format!("{prefix}/_parlov_no_route");
    }

    // No {id} in path — fall back to replacing last non-empty path segment
    if let Some(rel) = path.rfind('/') {
        let slash_pos = path_start + rel;
        return format!("{}/_parlov_no_route", &url[..slash_pos]);
    }

    // No slash at all: replace any {id} in the whole string
    url.replacen("{id}", "_parlov_no_route", 1)
}

/// Derives the probe-side Location URL by substituting `probe_id` for `baseline_id`
/// at the position anchored by the `target` template.
///
/// Returns `None` when:
/// - the `target` template contains no `{id}` placeholder
/// - `location` does not share the same prefix up to the ID position
/// - `location` does not contain `baseline_id` at that exact offset
///
/// Only the single occurrence anchored by the template is replaced, preventing
/// accidental substitution in version segments, ports, or other URL components.
#[must_use]
pub(crate) fn derive_probe_location(
    target: &str,
    baseline_id: &str,
    probe_id: &str,
    location: &str,
) -> Option<String> {
    // The ID's position in the substituted URL equals the position of `{id}` in the template,
    // since `{id}` and `baseline_id` have the same byte offset after a single replacen.
    let id_pos = target.find("{id}")?;
    let id_end = id_pos + baseline_id.len();
    // Location must share the same URL prefix up to the ID position
    if location.get(..id_pos) != target.get(..id_pos) {
        return None;
    }
    // Location must contain baseline_id at that exact offset
    if location.get(id_pos..id_end) != Some(baseline_id) {
        return None;
    }
    Some(format!(
        "{}{}{}",
        &location[..id_pos],
        probe_id,
        &location[id_end..]
    ))
}

/// Builds a JSON object body from a slice of `(field, value)` string pairs.
///
/// Returns serialized bytes. The caller is responsible for setting the
/// `Content-Type: application/json` header separately.
///
/// Example: `json_body(&[("email", "alice@example.com")])` → `{"email":"alice@example.com"}`
///
/// # Panics
///
/// Does not panic in practice. `serde_json::to_vec` on a string-keyed map with string
/// values is infallible; the `expect` is a compile-time guard against signature changes.
#[must_use]
pub fn json_body(fields: &[(&str, &str)]) -> Bytes {
    // serde_json is already linked; the Map allocation is acceptable for the small
    // fixed payloads used here. If this becomes hot, replace with direct JSON string construction.
    let map: serde_json::Map<String, serde_json::Value> = fields
        .iter()
        .map(|(k, v)| ((*k).to_owned(), serde_json::Value::String((*v).to_owned())))
        .collect();
    let vec = serde_json::to_vec(&map).expect("serializing string-keyed map is infallible");
    Bytes::from(vec)
}

#[cfg(test)]
#[path = "util_tests.rs"]
mod tests;