parlov-elicit 0.5.0

Elicitation engine: strategy selection and probe plan generation for parlov.
Documentation
//! Producer/Consumer DAG model for phase 2 chained probe generation.
//!
//! Producers extract typed signal from phase 1 exchanges. Consumers convert
//! that signal into phase 2 probe specs. The `ChainRegistry` holds directed
//! edges between them; `generate_dag_chained_plan` walks those edges against
//! a slice of classified exchanges and returns the resulting `ProbeSpec` list.

use std::sync::Arc;

use bytes::Bytes;
use http::HeaderMap;
use parlov_core::ResponseClass;

use crate::context::ScanContext;
use crate::harvest::EtagStrength;
use crate::types::{ChainProvenance, ProbeSpec};

/// Discriminant for `ProducerOutput` — used by `Consumer::needs` to declare its input requirement.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProducerOutputKind {
    /// H1 — `ETag` with strength discriminator.
    Etag,
    /// H1 — `Last-Modified` date string.
    LastModified,
    /// H2 — `Location` header value from a redirect.
    Location,
    /// H3 — Total resource size in bytes from `Content-Range`.
    ContentRangeSize,
    /// H3 — `Accept-Ranges` header value.
    AcceptRanges,
    /// B4 — Resource ID extracted from a 201 `Location` path or JSON body `id` field.
    ResourceId,
    /// B5 — Structured error signal from an RFC 9457 or plain JSON error body.
    ProblemDetails,
    /// C7 — MIME type extracted from `Content-Type` on 2xx responses (parameters stripped).
    ContentType,
    /// C8 — Auth challenge signal parsed from `WWW-Authenticate` on 401/407.
    AuthChallenge,
}

/// Typed signal value extracted from a phase 1 exchange by a `Producer`.
#[derive(Debug, Clone, PartialEq)]
pub enum ProducerOutput {
    /// H1 — `ETag` value and weak/strong classification.
    Etag(String, EtagStrength),
    /// H1 — `Last-Modified` date string.
    LastModified(String),
    /// H2 — `Location` header value.
    Location(String),
    /// H3 — Total resource size parsed from `Content-Range`.
    ContentRangeSize(u64),
    /// H3 — `Accept-Ranges` header value.
    AcceptRanges(String),
    /// B4 — Resource ID extracted from a 201 `Location` path segment.
    ResourceId(String),
    /// B5 — Structured error signal harvested from RFC 9457 or JSON error body.
    ProblemDetails {
        /// Field names from `errors[].field` entries.
        required_fields: Vec<String>,
        /// The `type` URI from the problem details object, if present.
        error_type: Option<String>,
    },
    /// C7 — MIME type extracted from `Content-Type` on 2xx responses (parameters stripped).
    ///
    /// Example: `"application/json; charset=utf-8"` is stored as `"application/json"`.
    ContentType(String),
    /// C8 — Auth challenge signal parsed from `WWW-Authenticate` on 401/407.
    AuthChallenge {
        /// Scheme token, e.g. `"Bearer"` or `"Basic"`.
        scheme: String,
        /// `realm` parameter value, e.g. `"api"`.
        realm: Option<String>,
        /// `scope` parameter value, e.g. `"read:items"`.
        scope: Option<String>,
    },
}

impl ProducerOutput {
    /// Returns the kind discriminant for this output value.
    #[must_use]
    pub fn kind(&self) -> ProducerOutputKind {
        match self {
            Self::Etag(..) => ProducerOutputKind::Etag,
            Self::LastModified(..) => ProducerOutputKind::LastModified,
            Self::Location(..) => ProducerOutputKind::Location,
            Self::ContentRangeSize(..) => ProducerOutputKind::ContentRangeSize,
            Self::AcceptRanges(..) => ProducerOutputKind::AcceptRanges,
            Self::ResourceId(..) => ProducerOutputKind::ResourceId,
            Self::ProblemDetails { .. } => ProducerOutputKind::ProblemDetails,
            Self::ContentType(..) => ProducerOutputKind::ContentType,
            Self::AuthChallenge { .. } => ProducerOutputKind::AuthChallenge,
        }
    }

    /// Stable string name of the variant — used as `chain_provenance.producer_kind`.
    #[must_use]
    pub fn kind_string(&self) -> String {
        match self.kind() {
            ProducerOutputKind::Etag => "Etag",
            ProducerOutputKind::LastModified => "LastModified",
            ProducerOutputKind::Location => "Location",
            ProducerOutputKind::ContentRangeSize => "ContentRangeSize",
            ProducerOutputKind::AcceptRanges => "AcceptRanges",
            ProducerOutputKind::ResourceId => "ResourceId",
            ProducerOutputKind::ProblemDetails => "ProblemDetails",
            ProducerOutputKind::ContentType => "ContentType",
            ProducerOutputKind::AuthChallenge => "AuthChallenge",
        }
        .to_owned()
    }

    /// Stable serialized representation of the inner value — used as
    /// `chain_provenance.producer_value`. Format is per-variant but always
    /// deterministic so consumers can correlate findings.
    #[must_use]
    pub fn value_string(&self) -> String {
        match self {
            Self::Etag(s, _)
            | Self::LastModified(s)
            | Self::Location(s)
            | Self::AcceptRanges(s)
            | Self::ResourceId(s)
            | Self::ContentType(s) => s.clone(),
            Self::ContentRangeSize(n) => n.to_string(),
            Self::ProblemDetails {
                required_fields,
                error_type,
            } => format!(
                "required_fields={required_fields:?} type={}",
                error_type.as_deref().unwrap_or("-")
            ),
            Self::AuthChallenge {
                scheme,
                realm,
                scope,
            } => format!(
                "scheme={scheme} realm={} scope={}",
                realm.as_deref().unwrap_or("-"),
                scope.as_deref().unwrap_or("-"),
            ),
        }
    }
}

/// Extracts a typed signal from a qualifying phase 1 exchange.
pub trait Producer: Send + Sync {
    /// Returns `true` when this producer can extract signal from responses of `class`.
    fn admits(&self, class: ResponseClass) -> bool;

    /// Extracts signal from a qualifying exchange's headers.
    ///
    /// Only called when `admits(class)` returns `true`. Returns `None` when
    /// the required header is absent or malformed.
    fn extract(&self, class: ResponseClass, headers: &HeaderMap) -> Option<ProducerOutput>;

    /// Extracts signal from headers and body.
    ///
    /// Default delegates to `extract`, preserving backward compatibility for
    /// header-only producers. Override when body content is required (e.g. B5).
    fn extract_with_body(
        &self,
        class: ResponseClass,
        headers: &HeaderMap,
        _body: &Bytes,
    ) -> Option<ProducerOutput> {
        self.extract(class, headers)
    }
}

/// Converts `ProducerOutput` into phase 2 chained probe specs.
pub trait Consumer: Send + Sync {
    /// The `ProducerOutputKind` this consumer requires as input.
    fn needs(&self) -> ProducerOutputKind;

    /// Generates chained probe specs for `ctx` using the extracted `output`.
    ///
    /// Only called when `output.kind() == self.needs()`.
    fn generate(&self, ctx: &ScanContext, output: &ProducerOutput) -> Vec<ProbeSpec>;
}

/// Registry of producer→consumer DAG edges.
///
/// Each registered pair is one directed edge: the producer extracts signal from
/// a phase 1 exchange; the consumer converts that signal into phase 2 probe specs.
/// Pairs are evaluated in registration order.
pub struct ChainRegistry {
    edges: Vec<(Arc<dyn Producer>, Arc<dyn Consumer>)>,
}

impl ChainRegistry {
    /// Creates an empty registry.
    #[must_use]
    pub fn new() -> Self {
        Self { edges: Vec::new() }
    }

    /// Registers a producer→consumer edge.
    pub fn register(&mut self, producer: Arc<dyn Producer>, consumer: Arc<dyn Consumer>) {
        self.edges.push((producer, consumer));
    }

    /// Returns the number of registered edges.
    #[must_use]
    pub fn len(&self) -> usize {
        self.edges.len()
    }

    /// Returns `true` when no edges are registered.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.edges.is_empty()
    }

    /// Moves all edges from `other` into this registry.
    ///
    /// After this call, `other` is consumed and all its edges appear at the end
    /// of `self`'s edge list.
    pub fn extend(&mut self, other: Self) {
        self.edges.extend(other.edges);
    }
}

impl Default for ChainRegistry {
    fn default() -> Self {
        Self::new()
    }
}

/// Walks `registry` against phase 1 `exchanges` to generate chained probe specs.
///
/// For each (producer, consumer) edge: finds the first exchange where
/// `producer.admits(class)` and `producer.extract_with_body()` returns `Some(output)`,
/// then calls `consumer.generate(ctx, output)` once. First-match semantics
/// per edge — mirrors the existing single-baseline harvest model.
///
/// Returns an empty `Vec` when the registry is empty or no exchange matches.
#[must_use]
pub fn generate_dag_chained_plan(
    ctx: &ScanContext,
    exchanges: &[(ResponseClass, HeaderMap, Bytes)],
    registry: &ChainRegistry,
) -> Vec<ProbeSpec> {
    let mut specs = Vec::new();
    for (producer, consumer) in &registry.edges {
        let output = exchanges.iter().find_map(|(class, headers, body)| {
            if producer.admits(*class) {
                let out = producer.extract_with_body(*class, headers, body)?;
                if out.kind() == consumer.needs() {
                    Some(out)
                } else {
                    None
                }
            } else {
                None
            }
        });
        if let Some(output) = output {
            let prov = ChainProvenance {
                producer_kind: output.kind_string(),
                producer_value: output.value_string(),
            };
            for spec in consumer.generate(ctx, &output) {
                specs.push(spec.with_chain_provenance(prov.clone()));
            }
        }
    }
    specs
}

#[cfg(test)]
#[path = "chain_tests.rs"]
mod tests;