parlov-analysis 0.7.0

//! Bayesian evidence accumulator for endpoint-level existence aggregation.
//!
//! Backed by the offline [`reducer`](super::reducer): the accumulator collects per-strategy
//! events and computes the posterior by reducing the full event set with `reduce_all`. Bayesian
//! log-odds updates are order-invariant by construction — the same evidence set produces the
//! same posterior regardless of ingest order.

use parlov_core::{StrategyMetaForStop, StrategyOutcome, Vector};

use crate::existence::families::SignalFamily;

use super::reducer::{reduce_all, reduce_with_attribution, EvidenceEvent, ReductionResult};

/// Maps a detection vector to its correlated signal family.
///
/// Strategies in the same family share diminishing returns to prevent correlated evidence from
/// inflating the posterior.
#[must_use]
pub fn vector_to_family(v: Vector) -> SignalFamily {
    match v {
        Vector::CacheProbing => SignalFamily::CacheValidator,
        Vector::RedirectDiff => SignalFamily::Redirect,
        Vector::ErrorMessageGranularity => SignalFamily::ErrorBody,
        Vector::StatusCodeDiff => SignalFamily::General,
    }
}

/// Diminishing-returns multiplier kept for backward compatibility with `verdict_builder` and
/// historical tests. Under the offline reducer this is no longer the dominant term — the
/// reducer's per-polarity schedule supersedes it.
#[must_use]
pub fn family_multiplier(count: u8) -> f64 {
    match count {
        0 => 1.0,
        1 => 0.5,
        _ => 0.0,
    }
}

/// Logit transform: `ln(p / (1 - p))`.
#[must_use]
pub fn logit(p: f64) -> f64 {
    (p / (1.0 - p)).ln()
}

/// Logistic (inverse logit): `1 / (1 + exp(-l))`.
fn logistic(l: f64) -> f64 {
    1.0 / (1.0 + (-l).exp())
}

/// Converts a confidence byte (0–100) to a probability clamped to `[0.01, 0.99]`.
#[must_use]
pub fn confidence_to_prob(confidence: u8) -> f64 {
    (f64::from(confidence) / 100.0).clamp(0.01, 0.99)
}

/// Collects evidence events and computes the posterior via offline reduction.
///
/// Posterior queries run the full event set through [`reduce_all`], which sorts by weight,
/// applies the polarity-specific schedule, and caps each `(family, polarity)` group at `±0.75`.
pub struct EvidenceAccumulator {
    events: Vec<EvidenceEvent>,
}

impl Default for EvidenceAccumulator {
    fn default() -> Self {
        Self::new()
    }
}

impl EvidenceAccumulator {
    /// Creates a new accumulator with an empty event set (neutral prior, log-odds = 0).
    #[must_use]
    pub fn new() -> Self {
        Self { events: Vec::new() }
    }

    /// Ingests a strategy outcome.
    ///
    /// Only `Positive` and `Contradictory` outcomes produce events; `NoSignal` and `Inapplicable`
    /// are no-ops. A `Positive` with `confidence < 60` is dropped (with a `tracing::warn!`) — its
    /// log-odds would be negative, contradicting the `Positive` label.
    pub fn ingest(&mut self, outcome: &StrategyOutcome, vector: Vector) {
        let family = vector_to_family(vector);
        match outcome {
            StrategyOutcome::Positive(result) => {
                if result.confidence < 60 {
                    tracing::warn!(
                        confidence = result.confidence,
                        "Positive outcome below Likely threshold — skipped to avoid negative posterior contribution"
                    );
                    return;
                }
                let p = confidence_to_prob(result.confidence);
                let lo = logit(p);
                let id = result
                    .technique_id
                    .as_deref()
                    .unwrap_or("unknown")
                    .to_owned();
                self.events
                    .push(EvidenceEvent::positive(family, id, lo, lo));
            }
            StrategyOutcome::Contradictory(result, weight) => {
                let w = f64::from(*weight);
                let id = result
                    .technique_id
                    .as_deref()
                    .unwrap_or("unknown")
                    .to_owned();
                self.events
                    .push(EvidenceEvent::contradictory(family, id, w, w));
            }
            StrategyOutcome::NoSignal(_) | StrategyOutcome::Inapplicable(_) => {}
        }
    }

    /// Posterior probability of existence: `logistic(reduce_all(events))`, clamped to `[0, 1]`.
    #[must_use]
    pub fn posterior_probability(&self) -> f64 {
        logistic(reduce_all(&self.events)).clamp(0.0, 1.0)
    }

    /// Total log-odds across the full event set.
    #[must_use]
    pub fn log_odds_current(&self) -> f64 {
        reduce_all(&self.events)
    }

    /// Per-event log-odds attribution under the current event set.
    #[must_use]
    pub fn reduce_with_attribution(&self) -> ReductionResult {
        reduce_with_attribution(&self.events)
    }

    /// Number of `Positive`/`Contradictory` events ingested. Used by `verdict_builder` to
    /// debug-assert event/finding alignment.
    #[must_use]
    pub fn event_count(&self) -> usize {
        self.events.len()
    }

    /// Returns the collected evidence events for inspection by coverage gates and other
    /// post-aggregation checks. Order is the order events were ingested.
    #[must_use]
    pub fn events(&self) -> &[EvidenceEvent] {
        &self.events
    }

    /// Maximum additional positive log-odds achievable from `remaining` strategies.
    ///
    /// Augments the event set with a best-case Positive event per remaining spec (confidence
    /// 0.99), reduces, and returns the delta clamped at zero. Does not mutate state.
    #[must_use]
    pub fn max_positive_remaining(&self, remaining: &[StrategyMetaForStop]) -> f64 {
        let current = reduce_all(&self.events);
        let mut hypothetical = self.events.clone();
        let max_logit = logit(0.99);
        for spec in remaining {
            let family = vector_to_family(spec.vector);
            hypothetical.push(EvidenceEvent::positive(
                family,
                "max-hypothetical",
                max_logit,
                max_logit,
            ));
        }
        (reduce_all(&hypothetical) - current).max(0.0)
    }

    /// Maximum additional negative log-odds from `remaining` strategies, as a non-negative magnitude.
    ///
    /// Mirrors [`max_positive_remaining`](Self::max_positive_remaining) with hypothetical
    /// Contradictory events at the per-vector max normalization weight. Vectors with no
    /// contradictory capacity (currently anything other than `StatusCodeDiff`) are skipped.
    #[must_use]
    pub fn max_negative_remaining(&self, remaining: &[StrategyMetaForStop]) -> f64 {
        let current = reduce_all(&self.events);
        let mut hypothetical = self.events.clone();
        for spec in remaining {
            let max_weight = max_normalization_weight(spec.vector);
            if max_weight > 0.0 {
                let family = vector_to_family(spec.vector);
                hypothetical.push(EvidenceEvent::contradictory(
                    family,
                    "max-hypothetical",
                    max_weight,
                    max_weight,
                ));
            }
        }
        (current - reduce_all(&hypothetical)).max(0.0)
    }
}

/// Maximum normalization weight for a given vector.
///
/// Only `StatusCodeDiff` strategies can produce contradictory evidence in the current model.
fn max_normalization_weight(v: Vector) -> f64 {
    match v {
        Vector::StatusCodeDiff => 0.25,
        _ => 0.0,
    }
}

#[cfg(test)]
#[path = "evidence_tests.rs"]
mod tests;

#[cfg(test)]
#[path = "evidence_contradictory_tests.rs"]
mod contradictory_tests;