parlov-analysis 0.7.0

Analysis engine trait and signal detection for parlov.
Documentation
//! Offline log-odds reducer for evidence events.
//!
//! Replaces the online order-dependent accumulator with a pure function over the full event set:
//! group by `(family, polarity)`, sort each group by weight descending, apply a polarity-specific
//! diminishing-returns schedule, cap the contribution per group, and sum the discounted signed
//! log-odds. Order-invariant by construction.

use std::cmp::Ordering;

use indexmap::IndexMap;

use crate::existence::families::SignalFamily;

/// Polarity of an evidence event.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum EvidencePolarity {
    /// Evidence supports the hypothesis (positive log-odds contribution).
    Positive,
    /// Evidence opposes the hypothesis (negative log-odds contribution).
    Contradictory,
}

/// A single evidence event collected for offline aggregation.
///
/// Fields are crate-private; construct events via [`EvidenceEvent::positive`] or
/// [`EvidenceEvent::contradictory`] so `signed_log_odds`'s sign always agrees with `polarity`.
#[derive(Debug, Clone)]
pub struct EvidenceEvent {
    pub(crate) family: SignalFamily,
    pub(crate) polarity: EvidencePolarity,
    pub(crate) technique_id: String,
    pub(crate) weight: f64,
    pub(crate) signed_log_odds: f64,
}

impl EvidenceEvent {
    /// Positive event; `signed_log_odds` is forced to `log_odds_magnitude.abs()`.
    #[must_use]
    pub fn positive(
        family: SignalFamily,
        technique_id: impl Into<String>,
        weight: f64,
        log_odds_magnitude: f64,
    ) -> Self {
        Self {
            family,
            polarity: EvidencePolarity::Positive,
            technique_id: technique_id.into(),
            weight,
            signed_log_odds: log_odds_magnitude.abs(),
        }
    }

    /// Contradictory event; `signed_log_odds` is forced to `-log_odds_magnitude.abs()`.
    #[must_use]
    pub fn contradictory(
        family: SignalFamily,
        technique_id: impl Into<String>,
        weight: f64,
        log_odds_magnitude: f64,
    ) -> Self {
        Self {
            family,
            polarity: EvidencePolarity::Contradictory,
            technique_id: technique_id.into(),
            weight,
            signed_log_odds: -log_odds_magnitude.abs(),
        }
    }
}

/// Diminishing-returns schedule for `Positive` evidence.
pub const POSITIVE_SCHEDULE: &[f64] = &[1.0, 0.5, 0.25, 0.1];

/// Diminishing-returns schedule for `Contradictory` evidence (softer than positive).
pub const CONTRADICTORY_SCHEDULE: &[f64] = &[1.0, 0.7, 0.5, 0.3, 0.1];

/// Per-(family, polarity) cap on contribution magnitude.
pub const PER_GROUP_CAP: f64 = 0.75;

/// Reduces a single (family, polarity) group of events to a discounted log-odds contribution.
///
/// Sorts events by `weight` descending (with `technique_id` ascending as tiebreaker for
/// determinism), applies the schedule slot by slot (events beyond the schedule length contribute
/// 0), and clamps the magnitude to `cap`.
#[must_use]
pub fn reduce_family_polarity(events: &[EvidenceEvent], schedule: &[f64], cap: f64) -> f64 {
    if events.is_empty() || schedule.is_empty() {
        return 0.0;
    }

    let mut sorted: Vec<&EvidenceEvent> = events.iter().collect();
    sorted.sort_by(|a, b| cmp_event_desc(a, b));

    let total: f64 = sorted
        .iter()
        .zip(schedule.iter())
        .map(|(event, multiplier)| event.signed_log_odds * multiplier)
        .sum();

    clamp_magnitude(total, cap)
}

/// Total log-odds across all `(family, polarity)` groups.
///
/// Equivalent to `reduce_with_attribution(events).total_log_odds` but skips the per-event
/// contribution allocation.
#[must_use]
pub fn reduce_all(events: &[EvidenceEvent]) -> f64 {
    if events.is_empty() {
        return 0.0;
    }
    let mut total = 0.0;
    for (group, polarity) in group_indices_by_family_polarity(events) {
        let schedule = schedule_for(polarity);
        let unclamped: f64 = sorted_indices(events, &group)
            .into_iter()
            .zip(schedule.iter())
            .map(|(i, m)| events[i].signed_log_odds * m)
            .sum();
        total += clamp_magnitude(unclamped, PER_GROUP_CAP);
    }
    total
}

/// Sorts a list of input indices by the events' weight descending, with `technique_id`
/// ascending as a deterministic tiebreaker.
fn sorted_indices(events: &[EvidenceEvent], indices: &[usize]) -> Vec<usize> {
    let mut sorted = indices.to_vec();
    sorted.sort_by(|&a, &b| cmp_event_desc(&events[a], &events[b]));
    sorted
}

/// Result of reducing a full event set, with per-event log-odds attribution.
#[derive(Debug, Clone)]
pub struct ReductionResult {
    /// Total log-odds across all groups.
    pub total_log_odds: f64,
    /// Per-event contribution, parallel to the input slice. Indexed by the event's position in
    /// the input vector. Sums to `total_log_odds` modulo floating-point noise.
    pub contributions: Vec<f64>,
}

/// Reduces all events and reports per-event log-odds attribution under the same sort, schedule,
/// and cap rules as [`reduce_all`]. `contributions[i]` corresponds to `events[i]`. When a group
/// exceeds [`PER_GROUP_CAP`], every event in that group is scaled by `cap / |unclamped_sum|`
/// so the group's contributions sum exactly to the signed cap.
#[must_use]
pub fn reduce_with_attribution(events: &[EvidenceEvent]) -> ReductionResult {
    if events.is_empty() {
        return ReductionResult {
            total_log_odds: 0.0,
            contributions: Vec::new(),
        };
    }

    let mut contributions = vec![0.0_f64; events.len()];
    let mut total = 0.0_f64;
    for (group, polarity) in group_indices_by_family_polarity(events) {
        let schedule = schedule_for(polarity);
        let group_total = attribute_group(events, &group, schedule, &mut contributions);
        total += group_total;
    }
    ReductionResult {
        total_log_odds: total,
        contributions,
    }
}

/// Computes per-event contributions for one `(family, polarity)` group and writes them into
/// `contributions` at the original input indices. Returns the group's clamped total.
fn attribute_group(
    events: &[EvidenceEvent],
    group_indices: &[usize],
    schedule: &[f64],
    contributions: &mut [f64],
) -> f64 {
    if group_indices.is_empty() || schedule.is_empty() {
        return 0.0;
    }

    let sorted = sorted_indices(events, group_indices);
    let unclamped: f64 = sorted
        .iter()
        .zip(schedule.iter())
        .map(|(&i, m)| events[i].signed_log_odds * m)
        .sum();
    let clamped = clamp_magnitude(unclamped, PER_GROUP_CAP);
    let scale = if (clamped - unclamped).abs() <= f64::EPSILON || unclamped.abs() <= f64::EPSILON {
        1.0
    } else {
        clamped / unclamped
    };

    for (slot, &idx) in sorted.iter().enumerate() {
        let multiplier = schedule.get(slot).copied().unwrap_or(0.0);
        contributions[idx] = events[idx].signed_log_odds * multiplier * scale;
    }
    clamped
}

/// Groups input indices by `(family, polarity)`, preserving first-occurrence order.
fn group_indices_by_family_polarity(
    events: &[EvidenceEvent],
) -> Vec<(Vec<usize>, EvidencePolarity)> {
    let mut groups: IndexMap<(SignalFamily, EvidencePolarity), Vec<usize>> = IndexMap::new();
    for (i, event) in events.iter().enumerate() {
        groups
            .entry((event.family, event.polarity))
            .or_default()
            .push(i);
    }
    groups
        .into_iter()
        .map(|((_, polarity), indices)| (indices, polarity))
        .collect()
}

/// Returns the schedule corresponding to a polarity.
fn schedule_for(polarity: EvidencePolarity) -> &'static [f64] {
    match polarity {
        EvidencePolarity::Positive => POSITIVE_SCHEDULE,
        EvidencePolarity::Contradictory => CONTRADICTORY_SCHEDULE,
    }
}

/// Sort comparator: weight descending, then `technique_id` ascending. NaN-safe.
fn cmp_event_desc(a: &EvidenceEvent, b: &EvidenceEvent) -> Ordering {
    match b.weight.partial_cmp(&a.weight) {
        Some(Ordering::Equal) | None => a.technique_id.cmp(&b.technique_id),
        Some(other) => other,
    }
}

/// Clamps `value` so its magnitude does not exceed `cap`. Cap is treated as `cap.abs()`.
fn clamp_magnitude(value: f64, cap: f64) -> f64 {
    let cap = cap.abs();
    if value > cap {
        cap
    } else if value < -cap {
        -cap
    } else {
        value
    }
}

#[cfg(test)]
#[path = "reducer_tests.rs"]
mod tests;