atelier_quant 0.0.12

//! Closed-form Maximum Likelihood Estimation for the homogeneous Poisson process.
//!
//! # Model
//!
//! Homogeneous Poisson process with constant intensity:
//!
//! $$
//!   \lambda(t) = \lambda
//! $$
//!
//! # Log-Likelihood
//!
//! For n events on the observation window $[t_1, t_n]$:
//!
//! $$
//!   \ell(\lambda) = (n - 1) \ln \lambda - \lambda \, T
//! $$
//!
//! where $T = t_n - t_1$.  The MLE is $\hat\lambda = (n-1) / T$.
//!
//! We use $n-1$ interarrivals (not $n$) because the observation window
//! is $[t_1, t_n]$, consistent with how the Hawkes compensator integral
//! is defined.  This ensures AIC/BIC are directly comparable.

use serde::{Deserialize, Serialize};

use super::errors::PoissonError;

// ── Configuration ────────────────────────────────────────────────────

/// Configuration for the Poisson MLE.
///
/// This is a unit struct for API symmetry with `HawkesEstimationConfig`.
/// The Poisson MLE is closed-form, so no optimizer settings are needed.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct PoissonEstimationConfig;

// ── Result ───────────────────────────────────────────────────────────

/// Output of the Poisson MLE estimation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PoissonEstimationResult {
    /// Estimated constant intensity λ̂ = (n−1) / T.
    pub lambda: f64,

    /// Log-likelihood at the MLE: (n−1)·ln(λ̂) − λ̂·T.
    pub log_likelihood: f64,

    /// Number of iterations (always 1 — closed-form).
    pub iterations: usize,

    /// Whether estimation converged (always true — closed-form).
    pub converged: bool,

    /// Akaike Information Criterion: 2k − 2ℓ  (k = 1).
    pub aic: f64,

    /// Bayesian Information Criterion: k·ln(n) − 2ℓ.
    pub bic: f64,
}

// ── Core functions ───────────────────────────────────────────────────

/// Validate event times for MLE input.
///
/// Checks: at least 2 events, all finite, strictly increasing.
pub fn validate_events(events: &[f64]) -> Result<(), PoissonError> {
    if events.len() < 2 {
        return Err(PoissonError::InvalidEventTimes(
            "Need at least 2 events for estimation".into(),
        ));
    }

    for (i, &t) in events.iter().enumerate() {
        if !t.is_finite() {
            return Err(PoissonError::InvalidEventTimes(format!(
                "Event at index {} is not finite: {}",
                i, t
            )));
        }
    }

    for i in 1..events.len() {
        if events[i] <= events[i - 1] {
            return Err(PoissonError::InvalidEventTimes(format!(
                "Events not strictly increasing at index {}: {} <= {}",
                i, events[i], events[i - 1]
            )));
        }
    }

    Ok(())
}

/// Compute the log-likelihood of a homogeneous Poisson process.
///
/// $$
///   \ell(\lambda) = (n - 1) \ln \lambda - \lambda \, T
/// $$
///
/// where $T = t_n - t_1$ and $n$ = `events.len()`.
///
/// Returns `f64::NEG_INFINITY` if λ ≤ 0 or n < 2.
pub fn log_likelihood(lambda: f64, events: &[f64]) -> f64 {
    let n = events.len();
    if n < 2 || lambda <= 0.0 {
        return f64::NEG_INFINITY;
    }

    let t_span = events[n - 1] - events[0];
    let n_gaps = (n - 1) as f64;

    n_gaps * lambda.ln() - lambda * t_span
}

/// Estimate the Poisson rate parameter via closed-form MLE.
///
/// # Arguments
///
/// * `events` - Sorted event times (at least 2, strictly increasing).
/// * `_config` - Unused; present for API symmetry with Hawkes.
///
/// # Returns
///
/// `PoissonEstimationResult` with λ̂, log-likelihood, AIC, BIC.
///
/// # Errors
///
/// - `PoissonError::InvalidEventTimes` if events are invalid.
/// - `PoissonError::EstimationFailed` if the observation window has zero length.
pub fn estimate_poisson_mle(
    events: &[f64],
    _config: &PoissonEstimationConfig,
) -> Result<PoissonEstimationResult, PoissonError> {
    validate_events(events)?;

    let n = events.len();
    let t_span = events[n - 1] - events[0];

    if t_span <= 0.0 {
        return Err(PoissonError::EstimationFailed(
            "All events have the same timestamp".into(),
        ));
    }

    let n_gaps = (n - 1) as f64;
    let lambda = n_gaps / t_span;
    let ll = log_likelihood(lambda, events);

    let k = 1.0_f64; // number of parameters
    let n_f = n as f64;
    let aic = 2.0 * k - 2.0 * ll;
    let bic = k * n_f.ln() - 2.0 * ll;

    Ok(PoissonEstimationResult {
        lambda,
        log_likelihood: ll,
        iterations: 1,
        converged: true,
        aic,
        bic,
    })
}

/// Compute the compensator (integrated intensity) over $[t_1, t]$.
///
/// For a homogeneous Poisson process:
///
/// $$
///   \Lambda(t) = \lambda \cdot (t - t_1)
/// $$
///
/// where $t_1$ = `events[0]`.
///
/// # Arguments
///
/// * `lambda` - Constant intensity.
/// * `events` - Event times (used only for the origin $t_1$).
/// * `t` - Evaluation point.
pub fn compensator(lambda: f64, events: &[f64], t: f64) -> f64 {
    if events.is_empty() {
        return 0.0;
    }
    let t0 = events[0];
    lambda * (t - t0).max(0.0)
}

/// Compute time-rescaling residuals for goodness-of-fit assessment.
///
/// Returns the transformed inter-event intervals:
///
/// $$
///   r_i = \lambda \cdot (t_i - t_{i-1})
/// $$
///
/// Under correct specification these should be i.i.d. Exp(1)
/// (time-rescaling theorem).
///
/// # Arguments
///
/// * `lambda` - Fitted (or true) Poisson rate.
/// * `events` - The event times.
///
/// # Returns
///
/// A vector of rescaled intervals (length `n-1`).
pub fn time_rescaling_residuals(lambda: f64, events: &[f64]) -> Vec<f64> {
    let n = events.len();
    if n < 2 {
        return vec![];
    }

    let mut residuals = Vec::with_capacity(n - 1);
    for i in 1..n {
        residuals.push(lambda * (events[i] - events[i - 1]));
    }

    residuals
}