atelier_quant 0.0.12

//! Interarrival time computation and descriptive statistics.
//!
//! Given a sequence of arrival timestamps (in nanoseconds), this module
//! computes the interarrival deltas $\Delta t_i = t_{i+1} - t_i$ and
//! provides summary statistics useful for distribution fitting.

use crate::errors::ArrivalsError;
use atelier_data::temporal::{TimeResolution, from_nanos};
use serde::{Deserialize, Serialize};

/// Result of an interarrival computation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InterarrivalResult {
    /// Raw interarrival deltas in nanoseconds.
    pub deltas_ns: Vec<u64>,

    /// Interarrival deltas converted to the requested output resolution.
    pub deltas_f64: Vec<f64>,

    /// The resolution used for `deltas_f64`.
    pub resolution: TimeResolution,

    /// Number of original events (timestamps).
    pub n_events: usize,

    /// Total observation window in nanoseconds: `t_last - t_first`.
    pub total_duration_ns: u64,
}

/// Descriptive statistics for a set of interarrival times.
///
/// All moments are computed as population statistics (dividing by *n*,
/// not *n − 1*).  Kurtosis is reported as **excess** kurtosis
/// (normal = 0).  The coefficient of variation is the key diagnostic
/// for distinguishing Poisson (CV ≈ 1) from Hawkes (CV > 1) arrivals.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArrivalStats {
    /// Number of interarrival observations (*n − 1* for *n* timestamps).
    pub count: usize,
    /// Arithmetic mean of the interarrival times: $\bar{\Delta t}$.
    pub mean: f64,
    /// Population variance: $\frac{1}{n}\sum(\Delta t_i - \bar{\Delta t})^2$.
    pub variance: f64,
    /// Population standard deviation: $\sqrt{\text{variance}}$.
    pub std_dev: f64,
    /// Shortest interarrival time observed.
    pub min: f64,
    /// Longest interarrival time observed.
    pub max: f64,
    /// Skewness (third standardised moment).  Exponential interarrivals
    /// have skewness = 2; larger values indicate heavier right tails.
    pub skewness: f64,
    /// Excess kurtosis (fourth standardised moment − 3).  Exponential
    /// interarrivals have excess kurtosis = 6; larger values indicate
    /// more extreme outliers.
    pub kurtosis: f64,
    /// Coefficient of variation: $\sigma / \mu$.
    ///
    /// For a Poisson process (exponential interarrivals), CV ≈ 1.
    /// CV > 1 suggests clustering (consistent with Hawkes).
    pub cv: f64,
}

/// Compute interarrival times from a sorted sequence of nanosecond timestamps.
///
/// # Arguments
///
/// * `timestamps_ns` - Strictly increasing timestamps in nanoseconds.
/// * `output_resolution` - The time unit for the `deltas_f64` field.
///
/// # Errors
///
/// Returns `ArrivalsError::InsufficientData` if fewer than 2 timestamps are provided.
///
/// # Examples
///
/// ```
/// use atelier_quant::arrivals::compute_interarrivals;
/// use atelier_data::temporal::TimeResolution;
///
/// let ts = vec![0, 1_000_000, 3_000_000, 6_000_000]; // nanoseconds
/// let result = compute_interarrivals(&ts, TimeResolution::Milliseconds).unwrap();
///
/// assert_eq!(result.deltas_ns, vec![1_000_000, 2_000_000, 3_000_000]);
/// assert_eq!(result.n_events, 4);
/// ```
pub fn compute_interarrivals(
    timestamps_ns: &[u64],
    output_resolution: TimeResolution,
) -> Result<InterarrivalResult, ArrivalsError> {
    if timestamps_ns.len() < 2 {
        return Err(ArrivalsError::InsufficientData(
            "Need at least 2 timestamps to compute interarrivals".into(),
        ));
    }

    let n = timestamps_ns.len();
    let mut deltas_ns = Vec::with_capacity(n - 1);

    for i in 1..n {
        // Saturating sub handles the (unlikely) case of
        // non-monotonic data without panicking.
        deltas_ns.push(timestamps_ns[i].saturating_sub(timestamps_ns[i - 1]));
    }

    let deltas_f64: Vec<f64> = deltas_ns
        .iter()
        .map(|&d| from_nanos(d, output_resolution))
        .collect();

    let total_duration_ns = timestamps_ns[n - 1].saturating_sub(timestamps_ns[0]);

    Ok(InterarrivalResult {
        deltas_ns,
        deltas_f64,
        resolution: output_resolution,
        n_events: n,
        total_duration_ns,
    })
}

/// Compute descriptive statistics for a slice of interarrival times.
///
/// The input values should be in a consistent unit (e.g. the `deltas_f64`
/// field from `InterarrivalResult`).
///
/// # Arguments
///
/// * `deltas` - Interarrival times as `f64`.
///
/// # Returns
///
/// `ArrivalStats` with mean, variance, skewness, kurtosis, and CV.
/// Returns `None` if `deltas` is empty.
pub fn descriptive_stats(deltas: &[f64]) -> Option<ArrivalStats> {
    let n = deltas.len();
    if n == 0 {
        return None;
    }

    let n_f = n as f64;

    // Mean
    let sum: f64 = deltas.iter().sum();
    let mean = sum / n_f;

    // Min / Max
    let mut min = f64::INFINITY;
    let mut max = f64::NEG_INFINITY;
    for &d in deltas {
        if d < min {
            min = d;
        }
        if d > max {
            max = d;
        }
    }

    // Variance (population), skewness, kurtosis
    let mut m2 = 0.0_f64;
    let mut m3 = 0.0_f64;
    let mut m4 = 0.0_f64;

    for &d in deltas {
        let diff = d - mean;
        let diff2 = diff * diff;
        m2 += diff2;
        m3 += diff2 * diff;
        m4 += diff2 * diff2;
    }

    let variance = m2 / n_f;
    let std_dev = variance.sqrt();

    let skewness = if std_dev > 0.0 {
        (m3 / n_f) / (std_dev * std_dev * std_dev)
    } else {
        0.0
    };

    // Excess kurtosis (normal = 0)
    let kurtosis = if std_dev > 0.0 {
        (m4 / n_f) / (variance * variance) - 3.0
    } else {
        0.0
    };

    let cv = if mean.abs() > f64::EPSILON {
        std_dev / mean
    } else {
        0.0
    };

    Some(ArrivalStats {
        count: n,
        mean,
        variance,
        std_dev,
        min,
        max,
        skewness,
        kurtosis,
        cv,
    })
}