Skip to main content

rs_stats/distributions/
traits.rs

1//! # Distribution trait
2//!
3//! Single unified [`Distribution`] trait covering both continuous and
4//! discrete probability distributions, parameterised by an associated
5//! support type [`Distribution::X`] — `f64` for continuous, `u64` for
6//! discrete. The previous `Distribution` / `DiscreteDistribution` split
7//! is gone in v3.0.
8//!
9//! ## Usage
10//!
11//! ```
12//! use rs_stats::distributions::traits::Distribution;
13//! use rs_stats::distributions::normal_distribution::Normal;
14//!
15//! let n = Normal::new(0.0, 1.0).unwrap();
16//! let pdf = n.pdf(0.0).unwrap();
17//! assert!((pdf - 0.398_942_280_4).abs() < 1e-8);
18//! ```
19//!
20//! For polymorphism, parameterise on the support type:
21//!
22//! ```
23//! use rs_stats::Distribution;
24//! use rs_stats::distributions::normal_distribution::Normal;
25//! use rs_stats::distributions::lognormal::LogNormal;
26//!
27//! fn pick(skewed: bool) -> Box<dyn Distribution<X = f64>> {
28//!     if skewed {
29//!         Box::new(LogNormal::new(1.0, 0.5).unwrap())
30//!     } else {
31//!         Box::new(Normal::new(80.0, 10.0).unwrap())
32//!     }
33//! }
34//! ```
35
36use crate::error::{StatsError, StatsResult};
37
38/// Unified probability-distribution interface.
39///
40/// All methods return [`StatsResult`] to propagate domain errors (e.g.
41/// `p ∉ [0, 1]`). The trait is **object-safe** with the support type
42/// fixed: `Box<dyn Distribution<X = f64>>` and
43/// `Box<dyn Distribution<X = u64>>` both work.
44///
45/// `fit` is intentionally not part of the trait (it would break object
46/// safety); each concrete type exposes `Dist::fit(data)` directly.
47pub trait Distribution {
48    /// The support type: `f64` for continuous distributions, `u64` for
49    /// discrete distributions.
50    type X: Copy;
51
52    /// Human-readable distribution name, e.g. `"Normal"`.
53    fn name(&self) -> &str;
54
55    /// Number of free parameters — used for AIC / BIC.
56    fn num_params(&self) -> usize;
57
58    /// Probability density (continuous) or probability mass (discrete) at `x`.
59    fn pdf(&self, x: Self::X) -> StatsResult<f64>;
60
61    /// `ln pdf(x)`. Default: `pdf(x).ln()`. Override for stability when
62    /// `pdf` underflows (e.g. extreme tails of LogNormal).
63    fn logpdf(&self, x: Self::X) -> StatsResult<f64> {
64        self.pdf(x).map(|p| p.ln())
65    }
66
67    /// Probability mass — alias for [`pdf`](Self::pdf), kept for natural
68    /// reading on discrete distributions and v2.x source-compatibility.
69    fn pmf(&self, x: Self::X) -> StatsResult<f64> {
70        self.pdf(x)
71    }
72
73    /// Log-PMF — alias for [`logpdf`](Self::logpdf).
74    fn logpmf(&self, x: Self::X) -> StatsResult<f64> {
75        self.logpdf(x)
76    }
77
78    /// Cumulative distribution function `F(x) = P(X ≤ x)`.
79    fn cdf(&self, x: Self::X) -> StatsResult<f64>;
80
81    /// Quantile / inverse CDF: smallest `x` such that `F(x) ≥ p`.
82    fn inverse_cdf(&self, p: f64) -> StatsResult<Self::X>;
83
84    /// Mean (expected value) `μ`.
85    fn mean(&self) -> f64;
86
87    /// Variance `σ²`.
88    fn variance(&self) -> f64;
89
90    /// Standard deviation `σ = √(variance)`.
91    fn std_dev(&self) -> f64 {
92        self.variance().sqrt()
93    }
94
95    /// Sum of log-likelihoods `Σ ln pdf(xᵢ)`. Falls back to
96    /// [`StatsError`] if any point is out of support.
97    fn log_likelihood(&self, data: &[Self::X]) -> StatsResult<f64> {
98        let mut ll = 0.0_f64;
99        for &x in data {
100            ll += self.logpdf(x)?;
101        }
102        Ok(ll)
103    }
104
105    /// Infallible bulk log-likelihood. Out-of-support points contribute
106    /// `f64::NEG_INFINITY` (as in scipy). Implementations should override
107    /// this when they can express the inner loop without per-point
108    /// branching, so LLVM can autovectorise it.
109    fn log_likelihood_fast(&self, data: &[Self::X]) -> f64 {
110        let mut ll = 0.0_f64;
111        for &x in data {
112            ll += self.logpdf(x).unwrap_or(f64::NEG_INFINITY);
113        }
114        ll
115    }
116
117    /// Akaike Information Criterion: `2k − 2·ln(L̂)`.
118    fn aic(&self, data: &[Self::X]) -> StatsResult<f64> {
119        let ll = self.log_likelihood(data)?;
120        Ok(2.0 * self.num_params() as f64 - 2.0 * ll)
121    }
122
123    /// Bayesian Information Criterion: `k·ln(n) − 2·ln(L̂)`.
124    fn bic(&self, data: &[Self::X]) -> StatsResult<f64> {
125        let ll = self.log_likelihood(data)?;
126        let n = data.len() as f64;
127        Ok(self.num_params() as f64 * n.ln() - 2.0 * ll)
128    }
129}
130
131// ── Helpers shared across discrete distributions ──────────────────────────────
132
133/// Generic discrete inverse-CDF: smallest `k ≥ 0` such that `cdf(k) ≥ p`.
134///
135/// Works on any monotone non-decreasing CDF over `u64`. Phase 1
136/// exponential-doubling brackets the answer, Phase 2 binary-searches.
137/// Discrete distributions whose `inverse_cdf` has no closed form delegate
138/// to this helper.
139pub(crate) fn discrete_inverse_cdf_search(
140    p: f64,
141    cdf: impl Fn(u64) -> StatsResult<f64>,
142) -> StatsResult<u64> {
143    if !(0.0..=1.0).contains(&p) {
144        return Err(StatsError::InvalidInput {
145            message: format!("inverse_cdf: p must be in [0, 1], got {p}"),
146        });
147    }
148    if p == 0.0 {
149        return Ok(0);
150    }
151    let mut hi: u64 = 1;
152    while cdf(hi)? < p {
153        hi = hi.saturating_mul(2);
154        if hi == u64::MAX {
155            return Err(StatsError::NumericalError {
156                message: "inverse_cdf: quantile exceeds u64::MAX".to_string(),
157            });
158        }
159    }
160    let mut lo: u64 = 0;
161    while lo < hi {
162        let mid = lo + (hi - lo) / 2;
163        if cdf(mid)? < p {
164            lo = mid + 1;
165        } else {
166            hi = mid;
167        }
168    }
169    Ok(lo)
170}
171
172/// Backwards-compatibility alias. `DiscreteDistribution` was a separate
173/// trait in v2.x; in v3.0 every distribution implements the unified
174/// [`Distribution`] trait. This alias matches discrete distributions
175/// (those whose support is `u64`) and keeps existing imports working.
176pub trait DiscreteDistribution: Distribution<X = u64> {}
177impl<T: Distribution<X = u64>> DiscreteDistribution for T {}