rs_stats/distributions/traits.rs
1//! # Distribution trait
2//!
3//! Single unified [`Distribution`] trait covering both continuous and
4//! discrete probability distributions, parameterised by an associated
5//! support type [`Distribution::X`] — `f64` for continuous, `u64` for
6//! discrete. The previous `Distribution` / `DiscreteDistribution` split
7//! is gone in v3.0.
8//!
9//! ## Usage
10//!
11//! ```
12//! use rs_stats::distributions::traits::Distribution;
13//! use rs_stats::distributions::normal_distribution::Normal;
14//!
15//! let n = Normal::new(0.0, 1.0).unwrap();
16//! let pdf = n.pdf(0.0).unwrap();
17//! assert!((pdf - 0.398_942_280_4).abs() < 1e-8);
18//! ```
19//!
20//! For polymorphism, parameterise on the support type:
21//!
22//! ```
23//! use rs_stats::Distribution;
24//! use rs_stats::distributions::normal_distribution::Normal;
25//! use rs_stats::distributions::lognormal::LogNormal;
26//!
27//! fn pick(skewed: bool) -> Box<dyn Distribution<X = f64>> {
28//! if skewed {
29//! Box::new(LogNormal::new(1.0, 0.5).unwrap())
30//! } else {
31//! Box::new(Normal::new(80.0, 10.0).unwrap())
32//! }
33//! }
34//! ```
35
36use crate::error::{StatsError, StatsResult};
37
38/// Unified probability-distribution interface.
39///
40/// All methods return [`StatsResult`] to propagate domain errors (e.g.
41/// `p ∉ [0, 1]`). The trait is **object-safe** with the support type
42/// fixed: `Box<dyn Distribution<X = f64>>` and
43/// `Box<dyn Distribution<X = u64>>` both work.
44///
45/// `fit` is intentionally not part of the trait (it would break object
46/// safety); each concrete type exposes `Dist::fit(data)` directly.
47pub trait Distribution {
48 /// The support type: `f64` for continuous distributions, `u64` for
49 /// discrete distributions.
50 type X: Copy;
51
52 /// Human-readable distribution name, e.g. `"Normal"`.
53 fn name(&self) -> &str;
54
55 /// Number of free parameters — used for AIC / BIC.
56 fn num_params(&self) -> usize;
57
58 /// Probability density (continuous) or probability mass (discrete) at `x`.
59 fn pdf(&self, x: Self::X) -> StatsResult<f64>;
60
61 /// `ln pdf(x)`. Default: `pdf(x).ln()`. Override for stability when
62 /// `pdf` underflows (e.g. extreme tails of LogNormal).
63 fn logpdf(&self, x: Self::X) -> StatsResult<f64> {
64 self.pdf(x).map(|p| p.ln())
65 }
66
67 /// Probability mass — alias for [`pdf`](Self::pdf), kept for natural
68 /// reading on discrete distributions and v2.x source-compatibility.
69 fn pmf(&self, x: Self::X) -> StatsResult<f64> {
70 self.pdf(x)
71 }
72
73 /// Log-PMF — alias for [`logpdf`](Self::logpdf).
74 fn logpmf(&self, x: Self::X) -> StatsResult<f64> {
75 self.logpdf(x)
76 }
77
78 /// Cumulative distribution function `F(x) = P(X ≤ x)`.
79 fn cdf(&self, x: Self::X) -> StatsResult<f64>;
80
81 /// Quantile / inverse CDF: smallest `x` such that `F(x) ≥ p`.
82 fn inverse_cdf(&self, p: f64) -> StatsResult<Self::X>;
83
84 /// Mean (expected value) `μ`.
85 fn mean(&self) -> f64;
86
87 /// Variance `σ²`.
88 fn variance(&self) -> f64;
89
90 /// Standard deviation `σ = √(variance)`.
91 fn std_dev(&self) -> f64 {
92 self.variance().sqrt()
93 }
94
95 /// Sum of log-likelihoods `Σ ln pdf(xᵢ)`. Falls back to
96 /// [`StatsError`] if any point is out of support.
97 fn log_likelihood(&self, data: &[Self::X]) -> StatsResult<f64> {
98 let mut ll = 0.0_f64;
99 for &x in data {
100 ll += self.logpdf(x)?;
101 }
102 Ok(ll)
103 }
104
105 /// Infallible bulk log-likelihood. Out-of-support points contribute
106 /// `f64::NEG_INFINITY` (as in scipy). Implementations should override
107 /// this when they can express the inner loop without per-point
108 /// branching, so LLVM can autovectorise it.
109 fn log_likelihood_fast(&self, data: &[Self::X]) -> f64 {
110 let mut ll = 0.0_f64;
111 for &x in data {
112 ll += self.logpdf(x).unwrap_or(f64::NEG_INFINITY);
113 }
114 ll
115 }
116
117 /// Akaike Information Criterion: `2k − 2·ln(L̂)`.
118 fn aic(&self, data: &[Self::X]) -> StatsResult<f64> {
119 let ll = self.log_likelihood(data)?;
120 Ok(2.0 * self.num_params() as f64 - 2.0 * ll)
121 }
122
123 /// Bayesian Information Criterion: `k·ln(n) − 2·ln(L̂)`.
124 fn bic(&self, data: &[Self::X]) -> StatsResult<f64> {
125 let ll = self.log_likelihood(data)?;
126 let n = data.len() as f64;
127 Ok(self.num_params() as f64 * n.ln() - 2.0 * ll)
128 }
129}
130
131// ── Helpers shared across discrete distributions ──────────────────────────────
132
133/// Generic discrete inverse-CDF: smallest `k ≥ 0` such that `cdf(k) ≥ p`.
134///
135/// Works on any monotone non-decreasing CDF over `u64`. Phase 1
136/// exponential-doubling brackets the answer, Phase 2 binary-searches.
137/// Discrete distributions whose `inverse_cdf` has no closed form delegate
138/// to this helper.
139pub(crate) fn discrete_inverse_cdf_search(
140 p: f64,
141 cdf: impl Fn(u64) -> StatsResult<f64>,
142) -> StatsResult<u64> {
143 if !(0.0..=1.0).contains(&p) {
144 return Err(StatsError::InvalidInput {
145 message: format!("inverse_cdf: p must be in [0, 1], got {p}"),
146 });
147 }
148 if p == 0.0 {
149 return Ok(0);
150 }
151 let mut hi: u64 = 1;
152 while cdf(hi)? < p {
153 hi = hi.saturating_mul(2);
154 if hi == u64::MAX {
155 return Err(StatsError::NumericalError {
156 message: "inverse_cdf: quantile exceeds u64::MAX".to_string(),
157 });
158 }
159 }
160 let mut lo: u64 = 0;
161 while lo < hi {
162 let mid = lo + (hi - lo) / 2;
163 if cdf(mid)? < p {
164 lo = mid + 1;
165 } else {
166 hi = mid;
167 }
168 }
169 Ok(lo)
170}
171
172/// Backwards-compatibility alias. `DiscreteDistribution` was a separate
173/// trait in v2.x; in v3.0 every distribution implements the unified
174/// [`Distribution`] trait. This alias matches discrete distributions
175/// (those whose support is `u64`) and keeps existing imports working.
176pub trait DiscreteDistribution: Distribution<X = u64> {}
177impl<T: Distribution<X = u64>> DiscreteDistribution for T {}