rs_stats/distributions/traits.rs
1//! # Distribution Traits
2//!
3//! Defines the [`Distribution`] and [`DiscreteDistribution`] traits that provide
4//! a unified interface for all statistical distributions in this crate.
5//!
6//! ## Usage
7//!
8//! ```
9//! use rs_stats::distributions::traits::Distribution;
10//! use rs_stats::distributions::normal_distribution::Normal;
11//!
12//! let n = Normal::new(0.0, 1.0).unwrap();
13//! let pdf = n.pdf(0.0).unwrap();
14//! assert!((pdf - 0.398_942_280_4).abs() < 1e-8);
15//! ```
16
17use crate::error::{StatsError, StatsResult};
18
19// ── Continuous distributions ───────────────────────────────────────────────────
20
21/// Unified interface for continuous probability distributions.
22///
23/// All methods return `StatsResult` to propagate domain errors (e.g. `p ∉ [0,1]`).
24///
25/// The trait is **object-safe**: `Box<dyn Distribution>` works at runtime.
26/// The `fit` associated function is intentionally *not* part of the trait to preserve
27/// object safety; each concrete type exposes `Dist::fit(data)` directly.
28pub trait Distribution {
29 /// Human-readable distribution name, e.g. `"Normal"`.
30 fn name(&self) -> &str;
31
32 /// Number of free parameters (used when computing AIC / BIC).
33 fn num_params(&self) -> usize;
34
35 /// Probability density function f(x).
36 fn pdf(&self, x: f64) -> StatsResult<f64>;
37
38 /// Natural logarithm of the PDF: ln f(x).
39 ///
40 /// Default implementation delegates to `pdf`; override for numerical stability.
41 fn logpdf(&self, x: f64) -> StatsResult<f64> {
42 self.pdf(x).map(|p| p.ln())
43 }
44
45 /// Cumulative distribution function F(x) = P(X ≤ x).
46 fn cdf(&self, x: f64) -> StatsResult<f64>;
47
48 /// Quantile (inverse CDF): find x such that F(x) = p.
49 fn inverse_cdf(&self, p: f64) -> StatsResult<f64>;
50
51 /// Mean (expected value) μ.
52 fn mean(&self) -> f64;
53
54 /// Variance σ².
55 fn variance(&self) -> f64;
56
57 /// Standard deviation σ = √(variance).
58 fn std_dev(&self) -> f64 {
59 self.variance().sqrt()
60 }
61
62 /// Sum of log-likelihoods: Σ ln f(xᵢ).
63 fn log_likelihood(&self, data: &[f64]) -> StatsResult<f64> {
64 let mut ll = 0.0_f64;
65 for &x in data {
66 ll += self.logpdf(x)?;
67 }
68 Ok(ll)
69 }
70
71 /// Akaike Information Criterion: AIC = 2k − 2·ln(L̂).
72 fn aic(&self, data: &[f64]) -> StatsResult<f64> {
73 let ll = self.log_likelihood(data)?;
74 Ok(2.0 * self.num_params() as f64 - 2.0 * ll)
75 }
76
77 /// Bayesian Information Criterion: BIC = k·ln(n) − 2·ln(L̂).
78 fn bic(&self, data: &[f64]) -> StatsResult<f64> {
79 let ll = self.log_likelihood(data)?;
80 let n = data.len() as f64;
81 Ok(self.num_params() as f64 * n.ln() - 2.0 * ll)
82 }
83}
84
85// ── Discrete distributions ─────────────────────────────────────────────────────
86
87/// Unified interface for discrete probability distributions.
88///
89/// Works with non-negative integer observations represented as `u64`.
90///
91/// Object-safe: `Box<dyn DiscreteDistribution>` is valid.
92pub trait DiscreteDistribution {
93 /// Human-readable distribution name.
94 fn name(&self) -> &str;
95
96 /// Number of free parameters (used for AIC / BIC).
97 fn num_params(&self) -> usize;
98
99 /// Probability mass function P(X = k).
100 fn pmf(&self, k: u64) -> StatsResult<f64>;
101
102 /// Natural logarithm of the PMF: ln P(X = k).
103 ///
104 /// Default: delegates to `pmf`; override for stability when p is tiny.
105 fn logpmf(&self, k: u64) -> StatsResult<f64> {
106 self.pmf(k).map(|p| p.ln())
107 }
108
109 /// Cumulative distribution function P(X ≤ k).
110 fn cdf(&self, k: u64) -> StatsResult<f64>;
111
112 /// Quantile function: smallest k ≥ 0 such that CDF(k) ≥ p.
113 ///
114 /// Returns an error if `p ∉ [0, 1]`.
115 ///
116 /// The default implementation performs an **exponential search** followed by
117 /// **binary search** on the CDF, which is correct for any monotone CDF but may
118 /// be slow for distributions with very large quantiles.
119 /// Override with a closed-form formula when available.
120 ///
121 /// # Examples
122 /// ```
123 /// use rs_stats::distributions::poisson_distribution::Poisson;
124 /// use rs_stats::DiscreteDistribution;
125 ///
126 /// let p = Poisson::new(3.0).unwrap();
127 /// // Median of Poisson(3) should be 3
128 /// let median = p.inverse_cdf(0.5).unwrap();
129 /// assert!(median == 2 || median == 3);
130 /// ```
131 fn inverse_cdf(&self, p: f64) -> StatsResult<u64> {
132 if !(0.0..=1.0).contains(&p) {
133 return Err(StatsError::InvalidInput {
134 message: format!("inverse_cdf: p must be in [0, 1], got {p}"),
135 });
136 }
137 if p == 0.0 {
138 return Ok(0);
139 }
140 // Phase 1 — exponential search to bracket the answer.
141 let mut hi: u64 = 1;
142 while self.cdf(hi)? < p {
143 hi = hi.saturating_mul(2);
144 if hi == u64::MAX {
145 return Err(StatsError::NumericalError {
146 message: "inverse_cdf: quantile exceeds u64::MAX".to_string(),
147 });
148 }
149 }
150 // Phase 2 — binary search in [0, hi].
151 let mut lo: u64 = 0;
152 while lo < hi {
153 let mid = lo + (hi - lo) / 2;
154 if self.cdf(mid)? < p {
155 lo = mid + 1;
156 } else {
157 hi = mid;
158 }
159 }
160 Ok(lo)
161 }
162
163 /// Mean (expected value) μ.
164 fn mean(&self) -> f64;
165
166 /// Variance σ².
167 fn variance(&self) -> f64;
168
169 /// Standard deviation σ = √(variance).
170 fn std_dev(&self) -> f64 {
171 self.variance().sqrt()
172 }
173
174 /// Sum of log-PMFs: Σ ln P(X = kᵢ).
175 fn log_likelihood(&self, data: &[u64]) -> StatsResult<f64> {
176 let mut ll = 0.0_f64;
177 for &k in data {
178 ll += self.logpmf(k)?;
179 }
180 Ok(ll)
181 }
182
183 /// AIC = 2k − 2·ln(L̂).
184 fn aic(&self, data: &[u64]) -> StatsResult<f64> {
185 let ll = self.log_likelihood(data)?;
186 Ok(2.0 * self.num_params() as f64 - 2.0 * ll)
187 }
188
189 /// BIC = k·ln(n) − 2·ln(L̂).
190 fn bic(&self, data: &[u64]) -> StatsResult<f64> {
191 let ll = self.log_likelihood(data)?;
192 let n = data.len() as f64;
193 Ok(self.num_params() as f64 * n.ln() - 2.0 * ll)
194 }
195}