Skip to main content

rs_stats/distributions/
lognormal.rs

1//! # Log-Normal Distribution
2//!
3//! If X ~ LogNormal(μ, σ), then ln(X) ~ Normal(μ, σ²).
4//! The distribution is defined for x > 0 and is right-skewed.
5//!
6//! **PDF**: f(x) = 1 / (x σ √(2π)) · exp(−(ln x − μ)² / (2σ²))
7//!
8//! **MLE**: μ̂ = mean(ln data),  σ̂ = pop-std(ln data)  (exact MLE)
9//!
10//! **Median** = exp(μ)  (more informative than mean for skewed data)
11//!
12//! ## When to use
13//!
14//! Log-Normal arises naturally when a positive quantity is the *product* of many
15//! independent factors, or whenever the natural logarithm of the data is approximately
16//! Normal.  It always produces right-skewed, positive-valued data — a hallmark of
17//! many biological measurements.
18//!
19//! ## Medical applications
20//!
21//! | Biomarker / quantity | Why log-normal |
22//! |----------------------|---------------|
23//! | **CRP** (C-reactive protein, mg/L) | Spans < 1 to > 100 in the same cohort |
24//! | **Serum creatinine** (µmol/L) | Positive, right-skewed in kidney disease |
25//! | **Drug plasma concentration** (AUC) | Product of absorption / distribution factors |
26//! | **Tumour volume** | Multiplicative growth process |
27//! | **Hospital length-of-stay** | Most stays short, rare very long admissions |
28//!
29//! ## Example — CRP inflammation marker
30//!
31//! ```rust
32//! use rs_stats::distributions::lognormal::LogNormal;
33//! use rs_stats::distributions::traits::Distribution;
34//!
35//! // CRP levels (mg/L) — healthy < 5, elevated 5–100, critical > 100
36//! let crp = vec![
37//!     0.8, 1.2, 1.5, 2.1, 2.4, 3.2, 3.9, 5.6, 9.7, 12.4,
38//!     22.3, 45.0, 88.0, 0.9, 1.3, 1.8, 0.7, 0.6, 0.5, 1.1,
39//! ];
40//! let dist = LogNormal::fit(&crp).unwrap();
41//! println!("LogNormal(μ={:.2}, σ={:.2})", dist.mu, dist.sigma);
42//!
43//! // Median is more appropriate than mean for skewed biomarkers
44//! let median = dist.inverse_cdf(0.5).unwrap();
45//! let p_high = 1.0 - dist.cdf(10.0).unwrap();
46//! println!("Median CRP       = {:.2} mg/L", median);
47//! println!("P(CRP > 10 mg/L) = {:.1}%", p_high * 100.0);
48//! ```
49
50use std::f64::consts::PI;
51
52use crate::distributions::normal_distribution::{normal_cdf, normal_inverse_cdf};
53use crate::distributions::traits::Distribution;
54use crate::error::{StatsError, StatsResult};
55
56/// Log-Normal distribution LogNormal(μ, σ).
57///
58/// # Examples
59/// ```
60/// use rs_stats::distributions::lognormal::LogNormal;
61/// use rs_stats::distributions::traits::Distribution;
62///
63/// let ln = LogNormal::new(0.0, 1.0).unwrap();
64/// // Mean of LogNormal(0, 1) = exp(0.5)
65/// assert!((ln.mean() - 0.5_f64.exp()).abs() < 1e-10);
66/// ```
67#[derive(Debug, Clone, Copy)]
68pub struct LogNormal {
69    /// Location (mean of ln X) μ
70    pub mu: f64,
71    /// Scale (std-dev of ln X) σ > 0
72    pub sigma: f64,
73}
74
75impl LogNormal {
76    /// Creates a `LogNormal(μ, σ)` distribution.
77    pub fn new(mu: f64, sigma: f64) -> StatsResult<Self> {
78        if sigma <= 0.0 {
79            return Err(StatsError::InvalidInput {
80                message: "LogNormal::new: sigma must be positive".to_string(),
81            });
82        }
83        Ok(Self { mu, sigma })
84    }
85
86    /// Exact MLE: μ̂ = mean(ln data), σ̂ = population std-dev of ln data.
87    ///
88    /// Requires all data > 0.
89    pub fn fit(data: &[f64]) -> StatsResult<Self> {
90        if data.is_empty() {
91            return Err(StatsError::InvalidInput {
92                message: "LogNormal::fit: data must not be empty".to_string(),
93            });
94        }
95        if data.iter().any(|&x| x <= 0.0) {
96            return Err(StatsError::InvalidInput {
97                message: "LogNormal::fit: all data values must be positive".to_string(),
98            });
99        }
100        let n = data.len() as f64;
101        let log_data: Vec<f64> = data.iter().map(|&x| x.ln()).collect();
102        let mu = log_data.iter().sum::<f64>() / n;
103        let variance = log_data.iter().map(|&y| (y - mu).powi(2)).sum::<f64>() / n;
104        Self::new(mu, variance.sqrt())
105    }
106}
107
108impl Distribution for LogNormal {
109    fn name(&self) -> &str {
110        "LogNormal"
111    }
112    fn num_params(&self) -> usize {
113        2
114    }
115
116    fn pdf(&self, x: f64) -> StatsResult<f64> {
117        if x <= 0.0 {
118            return Ok(0.0);
119        }
120        Ok(self.logpdf(x)?.exp())
121    }
122
123    fn logpdf(&self, x: f64) -> StatsResult<f64> {
124        if x <= 0.0 {
125            return Ok(f64::NEG_INFINITY);
126        }
127        let z = (x.ln() - self.mu) / self.sigma;
128        Ok(-x.ln() - self.sigma.ln() - 0.5 * (2.0 * PI).ln() - 0.5 * z * z)
129    }
130
131    fn cdf(&self, x: f64) -> StatsResult<f64> {
132        if x <= 0.0 {
133            return Ok(0.0);
134        }
135        normal_cdf(x.ln(), self.mu, self.sigma)
136    }
137
138    fn inverse_cdf(&self, p: f64) -> StatsResult<f64> {
139        if !(0.0..=1.0).contains(&p) {
140            return Err(StatsError::InvalidInput {
141                message: "LogNormal::inverse_cdf: p must be in [0, 1]".to_string(),
142            });
143        }
144        if p == 0.0 {
145            return Ok(0.0);
146        }
147        if p == 1.0 {
148            return Ok(f64::INFINITY);
149        }
150        Ok(normal_inverse_cdf(p, self.mu, self.sigma)?.exp())
151    }
152
153    fn mean(&self) -> f64 {
154        (self.mu + 0.5 * self.sigma * self.sigma).exp()
155    }
156
157    fn variance(&self) -> f64 {
158        let s2 = self.sigma * self.sigma;
159        (s2.exp() - 1.0) * (2.0 * self.mu + s2).exp()
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::*;
166
167    #[test]
168    fn test_lognormal_mean() {
169        let ln = LogNormal::new(0.0, 1.0).unwrap();
170        assert!((ln.mean() - 0.5_f64.exp()).abs() < 1e-10);
171    }
172
173    #[test]
174    fn test_lognormal_pdf_positive() {
175        let ln = LogNormal::new(0.0, 1.0).unwrap();
176        // pdf(1) = 1/(sqrt(2π)) ≈ 0.3989
177        // INV_SQRT_2PI = 1/sqrt(2π) ≈ 0.3989422804014327
178        assert!((ln.pdf(1.0).unwrap() - 0.398_942_280_4).abs() < 1e-8);
179    }
180
181    #[test]
182    fn test_lognormal_cdf_bounds() {
183        let ln = LogNormal::new(0.0, 1.0).unwrap();
184        assert_eq!(ln.cdf(0.0).unwrap(), 0.0);
185        assert!((ln.cdf(f64::MAX).unwrap() - 1.0).abs() < 1e-6);
186    }
187
188    #[test]
189    fn test_lognormal_inverse_cdf_roundtrip() {
190        let ln = LogNormal::new(1.0, 0.5).unwrap();
191        for p in [0.1, 0.25, 0.5, 0.75, 0.9] {
192            let x = ln.inverse_cdf(p).unwrap();
193            let p_back = ln.cdf(x).unwrap();
194            // Tolerance: Acklam approximation + erf roundtrip accumulates ~1e-6 error
195            assert!((p - p_back).abs() < 1e-6, "p={p}: roundtrip failed");
196        }
197    }
198
199    #[test]
200    fn test_lognormal_fit() {
201        let data = vec![1.0, 2.0, 0.5, 3.0, 1.5, 0.8, 2.5, 1.2];
202        let ln = LogNormal::fit(&data).unwrap();
203        let log_mean = data.iter().map(|&x| x.ln()).sum::<f64>() / data.len() as f64;
204        assert!((ln.mu - log_mean).abs() < 1e-10);
205    }
206}