Skip to main content

rs_stats/distributions/
f_distribution.rs

1//! # F-Distribution
2//!
3//! The F-distribution F(d1, d2) with d1 numerator and d2 denominator degrees
4//! of freedom models ratios of chi-squared random variables.
5//!
6//! **PDF**: f(x; d1, d2) = √((d1·x)^d1 · d2^d2 / (d1·x + d2)^(d1+d2)) / (x · B(d1/2, d2/2))
7//!
8//! **Mean**: d2 / (d2 − 2)  for d2 > 2
9
10use crate::distributions::traits::Distribution;
11use crate::error::{StatsError, StatsResult};
12use crate::utils::special_functions::{bisect_inverse_cdf, ln_beta, regularized_incomplete_beta};
13
14/// F-distribution F(d1, d2).
15///
16/// # Examples
17/// ```
18/// use rs_stats::distributions::f_distribution::FDistribution;
19/// use rs_stats::distributions::traits::Distribution;
20///
21/// let f = FDistribution::new(5.0, 10.0).unwrap();
22/// assert!((f.mean() - 10.0 / 8.0).abs() < 1e-10);
23/// ```
24#[derive(Debug, Clone, Copy)]
25pub struct FDistribution {
26    /// Numerator degrees of freedom d1 > 0
27    pub d1: f64,
28    /// Denominator degrees of freedom d2 > 0
29    pub d2: f64,
30}
31
32impl FDistribution {
33    /// Creates an `F(d1, d2)` distribution.
34    pub fn new(d1: f64, d2: f64) -> StatsResult<Self> {
35        if d1 <= 0.0 || d2 <= 0.0 {
36            return Err(StatsError::InvalidInput {
37                message: "FDistribution::new: d1 and d2 must be positive".to_string(),
38            });
39        }
40        Ok(Self { d1, d2 })
41    }
42
43    /// Method-of-moments fitting from data > 0.
44    ///
45    /// Estimates d2 from the mean (requires mean > 1) and d1 from the variance.
46    pub fn fit(data: &[f64]) -> StatsResult<Self> {
47        if data.is_empty() {
48            return Err(StatsError::InvalidInput {
49                message: "FDistribution::fit: data must not be empty".to_string(),
50            });
51        }
52        if data.iter().any(|&x| x <= 0.0) {
53            return Err(StatsError::InvalidInput {
54                message: "FDistribution::fit: all data values must be positive".to_string(),
55            });
56        }
57        let n = data.len() as f64;
58        let mean = data.iter().sum::<f64>() / n;
59        let variance = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
60
61        // E[X] = d2/(d2-2) → d2 = 2*mean/(mean-1)
62        if mean <= 1.0 {
63            // Can't estimate; fall back to typical defaults
64            return Self::new(2.0, 10.0);
65        }
66        let d2 = (2.0 * mean / (mean - 1.0)).max(2.01);
67
68        // Var[X] = 2d2²(d1+d2-2) / (d1(d2-2)²(d2-4))
69        // Solve for d1:
70        //   d1 = 2*d2²*(d2-2) / (variance*(d2-2)²*(d2-4)/d2² + 2*(d2-2)*(d2-4)*d2 / d2²)
71        // Simplified from the quadratic:
72        let d2m2 = d2 - 2.0;
73        let d2m4 = (d2 - 4.0).max(0.01);
74        // Var ≈ 2d2²(d1+d2-2) / (d1 * d2m2² * d2m4)
75        // d1 * variance * d2m2² * d2m4 = 2d2²(d1 + d2 - 2)
76        // d1 * (variance * d2m2² * d2m4 - 2d2²) = 2d2²(d2 - 2)
77        let numerator = 2.0 * d2 * d2 * d2m2;
78        let denominator = variance * d2m2 * d2m2 * d2m4 - 2.0 * d2 * d2;
79        let d1 = if denominator > 0.0 {
80            (numerator / denominator).max(0.01)
81        } else {
82            2.0
83        };
84
85        Self::new(d1, d2)
86    }
87}
88
89impl Distribution for FDistribution {
90    fn name(&self) -> &str {
91        "F"
92    }
93    fn num_params(&self) -> usize {
94        2
95    }
96
97    fn pdf(&self, x: f64) -> StatsResult<f64> {
98        if x <= 0.0 {
99            return Ok(0.0);
100        }
101        Ok(self.logpdf(x)?.exp())
102    }
103
104    fn logpdf(&self, x: f64) -> StatsResult<f64> {
105        if x <= 0.0 {
106            return Ok(f64::NEG_INFINITY);
107        }
108        let d1 = self.d1;
109        let d2 = self.d2;
110        let log_pdf = 0.5 * (d1 * (d1 * x).ln() + d2 * d2.ln() - (d1 + d2) * (d1 * x + d2).ln())
111            - x.ln()
112            - ln_beta(d1 / 2.0, d2 / 2.0);
113        Ok(log_pdf)
114    }
115
116    fn cdf(&self, x: f64) -> StatsResult<f64> {
117        if x <= 0.0 {
118            return Ok(0.0);
119        }
120        // I_{d1*x/(d1*x + d2)}(d1/2, d2/2)
121        let t = self.d1 * x / (self.d1 * x + self.d2);
122        Ok(regularized_incomplete_beta(self.d1 / 2.0, self.d2 / 2.0, t))
123    }
124
125    fn inverse_cdf(&self, p: f64) -> StatsResult<f64> {
126        if !(0.0..=1.0).contains(&p) {
127            return Err(StatsError::InvalidInput {
128                message: "FDistribution::inverse_cdf: p must be in [0, 1]".to_string(),
129            });
130        }
131        if p == 0.0 {
132            return Ok(0.0);
133        }
134        if p == 1.0 {
135            return Ok(f64::INFINITY);
136        }
137        let d1 = self.d1;
138        let d2 = self.d2;
139        let mean = if d2 > 2.0 { d2 / (d2 - 2.0) } else { 5.0 };
140        let hi = mean * 20.0;
141        Ok(bisect_inverse_cdf(
142            |x| {
143                if x <= 0.0 {
144                    return 0.0;
145                }
146                let t = d1 * x / (d1 * x + d2);
147                regularized_incomplete_beta(d1 / 2.0, d2 / 2.0, t)
148            },
149            p,
150            0.0,
151            hi,
152        ))
153    }
154
155    fn mean(&self) -> f64 {
156        if self.d2 > 2.0 {
157            self.d2 / (self.d2 - 2.0)
158        } else {
159            f64::INFINITY
160        }
161    }
162
163    fn variance(&self) -> f64 {
164        let d1 = self.d1;
165        let d2 = self.d2;
166        if d2 > 4.0 {
167            2.0 * d2 * d2 * (d1 + d2 - 2.0) / (d1 * (d2 - 2.0).powi(2) * (d2 - 4.0))
168        } else {
169            f64::INFINITY
170        }
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_f_mean() {
180        let f = FDistribution::new(5.0, 10.0).unwrap();
181        assert!((f.mean() - 10.0 / 8.0).abs() < 1e-10);
182    }
183
184    #[test]
185    fn test_f_cdf_zero() {
186        let f = FDistribution::new(3.0, 6.0).unwrap();
187        assert_eq!(f.cdf(0.0).unwrap(), 0.0);
188    }
189
190    #[test]
191    fn test_f_pdf_positive() {
192        let f = FDistribution::new(2.0, 2.0).unwrap();
193        let p = f.pdf(1.0).unwrap();
194        assert!(p > 0.0 && p.is_finite());
195    }
196
197    #[test]
198    fn test_f_inverse_cdf_roundtrip() {
199        let f = FDistribution::new(5.0, 10.0).unwrap();
200        for p in [0.1, 0.25, 0.5, 0.75, 0.9] {
201            let x = f.inverse_cdf(p).unwrap();
202            let p_back = f.cdf(x).unwrap();
203            assert!((p - p_back).abs() < 1e-5, "p={p}");
204        }
205    }
206
207    #[test]
208    fn test_f_invalid() {
209        assert!(FDistribution::new(0.0, 5.0).is_err());
210        assert!(FDistribution::new(5.0, 0.0).is_err());
211    }
212}