rs_stats/distributions/
f_distribution.rs1use crate::distributions::traits::Distribution;
11use crate::error::{StatsError, StatsResult};
12use crate::utils::special_functions::{bisect_inverse_cdf, ln_beta, regularized_incomplete_beta};
13
14#[derive(Debug, Clone, Copy)]
25pub struct FDistribution {
26 pub d1: f64,
28 pub d2: f64,
30}
31
32impl FDistribution {
33 pub fn new(d1: f64, d2: f64) -> StatsResult<Self> {
35 if d1 <= 0.0 || d2 <= 0.0 {
36 return Err(StatsError::InvalidInput {
37 message: "FDistribution::new: d1 and d2 must be positive".to_string(),
38 });
39 }
40 Ok(Self { d1, d2 })
41 }
42
43 pub fn fit(data: &[f64]) -> StatsResult<Self> {
47 if data.is_empty() {
48 return Err(StatsError::InvalidInput {
49 message: "FDistribution::fit: data must not be empty".to_string(),
50 });
51 }
52 if data.iter().any(|&x| x <= 0.0) {
53 return Err(StatsError::InvalidInput {
54 message: "FDistribution::fit: all data values must be positive".to_string(),
55 });
56 }
57 let n = data.len() as f64;
58 let mean = data.iter().sum::<f64>() / n;
59 let variance = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
60
61 if mean <= 1.0 {
63 return Self::new(2.0, 10.0);
65 }
66 let d2 = (2.0 * mean / (mean - 1.0)).max(2.01);
67
68 let d2m2 = d2 - 2.0;
73 let d2m4 = (d2 - 4.0).max(0.01);
74 let numerator = 2.0 * d2 * d2 * d2m2;
78 let denominator = variance * d2m2 * d2m2 * d2m4 - 2.0 * d2 * d2;
79 let d1 = if denominator > 0.0 {
80 (numerator / denominator).max(0.01)
81 } else {
82 2.0
83 };
84
85 Self::new(d1, d2)
86 }
87}
88
89impl Distribution for FDistribution {
90 fn name(&self) -> &str {
91 "F"
92 }
93 fn num_params(&self) -> usize {
94 2
95 }
96
97 fn pdf(&self, x: f64) -> StatsResult<f64> {
98 if x <= 0.0 {
99 return Ok(0.0);
100 }
101 Ok(self.logpdf(x)?.exp())
102 }
103
104 fn logpdf(&self, x: f64) -> StatsResult<f64> {
105 if x <= 0.0 {
106 return Ok(f64::NEG_INFINITY);
107 }
108 let d1 = self.d1;
109 let d2 = self.d2;
110 let log_pdf = 0.5 * (d1 * (d1 * x).ln() + d2 * d2.ln() - (d1 + d2) * (d1 * x + d2).ln())
111 - x.ln()
112 - ln_beta(d1 / 2.0, d2 / 2.0);
113 Ok(log_pdf)
114 }
115
116 fn cdf(&self, x: f64) -> StatsResult<f64> {
117 if x <= 0.0 {
118 return Ok(0.0);
119 }
120 let t = self.d1 * x / (self.d1 * x + self.d2);
122 Ok(regularized_incomplete_beta(self.d1 / 2.0, self.d2 / 2.0, t))
123 }
124
125 fn inverse_cdf(&self, p: f64) -> StatsResult<f64> {
126 if !(0.0..=1.0).contains(&p) {
127 return Err(StatsError::InvalidInput {
128 message: "FDistribution::inverse_cdf: p must be in [0, 1]".to_string(),
129 });
130 }
131 if p == 0.0 {
132 return Ok(0.0);
133 }
134 if p == 1.0 {
135 return Ok(f64::INFINITY);
136 }
137 let d1 = self.d1;
138 let d2 = self.d2;
139 let mean = if d2 > 2.0 { d2 / (d2 - 2.0) } else { 5.0 };
140 let hi = mean * 20.0;
141 Ok(bisect_inverse_cdf(
142 |x| {
143 if x <= 0.0 {
144 return 0.0;
145 }
146 let t = d1 * x / (d1 * x + d2);
147 regularized_incomplete_beta(d1 / 2.0, d2 / 2.0, t)
148 },
149 p,
150 0.0,
151 hi,
152 ))
153 }
154
155 fn mean(&self) -> f64 {
156 if self.d2 > 2.0 {
157 self.d2 / (self.d2 - 2.0)
158 } else {
159 f64::INFINITY
160 }
161 }
162
163 fn variance(&self) -> f64 {
164 let d1 = self.d1;
165 let d2 = self.d2;
166 if d2 > 4.0 {
167 2.0 * d2 * d2 * (d1 + d2 - 2.0) / (d1 * (d2 - 2.0).powi(2) * (d2 - 4.0))
168 } else {
169 f64::INFINITY
170 }
171 }
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn test_f_mean() {
180 let f = FDistribution::new(5.0, 10.0).unwrap();
181 assert!((f.mean() - 10.0 / 8.0).abs() < 1e-10);
182 }
183
184 #[test]
185 fn test_f_cdf_zero() {
186 let f = FDistribution::new(3.0, 6.0).unwrap();
187 assert_eq!(f.cdf(0.0).unwrap(), 0.0);
188 }
189
190 #[test]
191 fn test_f_pdf_positive() {
192 let f = FDistribution::new(2.0, 2.0).unwrap();
193 let p = f.pdf(1.0).unwrap();
194 assert!(p > 0.0 && p.is_finite());
195 }
196
197 #[test]
198 fn test_f_inverse_cdf_roundtrip() {
199 let f = FDistribution::new(5.0, 10.0).unwrap();
200 for p in [0.1, 0.25, 0.5, 0.75, 0.9] {
201 let x = f.inverse_cdf(p).unwrap();
202 let p_back = f.cdf(x).unwrap();
203 assert!((p - p_back).abs() < 1e-5, "p={p}");
204 }
205 }
206
207 #[test]
208 fn test_f_invalid() {
209 assert!(FDistribution::new(0.0, 5.0).is_err());
210 assert!(FDistribution::new(5.0, 0.0).is_err());
211 }
212}