use crate::distributions::traits::Distribution;
use crate::error::{StatsError, StatsResult};
use crate::utils::special_functions::{bisect_inverse_cdf, ln_beta, regularized_incomplete_beta};
#[derive(Debug, Clone, Copy)]
pub struct FDistribution {
pub d1: f64,
pub d2: f64,
}
impl FDistribution {
pub fn new(d1: f64, d2: f64) -> StatsResult<Self> {
if d1 <= 0.0 || d2 <= 0.0 {
return Err(StatsError::InvalidInput {
message: "FDistribution::new: d1 and d2 must be positive".to_string(),
});
}
Ok(Self { d1, d2 })
}
pub fn fit(data: &[f64]) -> StatsResult<Self> {
if data.is_empty() {
return Err(StatsError::InvalidInput {
message: "FDistribution::fit: data must not be empty".to_string(),
});
}
if data.iter().any(|&x| x <= 0.0) {
return Err(StatsError::InvalidInput {
message: "FDistribution::fit: all data values must be positive".to_string(),
});
}
let n = data.len() as f64;
let mean = data.iter().sum::<f64>() / n;
let variance = data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
if mean <= 1.0 {
return Self::new(2.0, 10.0);
}
let d2 = (2.0 * mean / (mean - 1.0)).max(2.01);
let d2m2 = d2 - 2.0;
let d2m4 = (d2 - 4.0).max(0.01);
let numerator = 2.0 * d2 * d2 * d2m2;
let denominator = variance * d2m2 * d2m2 * d2m4 - 2.0 * d2 * d2;
let d1 = if denominator > 0.0 {
(numerator / denominator).max(0.01)
} else {
2.0
};
Self::new(d1, d2)
}
}
impl Distribution for FDistribution {
fn name(&self) -> &str {
"F"
}
fn num_params(&self) -> usize {
2
}
fn pdf(&self, x: f64) -> StatsResult<f64> {
if x <= 0.0 {
return Ok(0.0);
}
Ok(self.logpdf(x)?.exp())
}
fn logpdf(&self, x: f64) -> StatsResult<f64> {
if x <= 0.0 {
return Ok(f64::NEG_INFINITY);
}
let d1 = self.d1;
let d2 = self.d2;
let log_pdf = 0.5 * (d1 * (d1 * x).ln() + d2 * d2.ln() - (d1 + d2) * (d1 * x + d2).ln())
- x.ln()
- ln_beta(d1 / 2.0, d2 / 2.0);
Ok(log_pdf)
}
fn cdf(&self, x: f64) -> StatsResult<f64> {
if x <= 0.0 {
return Ok(0.0);
}
let t = self.d1 * x / (self.d1 * x + self.d2);
Ok(regularized_incomplete_beta(self.d1 / 2.0, self.d2 / 2.0, t))
}
fn inverse_cdf(&self, p: f64) -> StatsResult<f64> {
if !(0.0..=1.0).contains(&p) {
return Err(StatsError::InvalidInput {
message: "FDistribution::inverse_cdf: p must be in [0, 1]".to_string(),
});
}
if p == 0.0 {
return Ok(0.0);
}
if p == 1.0 {
return Ok(f64::INFINITY);
}
let d1 = self.d1;
let d2 = self.d2;
let mean = if d2 > 2.0 { d2 / (d2 - 2.0) } else { 5.0 };
let hi = mean * 20.0;
Ok(bisect_inverse_cdf(
|x| {
if x <= 0.0 {
return 0.0;
}
let t = d1 * x / (d1 * x + d2);
regularized_incomplete_beta(d1 / 2.0, d2 / 2.0, t)
},
p,
0.0,
hi,
))
}
fn mean(&self) -> f64 {
if self.d2 > 2.0 {
self.d2 / (self.d2 - 2.0)
} else {
f64::INFINITY
}
}
fn variance(&self) -> f64 {
let d1 = self.d1;
let d2 = self.d2;
if d2 > 4.0 {
2.0 * d2 * d2 * (d1 + d2 - 2.0) / (d1 * (d2 - 2.0).powi(2) * (d2 - 4.0))
} else {
f64::INFINITY
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_f_mean() {
let f = FDistribution::new(5.0, 10.0).unwrap();
assert!((f.mean() - 10.0 / 8.0).abs() < 1e-10);
}
#[test]
fn test_f_cdf_zero() {
let f = FDistribution::new(3.0, 6.0).unwrap();
assert_eq!(f.cdf(0.0).unwrap(), 0.0);
}
#[test]
fn test_f_pdf_positive() {
let f = FDistribution::new(2.0, 2.0).unwrap();
let p = f.pdf(1.0).unwrap();
assert!(p > 0.0 && p.is_finite());
}
#[test]
fn test_f_inverse_cdf_roundtrip() {
let f = FDistribution::new(5.0, 10.0).unwrap();
for p in [0.1, 0.25, 0.5, 0.75, 0.9] {
let x = f.inverse_cdf(p).unwrap();
let p_back = f.cdf(x).unwrap();
assert!((p - p_back).abs() < 1e-5, "p={p}");
}
}
#[test]
fn test_f_invalid() {
assert!(FDistribution::new(0.0, 5.0).is_err());
assert!(FDistribution::new(5.0, 0.0).is_err());
}
}