Skip to main content

scirs2_stats/distributions/
f.rs

1//! F distribution functions
2//!
3//! This module provides functionality for the F distribution.
4
5use crate::error::{StatsError, StatsResult};
6use crate::sampling::SampleableDistribution;
7use scirs2_core::numeric::{Float, NumCast};
8use scirs2_core::random::prelude::*;
9use scirs2_core::random::{Distribution, FisherF as RandFisherF};
10use std::f64::consts::PI;
11
12/// Helper to convert f64 constants to generic Float type
13#[inline(always)]
14fn const_f64<T: Float + NumCast>(value: f64) -> T {
15    T::from(value).expect("Failed to convert constant to target float type")
16}
17
18/// F distribution structure
19pub struct F<T: Float> {
20    /// Degrees of freedom for numerator
21    pub dfn: T,
22    /// Degrees of freedom for denominator
23    pub dfd: T,
24    /// Location parameter
25    pub loc: T,
26    /// Scale parameter
27    pub scale: T,
28    /// Random number generator for this distribution
29    rand_distr: RandFisherF<f64>,
30}
31
32impl<T: Float + NumCast> F<T> {
33    /// Create a new F distribution with given degrees of freedom, location, and scale
34    ///
35    /// # Arguments
36    ///
37    /// * `dfn` - Numerator degrees of freedom (> 0)
38    /// * `dfd` - Denominator degrees of freedom (> 0)
39    /// * `loc` - Location parameter (default: 0)
40    /// * `scale` - Scale parameter (default: 1, must be > 0)
41    ///
42    /// # Returns
43    ///
44    /// * A new F distribution instance
45    ///
46    /// # Examples
47    ///
48    /// ```
49    /// use scirs2_stats::distributions::f::F;
50    ///
51    /// // F distribution with 2 and 10 degrees of freedom
52    /// let f_dist = F::new(2.0f64, 10.0, 0.0, 1.0).expect("Test/example failed");
53    /// ```
54    pub fn new(dfn: T, dfd: T, loc: T, scale: T) -> StatsResult<Self> {
55        if dfn <= T::zero() {
56            return Err(StatsError::DomainError(
57                "Numerator degrees of freedom must be positive".to_string(),
58            ));
59        }
60
61        if dfd <= T::zero() {
62            return Err(StatsError::DomainError(
63                "Denominator degrees of freedom must be positive".to_string(),
64            ));
65        }
66
67        if scale <= T::zero() {
68            return Err(StatsError::DomainError(
69                "Scale parameter must be positive".to_string(),
70            ));
71        }
72
73        // Convert to f64 for rand_distr
74        let dfn_f64 = <f64 as NumCast>::from(dfn).expect("Test/example failed");
75        let dfd_f64 = <f64 as NumCast>::from(dfd).expect("Test/example failed");
76
77        match RandFisherF::new(dfn_f64, dfd_f64) {
78            Ok(rand_distr) => Ok(F {
79                dfn,
80                dfd,
81                loc,
82                scale,
83                rand_distr,
84            }),
85            Err(_) => Err(StatsError::ComputationError(
86                "Failed to create F distribution".to_string(),
87            )),
88        }
89    }
90
91    /// Calculate the probability density function (PDF) at a given point
92    ///
93    /// # Arguments
94    ///
95    /// * `x` - The point at which to evaluate the PDF
96    ///
97    /// # Returns
98    ///
99    /// * The value of the PDF at the given point
100    ///
101    /// # Examples
102    ///
103    /// ```
104    /// use scirs2_stats::distributions::f::F;
105    ///
106    /// let f_dist = F::new(2.0f64, 10.0, 0.0, 1.0).expect("Test/example failed");
107    /// let pdf_at_one = f_dist.pdf(1.0);
108    /// assert!((pdf_at_one - 0.335).abs() < 1e-3);
109    /// ```
110    pub fn pdf(&self, x: T) -> T {
111        // Standardize the variable
112        let x_std = (x - self.loc) / self.scale;
113
114        // If x is not positive, PDF is zero (F is only defined for x > 0)
115        if x_std <= T::zero() {
116            return T::zero();
117        }
118
119        // Calculate PDF using log-space computation for numerical stability
120        // PDF(x) = sqrt( (d1*x)^d1 * d2^d2 / (d1*x + d2)^(d1+d2) ) / (x * B(d1/2, d2/2))
121        // Equivalently in log-space:
122        // ln(pdf) = (d1/2)*ln(d1) + (d2/2)*ln(d2) + (d1/2-1)*ln(x)
123        //         - ((d1+d2)/2)*ln(d1*x + d2) - ln(B(d1/2, d2/2))
124        let two = const_f64::<T>(2.0);
125        let d1 = self.dfn;
126        let d2 = self.dfd;
127        let d1h = d1 / two;
128        let d2h = d2 / two;
129
130        let ln_pdf = d1h * d1.ln() + d2h * d2.ln() + (d1h - T::one()) * x_std.ln()
131            - (d1h + d2h) * (d1 * x_std + d2).ln()
132            - ln_beta(d1h, d2h);
133
134        ln_pdf.exp() / self.scale
135    }
136
137    /// Calculate the cumulative distribution function (CDF) at a given point
138    ///
139    /// # Arguments
140    ///
141    /// * `x` - The point at which to evaluate the CDF
142    ///
143    /// # Returns
144    ///
145    /// * The value of the CDF at the given point
146    ///
147    /// # Examples
148    ///
149    /// ```
150    /// use scirs2_stats::distributions::f::F;
151    ///
152    /// let f_dist = F::new(2.0f64, 10.0, 0.0, 1.0).expect("Test/example failed");
153    /// let cdf_at_one = f_dist.cdf(1.0);
154    /// assert!((cdf_at_one - 0.5984).abs() < 0.01);
155    /// ```
156    pub fn cdf(&self, x: T) -> T {
157        // Standardize the variable
158        let x_std = (x - self.loc) / self.scale;
159
160        // If x is not positive, CDF is zero (F is only defined for x > 0)
161        if x_std <= T::zero() {
162            return T::zero();
163        }
164
165        // The CDF of the F distribution is related to the incomplete beta function
166        // CDF(x) = I_(dfn*x/(dfn*x + dfd))(dfn/2, dfd/2)
167        // where I_x(a,b) is the regularized incomplete beta function
168
169        let two = const_f64::<T>(2.0);
170
171        let dfn_half = self.dfn / two;
172        let dfd_half = self.dfd / two;
173
174        // Calculate the argument for the incomplete beta function
175        let z = self.dfn * x_std / (self.dfn * x_std + self.dfd);
176
177        // Calculate the incomplete beta function
178        regularized_beta(z, dfn_half, dfd_half)
179    }
180
181    /// Generate random samples from the distribution
182    ///
183    /// # Arguments
184    ///
185    /// * `size` - Number of samples to generate
186    ///
187    /// # Returns
188    ///
189    /// * Vector of random samples
190    ///
191    /// # Examples
192    ///
193    /// ```
194    /// use scirs2_stats::distributions::f::F;
195    ///
196    /// let f_dist = F::new(2.0f64, 10.0, 0.0, 1.0).expect("Test/example failed");
197    /// let samples = f_dist.rvs(1000).expect("Test/example failed");
198    /// assert_eq!(samples.len(), 1000);
199    /// ```
200    pub fn rvs(&self, size: usize) -> StatsResult<Vec<T>> {
201        let mut rng = thread_rng();
202        let mut samples = Vec::with_capacity(size);
203
204        for _ in 0..size {
205            // Generate a standard F random variable
206            let std_sample = self.rand_distr.sample(&mut rng);
207
208            // Scale and shift according to loc and scale parameters
209            let sample =
210                T::from(std_sample).expect("Failed to convert to float") * self.scale + self.loc;
211            samples.push(sample);
212        }
213
214        Ok(samples)
215    }
216}
217
218/// Beta function B(a,b) = Γ(a)Γ(b)/Γ(a+b)
219#[allow(dead_code)]
220fn beta_function<T: Float>(a: T, b: T) -> T {
221    gamma_function(a) * gamma_function(b) / gamma_function(a + b)
222}
223
224/// Regularized incomplete beta function I_x(a,b) using Lentz's continued fraction
225/// from Numerical Recipes / DLMF 8.17.22.
226#[allow(dead_code)]
227fn regularized_beta<T: Float>(x: T, a: T, b: T) -> T {
228    if x == T::zero() {
229        return T::zero();
230    }
231    if x == T::one() {
232        return T::one();
233    }
234
235    let one = T::one();
236    let two = const_f64::<T>(2.0);
237    let epsilon = const_f64::<T>(1e-14);
238    let tiny = const_f64::<T>(1e-30);
239    let max_iterations = 300;
240
241    // Use the symmetry relation I_x(a,b) = 1 - I_{1-x}(b,a)
242    // when x > (a+1)/(a+b+2) for better convergence.
243    let threshold = (a + one) / (a + b + two);
244    let use_symmetry = x > threshold;
245
246    let (x_cf, a_cf, b_cf) = if use_symmetry {
247        (one - x, b, a)
248    } else {
249        (x, a, b)
250    };
251
252    // Compute the prefactor: x^a * (1-x)^b / (a * B(a,b))
253    // Use log to avoid overflow
254    let ln_prefactor =
255        a_cf * x_cf.ln() + b_cf * (one - x_cf).ln() - a_cf.ln() - ln_beta(a_cf, b_cf);
256    let prefactor = ln_prefactor.exp();
257
258    // Lentz's algorithm for the continued fraction
259    // CF = 1/(1 + d1/(1 + d2/(1 + ...)))
260    // where d_{2m+1} = -(a+m)(a+b+m) x / ((a+2m)(a+2m+1))
261    //       d_{2m}   =  m(b-m) x       / ((a+2m-1)(a+2m))
262    let mut f = one;
263    let mut c = one;
264    let mut d = one - (a_cf + b_cf) * x_cf / (a_cf + one);
265    if d.abs() < tiny {
266        d = tiny;
267    }
268    d = one / d;
269    f = d;
270
271    for m in 1..=max_iterations {
272        let m_f = T::from(m as f64).expect("Failed to convert to float");
273
274        // Even step: d_{2m} = m(b-m)x / ((a+2m-1)(a+2m))
275        let two_m = two * m_f;
276        let num_even = m_f * (b_cf - m_f) * x_cf / ((a_cf + two_m - one) * (a_cf + two_m));
277
278        d = one + num_even * d;
279        if d.abs() < tiny {
280            d = tiny;
281        }
282        c = one + num_even / c;
283        if c.abs() < tiny {
284            c = tiny;
285        }
286        d = one / d;
287        let delta = c * d;
288        f = f * delta;
289
290        // Odd step: d_{2m+1} = -(a+m)(a+b+m)x / ((a+2m)(a+2m+1))
291        let num_odd =
292            -(a_cf + m_f) * (a_cf + b_cf + m_f) * x_cf / ((a_cf + two_m) * (a_cf + two_m + one));
293
294        d = one + num_odd * d;
295        if d.abs() < tiny {
296            d = tiny;
297        }
298        c = one + num_odd / c;
299        if c.abs() < tiny {
300            c = tiny;
301        }
302        d = one / d;
303        let delta = c * d;
304        f = f * delta;
305
306        if (delta - one).abs() < epsilon {
307            break;
308        }
309    }
310
311    let result = prefactor * f;
312
313    if use_symmetry {
314        one - result
315    } else {
316        result
317    }
318}
319
320/// Natural logarithm of the Beta function: ln B(a,b) = ln Γ(a) + ln Γ(b) - ln Γ(a+b)
321#[allow(dead_code)]
322fn ln_beta<T: Float>(a: T, b: T) -> T {
323    ln_gamma(a) + ln_gamma(b) - ln_gamma(a + b)
324}
325
326/// Lanczos approximation for the log-gamma function
327#[allow(dead_code)]
328fn ln_gamma<T: Float>(x: T) -> T {
329    let one = T::one();
330    let half = const_f64::<T>(0.5);
331    let pi = const_f64::<T>(std::f64::consts::PI);
332
333    // For x < 0.5, use the reflection formula
334    if x < half {
335        let sin_val = (pi * x).sin();
336        if sin_val == T::zero() {
337            return T::infinity();
338        }
339        return pi.ln() - sin_val.abs().ln() - ln_gamma(one - x);
340    }
341
342    let g = const_f64::<T>(7.0); // g parameter for Lanczos
343    let coefficients: [f64; 9] = [
344        0.99999999999980993,
345        676.5203681218851,
346        -1259.1392167224028,
347        771.32342877765313,
348        -176.61502916214059,
349        12.507343278686905,
350        -0.13857109526572012,
351        9.9843695780195716e-6,
352        1.5056327351493116e-7,
353    ];
354
355    let xx = x - one;
356    let mut sum = const_f64::<T>(coefficients[0]);
357    for (i, &c) in coefficients.iter().enumerate().skip(1) {
358        sum = sum + const_f64::<T>(c) / (xx + T::from(i as f64).expect("conv"));
359    }
360
361    let t = xx + g + half;
362    half * (const_f64::<T>(2.0) * pi).ln() + (xx + half) * t.ln() - t + sum.ln()
363}
364
365/// Approximation of the gamma function for floating point types
366#[allow(dead_code)]
367fn gamma_function<T: Float>(x: T) -> T {
368    if x == T::one() {
369        return T::one();
370    }
371
372    if x == const_f64::<T>(0.5) {
373        return T::from(PI).expect("Failed to convert to float").sqrt();
374    }
375
376    // For integers and half-integers, use recurrence relation
377    if x > T::one() {
378        return (x - T::one()) * gamma_function(x - T::one());
379    }
380
381    // Use Lanczos approximation for other values
382    let p = [
383        const_f64::<T>(676.5203681218851),
384        const_f64::<T>(-1259.1392167224028),
385        T::from(771.323_428_777_653_1).expect("Failed to convert to float"),
386        T::from(-176.615_029_162_140_6).expect("Failed to convert to float"),
387        const_f64::<T>(12.507343278686905),
388        const_f64::<T>(-0.13857109526572012),
389        T::from(9.984_369_578_019_572e-6).expect("Failed to convert to float"),
390        const_f64::<T>(1.5056327351493116e-7),
391    ];
392
393    let x_adj = x - T::one();
394    let t = x_adj + const_f64::<T>(7.5);
395
396    let mut sum = T::zero();
397    for (i, &coef) in p.iter().enumerate() {
398        sum = sum + coef / (x_adj + T::from(i + 1).expect("Failed to convert to float"));
399    }
400
401    let pi = T::from(PI).expect("Failed to convert to float");
402    let sqrt_2pi = (const_f64::<T>(2.0) * pi).sqrt();
403
404    sqrt_2pi * sum * t.powf(x_adj + const_f64::<T>(0.5)) * (-t).exp()
405}
406
407/// Implementation of SampleableDistribution for F
408impl<T: Float + NumCast> SampleableDistribution<T> for F<T> {
409    fn rvs(&self, size: usize) -> StatsResult<Vec<T>> {
410        self.rvs(size)
411    }
412}
413
414#[cfg(test)]
415mod tests {
416    use super::*;
417    use approx::assert_relative_eq;
418
419    #[test]
420    fn test_f_creation() {
421        // F with 2,10 degrees of freedom
422        let f_dist = F::new(2.0, 10.0, 0.0, 1.0).expect("Test/example failed");
423        assert_eq!(f_dist.dfn, 2.0);
424        assert_eq!(f_dist.dfd, 10.0);
425        assert_eq!(f_dist.loc, 0.0);
426        assert_eq!(f_dist.scale, 1.0);
427
428        // Custom F
429        let custom = F::new(5.0, 20.0, 1.0, 2.0).expect("Test/example failed");
430        assert_eq!(custom.dfn, 5.0);
431        assert_eq!(custom.dfd, 20.0);
432        assert_eq!(custom.loc, 1.0);
433        assert_eq!(custom.scale, 2.0);
434
435        // Error cases
436        assert!(F::<f64>::new(0.0, 10.0, 0.0, 1.0).is_err());
437        assert!(F::<f64>::new(-1.0, 10.0, 0.0, 1.0).is_err());
438        assert!(F::<f64>::new(2.0, 0.0, 0.0, 1.0).is_err());
439        assert!(F::<f64>::new(2.0, -1.0, 0.0, 1.0).is_err());
440        assert!(F::<f64>::new(2.0, 10.0, 0.0, 0.0).is_err());
441        assert!(F::<f64>::new(2.0, 10.0, 0.0, -1.0).is_err());
442    }
443
444    #[test]
445    fn test_f_pdf() {
446        // F with 2,10 degrees of freedom
447        let f_dist = F::new(2.0, 10.0, 0.0, 1.0).expect("Test/example failed");
448
449        // PDF at x = 0
450        let pdf_at_zero = f_dist.pdf(0.0);
451        assert_eq!(pdf_at_zero, 0.0);
452
453        // scipy: f.pdf(1, dfn=2, dfd=10) ≈ 0.334898
454        let pdf_at_one = f_dist.pdf(1.0);
455        assert_relative_eq!(pdf_at_one, 0.334898, epsilon = 1e-3);
456
457        // scipy: f.pdf(2, dfn=2, dfd=10) ≈ 0.132686
458        let pdf_at_two = f_dist.pdf(2.0);
459        assert_relative_eq!(pdf_at_two, 0.132686, epsilon = 1e-3);
460
461        // F with 5,20 degrees of freedom
462        let f5_20 = F::new(5.0, 20.0, 0.0, 1.0).expect("Test/example failed");
463
464        // scipy: f.pdf(1, dfn=5, dfd=20) ≈ 0.5449
465        let pdf_at_one = f5_20.pdf(1.0);
466        assert_relative_eq!(pdf_at_one, 0.5449, epsilon = 1e-2);
467    }
468
469    #[test]
470    fn test_f_cdf() {
471        // F with 1,10 degrees of freedom
472        let f1_10 = F::new(1.0, 10.0, 0.0, 1.0).expect("Test/example failed");
473
474        // CDF at x = 0
475        let cdf_at_zero = f1_10.cdf(0.0);
476        assert_eq!(cdf_at_zero, 0.0);
477
478        // scipy: f.cdf(1, dfn=1, dfd=10) ≈ 0.6591
479        let cdf_at_one = f1_10.cdf(1.0);
480        assert_relative_eq!(cdf_at_one, 0.6591, epsilon = 1e-3);
481
482        // F with 2,10 degrees of freedom
483        let f2_10 = F::new(2.0, 10.0, 0.0, 1.0).expect("Test/example failed");
484
485        // scipy: f.cdf(1, dfn=2, dfd=10) ≈ 0.59812
486        let cdf_at_one = f2_10.cdf(1.0);
487        assert_relative_eq!(cdf_at_one, 0.59812, epsilon = 1e-3);
488
489        // F with 5,20 degrees of freedom
490        let f5_20 = F::new(5.0, 20.0, 0.0, 1.0).expect("Test/example failed");
491
492        // scipy: f.cdf(1, dfn=5, dfd=20) ≈ 0.5560
493        let cdf_at_one = f5_20.cdf(1.0);
494        assert_relative_eq!(cdf_at_one, 0.5560, epsilon = 1e-2);
495    }
496
497    #[test]
498    fn test_f_rvs() {
499        let f_dist = F::new(2.0, 10.0, 0.0, 1.0).expect("Test/example failed");
500
501        // Generate samples
502        let samples = f_dist.rvs(1000).expect("Test/example failed");
503
504        // Check the number of samples
505        assert_eq!(samples.len(), 1000);
506
507        // Basic statistical checks
508        let sum: f64 = samples.iter().sum();
509        let mean = sum / 1000.0;
510
511        // Mean of F(2,10) should be close to 10/(10-2) = 1.25, within reasonable bounds for random samples
512        assert!(mean > 0.9 && mean < 1.6);
513    }
514
515    #[test]
516    fn test_beta_function() {
517        // Check known values
518        assert_relative_eq!(beta_function(1.0, 1.0), 1.0, epsilon = 1e-10);
519        assert_relative_eq!(beta_function(2.0, 3.0), 1.0 / 12.0, epsilon = 1e-10);
520        assert_relative_eq!(beta_function(0.5, 0.5), PI, epsilon = 1e-10);
521    }
522}