scirs2_stats/
traits.rs

1//! Statistical distribution traits
2//!
3//! This module defines the core traits for statistical distributions,
4//! including standard distributions and specialized circular distributions.
5
6use crate::error::StatsResult;
7use scirs2_core::ndarray::{Array, Array1, ArrayD, ArrayView1, IxDyn};
8use scirs2_core::numeric::Float;
9
10/// Base trait for all statistical distributions
11pub trait Distribution<F: Float> {
12    /// Mean (expected value) of the distribution
13    fn mean(&self) -> F;
14
15    /// Variance of the distribution
16    fn var(&self) -> F;
17
18    /// Standard deviation of the distribution
19    fn std(&self) -> F;
20
21    /// Generate random samples from the distribution
22    ///
23    /// # Arguments
24    ///
25    /// * `size` - Number of samples to generate
26    ///
27    /// # Returns
28    ///
29    /// An array of random samples from the distribution
30    fn rvs(&self, size: usize) -> StatsResult<Array1<F>>;
31
32    /// Generate random samples with a user-specified output shape.
33    ///
34    /// This is the SciPy-style ergonomic where `dist.rvs_array(&[8, 6])` returns
35    /// an `8 × 6` array of samples drawn IID from this distribution. The default
36    /// implementation draws `prod(shape)` scalar samples through `rvs(...)` and
37    /// reshapes them into a dynamically-dimensioned array.
38    ///
39    /// # Arguments
40    ///
41    /// * `shape` - Desired output shape. An empty shape produces a 0-D scalar array.
42    ///
43    /// # Returns
44    ///
45    /// A dynamic-rank array of IID samples shaped exactly as requested.
46    ///
47    /// # Errors
48    ///
49    /// Returns an error if the underlying `rvs` call fails or if the requested
50    /// shape's element count overflows `usize`.
51    fn rvs_array(&self, shape: &[usize]) -> StatsResult<ArrayD<F>> {
52        // Compute total element count, guarding against multiplication overflow.
53        let mut total: usize = 1;
54        for &dim in shape {
55            total = total.checked_mul(dim).ok_or_else(|| {
56                crate::error::StatsError::InvalidArgument(
57                    "Requested rvs_array shape overflows usize".to_string(),
58                )
59            })?;
60        }
61
62        // Empty shape ⇒ 0-D array containing a single sample.
63        // Zero total ⇒ build an empty array with the requested shape (e.g. shape=[3, 0]).
64        if total == 0 {
65            return Array::from_shape_vec(IxDyn(shape), Vec::<F>::new()).map_err(|e| {
66                crate::error::StatsError::DimensionMismatch(format!(
67                    "Failed to construct empty rvs_array with the requested shape: {e}"
68                ))
69            });
70        }
71
72        let flat = self.rvs(total)?;
73        Array::from_shape_vec(IxDyn(shape), flat.to_vec()).map_err(|e| {
74            crate::error::StatsError::DimensionMismatch(format!(
75                "Failed to reshape rvs samples into requested shape: {e}"
76            ))
77        })
78    }
79
80    /// Entropy of the distribution
81    fn entropy(&self) -> F;
82}
83
84/// Trait for continuous distributions
85pub trait ContinuousDistribution<F: Float>: Distribution<F> {
86    /// Probability density function (PDF)
87    ///
88    /// # Arguments
89    ///
90    /// * `x` - Point at which to evaluate the PDF
91    ///
92    /// # Returns
93    ///
94    /// The probability density at x
95    fn pdf(&self, x: F) -> F;
96
97    /// Cumulative distribution function (CDF)
98    ///
99    /// # Arguments
100    ///
101    /// * `x` - Point at which to evaluate the CDF
102    ///
103    /// # Returns
104    ///
105    /// The cumulative probability up to x
106    fn cdf(&self, x: F) -> F;
107
108    /// Percent point function (inverse CDF)
109    ///
110    /// # Arguments
111    ///
112    /// * `q` - Quantile (probability) in [0, 1]
113    ///
114    /// # Returns
115    ///
116    /// The value x such that CDF(x) = q
117    fn ppf(&self, q: F) -> StatsResult<F> {
118        // Default implementation using binary search
119        // This can be overridden for distributions with analytical ppf
120        if q < F::zero() || q > F::one() {
121            return Err(crate::error::StatsError::InvalidArgument(
122                "Quantile must be in [0, 1]".to_string(),
123            ));
124        }
125
126        // Use binary search to find the inverse
127        let mut low = F::from(-10.0).expect("Failed to convert constant to float");
128        let mut high = F::from(10.0).expect("Failed to convert constant to float");
129        let eps = F::from(1e-12).expect("Failed to convert constant to float");
130
131        // Find a reasonable search range
132        while self.cdf(low) > q {
133            low = low * F::from(2.0).expect("Failed to convert constant to float");
134        }
135        while self.cdf(high) < q {
136            high = high * F::from(2.0).expect("Failed to convert constant to float");
137        }
138
139        // Binary search
140        for _ in 0..100 {
141            let mid = (low + high) / F::from(2.0).expect("Failed to convert constant to float");
142            let cdf_mid = self.cdf(mid);
143
144            if (cdf_mid - q).abs() < eps {
145                return Ok(mid);
146            }
147
148            if cdf_mid < q {
149                low = mid;
150            } else {
151                high = mid;
152            }
153        }
154
155        Ok((low + high) / F::from(2.0).expect("Failed to convert constant to float"))
156    }
157
158    /// Vectorised PDF: evaluate `pdf` at every element of `x`.
159    ///
160    /// SciPy-style ergonomic — accepts a 1-D `ArrayView1` and returns an owned
161    /// `Array1` with the same length. The default implementation broadcasts
162    /// the scalar `pdf` via `mapv`. Distributions that can compute the PDF
163    /// faster on a batch are free to override this method.
164    ///
165    /// # Examples
166    ///
167    /// ```
168    /// use scirs2_core::ndarray::array;
169    /// use scirs2_stats::distributions::Normal;
170    /// use scirs2_stats::traits::ContinuousDistribution;
171    ///
172    /// let n = Normal::new(0.0_f64, 1.0).expect("normal");
173    /// let xs = array![-1.0, 0.0, 1.0];
174    /// let pdfs = n.pdf_array(&xs.view());
175    /// assert_eq!(pdfs.len(), 3);
176    /// ```
177    fn pdf_array(&self, x: &ArrayView1<F>) -> Array1<F> {
178        x.mapv(|xi| self.pdf(xi))
179    }
180
181    /// Vectorised CDF: evaluate `cdf` at every element of `x`.
182    ///
183    /// SciPy-style ergonomic. The default implementation broadcasts the scalar
184    /// `cdf` via `mapv`; distributions with a fast batch CDF should override.
185    fn cdf_array(&self, x: &ArrayView1<F>) -> Array1<F> {
186        x.mapv(|xi| self.cdf(xi))
187    }
188
189    /// Vectorised PPF: evaluate `ppf` at every element of `q`.
190    ///
191    /// Returns an error on the first element whose `ppf` call fails (typically
192    /// out-of-range quantiles). On success the output array has the same length
193    /// as the input view.
194    fn ppf_array(&self, q: &ArrayView1<F>) -> StatsResult<Array1<F>> {
195        let mut out = Array1::<F>::zeros(q.len());
196        for (i, &qi) in q.iter().enumerate() {
197            out[i] = self.ppf(qi)?;
198        }
199        Ok(out)
200    }
201}
202
203/// Trait for discrete distributions
204pub trait DiscreteDistribution<F: Float>: Distribution<F> {
205    /// Probability mass function (PMF)
206    ///
207    /// # Arguments
208    ///
209    /// * `k` - Point at which to evaluate the PMF
210    ///
211    /// # Returns
212    ///
213    /// The probability mass at k
214    fn pmf(&self, k: F) -> F;
215
216    /// Cumulative distribution function (CDF)
217    ///
218    /// # Arguments
219    ///
220    /// * `k` - Point at which to evaluate the CDF
221    ///
222    /// # Returns
223    ///
224    /// The cumulative probability up to k
225    fn cdf(&self, k: F) -> F;
226
227    /// Support of the distribution (range of possible values)
228    fn support(&self) -> (Option<F>, Option<F>) {
229        (None, None) // Default: unbounded support
230    }
231
232    /// Percent point function (inverse CDF)
233    ///
234    /// Returns the smallest integer `k` in the support such that `CDF(k) >= p`.
235    /// This default implementation uses exponential expansion + binary search on
236    /// the CDF, so it works for any discrete distribution that provides `cdf()`,
237    /// `mean()`, `std()`, and `support()`.
238    ///
239    /// Distributions with a closed-form quantile function should override this.
240    fn ppf(&self, p: F) -> StatsResult<F> {
241        // --- Validate input ---
242        if p < F::zero() || p > F::one() {
243            return Err(crate::error::StatsError::DomainError(
244                "Probability must be in [0, 1]".to_string(),
245            ));
246        }
247
248        // Determine support bounds (integer-valued).
249        let (sup_lo, sup_hi) = self.support();
250        let lo_bound: i64 = match sup_lo {
251            Some(v) => {
252                let raw = v.to_f64().unwrap_or(0.0);
253                raw.ceil() as i64
254            }
255            None => i64::MIN / 2,
256        };
257        let hi_bound: i64 = match sup_hi {
258            Some(v) => {
259                let raw = v.to_f64().unwrap_or(f64::MAX);
260                raw.floor() as i64
261            }
262            None => i64::MAX / 2,
263        };
264
265        // p == 0 → return support minimum.
266        if p <= F::zero() {
267            let lo_f = F::from(lo_bound.max(0)).ok_or_else(|| {
268                crate::error::StatsError::ComputationError(
269                    "Cannot convert support lower bound to F".to_string(),
270                )
271            })?;
272            return Ok(lo_f);
273        }
274
275        // p == 1 → return support maximum (or +∞ for infinite support).
276        if p >= F::one() {
277            return match sup_hi {
278                Some(v) => Ok(v),
279                None => Ok(F::infinity()),
280            };
281        }
282
283        // --- Derive initial search range from mean ± 10·std ---
284        let mean_f64 = self.mean().to_f64().unwrap_or(0.0);
285        let std_f64 = self.std().to_f64().unwrap_or(1.0).max(1.0);
286
287        let mut lo: i64 = ((mean_f64 - 10.0 * std_f64).floor() as i64).max(lo_bound);
288        let mut hi: i64 = ((mean_f64 + 10.0 * std_f64).ceil() as i64).min(hi_bound);
289
290        // Clamp lo so it is at least support minimum.
291        lo = lo.max(lo_bound);
292        // Ensure hi > lo.
293        if hi <= lo {
294            hi = lo + 1;
295        }
296
297        // --- Expand lo leftward until CDF(lo) can be < p ---
298        // (For distributions starting at 0 this loop usually doesn't run.)
299        {
300            let mut step: i64 = (std_f64 * 10.0).ceil() as i64 + 1;
301            loop {
302                let lo_f = F::from(lo).ok_or_else(|| {
303                    crate::error::StatsError::ComputationError(
304                        "Overflow expanding lower bound in ppf".to_string(),
305                    )
306                })?;
307                if self.cdf(lo_f) < p || lo <= lo_bound {
308                    break;
309                }
310                lo = (lo - step).max(lo_bound);
311                step = step.saturating_mul(2);
312            }
313            lo = lo.max(lo_bound);
314        }
315
316        // --- Expand hi rightward until CDF(hi) >= p ---
317        {
318            let cap: i64 = 1_000_000_000i64.min(hi_bound);
319            let mut step: i64 = (std_f64 * 10.0).ceil() as i64 + 1;
320            let mut iters = 0usize;
321            loop {
322                let hi_f = F::from(hi).ok_or_else(|| {
323                    crate::error::StatsError::ComputationError(
324                        "Overflow expanding upper bound in ppf".to_string(),
325                    )
326                })?;
327                if self.cdf(hi_f) >= p || hi >= cap {
328                    break;
329                }
330                hi = (hi + step).min(cap);
331                step = step.saturating_mul(2);
332                iters += 1;
333                if iters > 100 {
334                    break;
335                }
336            }
337        }
338
339        // --- Binary search: smallest integer k in [lo, hi] with CDF(k) >= p ---
340        // Invariant: CDF(hi) >= p (if not, hi is the best we have).
341        let mut left: i64 = lo;
342        let mut right: i64 = hi;
343
344        // Check if the right bound actually satisfies the condition.
345        let right_f = F::from(right).ok_or_else(|| {
346            crate::error::StatsError::ComputationError(
347                "Overflow converting hi bound in ppf".to_string(),
348            )
349        })?;
350        if self.cdf(right_f) < p {
351            // Support is exhausted; return hi.
352            return Ok(right_f);
353        }
354
355        while right - left > 1 {
356            let mid: i64 = left + (right - left) / 2;
357            let mid_f = F::from(mid).ok_or_else(|| {
358                crate::error::StatsError::ComputationError(
359                    "Overflow converting mid in ppf binary search".to_string(),
360                )
361            })?;
362            if self.cdf(mid_f) >= p {
363                right = mid;
364            } else {
365                left = mid;
366            }
367        }
368
369        // After the loop, `right` satisfies cdf(right) >= p.
370        // Check if `left` also satisfies cdf(left) >= p — if so it is the
371        // *smaller* integer and is the correct answer.
372        let left_f = F::from(left).ok_or_else(|| {
373            crate::error::StatsError::ComputationError(
374                "Overflow converting left in ppf".to_string(),
375            )
376        })?;
377        if self.cdf(left_f) >= p {
378            return Ok(left_f);
379        }
380
381        F::from(right).ok_or_else(|| {
382            crate::error::StatsError::ComputationError(
383                "Overflow converting result in ppf".to_string(),
384            )
385        })
386    }
387
388    /// Log probability mass function
389    fn logpmf(&self, x: F) -> F {
390        self.pmf(x).ln()
391    }
392}
393
394/// Trait for circular distributions (distributions on the unit circle)
395pub trait CircularDistribution<F: Float>: Distribution<F> {
396    /// Probability density function for circular distributions
397    ///
398    /// # Arguments
399    ///
400    /// * `x` - Angle in radians
401    ///
402    /// # Returns
403    ///
404    /// The probability density at angle x
405    fn pdf(&self, x: F) -> F;
406
407    /// Cumulative distribution function for circular distributions
408    ///
409    /// # Arguments
410    ///
411    /// * `x` - Angle in radians
412    ///
413    /// # Returns
414    ///
415    /// The cumulative probability up to angle x
416    fn cdf(&self, x: F) -> F;
417
418    /// Generate a single random sample
419    ///
420    /// # Returns
421    ///
422    /// A single random sample from the distribution
423    fn rvs_single(&self) -> StatsResult<F>;
424
425    /// Circular mean (mean direction)
426    ///
427    /// # Returns
428    ///
429    /// The mean direction in radians
430    fn circular_mean(&self) -> F;
431
432    /// Circular variance
433    ///
434    /// # Returns
435    ///
436    /// The circular variance (1 - mean resultant length)
437    fn circular_variance(&self) -> F;
438
439    /// Circular standard deviation
440    ///
441    /// # Returns
442    ///
443    /// The circular standard deviation in radians
444    fn circular_std(&self) -> F;
445
446    /// Mean resultant length
447    ///
448    /// # Returns
449    ///
450    /// The mean resultant length (measure of concentration)
451    fn mean_resultant_length(&self) -> F;
452
453    /// Concentration parameter
454    ///
455    /// # Returns
456    ///
457    /// The concentration parameter of the distribution
458    fn concentration(&self) -> F;
459}
460
461/// Trait for multivariate distributions
462pub trait MultivariateDistribution<F: Float> {
463    /// Probability density function for multivariate distributions
464    ///
465    /// # Arguments
466    ///
467    /// * `x` - Point at which to evaluate the PDF
468    ///
469    /// # Returns
470    ///
471    /// The probability density at x
472    fn pdf(&self, x: &Array1<F>) -> F;
473
474    /// Generate random samples from the multivariate distribution
475    ///
476    /// # Arguments
477    ///
478    /// * `size` - Number of samples to generate
479    ///
480    /// # Returns
481    ///
482    /// A matrix where each row is a sample
483    fn rvs(&self, size: usize) -> StatsResult<scirs2_core::ndarray::Array2<F>>;
484
485    /// Mean vector of the distribution
486    fn mean(&self) -> Array1<F>;
487
488    /// Covariance matrix of the distribution
489    fn cov(&self) -> scirs2_core::ndarray::Array2<F>;
490
491    /// Dimensionality of the distribution
492    fn dim(&self) -> usize;
493
494    /// Log probability density function for multivariate distributions
495    fn logpdf(&self, x: &Array1<F>) -> F {
496        self.pdf(x).ln()
497    }
498
499    /// Generate a single random sample from the multivariate distribution
500    fn rvs_single(&self) -> StatsResult<Vec<F>> {
501        let samples = self.rvs(1)?;
502        Ok(samples.row(0).to_vec())
503    }
504}
505
506/// Trait for distributions that support fitting to data
507pub trait Fittable<F: Float> {
508    /// Fit the distribution to observed data
509    ///
510    /// # Arguments
511    ///
512    /// * `data` - Observed data points
513    ///
514    /// # Returns
515    ///
516    /// A fitted distribution instance
517    fn fit(data: &Array1<F>) -> StatsResult<Self>
518    where
519        Self: Sized;
520
521    /// Maximum likelihood estimation of parameters
522    ///
523    /// # Arguments
524    ///
525    /// * `data` - Observed data points
526    ///
527    /// # Returns
528    ///
529    /// A tuple of estimated parameters
530    fn mle(data: &Array1<F>) -> StatsResult<Vec<F>>;
531}
532
533/// Trait for distributions that can be truncated
534pub trait Truncatable<F: Float>: Distribution<F> {
535    /// Create a truncated version of the distribution
536    ///
537    /// # Arguments
538    ///
539    /// * `lower` - Lower bound (None for no lower bound)
540    /// * `upper` - Upper bound (None for no upper bound)
541    ///
542    /// # Returns
543    ///
544    /// A truncated version of the distribution
545    fn truncate(&self, lower: Option<F>, upper: Option<F>)
546        -> StatsResult<Box<dyn Distribution<F>>>;
547}
548
549/// Trait for continuous distributions that support CDF-related functions
550pub trait ContinuousCDF<F: Float>: ContinuousDistribution<F> {
551    /// Survival function (1 - CDF)
552    ///
553    /// # Arguments
554    ///
555    /// * `x` - Point at which to evaluate the survival function
556    ///
557    /// # Returns
558    ///
559    /// The survival probability at x (1 - CDF(x))
560    fn sf(&self, x: F) -> F {
561        F::one() - self.cdf(x)
562    }
563
564    /// Hazard function (PDF / (1 - CDF))
565    ///
566    /// # Arguments
567    ///
568    /// * `x` - Point at which to evaluate the hazard function
569    ///
570    /// # Returns
571    ///
572    /// The hazard rate at x
573    fn hazard(&self, x: F) -> F {
574        let sf_val = self.sf(x);
575        if sf_val == F::zero() {
576            F::infinity()
577        } else {
578            self.pdf(x) / sf_val
579        }
580    }
581
582    /// Cumulative hazard function (-ln(survival function))
583    ///
584    /// # Arguments
585    ///
586    /// * `x` - Point at which to evaluate the cumulative hazard function
587    ///
588    /// # Returns
589    ///
590    /// The cumulative hazard at x (-ln(1 - CDF(x)))
591    fn cumhazard(&self, x: F) -> F {
592        let sf_val = self.sf(x);
593        if sf_val <= F::zero() {
594            F::infinity()
595        } else {
596            -sf_val.ln()
597        }
598    }
599
600    /// Inverse survival function (PPF(1 - q))
601    ///
602    /// # Arguments
603    ///
604    /// * `q` - Probability in [0, 1]
605    ///
606    /// # Returns
607    ///
608    /// The value x such that SF(x) = q (equivalent to PPF(1 - q))
609    fn isf(&self, q: F) -> StatsResult<F> {
610        if q < F::zero() || q > F::one() {
611            return Err(crate::error::StatsError::InvalidArgument(
612                "Probability must be in [0, 1]".to_string(),
613            ));
614        }
615        self.ppf(F::one() - q)
616    }
617}
618
619/// Trait for discrete distributions that support CDF-related functions
620pub trait DiscreteCDF<F: Float>: DiscreteDistribution<F> {
621    /// Survival function (1 - CDF)
622    ///
623    /// # Arguments
624    ///
625    /// * `k` - Point at which to evaluate the survival function
626    ///
627    /// # Returns
628    ///
629    /// The survival probability at k (1 - CDF(k))
630    fn sf(&self, k: F) -> F {
631        F::one() - self.cdf(k)
632    }
633
634    /// Hazard function (PMF / (1 - CDF))
635    ///
636    /// # Arguments
637    ///
638    /// * `k` - Point at which to evaluate the hazard function
639    ///
640    /// # Returns
641    ///
642    /// The hazard rate at k
643    fn hazard(&self, k: F) -> F {
644        let sf_val = self.sf(k);
645        if sf_val == F::zero() {
646            F::infinity()
647        } else {
648            self.pmf(k) / sf_val
649        }
650    }
651
652    /// Cumulative hazard function (-ln(survival function))
653    ///
654    /// # Arguments
655    ///
656    /// * `k` - Point at which to evaluate the cumulative hazard function
657    ///
658    /// # Returns
659    ///
660    /// The cumulative hazard at k (-ln(1 - CDF(k)))
661    fn cumhazard(&self, k: F) -> F {
662        let sf_val = self.sf(k);
663        if sf_val <= F::zero() {
664            F::infinity()
665        } else {
666            -sf_val.ln()
667        }
668    }
669
670    /// Inverse survival function (PPF(1 - q))
671    ///
672    /// # Arguments
673    ///
674    /// * `q` - Probability in [0, 1]
675    ///
676    /// # Returns
677    ///
678    /// The value k such that SF(k) = q (equivalent to PPF(1 - q))
679    fn isf(&self, q: F) -> StatsResult<F> {
680        if q < F::zero() || q > F::one() {
681            return Err(crate::error::StatsError::InvalidArgument(
682                "Probability must be in [0, 1]".to_string(),
683            ));
684        }
685        self.ppf(F::one() - q)
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692    use crate::error::StatsResult;
693    use scirs2_core::ndarray::Array1;
694    use scirs2_core::numeric::Float;
695
696    // -----------------------------------------------------------------------
697    // Test fixture: Bernoulli(p) implemented via the default ppf
698    // -----------------------------------------------------------------------
699    struct TestBernoulli {
700        p: f64,
701    }
702
703    impl Distribution<f64> for TestBernoulli {
704        fn mean(&self) -> f64 {
705            self.p
706        }
707        fn var(&self) -> f64 {
708            self.p * (1.0 - self.p)
709        }
710        fn std(&self) -> f64 {
711            self.var().sqrt()
712        }
713        fn rvs(&self, size: usize) -> StatsResult<Array1<f64>> {
714            Ok(Array1::zeros(size))
715        }
716        fn entropy(&self) -> f64 {
717            0.0
718        }
719    }
720
721    impl DiscreteDistribution<f64> for TestBernoulli {
722        fn pmf(&self, k: f64) -> f64 {
723            if (k - 0.0).abs() < 1e-9 {
724                1.0 - self.p
725            } else if (k - 1.0).abs() < 1e-9 {
726                self.p
727            } else {
728                0.0
729            }
730        }
731        fn cdf(&self, k: f64) -> f64 {
732            if k < 0.0 {
733                0.0
734            } else if k < 1.0 {
735                1.0 - self.p
736            } else {
737                1.0
738            }
739        }
740        fn support(&self) -> (Option<f64>, Option<f64>) {
741            (Some(0.0), Some(1.0))
742        }
743        // ppf deliberately NOT overridden → exercises the default
744    }
745
746    // -----------------------------------------------------------------------
747    // Test fixture: Poisson-like (mu) via default ppf
748    // -----------------------------------------------------------------------
749    struct TestPoisson {
750        mu: f64,
751    }
752
753    impl Distribution<f64> for TestPoisson {
754        fn mean(&self) -> f64 {
755            self.mu
756        }
757        fn var(&self) -> f64 {
758            self.mu
759        }
760        fn std(&self) -> f64 {
761            self.mu.sqrt()
762        }
763        fn rvs(&self, size: usize) -> StatsResult<Array1<f64>> {
764            Ok(Array1::zeros(size))
765        }
766        fn entropy(&self) -> f64 {
767            0.0
768        }
769    }
770
771    impl DiscreteDistribution<f64> for TestPoisson {
772        fn pmf(&self, k: f64) -> f64 {
773            if k < 0.0 || (k - k.floor()).abs() > 1e-9 {
774                return 0.0;
775            }
776            let ki = k.round() as u64;
777            let mut log_pmf = -(self.mu) + (ki as f64) * self.mu.ln();
778            for i in 1..=ki {
779                log_pmf -= (i as f64).ln();
780            }
781            log_pmf.exp()
782        }
783        fn cdf(&self, k: f64) -> f64 {
784            if k < 0.0 {
785                return 0.0;
786            }
787            let ki = k.floor() as u64;
788            (0..=ki).map(|i| self.pmf(i as f64)).sum::<f64>()
789        }
790        fn support(&self) -> (Option<f64>, Option<f64>) {
791            (Some(0.0), None)
792        }
793        // ppf deliberately NOT overridden
794    }
795
796    // -----------------------------------------------------------------------
797    // Test fixture: Geometric(p) (# failures before first success, starting at 0)
798    // CDF(k) = 1 - (1-p)^(k+1)
799    // -----------------------------------------------------------------------
800    struct TestGeometric {
801        p: f64,
802    }
803
804    impl Distribution<f64> for TestGeometric {
805        fn mean(&self) -> f64 {
806            (1.0 - self.p) / self.p
807        }
808        fn var(&self) -> f64 {
809            (1.0 - self.p) / (self.p * self.p)
810        }
811        fn std(&self) -> f64 {
812            self.var().sqrt()
813        }
814        fn rvs(&self, size: usize) -> StatsResult<Array1<f64>> {
815            Ok(Array1::zeros(size))
816        }
817        fn entropy(&self) -> f64 {
818            0.0
819        }
820    }
821
822    impl DiscreteDistribution<f64> for TestGeometric {
823        fn pmf(&self, k: f64) -> f64 {
824            if k < 0.0 || (k - k.floor()).abs() > 1e-9 {
825                return 0.0;
826            }
827            let ki = k.round() as u64;
828            self.p * (1.0 - self.p).powi(ki as i32)
829        }
830        fn cdf(&self, k: f64) -> f64 {
831            if k < 0.0 {
832                return 0.0;
833            }
834            let ki = k.floor() as u64;
835            1.0 - (1.0 - self.p).powi(ki as i32 + 1)
836        }
837        fn support(&self) -> (Option<f64>, Option<f64>) {
838            (Some(0.0), None)
839        }
840        // ppf deliberately NOT overridden
841    }
842
843    // -----------------------------------------------------------------------
844    // Helper: verify cdf(ppf(p)) >= p  (fundamental discrete PPF invariant)
845    // Also verifies ppf(p) is the SMALLEST such k.
846    // -----------------------------------------------------------------------
847    fn check_ppf_invariant<D: DiscreteDistribution<f64>>(dist: &D, p: f64) {
848        let k = dist
849            .ppf(p)
850            .unwrap_or_else(|e| panic!("ppf({p}) failed: {e}"));
851        let cdf_k = dist.cdf(k);
852        assert!(
853            cdf_k >= p - 1e-12,
854            "CDF({k}) = {cdf_k} < p = {p}: invariant cdf(ppf(p)) >= p violated"
855        );
856        // Verify that k-1 does NOT satisfy the condition (k is smallest).
857        if k >= 1.0 {
858            let cdf_km1 = dist.cdf(k - 1.0);
859            assert!(
860                cdf_km1 < p + 1e-12,
861                "CDF({}) = {} >= p = {p}: k={k} is not the SMALLEST such integer",
862                k - 1.0,
863                cdf_km1
864            );
865        }
866    }
867
868    // -----------------------------------------------------------------------
869    // Bernoulli(p=0.3) tests
870    // -----------------------------------------------------------------------
871    #[test]
872    fn test_default_ppf_bernoulli_invariants() {
873        let b = TestBernoulli { p: 0.3 };
874        for &p in &[0.1f64, 0.25, 0.5, 0.7, 0.9] {
875            check_ppf_invariant(&b, p);
876        }
877    }
878
879    #[test]
880    fn test_default_ppf_bernoulli_known_values() {
881        let b = TestBernoulli { p: 0.3 };
882        // CDF(0) = 0.7, so ppf(p) = 0 for p <= 0.7 and 1 for p > 0.7
883        assert_eq!(b.ppf(0.5).expect("ppf(0.5)"), 0.0);
884        assert_eq!(b.ppf(0.7).expect("ppf(0.7)"), 0.0);
885        assert_eq!(b.ppf(0.8).expect("ppf(0.8)"), 1.0);
886        assert_eq!(b.ppf(1.0).expect("ppf(1.0)"), 1.0);
887    }
888
889    #[test]
890    fn test_default_ppf_bernoulli_p0_returns_support_lo() {
891        let b = TestBernoulli { p: 0.7 };
892        // p == 0 → support minimum = 0
893        assert_eq!(b.ppf(0.0).expect("ppf(0.0)"), 0.0);
894    }
895
896    #[test]
897    fn test_default_ppf_bernoulli_p1_returns_support_hi() {
898        let b = TestBernoulli { p: 0.7 };
899        // p == 1 → support maximum = 1
900        assert_eq!(b.ppf(1.0).expect("ppf(1.0)"), 1.0);
901    }
902
903    #[test]
904    fn test_default_ppf_bernoulli_out_of_range() {
905        let b = TestBernoulli { p: 0.5 };
906        assert!(b.ppf(-0.1).is_err());
907        assert!(b.ppf(1.1).is_err());
908    }
909
910    // -----------------------------------------------------------------------
911    // Poisson(mu=3) tests
912    // -----------------------------------------------------------------------
913    #[test]
914    fn test_default_ppf_poisson_invariants() {
915        let po = TestPoisson { mu: 3.0 };
916        for &p in &[0.1f64, 0.25, 0.5, 0.75, 0.9] {
917            check_ppf_invariant(&po, p);
918        }
919    }
920
921    #[test]
922    fn test_default_ppf_poisson_median() {
923        let po = TestPoisson { mu: 3.0 };
924        // Median of Poisson(3) is 3
925        let med = po.ppf(0.5).expect("ppf(0.5)");
926        assert!(
927            (med - 3.0).abs() <= 1.0,
928            "Expected median near 3, got {med}"
929        );
930    }
931
932    #[test]
933    fn test_default_ppf_poisson_p0_returns_zero() {
934        let po = TestPoisson { mu: 5.0 };
935        assert_eq!(po.ppf(0.0).expect("ppf(0.0)"), 0.0);
936    }
937
938    #[test]
939    fn test_default_ppf_poisson_p1_returns_infinity() {
940        let po = TestPoisson { mu: 5.0 };
941        // Poisson has no finite upper bound → +∞
942        let v = po.ppf(1.0).expect("ppf(1.0)");
943        assert!(v.is_infinite() && v > 0.0, "Expected +∞, got {v}");
944    }
945
946    #[test]
947    fn test_default_ppf_poisson_large_mu_invariants() {
948        let po = TestPoisson { mu: 50.0 };
949        for &p in &[0.1f64, 0.5, 0.9] {
950            check_ppf_invariant(&po, p);
951        }
952    }
953
954    // -----------------------------------------------------------------------
955    // Geometric(p=0.4) tests
956    // -----------------------------------------------------------------------
957    #[test]
958    fn test_default_ppf_geometric_invariants() {
959        let g = TestGeometric { p: 0.4 };
960        for &p in &[0.1f64, 0.25, 0.5, 0.75, 0.9] {
961            check_ppf_invariant(&g, p);
962        }
963    }
964
965    #[test]
966    fn test_default_ppf_geometric_p0_returns_zero() {
967        let g = TestGeometric { p: 0.3 };
968        assert_eq!(g.ppf(0.0).expect("ppf(0.0)"), 0.0);
969    }
970
971    #[test]
972    fn test_default_ppf_geometric_p1_returns_infinity() {
973        let g = TestGeometric { p: 0.3 };
974        let v = g.ppf(1.0).expect("ppf(1.0)");
975        assert!(v.is_infinite() && v > 0.0, "Expected +∞, got {v}");
976    }
977
978    #[test]
979    fn test_default_ppf_geometric_heavy_tail() {
980        // Small p → heavy tail, exercises exponential expansion of hi
981        let g = TestGeometric { p: 0.05 };
982        for &p in &[0.5f64, 0.9, 0.99] {
983            check_ppf_invariant(&g, p);
984        }
985    }
986}
scirs2_stats/traits.rs

scirs2_stats/
traits.rs