r2rs_nmath/distribution/hyper/
mod.rs

1mod d;
2mod p;
3mod q;
4mod r;
5
6use strafe_type::{FloatConstraint, LogProbability64, PositiveInteger64, Probability64, Real64};
7
8pub(crate) use self::{d::*, p::*, q::*, r::*};
9use crate::traits::{Distribution, RNG};
10
11/// # The Hypergeometric Distribution
12///
13/// ## Description
14/// Density, distribution function, quantile function and random generation for the hypergeometric
15/// distribution.
16///
17/// ## Arguments
18///
19/// * Quantiles representing the number of white balls drawn without replacement
20/// from an urn which contains both black and white balls.
21/// * m: the number of white balls in the urn.
22/// * n: the number of black balls in the urn.
23/// * k: the number of balls drawn from the urn.
24/// * p: probability, it must be between 0 and 1.
25///
26/// ## Details
27///
28/// The hypergeometric distribution is used for sampling without replacement. The density of this
29/// distribution with parameters m, n and k (named Np, N-Np, and n, respectively in the reference below) is given by
30///
31/// $ p(x) = {m \choose x} {n \choose k-x} / {m+n \choose k} $
32///
33/// for x = 0, …, k.
34///
35/// Note that p(x) is non-zero only for max(0, k-n) <= x <= min(k, m).
36///
37/// With $p := \frac{m}{m+n}$ (hence $Np = N \times p$ in the reference's notation), the first two moments
38/// are mean
39///
40/// $ E\[X\] = μ = k p $
41///
42/// and variance
43///
44/// $ Var(X) = k p (1 - p) * \frac{m+n-k}{m+n-1} $,
45///
46/// which shows the closeness to the Binomial(k,p) (where the hypergeometric has smaller variance
47/// unless k = 1).
48///
49/// The quantile is defined as the smallest value x such that F(x) ≥ p, where F is the distribution
50/// function.
51///
52/// If one of m, n, k, exceeds .Machine$integer.max, currently the equivalent of
53/// qhyper(runif(nn), m,n,k) is used, when a binomial approximation may be considerably
54/// more efficient.
55///
56/// ## Density Plot
57///
58/// ```rust
59/// # use r2rs_base::traits::StatisticalSlice;
60/// # use r2rs_nmath::{distribution::HyperGeometricBuilder, traits::Distribution};
61/// # use strafe_plot::prelude::{IntoDrawingArea, Line, Plot, PlotOptions, SVGBackend, BLACK};
62/// # use strafe_type::FloatConstraint;
63/// let hyper_geom = HyperGeometricBuilder::new().build().unwrap();
64/// let x = <[f64]>::sequence_by(-0.5, 1.5, 0.001);
65/// let y = x
66///     .iter()
67///     .map(|x| hyper_geom.density(x).unwrap())
68///     .collect::<Vec<_>>();
69///
70/// let root = SVGBackend::new("density.svg", (1024, 768)).into_drawing_area();
71/// Plot::new()
72///     .with_options(PlotOptions {
73///         x_axis_label: "x".to_string(),
74///         y_axis_label: "density".to_string(),
75///         ..Default::default()
76///     })
77///     .with_plottable(Line {
78///         x,
79///         y,
80///         color: BLACK,
81///         ..Default::default()
82///     })
83///     .plot(&root)
84///     .unwrap();
85/// # use std::fs::rename;
86/// #     drop(root);
87/// #     rename(
88/// #             format!("density.svg"),
89/// #             format!("src/distribution/hyper/doctest_out/density.svg"),
90/// #     )
91/// #     .unwrap();
92/// ```
93#[cfg_attr(feature = "doc_outputs", cfg_attr(all(), doc = embed_doc_image::embed_image!("density", "src/distribution/hyper/doctest_out/density.svg")))]
94#[cfg_attr(feature = "doc_outputs", cfg_attr(all(), doc = "![Density][density]"))]
95///
96/// ## Source
97///
98/// dhyper computes via binomial probabilities, using code contributed by Catherine Loader
99/// (see dbinom).
100///
101/// phyper is based on calculating dhyper and phyper(...)/dhyper(...) (as a summation), based on
102/// ideas of Ian Smith and Morten Welinder.
103///
104/// qhyper is based on inversion.
105///
106/// rhyper is based on a corrected version of
107///
108/// Kachitvichyanukul, V. and Schmeiser, B. (1985). Computer generation of hypergeometric random
109/// variates. Journal of Statistical Computation and Simulation, 22, 127–145.
110///
111/// ## References
112///
113/// Johnson, N. L., Kotz, S., and Kemp, A. W. (1992) Univariate Discrete Distributions, Second
114/// Edition. New York: Wiley.
115///
116/// ## See Also
117///
118/// Distributions for other standard distributions.
119///
120/// ## Examples
121///
122/// These are not equal, but the error is very small
123/// ```rust
124/// # use r2rs_nmath::{distribution::HyperGeometricBuilder, traits::Distribution};
125/// # use r2rs_stats::traits::StatArray;
126/// # use strafe_type::FloatConstraint;
127/// let m = 10;
128/// let n = 7;
129/// let k = 8;
130///
131/// let x = (0..=k + 1).collect::<Vec<_>>();
132///
133/// let hyper = HyperGeometricBuilder::new()
134///     .with_group_1(m)
135///     .with_group_2(n)
136///     .with_number_drawn(k)
137///     .build()
138///     .unwrap();
139/// let p = x
140///     .iter()
141///     .map(|x| hyper.probability(x, true).unwrap())
142///     .collect::<Vec<_>>();
143/// let d = x
144///     .iter()
145///     .map(|x| hyper.density(x).unwrap())
146///     .collect::<Vec<_>>()
147///     .cumsum();
148/// let diff = p
149///     .iter()
150///     .zip(d.iter())
151///     .map(|(p, d)| p - d)
152///     .collect::<Vec<_>>();
153///
154/// println!("{p:?}");
155/// println!("{d:?}");
156/// println!("{diff:?}");
157/// # use std::{fs::File, io::Write};
158/// # let mut f = File::create("src/distribution/hyper/doctest_out/difference.md").unwrap();
159/// # writeln!(f, "```output").unwrap();
160/// # writeln!(f, "{p:?}").unwrap();
161/// # writeln!(f, "{d:?}").unwrap();
162/// # writeln!(f, "{diff:?}").unwrap();
163/// # writeln!(f, "```").unwrap();
164/// ```
165#[cfg_attr(feature = "doc_outputs", cfg_attr(all(), doc = include_str!("doctest_out/difference.md")))]
166pub struct HyperGeometric {
167    group_1: PositiveInteger64,
168    group_2: PositiveInteger64,
169    number_drawn: PositiveInteger64,
170}
171
172impl Distribution for HyperGeometric {
173    fn density<R: Into<Real64>>(&self, x: R) -> Real64 {
174        dhyper(x, self.group_1, self.group_2, self.number_drawn, false)
175    }
176
177    fn log_density<R: Into<Real64>>(&self, x: R) -> Real64 {
178        dhyper(x, self.group_1, self.group_2, self.number_drawn, true)
179    }
180
181    fn probability<R: Into<Real64>>(&self, q: R, lower_tail: bool) -> Probability64 {
182        phyper(q, self.group_1, self.group_2, self.number_drawn, lower_tail)
183    }
184
185    fn log_probability<R: Into<Real64>>(&self, q: R, lower_tail: bool) -> LogProbability64 {
186        log_phyper(q, self.group_1, self.group_2, self.number_drawn, lower_tail)
187    }
188
189    fn quantile<P: Into<Probability64>>(&self, p: P, lower_tail: bool) -> Real64 {
190        qhyper(p, self.group_1, self.group_2, self.number_drawn, lower_tail)
191    }
192
193    fn log_quantile<LP: Into<LogProbability64>>(&self, p: LP, lower_tail: bool) -> Real64 {
194        log_qhyper(p, self.group_1, self.group_2, self.number_drawn, lower_tail)
195    }
196
197    fn random_sample<R: RNG>(&self, rng: &mut R) -> Real64 {
198        rhyper(self.group_1, self.group_2, self.number_drawn, rng)
199    }
200}
201
202pub struct HyperGeometricBuilder {
203    group_1: Option<PositiveInteger64>,
204    group_2: Option<PositiveInteger64>,
205    number_drawn: Option<PositiveInteger64>,
206}
207
208impl HyperGeometricBuilder {
209    pub fn new() -> Self {
210        Self {
211            group_1: None,
212            group_2: None,
213            number_drawn: None,
214        }
215    }
216
217    pub fn with_group_1<P: Into<PositiveInteger64>>(&mut self, group_1: P) -> &mut Self {
218        self.group_1 = Some(group_1.into());
219        self
220    }
221
222    pub fn with_group_2<P: Into<PositiveInteger64>>(&mut self, group_2: P) -> &mut Self {
223        self.group_2 = Some(group_2.into());
224        self
225    }
226
227    pub fn with_number_drawn<P: Into<PositiveInteger64>>(&mut self, number_drawn: P) -> &mut Self {
228        self.number_drawn = Some(number_drawn.into());
229        self
230    }
231
232    pub fn build(&self) -> Result<HyperGeometric, String> {
233        let group_1 = self.group_1.unwrap_or(1.0.into());
234        let group_2 = self.group_2.unwrap_or(1.0.into());
235        let number_drawn = self.number_drawn.unwrap_or(1.0.into());
236
237        if number_drawn.unwrap() > group_1.unwrap() + group_2.unwrap() {
238            Err(format!(
239                "Number drawn must be less than the two group sizes combined: {} > {}",
240                number_drawn.unwrap(),
241                group_1.unwrap() + group_2.unwrap()
242            ))
243        } else {
244            Ok(HyperGeometric {
245                group_1,
246                group_2,
247                number_drawn,
248            })
249        }
250    }
251}
252
253#[cfg(test)]
254mod tests;
255
256#[cfg(all(test, feature = "enable_proptest"))]
257mod proptests;
258
259#[cfg(all(test, feature = "enable_covtest"))]
260mod covtests;