1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
mod c;
mod d;
mod p;
mod q;
mod r;

use strafe_type::{LogProbability64, Natural64, Probability64, Real64};

pub use self::{c::*, d::*, p::*, q::*, r::*};
use crate::traits::{Distribution, RNG};

/// # Distribution of the Wilcoxon Rank Sum Statistic
///
/// ## Description
///
/// Density, distribution function, quantile function and random generation for the distribution of
/// the Wilcoxon rank sum statistic obtained from samples with size m and n, respectively.
///
/// ## Arguments
///
/// * m, n: numbers of observations in the first and second sample, respectively. Can be vectors of positive integers.
///
/// ## Details
///
/// This distribution is obtained as follows. Let x and y be two random, independent samples of
/// size m and n. Then the Wilcoxon rank sum statistic is the number of all pairs (x\[i\], y\[j\])
/// for which y\[j\] is not greater than x\[i\]. This statistic takes values between 0 and m * n,
/// and its mean and variance are $ m * n / 2 $ and $ m * n * (m + n + 1) / 12 $, respectively.
///
/// If any of the first three arguments are vectors, the recycling rule is used to do the
/// calculations for all combinations of the three up to the length of the longest vector.
///
/// ## Density Plot
///
/// ```rust
/// # use r2rs_base::traits::StatisticalSlice;
/// # use r2rs_nmath::{distribution::WilcoxBuilder, traits::Distribution};
/// # use strafe_plot::prelude::{IntoDrawingArea, Line, Plot, PlotOptions, SVGBackend, BLACK};
/// # use strafe_type::FloatConstraint;
/// let wilcox = WilcoxBuilder::new().build();
/// let x = <[f64]>::sequence_by(-1.0, 2.0, 0.001);
/// let y = x
///     .iter()
///     .map(|x| wilcox.density(x).unwrap())
///     .collect::<Vec<_>>();
///
/// let root = SVGBackend::new("density.svg", (1024, 768)).into_drawing_area();
/// Plot::new()
///     .with_options(PlotOptions {
///         x_axis_label: "x".to_string(),
///         y_axis_label: "density".to_string(),
///         ..Default::default()
///     })
///     .with_plottable(Line {
///         x,
///         y,
///         color: BLACK,
///         ..Default::default()
///     })
///     .plot(&root)
///     .unwrap();
/// # use std::fs::rename;
/// #     drop(root);
/// #     rename(
/// #             format!("density.svg"),
/// #             format!("src/distribution/wilcox/doctest_out/density.svg"),
/// #     )
/// #     .unwrap();
/// ```
#[cfg_attr(feature = "doc_outputs", cfg_attr(all(), doc = embed_doc_image::embed_image!("density", "src/distribution/wilcox/doctest_out/density.svg")))]
#[cfg_attr(feature = "doc_outputs", cfg_attr(all(), doc = "![Density][density]"))]
///
/// ## Warning
///
/// These functions can use large amounts of memory and stack (and even crash R if the stack limit
/// is exceeded and stack-checking is not in place) if one sample is large (several thousands or more).
///
/// ## Note
///
/// S-PLUS uses a different (but equivalent) definition of the Wilcoxon statistic: see wilcox.test
/// for details.
///
/// ## Author(s)
///
/// Kurt Hornik
///
/// ## Source
///
/// These ("d","p","q") are calculated via recursion, based on cwilcox(k, m, n), the number of
/// choices with statistic k from samples of size m and n, which is itself calculated recursively
/// and the results cached. Then dwilcox and pwilcox sum appropriate values of cwilcox, and qwilcox
/// is based on inversion.
///
/// rwilcox generates a random permutation of ranks and evaluates the statistic. Note that it is
/// based on the same C code as sample(), and hence is determined by .Random.seed, notably from
/// RNGkind(sample.kind = ..) which changed with R version 3.6.0.
///
/// ## See Also
///
/// wilcox.test to calculate the statistic from data, find p values and so on.
///
/// Distributions for standard distributions, including dsignrank for the distribution of the
/// one-sample Wilcoxon signed rank statistic.
///
/// ## Examples
///
/// ```r
/// require(graphics)
///
/// x <- -1:(4*6 + 1)
/// fx <- dwilcox(x, 4, 6)
/// Fx <- pwilcox(x, 4, 6)
///
/// layout(rbind(1,2), widths = 1, heights = c(3,2))
/// plot(x, fx, type = "h", col = "violet",
///      main =  "Probabilities (density) of Wilcoxon-Statist.(n=6, m=4)")
/// plot(x, Fx, type = "s", col = "blue",
///      main =  "Distribution of Wilcoxon-Statist.(n=6, m=4)")
/// abline(h = 0:1, col = "gray20", lty = 2)
/// layout(1) # set back
///
/// N <- 200
/// hist(U <- rwilcox(N, m = 4,n = 6), breaks = 0:25 - 1/2,
///      border = "red", col = "pink", sub = paste("N =",N))
/// mtext("N * f(x),  f() = true \"density\"", side = 3, col = "blue")
///  lines(x, N*fx, type = "h", col = "blue", lwd = 2)
/// points(x, N*fx, cex = 2)
///
/// ## Better is a Quantile-Quantile Plot
/// qqplot(U, qw <- qwilcox((1:N - 1/2)/N, m = 4, n = 6),
///        main = paste("Q-Q-Plot of empirical and theoretical quantiles",
///                      "Wilcoxon Statistic,  (m=4, n=6)", sep = "\n"))
/// n <- as.numeric(names(print(tU <- table(U))))
/// text(n+.2, n+.5, labels = tU, col = "red")
/// ```
pub struct Wilcox {
    m: Natural64,
    n: Natural64,
}

impl Distribution for Wilcox {
    fn density<R: Into<Real64>>(&self, x: R) -> Real64 {
        dwilcox(x, self.m, self.n, false)
    }

    fn log_density<R: Into<Real64>>(&self, x: R) -> Real64 {
        dwilcox(x, self.m, self.n, true)
    }

    fn probability<R: Into<Real64>>(&self, q: R, lower_tail: bool) -> Probability64 {
        pwilcox(q, self.m, self.n, lower_tail)
    }

    fn log_probability<R: Into<Real64>>(&self, q: R, lower_tail: bool) -> LogProbability64 {
        log_pwilcox(q, self.m, self.n, lower_tail)
    }

    fn quantile<P: Into<Probability64>>(&self, p: P, lower_tail: bool) -> Real64 {
        qwilcox(p, self.m, self.n, lower_tail)
    }

    fn log_quantile<LP: Into<LogProbability64>>(&self, p: LP, lower_tail: bool) -> Real64 {
        log_qwilcox(p, self.m, self.n, lower_tail)
    }

    fn random_sample<R: RNG>(&self, rng: &mut R) -> Real64 {
        rwilcox(self.m, self.n, rng)
    }
}

pub struct WilcoxBuilder {
    m: Option<Natural64>,
    n: Option<Natural64>,
}

impl WilcoxBuilder {
    pub fn new() -> Self {
        Self { m: None, n: None }
    }

    pub fn with_m<N: Into<Natural64>>(&mut self, m: N) -> &mut Self {
        self.m = Some(m.into());
        self
    }

    pub fn with_n<N: Into<Natural64>>(&mut self, n: N) -> &mut Self {
        self.n = Some(n.into());
        self
    }

    pub fn build(&self) -> Wilcox {
        let m = self.m.unwrap_or(1.0.into());
        let n = self.n.unwrap_or(1.0.into());

        Wilcox { m, n }
    }
}

#[cfg(test)]
mod tests;

#[cfg(all(test, feature = "enable_proptest"))]
mod proptests;

#[cfg(all(test, feature = "enable_covtest"))]
mod covtests;