r2rs_base/traits/stat_slice/
mod.rs

1pub trait StatisticalSlice {
2    type Item;
3
4    /// Arithmetic Mean
5    ///
6    /// ## Description:
7    ///
8    /// Generic function for the (trimmed) arithmetic mean.
9    ///
10    /// ## Usage:
11    ///
12    /// mean(x, ...)
13    ///
14    /// ## Default S3 method:
15    /// mean(x, trim = 0, na.rm = FALSE, ...)
16    ///
17    /// ## Arguments:
18    ///
19    /// * x: An R object.  Currently there are methods for numeric/logical
20    ///vectors and date, date-time and time interval objects.
21    ///Complex vectors are allowed for ‘trim = 0’, only.
22    /// * trim: the fraction (0 to 0.5) of observations to be trimmed from
23    ///each end of ‘x’ before the mean is computed.  Values of trim
24    ///outside that range are taken as the nearest endpoint.
25    /// * na.rm: a logical evaluating to ‘TRUE’ or ‘FALSE’ indicating whether
26    ///‘NA’ values should be stripped before the computation
27    ///proceeds.
28    /// * ...: further arguments passed to or from other methods.
29    ///
30    /// ## Value:
31    ///
32    /// If ‘trim’ is zero (the default), the arithmetic mean of the values
33    /// in ‘x’ is computed, as a numeric or complex vector of length one.
34    /// If ‘x’ is not logical (coerced to numeric), numeric (including
35    /// integer) or complex, ‘NA_real_’ is returned, with a warning.
36    ///
37    /// If ‘trim’ is non-zero, a symmetrically trimmed mean is computed
38    /// with a fraction of ‘trim’ observations deleted from each end
39    /// before the mean is computed.
40    ///
41    /// ## References:
42    ///
43    /// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
44    /// Language_.  Wadsworth & Brooks/Cole.
45    ///
46    /// ## See Also:
47    ///
48    /// ‘weighted.mean’, ‘mean.POSIXct’, ‘colMeans’ for row and column
49    /// means.
50    ///
51    /// ## Examples:
52    ///
53    /// ```r
54    /// x <- c(0:10, 50)
55    /// xm <- mean(x)
56    /// c(xm, mean(x, trim = 0.10))
57    /// ```
58    fn mean(&self) -> Self::Item;
59
60    /// Median Value
61    ///
62    /// ## Description:
63    ///
64    /// Compute the sample median.
65    ///
66    /// ## Usage:
67    ///
68    /// median(x, na.rm = FALSE, ...)
69    ///
70    /// ## Arguments:
71    ///
72    /// * x: an object for which a method has been defined, or a numeric
73    ///vector containing the values whose median is to be computed.
74    /// * na.rm: a logical value indicating whether ‘NA’ values should be
75    ///stripped before the computation proceeds.
76    /// * ...: potentially further arguments for methods; not used in the
77    ///default method.
78    ///
79    /// ## Details:
80    ///
81    /// This is a generic function for which methods can be written.
82    /// However, the default method makes use of ‘is.na’, ‘sort’ and
83    /// ‘mean’ from package ‘base’ all of which are generic, and so the
84    /// default method will work for most classes (e.g., ‘"Date"’) for
85    /// which a median is a reasonable concept.
86    ///
87    /// ## Value:
88    ///
89    /// The default method returns a length-one object of the same type as
90    /// ‘x’, except when ‘x’ is logical or integer of even length, when
91    /// the result will be double.
92    ///
93    /// If there are no values or if ‘na.rm = FALSE’ and there are ‘NA’
94    /// values the result is ‘NA’ of the same type as ‘x’ (or more
95    /// generally the result of ‘x\[NA_integer_\]’).
96    ///
97    /// ## References:
98    ///
99    /// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
100    /// Language_.  Wadsworth & Brooks/Cole.
101    ///
102    /// ## See Also:
103    ///
104    /// ‘quantile’ for general quantiles.
105    ///
106    /// ## Examples:
107    ///
108    /// ```r
109    /// median(1:4)  # = 2.5 [even number]
110    /// median(c(1:3, 100, 1000))  # = 3 [odd, robust]
111    /// ```
112    fn median(&self) -> Self::Item;
113    fn mode(&self) -> Self::Item;
114
115    /// Range of Values
116    ///
117    /// ## Description:
118    ///
119    /// ‘range’ returns a vector containing the minimum and maximum of all
120    /// the given arguments.
121    ///
122    /// ## Usage:
123    ///
124    /// range(..., na.rm = FALSE)
125    ///
126    /// ## Default S3 method:
127    /// range(..., na.rm = FALSE, finite = FALSE)
128    ///
129    /// ## Arguments:
130    ///
131    /// * ...: any ‘numeric’ or character objects.
132    /// * na.rm: logical, indicating if ‘NA’'s should be omitted.
133    /// * finite: logical, indicating if all non-finite elements should be
134    ///omitted.
135    ///
136    /// ## Details:
137    ///
138    /// ‘range’ is a generic function: methods can be defined for it
139    /// directly or via the ‘Summary’ group generic.  For this to work
140    /// properly, the arguments ‘...’ should be unnamed, and dispatch is
141    /// on the first argument.
142    ///
143    /// If ‘na.rm’ is ‘FALSE’, ‘NA’ and ‘NaN’ values in any of the
144    /// arguments will cause ‘NA’ values to be returned, otherwise ‘NA’
145    /// values are ignored.
146    ///
147    /// If ‘finite’ is ‘TRUE’, the minimum and maximum of all finite
148    /// values is computed, i.e., ‘finite = TRUE’ _includes_ ‘na.rm =
149    /// TRUE’.
150    ///
151    /// A special situation occurs when there is no (after omission of
152    /// ‘NA’s) nonempty argument left, see ‘min’.
153    ///
154    /// ## S4 methods:
155    ///
156    /// This is part of the S4 ‘Summary’ group generic.  Methods for it
157    /// must use the signature ‘x, ..., na.rm’.
158    ///
159    /// ## References:
160    ///
161    /// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
162    /// Language_.  Wadsworth & Brooks/Cole.
163    ///
164    /// ## See Also:
165    ///
166    /// ‘min’, ‘max’.
167    ///
168    /// The ‘extendrange()’ utility in package ‘grDevices’.
169    ///
170    /// ## Examples:
171    ///
172    /// ```r
173    /// (r.x <- range(stats::rnorm(100)))
174    /// diff(r.x) # the SAMPLE range
175    ///
176    /// x <- c(NA, 1:3, -1:1/0); x
177    /// range(x)
178    /// range(x, na.rm = TRUE)
179    /// range(x, finite = TRUE)
180    /// ```
181    fn range(&self) -> (Self::Item, Self::Item);
182
183    /// Sample Ranks
184    ///
185    /// ## Description:
186    ///
187    /// Returns the sample ranks of the values in a vector.  Ties (i.e.,
188    /// equal values) and missing values can be handled in several ways.
189    ///
190    /// ## Usage:
191    ///
192    /// rank(x, na.last = TRUE,
193    /// ties.method = c("average", "first", "last", "random", "max", "min"))
194    ///
195    /// ## Arguments:
196    ///
197    ///x: a numeric, complex, character or logical vector.
198    ///
199    /// * na.last: a logical or character string controlling the treatment of
200    ///‘NA’s. If ‘TRUE’, missing values in the data are put last; if
201    ///‘FALSE’, they are put first; if ‘NA’, they are removed; if
202    ///‘"keep"’ they are kept with rank ‘NA’.
203    /// * ties.method: a character string specifying how ties are treated, see
204    ///‘Details’; can be abbreviated.
205    ///
206    /// ## Details:
207    ///
208    /// If all components are different (and no ‘NA’s), the ranks are well
209    /// defined, with values in ‘seq_along(x)’.  With some values equal
210    /// (called ‘ties’), the argument ‘ties.method’ determines the result
211    /// at the corresponding indices.  The ‘"first"’ method results in a
212    /// permutation with increasing values at each index set of ties, and
213    /// analogously ‘"last"’ with decreasing values.  The ‘"random"’
214    /// method puts these in random order whereas the default,
215    /// ‘"average"’, replaces them by their mean, and ‘"max"’ and ‘"min"’
216    /// replaces them by their maximum and minimum respectively, the
217    /// latter being the typical sports ranking.
218    ///
219    /// ‘NA’ values are never considered to be equal: for ‘na.last = TRUE’
220    /// and ‘na.last = FALSE’ they are given distinct ranks in the order
221    /// in which they occur in ‘x’.
222    ///
223    /// *NB*: ‘rank’ is not itself generic but ‘xtfrm’ is, and
224    /// ‘rank(xtfrm(x), ....)’ will have the desired result if there is a
225    /// ‘xtfrm’ method.  Otherwise, ‘rank’ will make use of ‘==’, ‘>’,
226    /// ‘is.na’ and extraction methods for classed objects, possibly
227    /// rather slowly.
228    ///
229    /// ## Value:
230    ///
231    /// A numeric vector of the same length as ‘x’ with names copied from
232    /// ‘x’ (unless ‘na.last = NA’, when missing values are removed).  The
233    /// vector is of integer type unless ‘x’ is a long vector or
234    /// ‘ties.method = "average"’ when it is of double type (whether or
235    /// not there are any ties).
236    ///
237    /// ## References:
238    ///
239    /// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
240    /// Language_.  Wadsworth & Brooks/Cole.
241    ///
242    /// ## See Also:
243    ///
244    /// ‘order’ and ‘sort’; ‘xtfrm’, see above.
245    ///
246    /// ## Examples:
247    ///
248    /// ```r
249    /// (r1 <- rank(x1 <- c(3, 1, 4, 15, 92)))
250    /// x2 <- c(3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5)
251    /// names(x2) <- letters[1:11]
252    /// (r2 <- rank(x2)) # ties are averaged
253    /// ```
254    ///
255    /// #### rank() is "idempotent": rank(rank(x)) == rank(x) :
256    /// ```r
257    /// stopifnot(rank(r1) == r1, rank(r2) == r2)
258    /// ```
259    ///
260    /// #### ranks without averaging
261    /// ```r
262    /// rank(x2, ties.method= "first")  # first occurrence wins
263    /// rank(x2, ties.method= "last")#  last occurrence wins
264    /// rank(x2, ties.method= "random") # ties broken at random
265    /// rank(x2, ties.method= "random") # and again
266    /// ```
267    ///
268    /// #### keep ties ties, no average
269    /// ```r
270    /// (rma <- rank(x2, ties.method= "max"))  # as used classically
271    /// (rmi <- rank(x2, ties.method= "min"))  # as in Sports
272    /// stopifnot(rma + rmi == round(r2 + r2))
273    /// ```
274    ///
275    /// #### Comparing all tie.methods:
276    /// ```r
277    /// tMeth <- eval(formals(rank)$ties.method)
278    /// rx2 <- sapply(tMeth, function(M) rank(x2, ties.method=M))
279    /// cbind(x2, rx2)
280    /// ```
281    ///
282    /// #### ties.method's does not matter w/o ties:
283    /// ```r
284    /// x <- sample(47)
285    /// rx <- sapply(tMeth, function(MM) rank(x, ties.method=MM))
286    /// stopifnot(all(rx[,1] == rx))
287    /// ```
288    fn rank(&self) -> Vec<f64>;
289
290    /// Scaling and Centering of Matrix-like Objects
291    ///
292    /// Description:
293    ///
294    /// ‘scale’ is generic function whose default method centers and/or
295    /// scales the columns of a numeric matrix.
296    ///
297    /// Usage:
298    ///
299    /// scale(x, center = TRUE, scale = TRUE)
300    ///
301    /// Arguments:
302    ///
303    /// * x: a numeric matrix(like object).
304    /// * center: either a logical value or numeric-alike vector of length
305    ///equal to the number of columns of ‘x’, where ‘numeric-alike’
306    ///means that ‘as.numeric(.)’ will be applied successfully if
307    ///‘is.numeric(.)’ is not true.
308    /// * scale: either a logical value or a numeric-alike vector of length
309    ///equal to the number of columns of ‘x’.
310    ///
311    /// Details:
312    ///
313    /// The value of ‘center’ determines how column centering is
314    /// performed.  If ‘center’ is a numeric-alike vector with length
315    /// equal to the number of columns of ‘x’, then each column of ‘x’ has
316    /// the corresponding value from ‘center’ subtracted from it.  If
317    /// ‘center’ is ‘TRUE’ then centering is done by subtracting the
318    /// column means (omitting ‘NA’s) of ‘x’ from their corresponding
319    /// columns, and if ‘center’ is ‘FALSE’, no centering is done.
320    ///
321    /// The value of ‘scale’ determines how column scaling is performed
322    /// (after centering).  If ‘scale’ is a numeric-alike vector with
323    /// length equal to the number of columns of ‘x’, then each column of
324    /// ‘x’ is divided by the corresponding value from ‘scale’.  If
325    /// ‘scale’ is ‘TRUE’ then scaling is done by dividing the (centered)
326    /// columns of ‘x’ by their standard deviations if ‘center’ is ‘TRUE’,
327    /// and the root mean square otherwise.  If ‘scale’ is ‘FALSE’, no
328    /// scaling is done.
329    ///
330    /// The root-mean-square for a (possibly centered) column is defined
331    /// as $\sqrt{\frac{\sum{x^2}}{n-1}}$, where x is a vector of the non-missing
332    /// values and n is the number of non-missing values.  In the case
333    /// ‘center = TRUE’, this is the same as the standard deviation, but
334    /// in general it is not.  (To scale by the standard deviations
335    /// without centering, use ‘scale(x, center = FALSE, scale = apply(x,
336    /// 2, sd, na.rm = TRUE))’.)
337    ///
338    /// Value:
339    ///
340    /// For ‘scale.default’, the centered, scaled matrix.  The numeric
341    /// centering and scalings used (if any) are returned as attributes
342    /// ‘"scaled:center"’ and ‘"scaled:scale"’
343    ///
344    /// References:
345    ///
346    /// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
347    /// Language_.  Wadsworth & Brooks/Cole.
348    ///
349    /// See Also:
350    ///
351    /// ‘sweep’ which allows centering (and scaling) with arbitrary
352    /// statistics.
353    ///
354    /// For working with the scale of a plot, see ‘par’.
355    ///
356    /// Examples:
357    ///
358    /// ```r
359    /// require(stats)
360    /// x <- matrix(1:10, ncol = 2)
361    /// (centered.x <- scale(x, scale = FALSE))
362    /// cov(centered.scaled.x <- scale(x)) # all 1
363    /// ```
364    fn scale(&mut self);
365    fn scale_by(&mut self, scale: Self::Item);
366    fn scale_robust(&mut self);
367    fn center(&mut self);
368    fn center_by(&mut self, center: Self::Item);
369    fn center_robust(&mut self);
370
371    /// Sample Quantiles
372    ///
373    /// ## Description:
374    ///
375    /// The generic function ‘quantile’ produces sample quantiles
376    /// corresponding to the given probabilities.  The smallest
377    /// observation corresponds to a probability of 0 and the largest to a
378    /// probability of 1.
379    ///
380    /// ## Usage:
381    ///
382    /// quantile(x, ...)
383    ///
384    /// ## Default S3 method:
385    /// quantile(x, probs = seq(0, 1, 0.25), na.rm = FALSE,
386    ///  names = TRUE, type = 7, digits = 7, ...)
387    ///
388    /// ## Arguments:
389    ///
390    /// * x: numeric vector whose sample quantiles are wanted, or an
391    /// object of a class for which a method has been defined (see
392    /// also ‘details’). ‘NA’ and ‘NaN’ values are not allowed in
393    /// numeric vectors unless ‘na.rm’ is ‘TRUE’.
394    ///
395    /// * probs: numeric vector of probabilities with values in \[0,1\].
396    /// (Values up to ‘2e-14’ outside that range are accepted and
397    /// moved to the nearby endpoint.)
398    ///
399    /// * na.rm: logical; if true, any ‘NA’ and ‘NaN’'s are removed from ‘x’
400    /// before the quantiles are computed.
401    ///
402    /// * names: logical; if true, the result has a ‘names’ attribute.  Set to
403    /// ‘FALSE’ for speedup with many ‘probs’.
404    ///
405    /// * type: an integer between 1 and 9 selecting one of the nine quantile
406    /// algorithms detailed below to be used.
407    ///
408    /// * digits: used only when ‘names’ is true: the precision to use when
409    /// formatting the percentages.  In R versions up to 4.0.x, this
410    /// had been set to ‘max(2, getOption("digits"))’, internally.
411    ///
412    /// * ...: further arguments passed to or from other methods.
413    ///
414    /// ## Details:
415    ///
416    /// A vector of length ‘length(probs)’ is returned; if ‘names = TRUE’,
417    /// it has a ‘names’ attribute.
418    ///
419    /// ‘NA’ and ‘NaN’ values in ‘probs’ are propagated to the result.
420    ///
421    /// The default method works with classed objects sufficiently like
422    /// numeric vectors that ‘sort’ and (not needed by types 1 and 3)
423    /// addition of elements and multiplication by a number work
424    /// correctly.  Note that as this is in a namespace, the copy of
425    /// ‘sort’ in ‘base’ will be used, not some S4 generic of that name.
426    /// Also note that that is no check on the ‘correctly’, and so e.g.
427    /// ‘quantile’ can be applied to complex vectors which (apart from
428    /// ties) will be ordered on their real parts.
429    ///
430    /// There is a method for the date-time classes (see ‘"POSIXt"’).
431    /// Types 1 and 3 can be used for class ‘"Date"’ and for ordered
432    /// factors.
433    ///
434    /// ## Types:
435    ///
436    /// ‘quantile’ returns estimates of underlying distribution quantiles
437    /// based on one or two order statistics from the supplied elements in
438    /// ‘x’ at probabilities in ‘probs’.  One of the nine quantile
439    /// algorithms discussed in Hyndman and Fan (1996), selected by
440    /// ‘type’, is employed.
441    ///
442    /// All sample quantiles are defined as weighted averages of
443    /// consecutive order statistics. Sample quantiles of type i are
444    /// defined by:
445    ///
446    ///$Q\[i\](p) = (1 - \gamma) x\[j\] + \gamma x\[j+1\]$,
447    ///
448    /// where $1 <= i <= 9$, $(j-m)/n <= p < (j-m+1)/n$, $x\[j\]$ is the $j$th order
449    /// statistic, $n$ is the sample size, the value of gamma is a function
450    /// of $j = \text{floor}(np + m)$ and $g = np + m - j$, and $m$ is a constant
451    /// determined by the sample quantile type.
452    ///
453    /// *Discontinuous sample quantile types 1, 2, and 3*
454    ///
455    /// For types 1, 2 and 3, $Q\[i\](p)$ is a discontinuous function of p,
456    /// with $m = 0$ when $i = 1$ and $i = 2$, and $m = -1/2$ when $i = 3$.
457    ///
458    /// * Type 1 Inverse of empirical distribution function.  $\gamma = 0$ if $g = 0$,
459    ///and 1 otherwise.
460    ///
461    /// * Type 2 Similar to type 1 but with averaging at discontinuities.
462    ///$\gamma = 0.5$ if $g = 0$, and 1 otherwise (SAS default, see
463    ///Wicklin(2017)).
464    ///
465    /// * Type 3 Nearest even order statistic (SAS default till ca. 2010).
466    ///$\gamma = 0$ if $g = 0$ and $j$ is even, and 1 otherwise.
467    ///
468    /// *Continuous sample quantile types 4 through 9*
469    ///
470    /// For types 4 through 9, $Q\[i\](p)$ is a continuous function of $p$, with
471    /// $\gamma = g$ and $m$ given below. The sample quantiles can be obtained
472    /// equivalently by linear interpolation between the points
473    /// $(p\[k\],x\[k\])$ where $x\[k\]$ is the $k$th order statistic.  Specific
474    /// expressions for $p\[k\]$ are given below.
475    ///
476    /// * Type 4 $m = 0$. $p\[k\] = k / n$.  That is, linear interpolation of the
477    ///empirical cdf.
478    ///
479    /// * Type 5 $m = 1/2$.  $p\[k\] = (k - 0.5) / n$.  That is a piecewise linear
480    ///function where the knots are the values midway through the
481    ///steps of the empirical cdf.  This is popular amongst
482    ///hydrologists.
483    ///
484    /// * Type 6 $m = p$. $p\[k\] = k / (n + 1)$.  Thus $p\[k\] = E\[F(x\[k\])\]$.  This
485    ///is used by Minitab and by SPSS.
486    ///
487    /// * Type 7 $m = 1-p$.  $p\[k\] = (k - 1) / (n - 1)$.  In this case,
488    ///$p\[k\] = \text{mode}\[F(x\[k\])\]$.  This is used by S.
489    ///
490    /// * Type 8 $m = (p+1)/3$.  $p\[k\] = (k - 1/3) / (n + 1/3)$.  Then
491    ///$p\[k\] =~ \text{median}\[F(x\[k\])\]$. The resulting quantile estimates are
492    ///approximately median-unbiased regardless of the distribution
493    ///of ‘x’.
494    ///
495    /// * Type 9 $m = p/4 + 3/8$.  $p\[k\] = (k - 3/8) / (n + 1/4)$.  The
496    ///resulting quantile estimates are approximately unbiased for
497    ///the expected order statistics if ‘x’ is normally distributed.
498    ///
499    /// Further details are provided in Hyndman and Fan (1996) who
500    /// recommended type 8.  The default method is type 7, as used by S
501    /// and by R < 2.0.0.  Makkonen argues for type 6, also as already
502    /// proposed by Weibull in 1939.  The Wikipedia page contains further
503    /// information about availability of these 9 types in software.
504    ///
505    /// ## Author(s):
506    ///
507    /// of the version used in R >= 2.0.0, Ivan Frohne and Rob J Hyndman.
508    ///
509    /// ## References:
510    ///
511    /// Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) _The New S
512    /// Language_.  Wadsworth & Brooks/Cole.
513    ///
514    /// Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
515    /// packages, _American Statistician_ *50*, 361-365.
516    /// doi:10.2307/2684934 <https://doi.org/10.2307/2684934>.
517    ///
518    /// Wicklin, R. (2017) Sample quantiles: A comparison of 9
519    /// definitions; SAS Blog.
520    /// <https://blogs.sas.com/content/iml/2017/05/24/definitions-sample-quantiles.html>
521    ///
522    /// Wikipedia:
523    /// <https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample>
524    ///
525    /// ## See Also:
526    ///
527    /// ‘ecdf’ for empirical distributions of which ‘quantile’ is an
528    /// inverse; ‘boxplot.stats’ and ‘fivenum’ for computing other
529    /// versions of quartiles, etc.
530    ///
531    /// ## Examples:
532    ///
533    /// ```r
534    /// quantile(x <- rnorm(1001)) # Extremes & Quartiles by default
535    /// quantile(x,  probs = c(0.1, 0.5, 1, 2, 5, 10, 50, NA)/100)
536    ///
537    /// ### Compare different types
538    /// quantAll <- function(x, prob, ...)
539    /// t(vapply(1:9, function(typ) quantile(x, probs = prob, type = typ, ...),
540    ///  quantile(x, prob, type=1, ...)))
541    /// p <- c(0.1, 0.5, 1, 2, 5, 10, 50)/100
542    /// signif(quantAll(x, p), 4)
543    ///
544    /// ## 0% and 100% are equal to min(), max() for all types:
545    /// stopifnot(t(quantAll(x, prob=0:1)) == range(x))
546    ///
547    /// ## for complex numbers:
548    /// z <- complex(real = x, imaginary = -10*x)
549    /// signif(quantAll(z, p), 4)
550    /// ```
551    fn quantile(&self, percentages: &[f64], rtype: QuantileType) -> Vec<f64>;
552
553    /// # The Interquartile Range
554    ///
555    /// ## Description:
556    ///
557    /// computes interquartile range of the ‘x’ values.
558    ///
559    /// ## Usage:
560    ///
561    /// IQR(x, na.rm = FALSE, type = 7)
562    ///
563    /// ## Arguments:
564    ///
565    /// * x: a numeric vector.
566    /// * na.rm: logical. Should missing values be removed?
567    /// * type: an integer selecting one of the many quantile algorithms, see
568    /// ‘quantile’.
569    ///
570    /// ## Details:
571    ///
572    /// Note that this function computes the quartiles using the
573    /// ‘quantile’ function rather than following Tukey's recommendations,
574    /// i.e., ‘IQR(x) = quantile(x, 3/4) - quantile(x, 1/4)’.
575    ///
576    /// For normally N(m,1) distributed X, the expected value of ‘IQR(X)’
577    /// is ‘2*qnorm(3/4) = 1.3490’, i.e., for a normal-consistent estimate
578    /// of the standard deviation, use ‘IQR(x) / 1.349’.
579    ///
580    /// ## References:
581    ///
582    /// Tukey, J. W. (1977).  _Exploratory Data Analysis._ Reading:
583    /// Addison-Wesley.
584    ///
585    /// ## See Also:
586    ///
587    /// ‘fivenum’, ‘mad’ which is more robust, ‘range’, ‘quantile’.
588    ///
589    /// ## Examples:
590    ///
591    /// ```r
592    /// IQR(rivers)
593    /// ```
594    fn iqr(&self, qtype: QuantileType) -> f64;
595
596    /// 'Jitter' (Add Noise) to Numbers
597    ///
598    /// ## Description:
599    ///
600    /// Add a small amount of noise to a numeric vector.
601    ///
602    /// ## Usage:
603    ///
604    /// jitter(x, factor = 1, amount = NULL)
605    ///
606    /// ## Arguments:
607    ///
608    /// * x: numeric vector to which _jitter_ should be added.
609    /// *factor: numeric.
610    /// *amount: numeric; if positive, used as _amount_ (see below),
611    ///otherwise, if ‘= 0’ the default is ‘factor * z/50’.
612    ///
613    /// Default (‘NULL’): ‘factor * d/5’ where ‘d’ is about the
614    /// smallest difference between ‘x’ values.
615    ///
616    /// ## Details:
617    ///
618    /// The result, say ‘r’, is ‘r <- x + runif(n, -a, a)’ where ‘n <-
619    /// length(x)’ and ‘a’ is the ‘amount’ argument (if specified).
620    ///
621    /// Let ‘z <- max(x) - min(x)’ (assuming the usual case).  The amount
622    /// ‘a’ to be added is either provided as _positive_ argument ‘amount’
623    /// or otherwise computed from ‘z’, as follows:
624    ///
625    /// If ‘amount == 0’, we set ‘a <- factor * z/50’ (same as S).
626    ///
627    /// If ‘amount’ is ‘NULL’ (_default_), we set ‘a <- factor * d/5’
628    /// where _d_ is the smallest difference between adjacent unique
629    /// (apart from fuzz) ‘x’ values.
630    ///
631    /// ## Value:
632    ///
633    /// ‘jitter(x, ...)’ returns a numeric of the same length as ‘x’, but
634    /// with an ‘amount’ of noise added in order to break ties.
635    ///
636    /// ## Author(s):
637    ///
638    /// Werner Stahel and Martin Maechler, ETH Zurich
639    ///
640    /// ## References:
641    ///
642    /// Chambers, J. M., Cleveland, W. S., Kleiner, B. and Tukey, P.A.
643    /// (1983) _Graphical Methods for Data Analysis._ Wadsworth; figures
644    /// 2.8, 4.22, 5.4.
645    ///
646    /// Chambers, J. M. and Hastie, T. J. (1992) _Statistical Models in
647    /// S._ Wadsworth & Brooks/Cole.
648    ///
649    /// ## See Also:
650    ///
651    /// ‘rug’ which you may want to combine with ‘jitter’.
652    ///
653    /// ## Examples:
654    ///
655    /// ```r
656    /// round(jitter(c(rep(1, 3), rep(1.2, 4), rep(3, 3))), 3)
657    /// ## These two 'fail' with S-plus 3.x:
658    /// jitter(rep(0, 7))
659    /// jitter(rep(10000, 5))
660    /// ```
661    fn jitter<R: NMathRNG + RNG>(
662        &mut self,
663        factor: Option<f64>,
664        amount: Option<Positive64>,
665        rng: &mut R,
666    );
667    fn sequence(start: Self::Item, end: Self::Item, length: usize) -> Vec<Self::Item>;
668    fn sequence_by(start: Self::Item, end: Self::Item, by: Self::Item) -> Vec<Self::Item>;
669}
670
671use std::{cmp::PartialOrd, mem::swap};
672
673use itertools::Itertools;
674use num_traits::{Float, FromPrimitive, Num, NumAssign, ToPrimitive};
675use r2rs_nmath::{
676    distribution::UniformBuilder,
677    rng::NMathRNG,
678    traits::{DigitPrec, Distribution, RNG},
679};
680use strafe_type::{FloatConstraint, Positive64};
681
682use crate::func::diff;
683
684pub enum QuantileType {
685    InverseEmpiricalDistribution,
686    InverseEmpiricalDistributionWithAveraging,
687    NearestEvenOrder,
688    LinearInterpolation,
689    PiecewiseLinear,
690    Minitab,
691    S,
692    MedianUnbiased,
693    UnbiasedNormal,
694}
695
696impl<I> StatisticalSlice for [I]
697where
698    I: Num
699        + NumAssign
700        + Float
701        + FromPrimitive
702        + ToPrimitive
703        + PartialOrd
704        + Clone
705        + std::fmt::Debug
706        + std::iter::Sum,
707{
708    type Item = I;
709
710    fn mean(&self) -> Self::Item {
711        self.iter().cloned().sum::<Self::Item>() / Self::Item::from_usize(self.len()).unwrap()
712    }
713
714    fn median(&self) -> Self::Item {
715        let mut s = (0..self.len()).collect::<Vec<_>>();
716        s.sort_by(|&i1, &i2| self[i1].partial_cmp(&self[i2]).unwrap());
717
718        let l = s.len();
719        if l == 1 {
720            self[0]
721        } else {
722            let left = self[s[((l as f64 - 1.0) / 2.0).floor() as usize]];
723            let right = self[s[((l as f64 - 1.0) / 2.0).ceil() as usize]];
724            (left + right) / Self::Item::from_u8(2).unwrap()
725        }
726    }
727
728    fn mode(&self) -> Self::Item {
729        let mut s = (0..self.len()).collect::<Vec<_>>();
730        s.sort_by(|&i1, &i2| self[i1].partial_cmp(&self[i2]).unwrap());
731
732        let mut stored_num = &self[s[0]];
733        let mut current_num = &self[s[0]];
734        let mut stored_count = 0;
735        let mut current_count = 0;
736        s.iter().for_each(|&i| {
737            if &self[i] == current_num {
738                current_count += 1
739            } else {
740                current_count = 1;
741                current_num = &self[i];
742            }
743            if current_count > stored_count {
744                stored_count = current_count;
745                stored_num = current_num;
746            }
747        });
748
749        *stored_num
750    }
751
752    fn range(&self) -> (Self::Item, Self::Item) {
753        let min = *self
754            .iter()
755            .min_by(|i1, i2| i1.partial_cmp(i2).unwrap())
756            .unwrap();
757        let max = *self
758            .iter()
759            .max_by(|i1, i2| i1.partial_cmp(i2).unwrap())
760            .unwrap();
761        (min, max)
762    }
763
764    fn rank(&self) -> Vec<f64> {
765        let mut ret = vec![0.0; self.len()];
766
767        for i in 0..self.len() {
768            let mut r = 1.0;
769            let mut s = 1.0;
770
771            for j in 0..self.len() {
772                if j != i && self[j] < self[i] {
773                    r += 1.0;
774                }
775
776                if j != i && self[j] == self[i] {
777                    s += 1.0;
778                }
779            }
780
781            ret[i] = r + (s - 1.0) / 2.0;
782        }
783
784        ret
785    }
786
787    fn scale(&mut self) {
788        let numerator = self.iter().cloned().map(|i| i.powi(2)).sum::<Self::Item>();
789        let denominator = Self::Item::from_usize((self.len() - 1).max(1)).unwrap();
790        let scale = (numerator / denominator).sqrt();
791
792        self.scale_by(scale);
793    }
794
795    fn scale_by(&mut self, scale: Self::Item) {
796        self.iter_mut().for_each(|i| *i /= scale);
797    }
798
799    fn scale_robust(&mut self) {
800        let quantile = self.quantile(&[0.25, 0.75], QuantileType::S);
801        let iqr = quantile[1] - quantile[0];
802
803        self.scale_by(I::from(iqr).unwrap());
804    }
805
806    fn center(&mut self) {
807        let center = self.mean();
808
809        self.center_by(center);
810    }
811
812    fn center_by(&mut self, center: Self::Item) {
813        self.iter_mut().for_each(|i| *i -= center);
814    }
815
816    fn center_robust(&mut self) {
817        let center = self.median();
818
819        self.center_by(center);
820    }
821
822    fn quantile(&self, percentages: &[f64], qtype: QuantileType) -> Vec<f64> {
823        let ordered_self = self
824            .iter()
825            .map(|x| x.to_f64().unwrap())
826            .sorted_by(|x1, x2| x1.partial_cmp(x2).unwrap())
827            .collect::<Vec<_>>();
828        let n = self.len();
829
830        let (h, j) = if let QuantileType::InverseEmpiricalDistribution
831        | QuantileType::InverseEmpiricalDistributionWithAveraging
832        | QuantileType::NearestEvenOrder = qtype
833        {
834            let nppm = percentages
835                .iter()
836                .map(|p| {
837                    if let QuantileType::NearestEvenOrder = qtype {
838                        n as f64 * p - 0.5
839                    } else {
840                        n as f64 * p
841                    }
842                })
843                .collect::<Vec<_>>();
844            let j = nppm.iter().map(|nppm| nppm.floor()).collect::<Vec<_>>();
845
846            let h = match qtype {
847                QuantileType::InverseEmpiricalDistribution => nppm
848                    .iter()
849                    .zip(j.iter())
850                    .map(|(nppm, j)| if nppm > j { 1.0 } else { 0.0 })
851                    .collect::<Vec<_>>(),
852                QuantileType::InverseEmpiricalDistributionWithAveraging => nppm
853                    .iter()
854                    .zip(j.iter())
855                    .map(|(nppm, j)| if nppm > j { 1.0 } else { 0.5 })
856                    .collect::<Vec<_>>(),
857                QuantileType::NearestEvenOrder => nppm
858                    .iter()
859                    .zip(j.iter())
860                    .map(|(nppm, j)| {
861                        if (nppm != j) || ((j % 2.0) == 1.0) {
862                            1.0
863                        } else {
864                            0.0
865                        }
866                    })
867                    .collect::<Vec<_>>(),
868                _ => unreachable!(),
869            };
870
871            (h, j)
872        } else {
873            let (a, b) = match qtype {
874                QuantileType::LinearInterpolation => (0.0, 1.0),
875                QuantileType::PiecewiseLinear => (0.5, 0.5),
876                QuantileType::Minitab => (0.0, 0.0),
877                QuantileType::S => (1.0, 1.0),
878                QuantileType::MedianUnbiased => (1.0 / 3.0, 1.0 / 3.0),
879                QuantileType::UnbiasedNormal => (3.0 / 8.0, 3.0 / 8.0),
880                _ => unreachable!(),
881            };
882            let eps = 2.220446e-16;
883            let fuzz = 4.0 * eps;
884
885            let nppm = percentages
886                .iter()
887                .map(|p| a + p * (n as f64 + 1.0 - a - b))
888                .collect::<Vec<_>>();
889            let j = nppm
890                .iter()
891                .map(|nppm| (nppm + fuzz).floor())
892                .collect::<Vec<_>>();
893            let h = nppm
894                .iter()
895                .zip(j.iter())
896                .map(|(nppm, j)| {
897                    let h = nppm - j;
898                    if h.abs() < fuzz {
899                        0.0
900                    } else {
901                        h
902                    }
903                })
904                .collect::<Vec<_>>();
905
906            (h, j)
907        };
908
909        let mut temp_x = ordered_self;
910        temp_x.insert(0, temp_x[0]);
911        temp_x.insert(0, temp_x[0]);
912        temp_x.push(*temp_x.last().unwrap());
913        temp_x.push(*temp_x.last().unwrap());
914
915        j.iter()
916            .zip(h.iter())
917            .map(|(j, h)| {
918                if *h == 0.0 {
919                    temp_x[(j + 1.0) as usize]
920                } else if *h == 1.0 {
921                    temp_x[(j + 2.0) as usize]
922                } else {
923                    (1.0 - h) * temp_x[(j + 1.0) as usize] + h * temp_x[(j + 2.0) as usize]
924                }
925            })
926            .collect()
927    }
928
929    fn iqr(&self, qtype: QuantileType) -> f64 {
930        let quantiles = self.quantile(&[0.25, 0.75], qtype);
931        quantiles[1] - quantiles[0]
932    }
933
934    fn jitter<R: NMathRNG + RNG>(
935        &mut self,
936        factor: Option<f64>,
937        amount: Option<Positive64>,
938        rng: &mut R,
939    ) {
940        let factor = factor.unwrap_or(1.0);
941
942        let (r1, r2) = self
943            .iter()
944            .filter(|x| x.is_finite())
945            .cloned()
946            .dedup_by(|f1, f2| f1 == f2)
947            .collect::<Vec<_>>()
948            .range();
949        let mut z = r2 - r1;
950
951        if z == Self::Item::zero() {
952            z = self[0];
953            if z == Self::Item::zero() {
954                z = Self::Item::one();
955            }
956        }
957
958        let amount = if let Some(am) = amount {
959            if am.unwrap() == 0.0 {
960                factor * (z.to_f64().unwrap() / 50.0)
961            } else {
962                am.unwrap()
963            }
964        } else {
965            let round_to = (3.0 - z.to_f64().unwrap().log10().floor()) as isize;
966            let xx = self
967                .iter()
968                .map(|xi| xi.to_f64().unwrap().round_to(round_to))
969                .sorted_by(|f1, f2| f1.partial_cmp(f2).unwrap())
970                .dedup_by(|f1, f2| f1 == f2)
971                .collect::<Vec<_>>();
972            let d = diff(&xx);
973
974            if !d.is_empty() {
975                *d.iter()
976                    .min_by(|f1, f2| f1.partial_cmp(f2).unwrap())
977                    .unwrap()
978            } else if xx[0] != 0.0 {
979                xx[0] / 10.0
980            } else {
981                z.to_f64().unwrap() / 10.0
982            }
983        };
984
985        let mut unif_builder = UniformBuilder::new();
986        unif_builder.with_bounds(-amount, amount);
987        let uniform = unif_builder.build();
988
989        self.iter_mut().for_each(|xi| {
990            *xi = Self::Item::from(
991                xi.to_f64().unwrap() + uniform.random_sample(rng).to_f64().unwrap(),
992            )
993            .unwrap()
994        })
995    }
996
997    fn sequence(mut start: Self::Item, mut end: Self::Item, length: usize) -> Vec<Self::Item> {
998        let mut reverse = false;
999        if start < end {
1000            swap(&mut start, &mut end);
1001            reverse = true;
1002        }
1003        let mut ret = (0..=length)
1004            .map(|i| {
1005                (Self::Item::from_usize(i).unwrap() / Self::Item::from_usize(length).unwrap()
1006                    * (end - start))
1007                    + start
1008            })
1009            .collect::<Vec<_>>();
1010
1011        if reverse {
1012            ret = ret.into_iter().rev().collect();
1013        }
1014
1015        ret
1016    }
1017
1018    fn sequence_by(start: Self::Item, end: Self::Item, by: Self::Item) -> Vec<Self::Item> {
1019        Self::sequence(start, end, ((end - start).abs() / by).to_usize().unwrap())
1020    }
1021}
1022
1023#[cfg(test)]
1024mod tests;
1025
1026#[cfg(all(test, feature = "enable_proptest"))]
1027mod proptests;
1028
1029#[cfg(all(test, feature = "enable_covtest"))]
1030mod covtests;
r2rs_base/traits/stat_slice/mod.rs

r2rs_base/traits/stat_slice/
mod.rs