rstats/
stats.rs

1use crate::*;
2use core::cmp::Ordering::*;
3use indxvec::Vecops;
4use medians::{Median, Medianf64};
5
6impl<T> Stats for &[T]
7where
8    T: Clone + PartialOrd + Into<f64>,
9{
10    /// Vector magnitude
11    fn vmag(self) -> f64 {
12        match self.len() {
13            0 => 0_f64,
14            1 => self[0].clone().into(),
15            _ => self
16                .iter()
17                .map(|x| x.clone().into().powi(2))
18                .sum::<f64>()
19                .sqrt(),
20        }
21    }
22
23    /// Vector magnitude squared (sum of squares)
24    fn vmagsq(self) -> f64 {
25        match self.len() {
26            0 => 0_f64,
27            1 => self[0].clone().into().powi(2),
28            _ => self.iter().map(|x| x.clone().into().powi(2)).sum::<f64>(),
29        }
30    }
31
32    /// Vector with reciprocal components
33    fn vreciprocal(self) -> Result<Vec<f64>, RE> {
34        if self.is_empty() {
35            return nodata_error("vreciprocal: empty self vec");
36        };
37        self.iter()
38            .map(|component| -> Result<f64, RE> {
39                let c: f64 = component.clone().into();
40                if c.is_normal() {
41                    Ok(1.0 / c)
42                } else {
43                    arith_error("vreciprocal: bad component {c}")
44                }
45            })
46            .collect::<Result<Vec<f64>, RE>>()
47    }
48
49    /// Vector with inverse magnitude
50    fn vinverse(self) -> Result<Vec<f64>, RE> {
51        if self.is_empty() {
52            return nodata_error("vinverse: empty self vec");
53        };
54        let vmagsq = self.vmagsq();
55        if vmagsq > 0.0 {
56            Ok(self.iter().map(|x| x.clone().into() / vmagsq).collect())
57        } else {
58            data_error("vinverse: can not invert zero vector")
59        }
60    }
61
62    // Negated vector (all components swap sign)
63    fn negv(self) -> Result<Vec<f64>, RE> {
64        if self.is_empty() {
65            nodata_error("negv: empty self vec")
66        } else {
67            Ok(self.iter().map(|x| (-x.clone().into())).collect())
68        }
69    }
70
71    /// Unit vector
72    fn vunit(self) -> Result<Vec<f64>, RE> {
73        if self.is_empty() {
74            return nodata_error("vunit: empty self vec");
75        };
76        let mag = self.vmag();
77        if mag > 0.0 {
78            Ok(self.iter().map(|x| x.clone().into() / mag).collect())
79        } else {
80            data_error("vunit: can not make zero vector into a unit vector")
81        }
82    }
83
84    /// Harmonic spread from median
85    fn hmad(self) -> Result<f64, RE> {
86        let n = self.len();
87        if n == 0 {
88            return nodata_error("hmad: empty self");
89        };
90        let fself = self.iter().map(|x| x.clone().into()).collect::<Vec<f64>>();
91        let recmedian = 1.0 / fself.medf_checked()?;
92        let recmad = self
93            .iter()
94            .map(|x| -> Result<f64, RE> {
95                let fx: f64 = x.clone().into();
96                if !fx.is_normal() {
97                    return arith_error("hmad: attempt to divide by zero");
98                };
99                Ok((recmedian - 1.0 / fx).abs())
100            })
101            .collect::<Result<Vec<f64>, RE>>()?
102            .medf_unchecked();
103        Ok(recmedian / recmad)
104    }
105
106    /// Arithmetic mean
107    /// # Example
108    /// ```
109    /// use rstats::Stats;
110    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
111    /// assert_eq!(v1.amean().unwrap(),7.5_f64);
112    /// ```
113    fn amean(self) -> Result<f64, RE> {
114        let n = self.len();
115        if n > 0 {
116            Ok(self.iter().map(|x| x.clone().into()).sum::<f64>() / (n as f64))
117        } else {
118            nodata_error("amean: empty self vec")
119        }
120    }
121
122    /// Median and Mad packed into in Params struct
123    fn medmad(self) -> Result<Params, RE> {
124        let median = self.qmedian_by(&mut |a, b| a.partial_cmp(b).unwrap_or(Equal), fromop)?;
125        Ok(Params {
126            centre: median,
127            spread: self.mad(median, fromop),
128        })
129    }
130
131    /// Arithmetic mean and (population) standard deviation
132    /// # Example
133    /// ```
134    /// use rstats::Stats;
135    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
136    /// let res = v1.ameanstd().unwrap();
137    /// assert_eq!(res.centre,7.5_f64);
138    /// assert_eq!(res.spread,4.031128874149275_f64);
139    /// ```
140    fn ameanstd(self) -> Result<Params, RE> {
141        let n = self.len();
142        if n == 0 {
143            return nodata_error("ameanstd: empty self vec");
144        };
145        let nf = n as f64;
146        let mut sx2 = 0_f64;
147        let mean = self
148            .iter()
149            .map(|x| {
150                let fx: f64 = x.clone().into();
151                sx2 += fx * fx;
152                fx
153            })
154            .sum::<f64>()
155            / nf;
156        Ok(Params {
157            centre: mean,
158            spread: (sx2 / nf - mean.powi(2)).sqrt(),
159        })
160    }
161
162    /// Linearly weighted arithmetic mean of an f64 slice.     
163    /// Linearly ascending weights from 1 to n.    
164    /// Time dependent data should be in the order of time increasing.
165    /// Then the most recent gets the most weight.
166    /// # Example
167    /// ```
168    /// use rstats::Stats;
169    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
170    /// assert_eq!(v1.awmean().unwrap(),9.666666666666666_f64);
171    /// ```
172    fn awmean(self) -> Result<f64, RE> {
173        let n = self.len();
174        if n == 0 {
175            return nodata_error("awmean: empty self vec");
176        };
177        let mut iw = 0_f64; // descending linear weights
178        Ok(self
179            .iter()
180            .map(|x| {
181                iw += 1_f64;
182                iw * x.clone().into()
183            })
184            .sum::<f64>()
185            / (sumn(n) as f64))
186    }
187
188    /// Linearly weighted arithmetic mean and standard deviation of an f64 slice.    
189    /// Linearly ascending weights from 1 to n.    
190    /// Time dependent data should be in the order of time increasing.
191    /// # Example
192    /// ```
193    /// use rstats::Stats;
194    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
195    /// let res = v1.awmeanstd().unwrap();
196    /// assert_eq!(res.centre,9.666666666666666_f64);
197    /// assert_eq!(res.spread,3.399346342395192_f64);
198    /// ```
199    fn awmeanstd(self) -> Result<Params, RE> {
200        let n = self.len();
201        if n == 0 {
202            return nodata_error("awmeanstd: empty self vec");
203        };
204        let mut sx2 = 0_f64;
205        let mut w = 0_f64; // descending linear weights
206        let nf = sumn(n) as f64;
207        let centre = self
208            .iter()
209            .map(|x| {
210                let fx: f64 = x.clone().into();
211                w += 1_f64;
212                let wx = w * fx;
213                sx2 += wx * fx;
214                wx
215            })
216            .sum::<f64>()
217            / nf;
218        Ok(Params {
219            centre,
220            spread: (sx2 / nf - centre.powi(2)).sqrt(),
221        })
222    }
223
224    /// Harmonic mean of an f64 slice.
225    /// # Example
226    /// ```
227    /// use rstats::Stats;
228    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
229    /// assert_eq!(v1.hmean().unwrap(),4.305622526633627_f64);
230    /// ```
231    fn hmean(self) -> Result<f64, RE> {
232        let n = self.len();
233        if n == 0 {
234            return nodata_error("hmean: empty self vec");
235        };
236        let mut sum = 0_f64;
237        for x in self {
238            let fx: f64 = x.clone().into();
239            if !fx.is_normal() {
240                return arith_error("hmean: attempt to divide by zero");
241            };
242            sum += 1.0 / fx
243        }
244        Ok(n as f64 / sum)
245    }
246
247    /// Harmonic mean and standard deviation
248    /// std is based on reciprocal moments
249    /// # Example
250    /// ```
251    /// use rstats::Stats;
252    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
253    /// let res = v1.hmeanstd().unwrap();
254    /// assert_eq!(res.centre,4.305622526633627_f64);
255    /// assert_eq!(res.spread,1.1996764516690959_f64);
256    /// ```
257    fn hmeanstd(self) -> Result<Params, RE> {
258        let n = self.len();
259        if n == 0 {
260            return nodata_error("hmeanstd: empty self vec");
261        };
262        let nf = n as f64;
263        let mut sx2 = 0_f64;
264        let mut sx = 0_f64;
265        for x in self {
266            let fx: f64 = x.clone().into();
267            if !fx.is_normal() {
268                return arith_error("hmeanstd: attempt to divide by zero");
269            };
270            let rx = 1_f64 / fx; // work with reciprocals
271            sx2 += rx * rx;
272            sx += rx;
273        }
274        let recipmean = sx / nf;
275        Ok(Params {
276            centre: 1.0 / recipmean,
277            spread: ((sx2 / nf - recipmean.powi(2)) / (recipmean.powi(4)) / nf).sqrt(),
278        })
279    }
280    /// Linearly weighted harmonic mean of an f64 slice.    
281    /// Linearly ascending weights from 1 to n.    
282    /// Time dependent data should be ordered by increasing time.
283    /// # Example
284    /// ```
285    /// use rstats::Stats;
286    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
287    /// assert_eq!(v1.hwmean().unwrap(),7.5_f64);
288    /// ```
289    fn hwmean(self) -> Result<f64, RE> {
290        let n = self.len();
291        if n == 0 {
292            return nodata_error("hwmean: empty self vec");
293        };
294        let mut sum = 0_f64;
295        let mut w = 0_f64;
296        for x in self {
297            let fx: f64 = x.clone().into();
298            if !fx.is_normal() {
299                return arith_error("hwmean: attempt to divide by zero");
300            };
301            w += 1_f64;
302            sum += w / fx;
303        }
304        Ok(sumn(n) as f64 / sum) // reciprocal of the mean of reciprocals
305    }
306
307    /// Weighted harmonic mean and standard deviation
308    /// std is based on reciprocal moments
309    /// # Example
310    /// ```
311    /// use rstats::Stats;
312    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
313    /// let res = v1.hmeanstd().unwrap();
314    /// assert_eq!(res.centre,4.305622526633627_f64);
315    /// assert_eq!(res.spread,1.1996764516690959_f64);
316    /// ```
317    fn hwmeanstd(self) -> Result<Params, RE> {
318        let n = self.len();
319        if n == 0 {
320            return nodata_error("hwmeanstd: empty self vec");
321        };
322        let nf = sumn(n) as f64;
323        let mut sx2 = 0_f64;
324        let mut sx = 0_f64;
325        let mut w = 0_f64;
326        for x in self {
327            w += 1_f64;
328            let fx: f64 = x.clone().into();
329            if !fx.is_normal() {
330                return arith_error("hwmeanstd: attempt to divide by zero");
331            };
332            sx += w / fx; // work with reciprocals
333            sx2 += w / (fx * fx);
334        }
335        let recipmean = sx / nf;
336        Ok(Params {
337            centre: 1.0 / recipmean,
338            spread: ((sx2 / nf - recipmean.powi(2)) / (recipmean.powi(4)) / nf).sqrt(),
339        })
340    }
341
342    /// Geometric mean of an i64 slice.  
343    /// The geometric mean is just an exponential of an arithmetic mean
344    /// of log data (natural logarithms of the data items).  
345    /// The geometric mean is less sensitive to outliers near maximal value.  
346    /// Zero valued data is not allowed!
347    /// # Example
348    /// ```
349    /// use rstats::Stats;
350    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
351    /// assert_eq!(v1.gmean().unwrap(),6.045855171418503_f64);
352    /// ```
353    fn gmean(self) -> Result<f64, RE> {
354        let n = self.len();
355        if n == 0 {
356            return nodata_error("gmean: empty self vec");
357        };
358        let mut sum = 0_f64;
359        for x in self {
360            let fx: f64 = x.clone().into();
361            if !fx.is_normal() {
362                return arith_error("gmean: attempt to take ln of zero");
363            };
364            sum += fx.ln()
365        }
366        Ok((sum / (n as f64)).exp())
367    }
368
369    /// Geometric mean and std ratio of an f64 slice.  
370    /// Zero valued data is not allowed.  
371    /// Std of ln data becomes a ratio after conversion back.
372    /// # Example
373    /// ```
374    /// use rstats::Stats;
375    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
376    /// let res = v1.gmeanstd().unwrap();
377    /// assert_eq!(res.centre,6.045855171418503_f64);
378    /// assert_eq!(res.spread,2.1084348239406303_f64);
379    /// ```
380    fn gmeanstd(self) -> Result<Params, RE> {
381        let n = self.len();
382        if n == 0 {
383            return nodata_error("gmeanstd: empty self vec");
384        };
385        let mut sum = 0_f64;
386        let mut sx2 = 0_f64;
387        for x in self {
388            let fx: f64 = x.clone().into();
389            if !fx.is_normal() {
390                return arith_error("gmeanstd: attempt to take ln of zero");
391            };
392            let lx = fx.ln();
393            sum += lx;
394            sx2 += lx * lx
395        }
396        sum /= n as f64;
397        Ok(Params {
398            centre: sum.exp(),
399            spread: (sx2 / (n as f64) - sum.powi(2)).sqrt().exp(),
400        })
401    }
402
403    /// Linearly weighted geometric mean of an i64 slice.  
404    /// Ascending weights from 1 down to n.    
405    /// Time dependent data should be in time increasing order.  
406    /// The geometric mean is an exponential of an arithmetic mean
407    /// of log data (natural logarithms of the data items).  
408    /// The geometric mean is less sensitive to outliers near maximal value.  
409    /// Zero valued data is not allowed!
410    /// # Example
411    /// ```
412    /// use rstats::Stats;
413    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
414    /// assert_eq!(v1.gwmean().unwrap(),8.8185222496341_f64);
415    /// ```
416    fn gwmean(self) -> Result<f64, RE> {
417        let n = self.len();
418        if n == 0 {
419            return nodata_error("gwmean: empty self vec");
420        };
421        let mut w = 0_f64; // ascending weights
422        let mut sum = 0_f64;
423        for x in self {
424            let fx: f64 = x.clone().into();
425            if !fx.is_normal() {
426                return arith_error("gwmean: attempt to take ln of zero");
427            };
428            w += 1_f64;
429            sum += w * fx.ln();
430        }
431        Ok((sum / sumn(n) as f64).exp())
432    }
433
434    /// Linearly weighted version of gmeanstd.
435    /// # Example
436    /// ```
437    /// use rstats::Stats;
438    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
439    /// let res = v1.gwmeanstd().unwrap();
440    /// assert_eq!(res.centre,8.8185222496341_f64);
441    /// assert_eq!(res.spread,1.626825493266009_f64);
442    /// ```
443    fn gwmeanstd(self) -> Result<Params, RE> {
444        let n = self.len();
445        if n == 0 {
446            return nodata_error("gwmeanstd: empty self vec");
447        };
448        let mut w = 0_f64; // ascending weights
449        let mut sum = 0_f64;
450        let mut sx2 = 0_f64;
451        for x in self {
452            let fx: f64 = x.clone().into();
453            if !fx.is_normal() {
454                return arith_error("gwmeanstd: attempt to take ln of zero");
455            };
456            let lnx = fx.ln();
457            w += 1_f64;
458            sum += w * lnx;
459            sx2 += w * lnx * lnx;
460        }
461        let nf = sumn(n) as f64;
462        sum /= nf;
463        Ok(Params {
464            centre: sum.exp(),
465            spread: (sx2 / nf - sum.powi(2)).sqrt().exp(),
466        })
467    }
468
469    /// Probability density function of a sorted slice with repeats.
470    /// Repeats are counted and removed
471    fn pdf(self) -> Vec<f64> {
472        let nf = self.len() as f64;
473        let mut res: Vec<f64> = Vec::new();
474        let mut count = 1_usize; // running count
475        let mut lastval = &self[0];
476        self.iter().skip(1).for_each(|s| {
477            if *s > *lastval {
478                // new value encountered
479                res.push((count as f64) / nf); // save previous probability
480                lastval = s; // new value
481                count = 1_usize; // reset counter
482            } else {
483                count += 1;
484            }
485        });
486        res.push((count as f64) / nf); // flush the rest!
487        res
488    }
489
490    /// Information (entropy) (in nats)
491    fn entropy(self) -> f64 {
492        let pdfv = self.sortm(true).pdf();
493        pdfv.iter().map(|&x| -x * (x.ln())).sum()
494    }
495
496    /// (Auto)correlation coefficient of pairs of successive values of (time series) f64 variable.
497    /// # Example
498    /// ```
499    /// use rstats::Stats;
500    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
501    /// assert_eq!(v1.autocorr().unwrap(),0.9984603532054123_f64);
502    /// ```
503    fn autocorr(self) -> Result<f64, RE> {
504        let (mut sx, mut sy, mut sxy, mut sx2, mut sy2) = (0_f64, 0_f64, 0_f64, 0_f64, 0_f64);
505        let n = self.len();
506        if n < 2 {
507            return nodata_error("autocorr needs a Vec of at least two items");
508        };
509        let mut x: f64 = self[0].clone().into();
510        self.iter().skip(1).for_each(|si| {
511            let y: f64 = si.clone().into();
512            sx += x;
513            sy += y;
514            sxy += x * y;
515            sx2 += x * x;
516            sy2 += y * y;
517            x = y
518        });
519        let nf = n as f64;
520        Ok((sxy - sx / nf * sy) / ((sx2 - sx / nf * sx) * (sy2 - sy / nf * sy)).sqrt())
521    }
522
523    /// Linear transform to interval [0,1]
524    fn lintrans(self) -> Result<Vec<f64>, RE> {
525        let mm = self.minmax();
526        let min = mm.min.into();
527        let range = mm.max.into() - min;
528        if range == 0_f64 {
529            return arith_error("lintrans: self has zero range");
530        };
531        Ok(self
532            .iter()
533            .map(|x| (x.clone().into() - min) / range)
534            .collect())
535    }
536
537    /// Linearly weighted approximate time series derivative at the last point (present time).
538    /// Weighted sum (backwards half filter), minus the median.
539    /// Rising values return positive result and vice versa.
540    fn dfdt(self, centre: f64) -> Result<f64, RE> {
541        let len = self.len();
542        if len < 2 {
543            return data_error("dfdt: time series too short: {len}");
544        };
545        let mut weight = 0_f64;
546        let mut sumwx = 0_f64;
547        for x in self.iter() {
548            weight += 1_f64;
549            sumwx += weight * x.clone().into();
550        }
551        Ok(sumwx / (sumn(len) as f64) - centre)
552    }
553
554    /// Householder reflector
555    fn house_reflector(self) -> Vec<f64> {
556        let norm = self.vmag();
557        if norm.is_normal() {
558            let mut u = self.smult(1. / norm);
559            if u[0] < 0. {
560                u[0] -= 1.;
561            } else {
562                u[0] += 1.;
563            };
564            let uzero = 1.0 / (u[0].abs().sqrt());
565            u.mutsmult(uzero);
566            return u;
567        };
568        let mut u = vec![0.; self.len()];
569        u[0] = std::f64::consts::SQRT_2;
570        u
571    }
572}