rstats/
vecg.rs

1use crate::*;
2use core::{cmp::Ordering::*, convert::identity};
3use indxvec::{Indices, Vecops};
4use medians::Median;
5
6impl<T> Vecg for &[T]
7where
8    T: Clone + PartialOrd + Into<f64>,
9{
10    /// nd tm_statistic of self against centre and spread.     
11    /// Unlike in 1d, is always positive.
12    fn tm_statistic(self, centre: &[f64], spread: f64) -> Result<f64, RE> {
13        Ok(self.vdist(centre) / spread)
14    }
15
16    /// Dot product of vector self with column c of matrix v
17    fn columnp<U: Clone + Into<f64>>(self, c: usize, v: &[Vec<U>]) -> f64 {
18        self.iter()
19            .enumerate()
20            .map(|(i, x)| x.clone().into() * v[i][c].clone().into())
21            .sum::<f64>()
22    }
23
24    /// Scalar addition to a vector, creates new vec
25    fn sadd<U: Into<f64>>(self, s: U) -> Vec<f64> {
26        let sf: f64 = s.into();
27        self.iter().map(|x| sf + x.clone().into()).collect()
28    }
29
30    /// Scalar multiplication with a vector, creates new vec
31    fn smult<U: Into<f64>>(self, s: U) -> Vec<f64> {
32        let sf: f64 = s.into();
33        self.iter().map(|x| sf * x.clone().into()).collect()
34    }
35
36    /// Scalar product (asymmetric)
37    /// Efficiently multiplies rows and columns
38    /// of triangular matrices, without having to 
39    /// fill them with zeroes and then multiply by them. 
40    /// Self, when shorter, acts as if zero filled at the end,
41    /// v, when shorter, acts as if zero filled at the beginning.
42    fn dotp<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
43        if self.len() <= v.len() {
44            self.iter()
45                .zip(v)
46                .map(|(xi, vi)| { xi.clone().into() * vi.clone().into() })
47                .sum::<f64>()
48        } else {
49            self.iter().skip(self.len()-v.len())
50                .zip(v)  
51                .map(|(xi, vi)| { xi.clone().into() * vi.clone().into() })
52                .sum::<f64>()
53        }
54    }
55
56    /// Measure d of cloud density in (zero median) self direction:`0 <= d <= |self|`
57    /// Product with signature vec of axial projections of some cloud.  
58    /// Similar result can be obtained by projecting all points onto self but that is usually too slow.
59    fn dotsig(self, sig: &[f64]) -> Result<f64, RE> {
60        let dims = self.len();
61        if 2 * dims != sig.len() {
62            return data_error("dotsig: sig vec must have double the dimensions of self");
63        }
64        let mut ressum = 0_f64;
65        for (i, c) in self.iter().enumerate() {
66            let component = c.clone().into();
67            if component > 0_f64 {
68                ressum += component * sig[i];
69                continue;
70            };
71            if component < 0_f64 {
72                ressum -= component * sig[dims + i];
73            };
74        }
75        Ok(ressum)
76    }
77
78    /// Cosine of angle between the two slices.
79    /// Done in one iteration for efficiency.
80    fn cosine<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
81        let (mut sxy, mut sy2) = (0_f64, 0_f64);
82        let sx2: f64 = self
83            .iter()
84            .zip(v)
85            .map(|(tx, uy)| {
86                let x = tx.clone().into();
87                let y = uy.clone().into();
88                sxy += x * y;
89                sy2 += y * y;
90                x * x
91            })
92            .sum();
93        sxy / (sx2 * sy2).sqrt()
94    }
95
96    /// Generalized cross product:  
97    /// Sine of an angle between **self** and **v** with correct sign in any number of dimensions
98    fn sine<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
99        let blade = self.wedge(v);
100        blade.sum().signum()
101            * (blade.magsq()
102                / (self.vmagsq() * v.iter().map(|x| x.clone().into().powi(2)).sum::<f64>()))
103            .sqrt()
104    }
105
106    /// Vector subtraction
107    fn vsub<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64> {
108        self.iter()
109            .zip(v)
110            .map(|(xi, vi)| xi.clone().into() - vi.clone().into())
111            .collect()
112    }
113
114    /// Vectors difference unitised (done together for efficiency)
115    fn vsubunit<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64> {
116        let mut sumsq = 0_f64;
117        let dif = self
118            .iter()
119            .zip(v)
120            .map(|(xi, vi)| {
121                let d = xi.clone().into() - vi.clone().into();
122                sumsq += d * d;
123                d
124            })
125            .collect::<Vec<f64>>();
126        dif.smult(1_f64 / sumsq.sqrt())
127    }
128
129    /// Vector addition
130    fn vadd<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64> {
131        self.iter()
132            .zip(v)
133            .map(|(xi, vi)| xi.clone().into() + vi.clone().into())
134            .collect()
135    }
136
137    /// Euclidian distance   
138    fn vdist<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
139        self.iter()
140            .zip(v)
141            .map(|(xi, vi)| (xi.clone().into() - vi.clone().into()).powi(2))
142            .sum::<f64>()
143            .sqrt()
144    }
145
146    /// Weighted arithmetic mean of `self:&[T]`, scaled by `ws:&[U]`
147    fn wvmean<U: Clone + Into<f64>>(self, ws: &[U]) -> f64 {
148        let mut wsum: f64 = 0.;
149        let mut sum: f64 = 0.;
150        for (s, w) in self.iter().zip(ws) {
151            let fw = w.clone().into();
152            sum += fw * s.clone().into();
153            wsum += fw;
154        }
155        sum / wsum
156    }
157
158    /// Weighted distance of `self:&[T]` to `v:&[V]`, scaled by `ws:&[U]`
159    /// allows all three to be of different types
160    fn wvdist<U, V>(self, ws: &[U], v: &[V]) -> f64
161    where
162        U: Clone + Into<f64>,
163        V: Clone + Into<f64>,
164    {
165        self.iter()
166            .enumerate()
167            .map(|(i, xi)| (ws[i].clone().into() * xi.clone().into() - v[i].clone().into()).powi(2))
168            .sum::<f64>()
169            .sqrt()
170    }
171
172    /// Euclidian distance squared  
173    fn vdistsq<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
174        self.iter()
175            .zip(v)
176            .map(|(xi, vi)| (xi.clone().into() - vi.clone().into()).powi(2))
177            .sum::<f64>()
178    }
179
180    /// cityblock distance
181    fn cityblockd<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
182        self.iter()
183            .zip(v)
184            .map(|(xi, vi)| (xi.clone().into() - vi.clone().into()).abs())
185            .sum::<f64>()
186    }
187
188    /// Area spanned by two vectors over their concave angle (always >= 0)
189    /// |a||b||sin(theta)| == (1-cos2(theta)).sqrt()
190    /// Attains maximum `|a|.|b|` when the vectors are orthogonal.
191    fn varea<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64 {
192        (self.vmagsq() * v.vmagsq() - self.dotp(v).powi(2)).sqrt()
193    }
194
195    /// Area of swept arc
196    /// = |a||b|(1-cos(theta)) = 2|a||b|D
197    fn varc<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64 {
198        (v.vmagsq() * self.vmagsq()).sqrt() - self.dotp(v)
199    }
200
201    /// Positive dotp in the interval: `[0,2|a||b|]`
202    /// = |a||b|(1+cos(theta)) = 2|a||b|S
203    fn pdotp<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64 {
204        (v.vmagsq() * self.vmagsq()).sqrt() + self.dotp(v)
205    }
206
207    /// We define vector similarity S in the interval [0,1] as
208    /// S = (1+cos(theta))/2
209    fn vsim<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
210        (1.0 + self.cosine(v)) / 2.0
211    }
212
213    /// We define vector dissimilarity D in the interval [0,1] as
214    /// D = 1-S = (1-cos(theta))/2
215    fn vdisim<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
216        (1.0 - self.cosine(v)) / 2.0
217    }
218
219    /// Vectors median correlation similarity in [0,1]
220    fn vcorrsim(self, v: Self) -> Result<f64, RE> {
221        Ok((1.0
222            + self.med_correlation(v, &mut |a, b| a.partial_cmp(b).unwrap_or(Equal), |a| {
223                identity(a.clone().into())
224            })?)
225            / 2.0)
226    }
227
228    /// Lower triangular covariance matrix for a single vector.
229    /// Where m is either mean or median vector (to be subtracted).
230    /// Covariance matrix is symmetric (kind:2) (positive semi definite).
231    fn covone<U: Clone + Into<f64>>(self, m: &[U]) -> TriangMat {
232        let mut cov: Vec<f64> = Vec::new(); // flat lower triangular result array
233        let vm = self.vsub(m); // zero mean/median vector
234        vm.iter().enumerate().for_each(|(i,&thisc)|
235            // generate its products up to and including the diagonal (itself)
236            vm.iter().take(i+1).for_each(|&component| cov.push(thisc*component)));
237        TriangMat { kind: 2, data: cov }
238    }
239
240    /// Kronecker product of two vectors.   
241    /// The indexing is always assumed to be in this order: row,column.
242    /// Flat version of outer(wedge) product
243    fn kron<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64> {
244        let vf = v.iter().map(|vi| vi.clone().into()).collect::<Vec<f64>>();
245        self.iter()
246            .flat_map(|s| {
247                let sf: f64 = s.clone().into();
248                vf.iter().map(move |&vfi| sf * vfi)
249            })
250            .collect::<Vec<f64>>()
251    }
252
253    /// Outer product: matrix multiplication of column self with row v.
254    fn outer<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<Vec<f64>> {
255        let vf = v.iter().map(|vi| vi.clone().into()).collect::<Vec<f64>>();
256        self.iter()
257            .map(|s| {
258                let sf: f64 = s.clone().into();
259                vf.iter().map(|&vfi| sf * vfi).collect::<Vec<f64>>()
260            })
261            .collect::<Vec<Vec<f64>>>()
262    }
263
264    /// Exterior (Grassman) algebra product: produces components of 2-blade **a∧b**
265    fn wedge<U: Clone + Into<f64>>(self, b: &[U]) -> TriangMat {
266        let n = self.len();
267        assert_eq!(n, b.len());
268        let mut result: Vec<f64> = Vec::new();
269        for i in 0..n {
270            let ai: f64 = self[i].clone().into();
271            let bi: f64 = b[i].clone().into();
272            for j in 0..i + 1 {
273                result.push(ai * b[j].clone().into() - bi * self[j].clone().into());
274            }
275        }
276        TriangMat {
277            kind: 1,
278            data: result,
279        }
280    }
281
282    /// Geometric (Clifford) algebra product: **a*b** + **a∧b** in matrix form
283    /// here the elements of the dot product a*b are placed in their
284    /// natural positions on the diagonal (can be easily added)
285    fn geometric<U: Clone + Into<f64>>(self, b: &[U]) -> TriangMat {
286        let n = self.len();
287        assert_eq!(n, b.len());
288        let mut result: Vec<f64> = Vec::new();
289        for i in 0..n {
290            let ai: f64 = self[i].clone().into();
291            let bi: f64 = b[i].clone().into();
292            for j in 0..i {
293                result.push(ai * b[j].clone().into() - bi * self[j].clone().into());
294            }
295            result.push(ai * bi); // the diagonal dot product element
296        }
297        TriangMat {
298            kind: 1,
299            data: result,
300        }
301    }
302
303    /// Joint probability density function of two pairwise matched slices
304    fn jointpdf<U: Clone + Into<f64>>(self, v: &[U]) -> Result<Vec<f64>, RE> {
305        let n = self.len();
306        if v.len() != n {
307            return data_error("jointpdf argument vectors must be of equal length!");
308        };
309        let nf = n as f64;
310        let mut res: Vec<f64> = Vec::new();
311        // collect successive pairs, upgrading all end types to common f64
312        let mut spairs: Vec<(f64, f64)> = self
313            .iter()
314            .zip(v)
315            .map(|(si, vi)| (si.clone().into(), vi.clone().into()))
316            .collect();
317        // sort them to group all same pairs together for counting
318        spairs.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
319        let mut count = 1_usize; // running count
320        let mut lastindex = 0;
321        spairs.iter().enumerate().skip(1).for_each(|(i, si)| {
322            if si > &spairs[lastindex] {
323                // new pair encountered
324                res.push((count as f64) / nf); // save previous probability
325                lastindex = i; // current index becomes the new one
326                count = 1_usize; // reset counter
327            } else {
328                count += 1;
329            }
330        });
331        res.push((count as f64) / nf); // flush the rest!
332        Ok(res)
333    }
334
335    /// Joint entropy of two sets of the same length
336    fn jointentropy<U: Clone + Into<f64>>(self, v: &[U]) -> Result<f64, RE> {
337        let jpdf = self.jointpdf(v)?;
338        Ok(jpdf.iter().map(|&x| -x * (x.ln())).sum())
339    }
340
341    /// Dependence of &[T] &[U] variables in the range [0,1]
342    /// returns 0 iff they are statistically component wise independent
343    /// returns 1 when they are identical or all their values are unique
344    fn dependence<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> Result<f64, RE> {
345        Ok((self.entropy() + v.entropy()) / self.jointentropy(v)? - 1.0)
346    }
347
348    /// Independence of &[T] &[U] variables in the range [0,1]
349    /// returns 1 iff they are statistically component wise independent
350    fn independence<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> Result<f64, RE> {
351        Ok(2.0 * self.jointentropy(v)? / (self.entropy() + v.entropy()) - 1.0)
352    }
353
354    /// Pearson's (most common) correlation.
355    /// # Example
356    /// ```
357    /// use rstats::Vecg;
358    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
359    /// let v2 = vec![14_f64,1.,13.,2.,12.,3.,11.,4.,10.,5.,9.,6.,8.,7.];
360    /// assert_eq!(v1.correlation(&v2),-0.1076923076923077);
361    /// ```
362    fn correlation<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
363        let (mut sy, mut sxy, mut sx2, mut sy2) = (0_f64, 0_f64, 0_f64, 0_f64);
364        let sx: f64 = self
365            .iter()
366            .zip(v)
367            .map(|(xt, yu)| {
368                let x = xt.clone().into();
369                let y = yu.clone().into();
370                sy += y;
371                sxy += x * y;
372                sx2 += x * x;
373                sy2 += y * y;
374                x
375            })
376            .sum();
377        let nf = self.len() as f64;
378        (sxy - sy * sx / nf) / ((sx2 - sx * sx / nf) * (sy2 - sy * sy / nf)).sqrt()
379    }
380    /// Kendall Tau-B correlation.
381    /// Defined by: tau = (conc - disc) / sqrt((conc + disc + tiesx) * (conc + disc + tiesy))
382    /// This is the simplest implementation with no sorting.
383    /// # Example
384    /// ```
385    /// use rstats::Vecg;
386    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
387    /// let v2 = vec![14_f64,1.,13.,2.,12.,3.,11.,4.,10.,5.,9.,6.,8.,7.];
388    /// assert_eq!(v1.kendalcorr(&v2),-0.07692307692307693);
389    /// ```
390    fn kendalcorr<U: Clone + Into<f64>>(self, v: &[U]) -> f64 {
391        let (mut conc, mut disc, mut tiesx, mut tiesy) = (0_i64, 0_i64, 0_i64, 0_i64);
392        for i in 1..self.len() {
393            let x = self[i].clone().into();
394            let y = v[i].clone().into();
395            for j in 0..i {
396                let xd = x - self[j].clone().into();
397                let yd = y - v[j].clone().into();
398                if !xd.is_normal() {
399                    if !yd.is_normal() {
400                        continue;
401                    } else {
402                        tiesx += 1;
403                        continue;
404                    }
405                };
406                if !yd.is_normal() {
407                    tiesy += 1;
408                    continue;
409                };
410                if (xd * yd).signum() > 0_f64 {
411                    conc += 1
412                } else {
413                    disc += 1
414                }
415            }
416        }
417        (conc - disc) as f64 / (((conc + disc + tiesx) * (conc + disc + tiesy)) as f64).sqrt()
418    }
419    /// Spearman rho correlation.
420    /// This is the simplest implementation with no sorting.
421    /// # Example
422    /// ```
423    /// use rstats::Vecg;
424    /// let v1 = vec![1_f64,2.,3.,4.,5.,6.,7.,8.,9.,10.,11.,12.,13.,14.];
425    /// let v2 = vec![14_f64,1.,13.,2.,12.,3.,11.,4.,10.,5.,9.,6.,8.,7.];
426    /// assert_eq!(v1.spearmancorr(&v2),-0.1076923076923077);
427    /// ```
428    fn spearmancorr<U: PartialOrd + Clone + Into<f64>>(self, v: &[U]) -> f64 {
429        let xvec = self.rank(true);
430        let yvec = v.rank(true); // rank from crate idxvec::merge
431                                 // It is just Pearson's correlation of usize ranks
432        xvec.ucorrelation(&yvec) // using Indices trait from idxvec
433    }
434
435    /// Delta gm that adding a point will cause
436    fn contribvec_newpt(self, gm: &[f64], recips: f64) -> Result<Vec<f64>, RE> {
437        let dv = self.vsub(gm);
438        let mag = dv.vmag();
439        if !mag.is_normal() {
440            return arith_error("contribvec_newpt: point being added is coincident with gm");
441        };
442        // adding new unit vector (to approximate zero vector) and rescaling
443        let recip = 1f64 / mag;
444        Ok(dv.vunit()?.smult(recip / (recips + recip)))
445    }
446
447    /// Normalized magnitude of change to gm that adding a point will cause
448    fn contrib_newpt(self, gm: &[f64], recips: f64, nf: f64) -> Result<f64, RE> {
449        let mag = self.vdist(gm);
450        if !mag.is_normal() {
451            return arith_error("contrib_newpt: point being added is coincident with gm");
452        };
453        let recip = 1f64 / mag; // first had to test for division by zero
454        Ok((nf + 1.0) / (recips + recip))
455    }
456
457    /// Delta gm caused by removing an existing set point self
458    fn contribvec_oldpt(self, gm: &[f64], recips: f64) -> Result<Vec<f64>, RE> {
459        let dv = self.vsub(gm);
460        let mag = dv.vmag();
461        if !mag.is_normal() {
462            return arith_error("contribvec_oldpt: point being removed is coincident with gm");
463        };
464        let recip = 1f64 / mag; // first had to test for division by zero
465        Ok(dv.vunit()?.smult(recip / (recip - recips))) // scaling
466    }
467
468    /// Normalized Contribution that removing an existing set point p will make
469    /// Is a negative number
470    fn contrib_oldpt(self, gm: &[f64], recips: f64, nf: f64) -> Result<f64, RE> {
471        let mag = self.vdist(gm);
472        if !mag.is_normal() {
473            return arith_error("contrib_oldpt: point being removed is coincident with gm");
474        };
475        let recip = 1f64 / mag; // first had to test for division by zero
476        Ok((nf - 1.0) / (recip - recips))
477        // self.contribvec_oldpt(gm,recips,p).vmag()
478    }
479
480    /// Householder reflect
481    fn house_reflect<U: Clone + PartialOrd + Into<f64>>(self, x: &[U]) -> Vec<f64> {
482        x.vsub(&self.smult(x.dotp(self)))
483    }
484}
rstats/vecg.rs

rstats/
vecg.rs