rstats/
vecvec.rs

1use crate::*;
2use indxvec::Vecops;
3use medians::{MedError, Medianf64};
4use rayon::prelude::*;
5
6impl<T> VecVec<T> for &[Vec<T>]
7where
8    T: Clone + PartialOrd + Sync + Into<f64>,
9    Vec<Vec<T>>: IntoParallelIterator,
10    Vec<T>: IntoParallelIterator{
11
12    /// Linearly weighted approximate time series derivative at the last point (present time).
13    /// Weighted average (backwards half filter), minus the median. 
14    fn dvdt(self, centre: &[f64]) -> Result<Vec<f64>, RE> {
15        let len = self.len();
16        if len < 2 {
17            return nodata_error(format!("dvdt time series is too short: {len}"));
18        };
19        let mut weight = 1_f64; 
20        let mut sumwv:Vec<f64> = self[0].iter().map(|x| x.clone().into()).collect();
21        for v in self.iter().skip(1) { 
22            weight += 1_f64;
23            sumwv.mutvadd(&v.smult(weight));
24        };
25        Ok(sumwv.smult(1.0/(sumn(len) as f64)).vsub(centre))
26    }
27
28    /// Maps scalar valued closure onto all vectors in self and collects
29    fn scalar_fn(self,f: impl Fn(&[T]) -> Result<f64,RE>) -> Result<Vec<f64>,RE> {
30        self.iter().map(|s|-> Result<f64,RE> {
31            f(s) }).collect::<Result<Vec<f64>,RE>>()
32    }
33
34    /// Maps vector valued closure onto all vectors in self and collects
35    fn vector_fn(self,f: impl Fn(&[T]) -> Result<Vec<f64>,RE>) -> Result<Vec<Vec<f64>>,RE> {
36        self.iter().map(|s|-> Result<Vec<f64>,RE> {
37            f(s) }).collect::<Result<Vec<Vec<f64>>,RE>>()
38    } 
39
40    /// Exact radii magnitudes to all member points from the Geometric Median.
41    /// More accurate and usually faster as well than the approximate `eccentricities` above,
42    /// especially when there are many points.
43    fn radii(self, gm: &[f64]) -> Result<Vec<f64>, RE> {
44        self.scalar_fn(|s| Ok(gm.vdist(s)))  
45    }   
46
47    /// Selects a column by number
48    fn column(self, cnum: usize) -> Vec<f64> {
49        self.iter().map(|row| row[cnum].clone().into()).collect()
50    }
51
52    /// Multithreaded transpose of vec of vecs matrix
53    fn transpose(self) -> Vec<Vec<f64>> {
54        (0..self[0].len())
55            .into_par_iter()
56            .map(|cnum| self.column(cnum))
57            .collect()
58    }
59
60    /// Normalize columns of a matrix, so that they become unit row vectors
61    fn normalize(self) -> Result<Vec<Vec<f64>>, RE> {
62        (0..self[0].len())
63            .into_par_iter()
64            .map(|cnum| -> Result<Vec<f64>, RE> { self.column(cnum).vunit() })
65            .collect()
66    }
67
68    /// Householder's method returning triangular matrices (U',R), where
69    /// U are the reflector generators for use by house_uapply(m).
70    /// R is the upper triangular decomposition factor.
71    /// Here both U and R are returned for convenience in their transposed lower triangular forms.
72    /// Transposed input self for convenience, so that original columns get accessed easily as rows.
73    fn house_ur(self) -> Result<(TriangMat, TriangMat),RE> {
74        let n = self.len();
75        let d = self[0].len();
76        let min = if d <= n { d } else { n }; // minimal dimension
77        let mut r = self.transpose(); // self.iter().map(|s| s.tof64()).collect::<Vec<Vec<f64>>>(); //  //
78        let mut ures = vec![0.; sumn(min)];
79        let mut rres = Vec::with_capacity(sumn(min));
80        for j in 0..min {
81            let Some(slc) = r[j].get(j..d) 
82            else { return data_error("house_ur: failed to extract uvec slice"); };
83            let uvec = slc.house_reflector();
84            for rlast in r.iter_mut().take(d).skip(j) {
85                let rvec = uvec.house_reflect::<f64>(&rlast.drain(j..d).collect::<Vec<f64>>());
86                rlast.extend(rvec);
87                // drained, reflected with this uvec, and rebuilt, all remaining rows of r
88            }
89            // these uvecs are columns, so they must be saved column-wise
90            for (row, &usave) in uvec.iter().enumerate() {
91                ures[sumn(row + j) + j] = usave; // using triangular index
92            }
93            // save completed `r[j]` components only up to and including the diagonal
94            // we are not even storing the rest, so no need to set those to zero
95            for &rsave in r[j].iter().take(j + 1) {
96                rres.push(rsave)
97            }
98        }
99        Ok((
100            TriangMat {
101                kind: 3,
102                data: ures,
103            }, // transposed, non symmetric kind
104            TriangMat {
105                kind: 3,
106                data: rres,
107            }, // transposed, non symmetric kind
108        ))
109    }
110
111    /// Joint probability density function of n matched slices of the same length
112    fn jointpdfn(self) -> Result<Vec<f64>, RE> {
113        let d = self[0].len(); // their common dimensionality (length)
114        for v in self.iter().skip(1) {
115            if v.len() != d {
116                return data_error("jointpdfn: all vectors must be of equal length!");
117            };
118        }
119        let mut res: Vec<f64> = Vec::with_capacity(d);
120        let mut tuples = self.transpose();
121        let df = tuples.len() as f64; // for turning counts to probabilities
122        // lexical sort to group together occurrences of identical tuples
123        tuples.sort_unstable_by(
124            |a, b| a.partial_cmp(b)
125            .expect("jointpdfn: tuples comparison failed"));
126        let mut count = 1_usize; // running count
127        let mut lastindex = 0; // initial index of the last unique tuple
128        tuples.iter().enumerate().skip(1).for_each(|(i, ti)| {
129            if ti > &tuples[lastindex] {
130                // new tuple ti (Vec<T>) encountered
131                res.push((count as f64) / df); // save frequency count as probability
132                lastindex = i; // current index becomes the new one
133                count = 1_usize; // reset counter
134            } else {
135                count += 1;
136            }
137        });
138        res.push((count as f64) / df); // flush the rest!
139        Ok(res)
140    }
141
142    /// Joint entropy of vectors of the same length
143    fn jointentropyn(self) -> Result<f64, RE> {
144        let jpdf = self.jointpdfn()?;
145        Ok(jpdf.iter().map(|&x| -x * (x.ln())).sum())
146    }
147
148    /// Dependence (component wise) of a set of vectors.
149    /// i.e. `dependencen` returns 0 iff they are statistically independent
150    /// bigger values when they are dependentent
151    fn dependencen(self) -> Result<f64, RE> {
152        Ok((0..self.len())
153            .into_par_iter()
154            .map(|i| self[i].entropy())
155            .sum::<f64>()
156            / self.jointentropyn()?
157            - 1.0)
158    }
159
160    /// Flattened lower triangular part of a symmetric matrix for vectors in self.
161    /// The upper triangular part can be trivially generated for all j>i by: c(j,i) = c(i,j).
162    /// Applies closure f to compute a scalar binary relation between all pairs of vector
163    /// components of self.   
164    /// The closure typically invokes one of the methods from Vecg trait (in vecg.rs),
165    /// such as dependencies.  
166    /// Example call: `pts.transpose().crossfeatures(|v1,v2| v1.mediancorrf64(v2)?)?`
167    /// computes median correlations between all column vectors (features) in pts.
168    fn crossfeatures(self, f: fn(&[T], &[T]) -> f64) -> Result<TriangMat, RE> {
169        Ok(TriangMat {
170            kind: 2, // symmetric, non transposed
171            data: (0..self.len())
172                .into_par_iter()
173                .flat_map(|i| {
174                    (0..i+1)
175                        .map(|j| f(&self[i], &self[j]))
176                        .collect::<Vec<f64>>()
177                })
178                .collect::<Vec<f64>>(),
179        })
180    }
181
182    /// Sum of nd points (or vectors)
183    fn sumv(self) -> Vec<f64> {
184        let mut resvec = vec![0_f64; self[0].len()];
185        for v in self {
186            resvec.mutvadd(v)
187        }
188        resvec
189    }
190
191    /// acentroid = multidimensional arithmetic mean
192    fn acentroid(self) -> Vec<f64> {
193        self.sumv().smult(1. / (self.len() as f64))
194    }
195
196    /// multithreaded acentroid = multidimensional arithmetic mean
197    fn par_acentroid(self) -> Vec<f64> {
198        let sumvec = self
199            .par_iter()
200            .fold(
201                || vec![0_f64; self[0].len()],
202                |mut vecsum: Vec<f64>, p| {
203                    vecsum.mutvadd(p);
204                    vecsum
205                },
206            )
207            .reduce(
208                || vec![0_f64; self[0].len()],
209                |mut finalsum: Vec<f64>, partsum: Vec<f64>| {
210                    finalsum.mutvadd(&partsum);
211                    finalsum
212                },
213            );
214        sumvec.smult(1. / (self.len() as f64))
215    }
216
217    /// gcentroid = multidimensional geometric mean
218    fn gcentroid(self) -> Result<Vec<f64>,RE> { 
219        let logvs = self.iter().map(|v|-> Result<Vec<f64>,RE> {
220            Ok(v.vunit()?.smult(v.vmag().ln())) })
221            .collect::<Result<Vec<Vec<f64>>,RE>>()?;
222        let logcentroid = logvs.acentroid();
223        Ok(logcentroid.vunit()?.smult(logcentroid.vmag().exp()))
224    }
225
226    /// hcentroid =  multidimensional harmonic mean
227    fn hcentroid(self) -> Result<Vec<f64>,RE> {
228        let mut centre = vec![0_f64; self[0].len()];
229        for v in self {
230            centre.mutvadd(&v.vinverse()?)
231        }
232        Ok(centre  
233            .vinverse()?.smult(self.len() as f64)) 
234    }
235
236    /// For each member point, gives its sum of distances to all other points and their MinMax
237    fn distsums(self) -> Vec<f64> {
238        let n = self.len();
239        let mut dists = vec![0_f64; n]; // distances accumulator for all points
240                                        // examine all unique pairings (lower triangular part of symmetric flat matrix)
241        self.iter().enumerate().for_each(|(i, thisp)| {
242            self.iter().take(i).enumerate().for_each(|(j, thatp)| {
243                let d = thisp.vdist(thatp); // calculate each distance relation just once
244                dists[i] += d;
245                dists[j] += d; // but add it to both points' sums
246            })
247        });
248        dists
249    }
250
251    /// Points nearest and furthest from the geometric median.
252    /// Returns struct MinMax{min,minindex,max,maxindex}
253    fn medout(self, gm: &[f64]) -> Result<MinMax<f64>,RE> {
254        Ok(self.scalar_fn(|s| Ok(gm.vdist(s)))?.minmax())
255    }
256
257    /// Radius of a point specified by its subscript.    
258    fn radius(self, i: usize, gm: &[f64]) -> Result<f64, RE> {
259        if i > self.len() {
260            return data_error("radius: invalid subscript");
261        }
262        Ok(self[i].vdist(gm))
263    }
264
265    /// Arith mean and std (in Params struct), Median and MAD (in another Params struct), Medoid and Outlier (in MinMax struct)
266    /// of scalar radii of points in self.
267    /// These are new robust measures of a cloud of multidimensional points (or multivariate sample).  
268    fn eccinfo(self, gm: &[f64]) -> Result<(Params, Params, MinMax<f64>), RE>
269    where
270        Vec<f64>: FromIterator<f64>,
271    {
272        let rads: Vec<f64> = self.radii(gm)?;
273        let radsmed = rads.medf_unchecked();
274        let radsmad = rads.madf(radsmed);
275        Ok((rads.ameanstd()?, Params{centre:radsmed,spread:radsmad}, rads.minmax()))
276    }
277
278    /// Quasi median, recommended only for comparison purposes
279    fn quasimedian(self) -> Result<Vec<f64>, RE> {
280        Ok((0..self[0].len()) 
281            .map(|colnum| self.column(colnum).medf_checked())
282            .collect::<Result<Vec<f64>, MedError<String>>>()?)
283    }
284
285    /// Proportional projections on each +/- axis (by hemispheres).
286    /// Adds only points that are specified in idx.
287    /// Self should be zero median vectors, previously obtained by `self.translate(&gm)`.
288    /// The result is normalized to unit vector.
289    fn sigvec(self, idx: &[usize]) -> Result<Vec<f64>, RE> { 
290        let dims = self[0].len();
291        if self.is_empty() {
292            return nodata_error("sigvec given empty data");
293        };
294        let mut hemis = vec![0_f64; 2 * dims];
295        for &i in idx {
296            for (j, component) in self[i].iter().enumerate() {
297                let cf = component.clone().into();
298                if cf < 0. {
299                    hemis[dims + j] -= cf;
300                    continue;
301                };
302                hemis[j] += cf;
303                };
304            }; 
305        hemis.vunit()
306    }
307
308    /// madgm median of distances from gm: stable nd data spread measure
309    fn madgm(self, gm: &[f64]) -> Result<f64, RE> {
310        if self.is_empty() { 
311            return nodata_error("madgm given empty vec!"); };     
312        Ok(self.radii(gm)?.medf_unchecked())
313     }
314
315    /// stdgm mean of distances from gm: nd data spread measure, aka nd standard deviation
316    fn stdgm(self, gm: &[f64]) -> Result<f64,RE> { 
317        if self.is_empty() { 
318            return nodata_error("stdgm given empty vec!")?; };     
319        Ok( self.iter()
320            .map(|s| s.vdist(gm)).sum::<f64>()/self.len() as f64 ) 
321    }
322
323    /// Outer hull subscripts from their square radii and their sort index
324    fn outer_hull(self, sqrads: &[f64], sindex: &[usize]) -> Vec<usize> {
325        let mut hullindex: Vec<usize> = Vec::new();
326        let len = sindex.len();
327        // test ipoints in descending sqrads order
328        'ploop: for i in (0..len).rev() {
329            let ipoint = &self[sindex[i]];
330            // against jpoints with greater radius
331            for j in (i+1..len).rev() {  
332                // this jpoint lies outside the normal plane to ipoint => reject ipoint  
333                if self[sindex[j]].dotp(ipoint) > sqrads[sindex[i]] { continue 'ploop; };
334            };
335            hullindex.push(sindex[i]);  // passed
336        };      
337        hullindex
338    }
339
340    /// Inner hull points from their square radii and their sort index
341    fn inner_hull(self, sqrads: &[f64], sindex: &[usize]) -> Vec<usize> {
342        let mut hullindex: Vec<usize> = Vec::new();
343        let len = sindex.len();
344        // test ipoints in ascending sqrads order
345        'ploop: for i in 0..len {  
346            let ipoint = &self[sindex[i]];
347            // against jpoints with smaller radius
348            for j in 0..i { 
349                // this jpoint lies inside ipoint => reject ipoint  
350                if self[sindex[j]].dotp(ipoint) > sqrads[sindex[j]] { continue 'ploop; };
351            };
352            hullindex.push(sindex[i]);  // ipoint passed
353        };      
354        hullindex
355    }
356
357    /// Likelihood of zero median point **p** belonging to zero median data cloud `self`,
358    /// based on the points outside of the normal plane through **p**. 
359    /// Returns the sum of unit vectors of its outside points, projected onto unit **p**. 
360    /// Index should be in the descending order of magnitudes of self points (for efficiency).
361    fn depth(self, descending_index: &[usize], p: &[f64]) -> Result<f64,RE> {
362        let psq = p.vmagsq();
363        let mut sumvec = vec![0_f64;p.len()]; 
364        for &i in descending_index {
365            let s = &self[i];
366            let ssq = s.vmagsq();
367            if ssq <= psq { break; }; // no more outside points
368            if s.dotp(p) > psq { sumvec.mutvadd(&s.smult(1.0/(ssq.sqrt()))) };
369        };
370        Ok(sumvec.dotp(&p.vunit()?))
371    }
372
373    /// Likelihood of zero median point **p** belonging to zero median data cloud `self`,
374    /// based on the proportion of points outside of the normal plane through **p**. 
375    /// Index should be in the descending order of magnitudes of self points (for efficiency).
376    fn depth_ratio(self, descending_index: &[usize], p: &[f64]) -> f64 {
377        let psq = p.vmagsq();
378        let mut num = 0_f64; 
379        for &i in descending_index {
380            let s = &self[i];
381            let ssq = s.vmagsq();
382            if ssq <= psq { break; }; // no more outside points
383            if s.dotp(p) > psq { num += 1.0; };
384        };
385        num/(self.len() as f64)
386    }
387 
388    /// Collects indices of inner hull and outer hull, from zero median points in self.
389    /// We put a plane trough data point A, normal to its zero median vector **a**.     
390    /// B is an inner hull point, when it lies inside all other points' normal planes.  
391    /// C is an outer hull point, when there is no other point outside its own normal plane.
392    /// B can belong to both hulls, as when all the points lie on a hyper-sphere around **gm**.   
393    /// The testing is done in increasing (decreasing) radius order.  
394    /// B lies outside the normal plane of **a**, when its projection onto unit **a** exceeds
395    /// `|a|: |b|cos(θ) > |a| => a*b > |a|^2`,  
396    /// such B immediately fails as a candidate for the inner hull.
397    /// Using square magnitudes, `|a|^2` saves taking square roots and dividing the dot product by |a|.  
398    /// Similarly for the outer hull, where A and B simply swap roles.
399    fn hulls(self) -> (Vec<usize>, Vec<usize>) {
400        let sqradii = self.iter().map(|s| s.vmagsq()).collect::<Vec<f64>>();
401        let sindex = sqradii.mergesort_indexed();  
402        let innerindex = self.inner_hull(&sqradii,&sindex); 
403        let outerindex = self.outer_hull(&sqradii,&sindex); 
404        (innerindex, outerindex)
405    }
406
407    /// Geometric median's residual error
408    fn gmerror(self, g: &[f64]) -> Result<f64, RE> {
409        let mut unitvecssum = vec![0_f64; self[0].len()];
410        for v in self { unitvecssum.mutvadd(&v.vsub(g).vunit()?); };
411        Ok(unitvecssum.vmag())
412    }
413
414    /// Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.
415    /// It has (provably) only vector iterative solutions.
416    /// Search methods are slow and difficult in highly dimensional space.
417    /// Weiszfeld's fixed point iteration formula has known problems with sometimes failing to converge.
418    /// Especially, when the points are dense in the close proximity of the gm, or gm coincides with one of them.  
419    /// However, these problems are fixed in my new algorithm here.
420    /// The sum of reciprocals is strictly increasing and so is used here as
421    /// easy to evaluate termination condition.
422    fn gmedian(self, eps: f64) -> Vec<f64> {
423        let mut g = self.acentroid(); // start iterating from the mean  or vec![0_f64; self[0].len()];
424        let mut recsum = 0f64;
425        loop {
426            // vector iteration till accuracy eps is exceeded
427            let mut nextg = vec![0_f64; self[0].len()];
428            let mut nextrecsum = 0_f64;
429            for p in self {
430                // |p-g| done in-place for speed. Could have simply called p.vdist(g)
431                let mag: f64 = p
432                    .iter()
433                    .zip(&g)
434                    .map(|(vi, gi)| (vi.clone().into() - gi).powi(2))
435                    .sum();
436                if mag > eps {
437                    // reciprocal of distance (scalar)
438                    let rec = 1.0_f64 / (mag.sqrt()); 
439                    // vsum increment by components
440                    for (vi, gi) in p.iter().zip(&mut nextg) {
441                        *gi += vi.clone().into() * rec
442                    }
443                    // add the scaling reciprocal
444                    nextrecsum += rec 
445                } // ignore point p when |p-g| <= eps
446            }
447            nextg.iter_mut().for_each(|gi| *gi /= nextrecsum); 
448            if nextrecsum - recsum < eps {
449                return nextg;
450            }; // termination
451            g = nextg;
452            recsum = nextrecsum;
453        }
454    }
455
456    /// Parallel (multithreaded) implementation of Geometric Median. Possibly the fastest you will find.  
457    /// Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.  
458    /// It has (provably) only vector iterative solutions.    
459    /// Search methods are slow and difficult in hyper space.    
460    /// Weiszfeld's fixed point iteration formula has known problems and sometimes fails to converge.  
461    /// Specifically, when the points are dense in the close proximity of the gm, or gm coincides with one of them.    
462    /// However, these problems are solved in my new algorithm here.     
463    /// The sum of reciprocals is strictly increasing and so is used to easily evaluate the termination condition.  
464    fn par_gmedian(self, eps: f64) -> Vec<f64> {
465        let mut g = self.par_acentroid(); // start iterating from the mean  or vec![0_f64; self[0].len()];
466        let mut recsum = 0_f64;
467        loop {
468            // vector iteration till accuracy eps is exceeded
469            let (mut nextg, nextrecsum) = self
470                .par_iter()
471                .fold(
472                    || (vec![0_f64; self[0].len()], 0_f64),
473                    |mut pair: (Vec<f64>, f64), p: &Vec<T>| {
474                        // |p-g| done in-place for speed. Could have simply called p.vdist(g)
475                        let mag: f64 = p
476                            .iter()
477                            .zip(&g)
478                            .map(|(vi, gi)| (vi.clone().into() - gi).powi(2))
479                            .sum();
480                        if mag > eps {
481                            let rec = 1.0_f64 / (mag.sqrt()); // reciprocal of distance (scalar)
482                            for (vi, gi) in p.iter().zip(&mut pair.0) {
483                                *gi += vi.clone().into() * rec
484                            }
485                            pair.1 += rec; // add separately the reciprocals for the final scaling
486                        } // else simply ignore this point should its distance from g be zero
487                        pair
488                    },
489                )
490                // must run reduce on the partial sums produced by fold
491                .reduce(
492                    || (vec![0_f64; self[0].len()], 0_f64),
493                    |mut pairsum: (Vec<f64>, f64), pairin: (Vec<f64>, f64)| {
494                        pairsum.0.mutvadd(&pairin.0);
495                        pairsum.1 += pairin.1;
496                        pairsum
497                    },
498                );
499            nextg.iter_mut().for_each(|gi| *gi /= nextrecsum);
500            if nextrecsum - recsum < eps {
501                return nextg;
502            }; // termination test
503            g = nextg;
504            recsum = nextrecsum;
505        }
506    }
507
508    /// Like `gmedian` but returns also the sum of reciprocals.
509    fn gmparts(self, eps: f64) -> (Vec<f64>, f64) {
510        let mut g = self.acentroid(); // start iterating from the Centre
511        let mut recsum = 0f64;
512        loop {
513            // vector iteration till accuracy eps is exceeded
514            let mut nextg = vec![0_f64; self[0].len()];
515            let mut nextrecsum = 0f64;
516            for x in self { 
517                let mag = g
518                    .iter()
519                    .zip(x)
520                    .map(|(&gi, xi)| (xi.clone().into() - gi).powi(2))
521                    .sum::<f64>();
522                if mag.is_normal() {
523                    let rec = 1.0_f64 / (mag.sqrt()); // reciprocal of distance (scalar)
524                                                      // vsum increments by components
525                    nextg
526                        .iter_mut()
527                        .zip(x)
528                        .for_each(|(vi, xi)| *vi += xi.clone().into() * rec);
529                    nextrecsum += rec // add separately the reciprocals for final scaling
530                } // else simply ignore this point should its distance from g be zero
531            }
532            if nextrecsum - recsum < eps {
533                return (
534                    nextg
535                        .iter()
536                        .map(|&gi| gi / nextrecsum)
537                        .collect::<Vec<f64>>(), 
538                    nextrecsum,
539                );
540            }; // termination
541            nextg.iter_mut().for_each(|gi| *gi /= nextrecsum);
542            g = nextg;
543            recsum = nextrecsum;
544        }
545    }
546
547    /// Symmetric covariance matrix. Becomes comediance when argument `mid`  
548    /// is the geometric median instead of the centroid.
549    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
550    /// The items are flattened into a single vector in this order.
551    fn covar(self, mid:&[f64]) -> Result<TriangMat,RE> {
552        let d = self[0].len(); // dimension of the vector(s)
553        if d != mid.len() { 
554            return data_error("covar self and mid dimensions mismatch"); }; 
555        let mut covsum = self
556            .par_iter()
557            .fold(
558                || vec![0_f64; (d+1)*d/2],
559                | mut cov: Vec<f64>, selfp | {
560                let mut covsub = 0_usize; // subscript into the flattened array cov
561                let vm = selfp.vsub(mid);  // zero mean vector
562                vm.iter().enumerate().for_each(|(i,diag)| 
563                    // its products up to and including the diagonal (itself)
564                    // the number of elements to take is always the subscript + 1!
565                    vm.iter().take(i+1).for_each(|vmi| { 
566                        cov[covsub] += diag*vmi;
567                        covsub += 1;
568                        })); 
569                cov 
570                }
571            )
572            .reduce(
573                || vec![0_f64; (d+1)*d/2],
574                | mut covout: Vec<f64>, covin: Vec<f64> | {
575                covout.mutvadd(&covin);
576                covout
577                }
578            ); 
579        // now compute the means and return
580        let lf = self.len() as f64;
581        covsum.iter_mut().for_each(|c| *c /= lf); 
582        Ok(TriangMat{ kind:2,data:covsum }) // symmetric, non transposed
583    }
584
585    /// Symmetric covariance matrix. Becomes comediance when supplied argument `mid`  
586    /// is the geometric median instead of the centroid.
587    /// Indexing is always in this order: (row,column) (left to right, top to bottom).
588    fn serial_covar(self, mid:&[f64]) -> Result<TriangMat,RE> {
589        let d = self[0].len(); // dimension of the vector(s)
590        if d != mid.len() { 
591            return data_error("serial_covar self and mid dimensions mismatch")?; }; 
592		let mut covsums = vec![0_f64; (d+1)*d/2];
593 		for p in self { 
594            let mut covsub = 0_usize; // subscript into the flattened array cov
595            let zp = p.vsub(mid);     // zero mean/median vector
596            zp.iter().enumerate().for_each(|(i,thisc)| 
597                  // its products up to and including the diagonal 
598                  zp.iter().take(i+1).for_each(|otherc| { 
599                      covsums[covsub] += thisc*otherc;
600                      covsub += 1;
601                  }) )
602        };
603        // now compute the means and return
604        let lf = self.len() as f64;
605        for c in covsums.iter_mut() { *c /= lf }; 
606        Ok(TriangMat{ kind:2,data:covsums }) // kind 2 = symmetric, non transposed
607    }
608
609    /// Projects self onto a given basis, e.g. dimensional reduction  
610    /// The returned vectors will have lengths equal to the number of supplied basis vectors.
611    fn projection(self, basis: &[Vec<f64>]) -> Result<Vec<Vec<f64>>, RE>
612    {
613        if self.is_empty() {
614            return nodata_error("projection: empty data");
615        };
616        let olddims = self[0].len();
617        if basis.len() > olddims { 
618            return data_error("projection: given too many basis vectors");
619        };
620        if olddims != basis[0].len() {
621            return data_error("projection: lengths of data vectors and basis vectors differ!");
622        };
623        let mut res = Vec::with_capacity(self.len()); 
624        for dvec in self {
625            res.push(
626                basis
627                    .iter()
628                    .map(|ev| dvec.dotp(ev))
629                    .collect::<Vec<f64>>(),
630            )
631        };
632        Ok(res)
633    }
634}
rstats/vecvec.rs

rstats/
vecvec.rs