1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
use crate::{error::RError,RE,Stats,Vecg,MutVecg,VecVecg,VecVec};
use indxvec::{F64,Vecops,Indices};
use medians::{Median};

impl<T,U> VecVecg<T,U> for &[Vec<T>] 
    where T: Copy+PartialOrd+std::fmt::Display,f64:From<T>, 
    U: Copy+PartialOrd+std::fmt::Display,f64:From<U> {

    /// Leftmultiply (column) vector v by matrix self
    fn leftmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
        if self[0].len() != v.len() { return Err(RError::DataError("leftmultv dimensions mismatch")); };
        Ok(self.iter().map(|s| s.dotp(v)).collect())
    }

    /// Rightmultiply (row) vector v by matrix self
    fn rightmultv(self,v: &[U]) -> Result<Vec<f64>,RE> {
        if v.len() != self.len() { return Err(RError::DataError("rightmultv dimensions mismatch")); };
        Ok(self.transpose().iter().map(|s| s.dotp(v)).collect())
    }

    /// Rectangular Matrices multiplication: self * m.
    /// Returns DataError if lengths of rows of self: `self[0].len()` 
    /// and columns of m: `m.len()` do not match.
    /// Result dimensions are self.len() x m[0].len() 
    fn matmult(self,m: &[Vec<U>]) -> Result<Vec<Vec<f64>>,RE> {
        if self[0].len() != m.len() { return Err(RError::DataError("matmult dimensions mismatch")); };
        let mut res:Vec<Vec<f64>> = vec![Vec::new();self.len()];
        let mtr  = m.transpose();
        for (i,srow) in self.iter().enumerate() { 
            mtr.iter().for_each(|mcolumn| { res[i].push(srow.dotp(mcolumn));}) };
        Ok(res)
    }

    /// Weighted sum of nd points (or vectors). 
    /// Weights are associated with points, not coordinates
    fn wsumv(self,ws: &[U]) -> Vec<f64> {
        let mut resvec = vec![0_f64;self[0].len()]; 
        for (v,&w) in self.iter().zip(ws) { 
            let weight = f64::from(w);
            for (res,component) in resvec.iter_mut().zip(v) {
                *res += weight*f64::from(*component) }
        };
        resvec
    }

    /// Weighted Centre
    fn wacentroid(self,ws: &[U]) -> Vec<f64> { 
        let mut wsum = 0_f64;
        let mut result = vec![0_f64;self[0].len()]; 
        for (v,&w) in self.iter().zip(ws) { 
            let weight = f64::from(w); // saves converting twice
            wsum += weight;
            result.mutvadd(&v.smult(weight)); };
        result.mutsmult::<f64>(1.0/wsum); // divide by weighted sum to get the mean
        result
    }

    /// Trend computes the vector connecting the geometric medians of two sets of multidimensional points.
    /// This is a robust relationship between two unordered multidimensional sets.
    /// The two sets have to be in the same space but can have different numbers of points.
    fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Vec<f64> {
        let mut m1 = self.gmedian(eps);       
        m1.mutvsub::<f64>(&v.gmedian(eps));
        m1
    }

    /// Translates the whole set by subtracting vector m. Returns Vec of Vecs.
    /// When m is set to the geometric median, this produces the zero median form.
    /// The geometric median is invariant with respect to rotation,
    /// unlike the often used mean (`acentroid` here), or the quasi median,
    /// both of which depend on the choice of axis.
     fn translate(self, m: &[U]) -> Vec<Vec<f64>> {  
        self.iter().map(|s| s.vsub(m)).collect()   
    }

    /// Transforms nd data to zeromedian form
    /// essentially the same as translate but specialised to f64 gms
    fn zerogm(self, gm: &[f64]) -> Vec<Vec<f64>> {  
        self.iter().map(|s| s.vsub::<f64>(gm)).collect()
    }

    /// Dependencies of m on each vector in self
    /// m is typically a vector of outcomes.
    /// Factors out the entropy of m to save repetition of work
    fn dependencies(self, m: &[U]) -> Vec<f64> {  
        let entropym = m.entropy();
        self.iter().map(|s| (entropym + s.entropy())/s.jointentropy(m)-1.0).collect::<Vec<f64>>()
    }

    /// (Median) correlations of m with each vector in self
    /// Factors out the unit vector of m to save repetition of work
    fn correlations(self, m: &[U]) -> Vec<f64> {
        let mm = m.median(); // ignore quartile fields 
        let unitzerom =  m.sadd(-mm).vunit();
        self.iter().map(|s|{ 
            let ms = s.as_slice().median();   
            s.sadd(-ms).vunit().dotp(&unitzerom)
        }).collect::<Vec<f64>>()
    }

    /// Individual distances from any point v, typically not a member, to all the members of self.    
    fn dists(self, v: &[U]) -> Vec<f64> {
        self.iter().map(|p| p.vdist(v)).collect()
    }

    /// Sum of distances from any single point v, typically not a member, 
    /// to all members of self.    
    /// Geometric Median (gm) is defined as the point which minimises this function.
    /// This is relatively expensive measure to compute.
    /// The radius (distance) from gm is far more efficient, once gm has been found.
    fn distsum(self, v: &[U]) -> f64 {
        self.iter().map(|p| p.vdist(v)).sum::<f64>()
    }

    /// Sorted eccentricities magnitudes (radii), w.r.t. weighted geometric median.
    /// associated cummulative probability density function in [0,1] of the weights.
    fn wsortedeccs(self, ws: &[U], gm: &[f64]) -> ( Vec<f64>,Vec<f64> ) where F64:From<T> { 
        let mut eccs = Vec::with_capacity(self.len()); 
        // collect true eccentricities magnitudes
        for v in self { eccs.push(v.vdist::<f64>(gm)) }
        // create sort index of the eccs
        let index = eccs.hashsort_indexed();
        // pick the associated points weights in the order of the sorted eccs
        let weights = index.unindex(ws,true);
        let sumw = weights.iter().map(|&w|f64::from(w)).sum::<f64>();
        let mut accum = 0_f64;
        // accummulate the probabilities in [0,1]
        let probs = weights.iter().map(|&w| {
            accum += f64::from(w);
            accum / sumw }).collect::<Vec<f64>>();     
        ( index.unindex(&eccs, true), probs )
    }

    /// Like wgmparts, except only does one iteration from any non-member point g
    fn wnxnonmember(self, ws:&[U], g:&[f64]) -> (Vec<f64>,Vec<f64>,f64) {
        // vsum is the sum vector of unit vectors towards the points
        let mut vsum = vec![0_f64; self[0].len()];
        let mut recip = 0_f64;
        for (x,&w) in self.iter().zip(ws) { 
            // |x-p| done in-place for speed. Could have simply called x.vdist(p)
            let mag:f64 = x.iter().zip(g).map(|(&xi,&gi)|(f64::from(xi)-gi).powi(2)).sum::<f64>(); 
            if mag.is_normal() { // ignore this point should distance be zero
                let rec = f64::from(w)/(mag.sqrt()); // reciprocal of distance (scalar)
                // vsum increments by components
                vsum.iter_mut().zip(x).for_each(|(vi,xi)| *vi += f64::from(*xi)*rec); 
                 recip += rec // add separately the reciprocals for final scaling   
            }
        }
        ( vsum.iter().map(|vi| vi / recip).collect::<Vec<f64>>(),        
            vsum,
            recip )
    }    

    /// Weighted Geometric Median (gm) is the point that minimises the sum of distances to a given set of points.
    fn wgmedian(self, ws:&[U], eps: f64) -> Vec<f64> { 
        let mut g = self.acentroid(); // start iterating from the Centre 
        let mut recsum = 0f64;
        loop { // vector iteration till accuracy eps is exceeded  
            let mut nextg = vec![0_f64; self[0].len()];   
            let mut nextrecsum = 0_f64;
            for (x,&w) in self.iter().zip(ws) {   
                // |x-g| done in-place for speed. Could have simply called x.vdist(g)
                //let mag:f64 = g.vdist::<f64>(&x); 
                let mag = g.iter().zip(x).map(|(&gi,&xi)|(f64::from(xi)-gi).powi(2)).sum::<f64>(); 
                if mag.is_normal() { 
                    let rec = f64::from(w)/(mag.sqrt()); // reciprocal of distance (scalar)
                    // vsum increments by components
                    nextg.iter_mut().zip(x).for_each(|(vi,&xi)| *vi += f64::from(xi)*rec); 
                    nextrecsum += rec // add separately the reciprocals for final scaling   
                } // else simply ignore this point should its distance from g be zero
            }
            nextg.iter_mut().for_each(|gi| *gi /= nextrecsum);       
            // eprintln!("recsum {}, nextrecsum {} diff {}",recsum,nextrecsum,nextrecsum-recsum);
            if nextrecsum-recsum < eps { return nextg };  // termination test
            g = nextg;
            recsum = nextrecsum;            
        }
    }
    
    /// Like `gmedian` but returns also the sum of unit vecs and the sum of reciprocals. 
    fn wgmparts(self, ws:&[U], eps: f64) -> (Vec<f64>,Vec<f64>,f64) { 
        let mut g = self.acentroid(); // start iterating from the Centre
        let mut recsum = 0f64; 
        loop { // vector iteration till accuracy eps is exceeded  
            let mut nextg = vec![0_f64; self[0].len()];   
            let mut nextrecsum = 0f64;
            for (x,&w) in self.iter().zip(ws) { // for all points
                // |x-g| done in-place for speed. Could have simply called x.vdist(g)
                //let mag:f64 = g.vdist::<f64>(&x); 
                let mag = g.iter().zip(x).map(|(&gi,&xi)|(f64::from(xi)-gi).powi(2)).sum::<f64>(); 
                if mag.is_normal() { 
                    let rec = f64::from(w)/(mag.sqrt()); // reciprocal of distance (scalar)
                    // vsum increments by components
                    nextg.iter_mut().zip(x).for_each(|(vi,&xi)| *vi += f64::from(xi)*rec); 
                    nextrecsum += rec // add separately the reciprocals for final scaling   
                } // else simply ignore this point should its distance from g be zero
            }
            if nextrecsum-recsum < eps { 
                return (
                    nextg.iter().map(|&gi| gi/nextrecsum).collect::<Vec<f64>>(),
                    nextg,
                    nextrecsum
                ); }; // termination        
            nextg.iter_mut().for_each(|gi| *gi /= nextrecsum);
            g = nextg;
            recsum = nextrecsum;            
        }
    }

    /// wmadgm median of weighted absolute deviations from weighted gm: stable nd data spread estimator
    fn wmadgm(self, ws: &[U], wgm: &[f64]) -> f64 {     
        let devs:Vec<f64> = self.iter().map(|v| v.wvdist(ws,wgm)).collect();
        devs.as_slice().median()
    }

    /// Covariance matrix for f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    fn covar(self, m:&[U]) -> Vec<f64> {
        let d = self[0].len(); // dimension of the vector(s)
        let mut cov:Vec<f64> = vec![0_f64; (d+1)*d/2]; // flat lower triangular results array  
        for thisp in self { // adding up covars for all the points
            let mut covsub = 0_usize; // subscript into the flattened array cov
            let vm = thisp.vsub(m);  // zero mean vector
            vm.iter().enumerate().for_each(|(i,thisc)| 
                // its products up to and including the diagonal (itself)
                vm.iter().take(i+1).for_each(|vmi| { 
                    cov[covsub] += thisc*vmi;
                    covsub += 1;
                }));
            } 
        // now compute the means and return
        let lf = self.len() as f64;
        cov.iter_mut().for_each(|c| *c /= lf); 
        cov
    }
 
    /// Weighted covariance matrix for f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    fn wcovar(self, ws:&[U], m:&[f64]) -> Vec<f64> {
        let n = self[0].len(); // dimension of the vector(s)
        // let mut covs:Vec<Vec<f64>> = Vec::new();
        let mut cov:Vec<f64> = vec![0_f64; (n+1)*n/2]; // flat lower triangular results array
        let mut wsum = 0_f64;
        self.iter().zip(ws).for_each(|(selfh,&wsh)| { // adding up covars for all the points
            let w = f64::from(wsh); wsum += w;
            let mut covsub = 0_usize; // subscript into the flattened array cov 
            let vm = selfh.vsub(m);   // subtract zero mean/median vector  
            vm.iter().enumerate().for_each(|(i,&thisc)|            
                vm.iter().take(i+1).for_each(|&vmi| {  // its weighted products up to and including the diagonal 
                    cov[covsub] += w*thisc*vmi;
                    covsub += 1;
                }));
        });
        // now compute the means and return
        cov.mutsmult::<f64>(1_f64/wsum); 
        cov
    }

    /// Covariance matrix for f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    /// Similar to `covar` above but instead of averaging the covariances over n points, 
    /// their medians are returned.
    fn comed(self, m:&[U]) -> Vec<f64> { // m should be the median here 
        let d = self[0].len(); // dimension of the vector(s)
        let mut com:Vec<f64> = Vec::with_capacity((d+1)*d/2); // result vec flat lower triangular array 
        let zs:Vec<Vec<f64>> = self.iter().map(|s| s.vsub(m)).collect(); // zero median vectors
        for i in 0..d { // cross multiplaying the components
            for j in 0..i+1 { // in this order so as to save memory
                let thisproduct:Vec<f64> = zs.iter().map(|v| v[i]*v[j]).collect(); 
                com.push(thisproduct.as_slice().median());
            }
        }
        com
    }

    /// Covariance matrix for weighted f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    /// Similar to `wcovar` above but instead of averaging the covariances over n points, 
    /// their median is returned.
    fn wcomed(self, ws:&[U], m:&[f64]) -> Vec<f64> { // m should be the median here 
        let d = self[0].len(); // dimension of the vector(s)
        let zs:Vec<Vec<f64>> = self.iter().map(|s| s.vsub(m)).collect(); // zero median vectors
        let mut com:Vec<f64> = Vec::with_capacity((d+1)*d/2); // result vec flat lower triangular array 
        let wmean = ws.iter().map(|&w| f64::from(w)).sum::<f64>()/(self.len() as f64); 
        for i in 0..d { // cross multiplaying the components
            for j in 0..i+1 { // in this order so as to save memory
                let thisproduct:Vec<f64> = zs.iter().zip(ws).map(|(v,&w)| f64::from(w)*v[i]*v[j]).collect();  
                com.push(thisproduct.as_slice().median()/wmean);
            }
        };
        com
    }
}