1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
use crate::{Stats,Vecg,MutVecg,VecVecg,VecVec};
use indxvec::{Vecops,Indices};
use medians::{Median};

impl<T,U> VecVecg<T,U> for &[Vec<T>] 
    where T: Copy+PartialOrd+std::fmt::Display,f64:From<T>, 
    U: Copy+PartialOrd+std::fmt::Display,f64:From<U> {

    /// Weighted Centre
    fn wacentroid(self,ws: &[U]) -> Vec<f64> where {
        let mut centre = vec![0_f64; self[0].len()];
        let mut wsum = 0_f64;
        self.iter().zip(ws).for_each(|(s,&w)|
        { 
            wsum += f64::from(w);
            centre.mutvadd::<f64>(&s.smult(w))
        });
        centre.mutsmult::<f64>(1.0 / wsum);
        centre
    }

    /// Trend computes the vector connecting the geometric medians of two sets of multidimensional points.
    /// This is a robust relationship between two unordered multidimensional sets.
    /// The two sets have to be in the same space but can have different numbers of points.
    fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Vec<f64> {
        let mut m1 = self.gmedian(eps);       
        m1.mutvsub::<f64>(&v.gmedian(eps));
        m1
    }

    /// Translates the whole set by subtracting vector m. Returns Vec of Vecs.
    /// When m is set to the geometric median, this produces the zero median form.
    /// The geometric median is invariant with respect to rotation,
    /// unlike the often used mean (`acentroid` here), or the quasi median,
    /// both of which depend on the choice of axis.
     fn translate(self, m: &[U]) -> Vec<Vec<f64>> {  
        self.iter().map(|s| s.vsub(m)).collect()   
    }

    /// Transforms nd data to zeromedian form
    /// essentially the same as translate but specialised to f64 gms
    fn zerogm(self, gm: &[f64]) -> Vec<Vec<f64>> {  
        self.iter().map(|s| s.vsub::<f64>(gm)).collect()
    }

    /// Dependencies of m on each vector in self
    /// m is typically a vector of outcomes.
    /// Factors out the entropy of m to save repetition of work
    fn dependencies(self, m: &[U]) -> Vec<f64> {  
        let entropym = m.entropy();
        self.iter().map(|s| (entropym + s.entropy())/s.jointentropy(m)-1.0).collect::<Vec<f64>>()
    }

    /// (Median) correlations of m with each vector in self
    /// Factors out the unit vector of m to save repetition of work
    fn correlations(self, m: &[U]) -> Vec<f64> {
        let mm = m.median(); // ignore quartile fields 
        let unitzerom =  m.sadd(-mm).vunit();
        self.iter().map(|s|{ 
            let ms = s.as_slice().median();   
            s.sadd(-ms).vunit().dotp(&unitzerom)
        }).collect::<Vec<f64>>()
    }

    /// Individual distances from any point v, typically not a member, to all the members of self.    
    fn dists(self, v: &[U]) -> Vec<f64> {
        self.iter().map(|p| p.vdist(v)).collect()
    }

    /// Sum of distances from any single point v, typically not a member, 
    /// to all members of self.    
    /// Geometric Median (gm) is defined as the point which minimises this function.
    /// This is relatively expensive measure to compute.
    /// The radius (distance) from gm is far more efficient, once gm has been found.
    fn distsum(self, v: &[U]) -> f64 {
        self.iter().map(|p| p.vdist(v)).sum::<f64>()
    }

    /// Sorted eccentricities magnitudes (radii), w.r.t. weighted geometric median.
    /// associated cummulative probability density function in [0,1] of the weights.
    fn wsortedeccs(self, ws: &[U], gm: &[f64]) -> ( Vec<f64>,Vec<f64> ) { 
        let mut eccs = Vec::with_capacity(self.len()); 
        // collect true eccentricities magnitudes
        for v in self { eccs.push(v.vdist::<f64>(gm)) }
        // create sort index of the eccs
        let index = eccs.sortidx();
        // pick the associated points weights in the order of the sorted eccs
        let mut weights = index.unindexf64(ws,true);
        let mut sumw = 0_f64;
        // accummulate the weights
        weights.iter_mut().for_each(|w|{ sumw += *w; *w = sumw });     
        // divide by the final sum to get cummulative probabilities in [0,1]
        weights.iter_mut().for_each(|w| *w /= sumw );     
        ( index.unindex(&eccs, true), weights )
    }

    /// Sorted cosines magnitudes,
    /// associated cummulative probability density function in [0,1] of the weights.
    /// Needs central median
    fn wsortedcos(self, medmed: &[U], unitzmed: &[U], ws: &[U]) -> ( Vec<f64>,Vec<f64> ) { 
        let mut coses = Vec::with_capacity(self.len());  
        for p in self { // collect coses      
            coses.push(p.vsubunit(medmed).dotp(unitzmed)); 
        } 
        // create sort index of the coses
        let index = coses.sortidx();
        // pick the associated points weights in the same order as the sorted coses
        let mut weights = index.unindexf64(ws,true);
        let mut sumw = 0_f64;
        // accummulate the weights to form cpdf
        weights.iter_mut().for_each(|w|{ sumw += *w; *w = sumw });
        // divide by the sum to get cum. probabilities in [0,1]
        weights.iter_mut().for_each(|w| *w /= sumw );     
        ( index.unindex(&coses,true), weights )
    }

     /// Next approximate weighted median, from a non member point. 
    fn wnxnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64> {
        let mut vsum = vec![0_f64; self[0].len()];
        let mut recip = 0_f64;
        for (x,&w) in self.iter().zip(ws) {  
            let mag:f64 = x.iter().zip(p).map(|(&xi,&pi)|(f64::from(xi)-pi).powi(2)).sum::<f64>().sqrt(); 
            if mag.is_normal() {  // skip point x when its distance is zero
                let rec = f64::from(w)/mag;
                vsum.iter_mut().zip(x).for_each(|(vi,xi)| *vi += rec*f64::from(*xi)); 
                recip += rec // add separately the reciprocals    
            }
        }
        vsum.iter_mut().for_each(|vi| *vi /= recip);
        vsum
    }
    
    /// Estimated (computed) eccentricity vector for a non member point
    /// The true geometric median is as yet unknown.
    /// Returns the weighted eccentricity vector.
    /// The true geometric median would return zero vector.
    /// This function is suitable for a single non-member point. 
    fn weccnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64> {
       self.wnxnonmember(ws,p).vsub::<f64>(p)
    }

    /// Secant method with recovery from divergence
    /// for finding the weighted geometric median
    fn wgmedian(self, ws: &[U], eps: f64) -> Vec<f64> {  
        let eps2 = eps.powi(2);
        let mut p = self.wacentroid(ws); // start iterating from the Centre
        loop { // vector iteration till accuracy eps is reached
            let nextp = self.wnxnonmember(ws,&p);          
            if nextp.iter().zip(p).map(|(&xi,pi)|(xi-pi).powi(2)).sum::<f64>() < eps2 { return nextp }; // termination 
            p = nextp
        }
    }

    /// wmadgm median of weighted absolute deviations from weighted gm: stable nd data spread estimator
    fn wmadgm(self, ws: &[U], wgm: &[f64]) -> f64 {     
        let devs:Vec<f64> = self.iter().map(|v| v.wvdistf64(ws,wgm)).collect();
        devs.as_slice().median()
    }

    /// Covariance matrix for f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    fn covar(self, m:&[U]) -> Vec<f64> {
        let d = self[0].len(); // dimension of the vector(s)
        let mut cov:Vec<f64> = vec![0_f64; (d+1)*d/2]; // flat lower triangular results array  
        for thisp in self { // adding up covars for all the points
            let mut covsub = 0_usize; // subscript into the flattened array cov
            let vm = thisp.vsub(m);  // zero mean vector
            vm.iter().enumerate().for_each(|(i,thisc)| 
                // its products up to and including the diagonal (itself)
                vm.iter().take(i+1).for_each(|vmj| { 
                    cov[covsub] += thisc*vmj;
                    covsub += 1;
                }));
            } 
        // now compute the means and return
        let lf = self.len() as f64;
        cov.iter_mut().for_each(|c| *c /= lf); 
        cov
    }
 
    /// Weighted covariance matrix for f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    fn wcovar(self, ws:&[U], m:&[f64]) -> Vec<f64> {
        let n = self[0].len(); // dimension of the vector(s)
        // let mut covs:Vec<Vec<f64>> = Vec::new();
        let mut cov:Vec<f64> = vec![0_f64; (n+1)*n/2]; // flat lower triangular results array
        let mut wsum = 0_f64;
        self.iter().zip(ws).for_each(|(selfh,&wsh)| { // adding up covars for all the points
            let w = f64::from(wsh); wsum += w;
            let mut covsub = 0_usize; // subscript into the flattened array cov 
            let vm = selfh.vsub(m);  // subtract zero mean/median vector  
            vm.iter().enumerate().for_each(|(i,&thisc)|            
                vm.iter().take(i+1).for_each(|&vmj| {  // its weighted products up to and including the diagonal 
                    cov[covsub] += w*thisc*vmj;
                    covsub += 1;
                }));
        });
        // now compute the means and return
        cov.mutsmult::<f64>(1_f64/wsum); 
        cov
    }

    /// Covariance matrix for f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    /// Similar to `covar` above but instead of averaging the covariances over n points, 
    /// their medians are returned.
    fn comed(self, m:&[U]) -> Vec<f64> { // m should be the median here 
        let d = self[0].len(); // dimension of the vector(s)
        let mut com:Vec<f64> = Vec::with_capacity((d+1)*d/2); // result vec flat lower triangular array 
        let zs:Vec<Vec<f64>> = self.iter().map(|s| s.vsub(m)).collect(); // zero median vectors
        for i in 0..d { // cross multiplaying the components
            for j in 0..i+1 { // in this order so as to save memory
                let thisproduct:Vec<f64> = zs.iter().map(|v| v[i]*v[j]).collect(); 
                com.push(thisproduct.as_slice().median());
            }
        }
        com
    }

    /// Covariance matrix for weighted f64 vectors in self. Becomes comediance when 
    /// argument m is the geometric median instead of the centroid.
    /// Since the matrix is symmetric, the missing upper triangular part can be trivially
    /// regenerated for all j>i by: c(j,i) = c(i,j).
    /// The indexing is always in this order: (row,column) (left to right, top to bottom).
    /// The items are flattened into a single vector in this order.
    /// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
    /// Similar to `wcovar` above but instead of averaging the covariances over n points, 
    /// their median is returned.
    fn wcomed(self, ws:&[U], m:&[f64]) -> Vec<f64> { // m should be the median here 
        let d = self[0].len(); // dimension of the vector(s)
        let zs:Vec<Vec<f64>> = self.iter().map(|s| s.vsub(m)).collect(); // zero median vectors
        let mut com:Vec<f64> = Vec::with_capacity((d+1)*d/2); // result vec flat lower triangular array 
        let wmean = ws.iter().map(|&w| f64::from(w)).sum::<f64>()/(self.len() as f64); 
        for i in 0..d { // cross multiplaying the components
            for j in 0..i+1 { // in this order so as to save memory
                let thisproduct:Vec<f64> = zs.iter().zip(ws).map(|(v,&w)| f64::from(w)*v[i]*v[j]).collect();  
                com.push(thisproduct.as_slice().median()/wmean);
            }
        };
        com
    }
}