1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
use crate::{Stats,Vecg,MutVecg,VecVecg,VecVec};
use indxvec::{Vecops,Indices};
use medians::{Median};
impl<T,U> VecVecg<T,U> for &[Vec<T>]
where T: Copy+PartialOrd+std::fmt::Display,f64:From<T>,
U: Copy+PartialOrd+std::fmt::Display,f64:From<U> {
/// Weighted Centre
fn wacentroid(self,ws: &[U]) -> Vec<f64> where {
let mut centre = vec![0_f64; self[0].len()];
let mut wsum = 0_f64;
self.iter().zip(ws).for_each(|(s,&w)|
{
wsum += f64::from(w);
centre.mutvadd::<f64>(&s.smult(w))
});
centre.mutsmult::<f64>(1.0 / wsum);
centre
}
/// Trend computes the vector connecting the geometric medians of two sets of multidimensional points.
/// This is a robust relationship between two unordered multidimensional sets.
/// The two sets have to be in the same space but can have different numbers of points.
fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Vec<f64> {
let mut m1 = self.gmedian(eps);
m1.mutvsub::<f64>(&v.gmedian(eps));
m1
}
/// Translates the whole set by subtracting vector m. Returns Vec of Vecs.
/// When m is set to the geometric median, this produces the zero median form.
/// The geometric median is invariant with respect to rotation,
/// unlike the often used mean (`acentroid` here), or the quasi median,
/// both of which depend on the choice of axis.
fn translate(self, m: &[U]) -> Vec<Vec<f64>> {
self.iter().map(|s| s.vsub(m)).collect()
}
/// Transforms nd data to zeromedian form
/// essentially the same as translate but specialised to f64 gms
fn zerogm(self, gm: &[f64]) -> Vec<Vec<f64>> {
self.iter().map(|s| s.vsub::<f64>(gm)).collect()
}
/// Dependencies of m on each vector in self
/// m is typically a vector of outcomes.
/// Factors out the entropy of m to save repetition of work
fn dependencies(self, m: &[U]) -> Vec<f64> {
let entropym = m.entropy();
self.iter().map(|s| (entropym + s.entropy())/s.jointentropy(m)-1.0).collect::<Vec<f64>>()
}
/// (Median) correlations of m with each vector in self
/// Factors out the unit vector of m to save repetition of work
fn correlations(self, m: &[U]) -> Vec<f64> {
let mm = m.median(); // ignore quartile fields
let unitzerom = m.sadd(-mm).vunit();
self.iter().map(|s|{
let ms = s.as_slice().median();
s.sadd(-ms).vunit().dotp(&unitzerom)
}).collect::<Vec<f64>>()
}
/// Individual distances from any point v, typically not a member, to all the members of self.
fn dists(self, v: &[U]) -> Vec<f64> {
self.iter().map(|p| p.vdist(v)).collect()
}
/// Sum of distances from any single point v, typically not a member,
/// to all members of self.
/// Geometric Median (gm) is defined as the point which minimises this function.
/// This is relatively expensive measure to compute.
/// The radius (distance) from gm is far more efficient, once gm has been found.
fn distsum(self, v: &[U]) -> f64 {
self.iter().map(|p| p.vdist(v)).sum::<f64>()
}
/// Sorted eccentricities magnitudes (radii), w.r.t. weighted geometric median.
/// associated cummulative probability density function in [0,1] of the weights.
fn wsortedeccs(self, ws: &[U], gm: &[f64]) -> ( Vec<f64>,Vec<f64> ) {
let mut eccs = Vec::with_capacity(self.len());
// collect true eccentricities magnitudes
for v in self { eccs.push(v.vdist::<f64>(gm)) }
// create sort index of the eccs
let index = eccs.sortidx();
// pick the associated points weights in the order of the sorted eccs
let mut weights = index.unindexf64(ws,true);
let mut sumw = 0_f64;
// accummulate the weights
weights.iter_mut().for_each(|w|{ sumw += *w; *w = sumw });
// divide by the final sum to get cummulative probabilities in [0,1]
weights.iter_mut().for_each(|w| *w /= sumw );
( index.unindex(&eccs, true), weights )
}
/// Sorted cosines magnitudes,
/// associated cummulative probability density function in [0,1] of the weights.
/// Needs central median
fn wsortedcos(self, medmed: &[U], unitzmed: &[U], ws: &[U]) -> ( Vec<f64>,Vec<f64> ) {
let mut coses = Vec::with_capacity(self.len());
for p in self { // collect coses
coses.push(p.vsubunit(medmed).dotp(unitzmed));
}
// create sort index of the coses
let index = coses.sortidx();
// pick the associated points weights in the same order as the sorted coses
let mut weights = index.unindexf64(ws,true);
let mut sumw = 0_f64;
// accummulate the weights to form cpdf
weights.iter_mut().for_each(|w|{ sumw += *w; *w = sumw });
// divide by the sum to get cum. probabilities in [0,1]
weights.iter_mut().for_each(|w| *w /= sumw );
( index.unindex(&coses,true), weights )
}
/// Next approximate weighted median, from a non member point.
fn wnxnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64> {
let mut vsum = vec![0_f64; self[0].len()];
let mut recip = 0_f64;
for (x,&w) in self.iter().zip(ws) {
let mag:f64 = x.iter().zip(p).map(|(&xi,&pi)|(f64::from(xi)-pi).powi(2)).sum::<f64>().sqrt();
if mag.is_normal() { // skip point x when its distance is zero
let rec = f64::from(w)/mag;
vsum.iter_mut().zip(x).for_each(|(vi,xi)| *vi += rec*f64::from(*xi));
recip += rec // add separately the reciprocals
}
}
vsum.iter_mut().for_each(|vi| *vi /= recip);
vsum
}
/// Estimated (computed) eccentricity vector for a non member point
/// The true geometric median is as yet unknown.
/// Returns the weighted eccentricity vector.
/// The true geometric median would return zero vector.
/// This function is suitable for a single non-member point.
fn weccnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64> {
self.wnxnonmember(ws,p).vsub::<f64>(p)
}
/// Secant method with recovery from divergence
/// for finding the weighted geometric median
fn wgmedian(self, ws: &[U], eps: f64) -> Vec<f64> {
let eps2 = eps.powi(2);
let mut p = self.wacentroid(ws); // start iterating from the Centre
loop { // vector iteration till accuracy eps is reached
let nextp = self.wnxnonmember(ws,&p);
if nextp.iter().zip(p).map(|(&xi,pi)|(xi-pi).powi(2)).sum::<f64>() < eps2 { return nextp }; // termination
p = nextp
}
}
/// wmadgm median of weighted absolute deviations from weighted gm: stable nd data spread estimator
fn wmadgm(self, ws: &[U], wgm: &[f64]) -> f64 {
let devs:Vec<f64> = self.iter().map(|v| v.wvdistf64(ws,wgm)).collect();
devs.as_slice().median()
}
/// Covariance matrix for f64 vectors in self. Becomes comediance when
/// argument m is the geometric median instead of the centroid.
/// Since the matrix is symmetric, the missing upper triangular part can be trivially
/// regenerated for all j>i by: c(j,i) = c(i,j).
/// The indexing is always in this order: (row,column) (left to right, top to bottom).
/// The items are flattened into a single vector in this order.
/// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
fn covar(self, m:&[U]) -> Vec<f64> {
let d = self[0].len(); // dimension of the vector(s)
let mut cov:Vec<f64> = vec![0_f64; (d+1)*d/2]; // flat lower triangular results array
for thisp in self { // adding up covars for all the points
let mut covsub = 0_usize; // subscript into the flattened array cov
let vm = thisp.vsub(m); // zero mean vector
vm.iter().enumerate().for_each(|(i,thisc)|
// its products up to and including the diagonal (itself)
vm.iter().take(i+1).for_each(|vmj| {
cov[covsub] += thisc*vmj;
covsub += 1;
}));
}
// now compute the means and return
let lf = self.len() as f64;
cov.iter_mut().for_each(|c| *c /= lf);
cov
}
/// Weighted covariance matrix for f64 vectors in self. Becomes comediance when
/// argument m is the geometric median instead of the centroid.
/// Since the matrix is symmetric, the missing upper triangular part can be trivially
/// regenerated for all j>i by: c(j,i) = c(i,j).
/// The indexing is always in this order: (row,column) (left to right, top to bottom).
/// The items are flattened into a single vector in this order.
/// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
fn wcovar(self, ws:&[U], m:&[f64]) -> Vec<f64> {
let n = self[0].len(); // dimension of the vector(s)
// let mut covs:Vec<Vec<f64>> = Vec::new();
let mut cov:Vec<f64> = vec![0_f64; (n+1)*n/2]; // flat lower triangular results array
let mut wsum = 0_f64;
self.iter().zip(ws).for_each(|(selfh,&wsh)| { // adding up covars for all the points
let w = f64::from(wsh); wsum += w;
let mut covsub = 0_usize; // subscript into the flattened array cov
let vm = selfh.vsub(m); // subtract zero mean/median vector
vm.iter().enumerate().for_each(|(i,&thisc)|
vm.iter().take(i+1).for_each(|&vmj| { // its weighted products up to and including the diagonal
cov[covsub] += w*thisc*vmj;
covsub += 1;
}));
});
// now compute the means and return
cov.mutsmult::<f64>(1_f64/wsum);
cov
}
/// Covariance matrix for f64 vectors in self. Becomes comediance when
/// argument m is the geometric median instead of the centroid.
/// Since the matrix is symmetric, the missing upper triangular part can be trivially
/// regenerated for all j>i by: c(j,i) = c(i,j).
/// The indexing is always in this order: (row,column) (left to right, top to bottom).
/// The items are flattened into a single vector in this order.
/// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
/// Similar to `covar` above but instead of averaging the covariances over n points,
/// their medians are returned.
fn comed(self, m:&[U]) -> Vec<f64> { // m should be the median here
let d = self[0].len(); // dimension of the vector(s)
let mut com:Vec<f64> = Vec::with_capacity((d+1)*d/2); // result vec flat lower triangular array
let zs:Vec<Vec<f64>> = self.iter().map(|s| s.vsub(m)).collect(); // zero median vectors
for i in 0..d { // cross multiplaying the components
for j in 0..i+1 { // in this order so as to save memory
let thisproduct:Vec<f64> = zs.iter().map(|v| v[i]*v[j]).collect();
com.push(thisproduct.as_slice().median());
}
}
com
}
/// Covariance matrix for weighted f64 vectors in self. Becomes comediance when
/// argument m is the geometric median instead of the centroid.
/// Since the matrix is symmetric, the missing upper triangular part can be trivially
/// regenerated for all j>i by: c(j,i) = c(i,j).
/// The indexing is always in this order: (row,column) (left to right, top to bottom).
/// The items are flattened into a single vector in this order.
/// The full 2D matrix can be reconstructed by `symmatrix` in the trait `Stats`.
/// Similar to `wcovar` above but instead of averaging the covariances over n points,
/// their median is returned.
fn wcomed(self, ws:&[U], m:&[f64]) -> Vec<f64> { // m should be the median here
let d = self[0].len(); // dimension of the vector(s)
let zs:Vec<Vec<f64>> = self.iter().map(|s| s.vsub(m)).collect(); // zero median vectors
let mut com:Vec<f64> = Vec::with_capacity((d+1)*d/2); // result vec flat lower triangular array
let wmean = ws.iter().map(|&w| f64::from(w)).sum::<f64>()/(self.len() as f64);
for i in 0..d { // cross multiplaying the components
for j in 0..i+1 { // in this order so as to save memory
let thisproduct:Vec<f64> = zs.iter().zip(ws).map(|(v,&w)| f64::from(w)*v[i]*v[j]).collect();
com.push(thisproduct.as_slice().median()/wmean);
}
};
com
}
}