1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445
use crate::{Med, MStats, MutVectors, Stats, VecVecf64, Vecf64, Indices}; impl VecVecf64 for &[Vec<f64>] { /// acentroid = simple multidimensional arithmetic mean /// # Example /// ``` /// use rstats::{Vecf64,VecVecf64,functions::genvec}; /// let pts = genvec(15,15,255,30); /// let centre = pts.acentroid(); /// let dist = pts.distsum(¢re); /// assert_eq!(dist, 4.14556218326653_f64); /// ``` fn acentroid(self) -> Vec<f64> { let mut centre = vec![0_f64; self[0].len()]; for v in self { centre.mutvadd(&v) } centre.mutsmult(1.0 / self.len() as f64); centre } /// hcentroid = multidimensional harmonic mean /// # Example /// ``` /// use rstats::{Vecf64,VecVecf64,functions::genvec}; /// let pts = genvec(15,15,255,30); /// let centre = pts.hcentroid(); /// let dist = pts.distsum(¢re); /// assert_eq!(dist, 5.623778191797538_f64); /// ``` fn hcentroid(self) -> Vec<f64> { let mut centre = vec![0_f64; self[0].len()]; // let t = self.translate(&self.acentroid()); for v in self { centre.mutvadd(&v.vinverse()) } centre.vinverse() } /// Trend computes the vector connecting the geometric medians of two sets of multidimensional points. /// This is a robust relationship between two unordered multidimensional sets. /// The two sets have to be in the same space but can have different numbers of points. fn trend(self, eps: f64, v: Vec<Vec<f64>>) -> Vec<f64> { let m1 = self.gmedian(eps); let m2 = v.gmedian(eps); m2.vsub(&m1) } /// Translates the whole set by vector -m. Returns Vec of Vecs. /// When m is set to the geometric median, this produces the zero median form. /// The geometric median is invariant with respect to rotation, /// unlike the often misguidedly used mean (`acentroid` here), or the quasi median, /// both of which depend on the choice of axis. fn translate(self, m: &[f64]) -> Vec<Vec<f64>> { let mut result = Vec::new(); for point in self { result.push(point.vsub(m)) } result } /// For each member point, gives its sum of distances to all other points. fn distsums(self) -> Vec<f64> { let n = self.len(); let mut dists = vec![0_f64; n]; // distances accumulator for all points // examine all unique pairings (lower triangular part of symmetric flat matrix) for i in 1..n { let thisp = &self[i]; for j in 0..i { let thatp = &self[j]; let d = thisp.vdist(&thatp); // calculate each distance relation just once dists[i] += d; dists[j] += d; // but add it to both points } } dists } /// The sum of distances from one member point, given by its `indx`, to all the other points in self. /// For all the points, use more efficient `distsums`. fn distsuminset(self, indx: usize) -> f64 { let n = self.len(); let mut sum = 0_f64; let thisp = &self[indx]; for i in 0..n { if i == indx { continue; }; sum += self[i].vdist(&thisp) } sum } /// Individual distances from any point v, typically not a member, to all the members of self. fn dists(self, v: &[f64]) -> Vec<f64> { self.iter().map(|p| p.vdist(v)).collect() } /// The sum of distances from any single point v, typically not a member, to all the members of self. /// Geometric Median is defined as the point which minimises this function. fn distsum(self, v: &[f64]) -> f64 { self.iter().map(|p| p.vdist(v)).sum::<f64>() } /// Medoid is the member point (point belonging to the set of points `self`), /// which has the least sum of distances to all other points. /// Outlier is the point with the greatest sum of distances. /// In other words, they are the members nearest and furthest from the median. /// This function returns a four-tuple: /// (medoid_distance, medoid_index, outlier_distance, outlier_index). /// # Example /// ``` /// use rstats::{Vecf64,VecVecf64,functions::genvec}; /// let pts = genvec(15,15,255,30); /// let (dm,_,_,_) = pts.medoid(); /// assert_eq!(dm,4.812334638782327_f64); /// ``` fn medoid(self) -> (f64, usize, f64, usize) { self.distsums().minmax() } /// Finds approximate vectors from each member point towards the geometric median. /// Twice as fast as doing them individually, using symmetry. fn eccentricities(self) -> Vec<Vec<f64>> { let n = self.len(); // allocate vectors for the results let mut eccs = vec![vec![0_f64; self[0].len()]; n]; let mut recips = vec![0_f64; n]; // ecentricities vectors accumulator for all points // examine all unique pairings (lower triangular part of symmetric flat matrix) for i in 1..n { let thisp = &self[i]; for j in 0..i { // calculate each unit vector between any pair of points just once let dvmag = self[j].vdist(&thisp); if !dvmag.is_normal() { continue } let rec = 1.0/dvmag; eccs[i].mutvadd(&self[j].smult(rec)); recips[i] += rec; // mind the vector's opposite orientations w.r.t. to the two points! eccs[j].mutvsub(&self[j].smult(rec)); recips[j] += rec; } } for i in 0..n { eccs[i].mutsmult(1.0/recips[i]); eccs[i].mutvsub(&self[i]) } eccs } /// Exact eccentricity vectors from all member points by first finding the Geometric Median. /// Usually faster than the approximate `eccentricities` above, especially when there are many points. fn exacteccs(self, eps:f64) -> Vec<Vec<f64>> { let mut eccs = Vec::with_capacity(self.len()); // Vectors for the results let gm = self.gmedian(eps); for v in self { eccs.push(gm.vsub(&v)) } eccs } /// GM and sorted eccentricities magnitudes. /// Describing a set of points `self` in n dimensions fn sortedeccs(self, ascending:bool, eps:f64) -> ( Vec<f64>,Vec<f64> ) { let mut eccs = Vec::with_capacity(self.len()); let gm = self.gmedian(eps); for v in self { // collect raw ecentricities magnitudes eccs.push(gm.vdist(&v)) } ( gm, eccs.sortm(ascending) ) } /// Weighted geometric median, sorted eccentricities magnitudes, /// associated cummulative probability density function in [0,1] of the weights. fn wsortedeccs(self, ws: &[f64], eps:f64) -> ( Vec<f64>,Vec<f64>,Vec<f64> ) { let mut eccs = Vec::with_capacity(self.len()); let gm = self.wgmedian(ws,eps); for v in self { // collect true ecentricities magnitudes eccs.push(gm.vdist(&v)) } // Apply linear transform // eccs = eccs.lintrans(); // create sort index of the eccs let index = eccs.mergesort(0,self.len()); // pick the associated points weights in the reverse order of the sorted eccs let mut weights = index.unindex(true,&ws); let mut sumw = 0_f64; // accummulate the weights for i in 0..weights.len() { sumw += weights[i]; weights[i] = sumw } // divide by the sum to get cum. probabilities in [0,1] for i in 0..weights.len() { weights[i] /= sumw }; ( gm, index.unindex(true,&eccs), weights ) } /// Sorted cosines magnitudes, /// associated cummulative probability density function in [0,1] of the weights. /// Needs central median fn wsortedcos(self, medmed: &[f64], zeromed: &[f64], ws: &[f64]) -> ( Vec<f64>,Vec<f64> ) { let mut coses = Vec::with_capacity(self.len()); for p in self { // collect coses coses.push(p.vsub(&medmed).vsim(&zeromed)); } // Apply linear transform // coses = coses.lintrans(); // create sort index of the coses let index = coses.mergesort(0,self.len()); // pick the associated points weights in the same order as the sorted coses let mut weights = index.unindex(true,&ws); let mut sumw = 0_f64; // accummulate the weights to from cpdf for i in 0..weights.len() { sumw += weights[i]; weights[i] = sumw } // divide by the sum to get cum. probabilities in [0,1] for i in 0..weights.len() { weights[i] /= sumw }; ( index.unindex(true,&coses), weights ) } /// Vector `eccentricity` or measure of /// `not being the geometric median`, added to a given member point. /// The true geometric median is as yet unknown. /// The member point in question is specified by its index `indx`. /// This function is suitable for a single member point. fn nxmember(self, indx: usize) -> Vec<f64> { let n = self.len(); let mut vsum = vec![0_f64; self[0].len()]; let thisp = &self[indx]; let mut recip = 0_f64; for i in 0..n { if i == indx { continue }; // exclude this point let dvmag = self[i].vdist(&thisp); if !dvmag.is_normal() { continue } // too close to this one let rec = 1.0/dvmag; vsum.mutvadd(&self[i].smult(rec)); // add vector recip += rec // add separately the reciprocals } vsum.mutsmult(1.0/recip); vsum } /// Vector `eccentricity` or measure of /// `not being the geometric median` for a member point. /// The true geometric median is as yet unknown. /// The true geometric median would return zero. /// The member point in question is specified by its index `indx`. /// This function is suitable for a single member point. /// When eccentricities of all the points are needed, use `exacteccs` above. fn eccmember(self, indx: usize) -> Vec<f64> { self.nxmember(indx).vsub(&self[indx]) } /// Eccentricity vector added to a non member point, /// while the true geometric median is as yet unknown. /// This function is suitable for a single non-member point. fn nxnonmember(self, p:&[f64]) -> Vec<f64> { let mut vsum = vec![0_f64; self[0].len()]; let mut recip = 0_f64; for x in self { let dvmag = x.vdist(&p); if !dvmag.is_normal() { continue } // zero distance, safe to ignore let rec = 1.0/dvmag; vsum.mutvadd(&x.smult(rec)); // add vector recip += rec // add separately the reciprocals } vsum.mutsmult(1.0/recip); vsum } /// Eccentricity vector for a non member point, /// while the true geometric median is as yet unknown. /// Returns the eccentricity vector. /// The true geometric median would return zero vector. /// This function is suitable for a single non-member point. fn eccnonmember(self, p:&[f64]) -> Vec<f64> { self.nxnonmember(p).vsub(&p) } /// Next approximate weighted median, from a non member point. fn wnxnonmember(self, ws:&[f64], p:&[f64]) -> Vec<f64> { let mut vsum = vec![0_f64; self[0].len()]; let mut recip = 0_f64; for i in 0..self.len() { let dvmag = self[i].vdist(&p); if !dvmag.is_normal() { continue } // zero distance, safe to ignore let rec = ws[i]/dvmag; // ws[i] is weigth for this point self[i] vsum.mutvadd(&self[i].smult(rec)); // add weighted vector recip += rec // add separately the reciprocals } vsum.mutsmult(1.0/recip); vsum } /// Magnitudes of all the vectors in self fn mags(self) -> Vec<f64> { let mut magsv = Vec::new(); for v in self { magsv.push(v.vmag()) } magsv } /// Mean and Std (in MStats struct), Median and quartiles (in Med struct) /// of scalar eccentricities of points in self. /// These are new robust measures of a cloud of multidimensional points (or multivariate sample). fn moe(self, eps: f64) -> (MStats, Med) { let eccs = self.exacteccs(eps).mags(); (eccs.ameanstd().unwrap(),eccs.median().unwrap()) } /// Eccentricity defined Medoid and Outlier. /// This can give different results to `medoid` above, defined by sums of distances, /// especially for the outliers. See tests.rs. /// Consider some point c and some other points, bunched up at a distance r from c. /// The sum of their distances will be nr. Now, spread those points around a circle of radius r from c. /// The sum of their distances from c will remain the same but the eccentricity of c will be much reduced. /// # Example /// ``` /// use rstats::{Vecf64,VecVecf64,functions::genvec}; /// pub const EPS:f64 = 1e-7; /// let d = 6_usize; /// let pt = genvec(d,24,7,13); // random test data 5x20 /// let (_medoideccentricity,medei,_outlierecccentricity,outei) = pt.emedoid(EPS); /// assert_eq!(medei,10); // index of e-medoid /// assert_eq!(outei,20); // index of e-outlier /// ``` fn emedoid(self, eps: f64) -> (f64, usize, f64, usize) { self.exacteccs(eps).mags().minmax() } /// Geometric Median (gm) is the point that minimises the sum of distances to a given set of points. /// It has (provably) only vector iterative solutions. /// Search methods are slow and difficult in highly dimensional space. /// Weiszfeld's fixed point iteration formula had known problems with sometimes failing to converge. /// Especially, when the points are dense in the close proximity of the gm, or gm coincides with one of them. /// However, these problems are fixed in my new algorithm here. /// There will eventually be a multithreaded version. fn nmedian(self, eps: f64) -> Vec<f64> { let mut point = self.acentroid(); // start iterating loop { let nextp = self.nxnonmember(&point); if nextp.vdist(&point) < eps { return nextp }; point = nextp } } /// Possible first iteration point for geometric medians. /// Same as eccnonmember(origin), just saving the zero subtractions /// when moving from the origin fn firstpoint(self) -> Vec<f64> { let mut rsum = 0_f64; let mut vsum = vec![0_f64; self[0].len()]; for thisp in self { let mag = thisp.vmag(); if mag.is_normal() { let invmag = 1.0_f64/mag; rsum += invmag; vsum.mutvadd(&thisp.smult(invmag)) // accumulate unit vectors } } vsum.smult(1.0/rsum) // scale by the sum of reciprocals } /// Secant method with recovery from divergence /// for finding the geometric median fn gmedian(self, eps: f64) -> Vec<f64> { let mut p1 = self.acentroid(); let mut p2 = self.nxnonmember(&p1); let mut e1mag = p2.vdist(&p1); loop { // will not use this np as does nmedian, using secant instead let mut np = self.nxnonmember(&p2); let e2 = np.vsub(&p2); // new vetor error, or eccentricity let e2mag = e2.vmag(); if e2mag < eps { return np }; if e1mag > e2mag { // eccentricity magnitude decreased, good, employ secant np = p2.vadd(&e2.smult(p1.vsub(&p2).vmag()/(e1mag-e2mag))); } else { // recovery: probably overshot the minimum, shorten the jump // e2 will already be pointing moreless back np = p2.vadd(&e2.smult(p1.vsub(&p2).vmag()/(e1mag+e2mag))); } p1 = p2; p2 = np; e1mag = e2mag } } /// Secant method with recovery /// for finding the weighted geometric median fn wgmedian(self, ws: &[f64], eps: f64) -> Vec<f64> { let mut p1 = self.acentroid(); let mut p2 = self.wnxnonmember(ws,&p1); let mut e1mag = p2.vdist(&p1); loop { // will not use this np as does nmedian, using secant instead let mut np = self.wnxnonmember(ws,&p2); let e2 = np.vsub(&p2); // new vector error, or eccentricity let e2mag = e2.vmag(); if e2mag < eps { return np }; if e1mag > e2mag { // eccentricity magnitude decreased, good, employ secant np = p2.vadd(&e2.smult(p1.vsub(&p2).vmag()/(e1mag-e2mag))); } else { // recovery: probably overshot the minimum, shorten the jump // e2 will already be pointing moreless back np = p2.vadd(&e2.smult(p1.vsub(&p2).vmag()/(e1mag+e2mag))); } p1 = p2; p2 = np; e1mag = e2mag } } /// Flattened lower triangular part of a covariance matrix for f64 vectors in self. /// Since covariance matrix is symmetric (positive semi definite), /// the upper triangular part can be trivially generated for all j>i by: c(j,i) = c(i,j). /// N.b. the indexing is always assumed to be in this order: row,column. /// The items of the resulting lower triangular array c[i][j] are here flattened /// into a single vector in this double loop order: left to right, top to bottom fn covar(self, m:&[f64]) -> Vec<f64> { let n = self[0].len(); // dimension of the vector(s) let mut cov:Vec<f64> = vec![0_f64; (n+1)*n/2]; // flat lower triangular results array for thisp in self { // adding up covars for all the points let mut covsub = 0_usize; // subscript into the flattened array cov let vm = thisp.vsub(&m); // zero mean vector for i in 0..n { let thisc = vm[i]; // ith component // its products up to and including the diagonal (itself) for j in 0..i+1 { cov[covsub] += thisc*vm[j]; covsub += 1 } } } // now compute the means and return cov.mutsmult(1.0_f64/self.len()as f64); cov } }