1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
pub mod statsg;
pub mod mutstats;
pub mod vecg;
pub mod vecu8;
pub mod mutvec;
pub mod vecvec;
pub mod vecvecg;
// reexporting to avoid duplication and for backwards compatibility
pub use indxvec::{MinMax,here,wi,wv,printvv};
/// simple error handling
use anyhow::{Result,bail};
use core::iter::FromIterator;
// Structs //
/// Median and quartiles
#[derive(Default)]
pub struct Med {
pub lquartile: f64,
pub median: f64,
pub uquartile: f64,
}
impl std::fmt::Display for Med {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
"median:\n\tLower Q: {}\n\tMedian: {}\n\tUpper Q: {}",
wi(&self.lquartile),
wi(&self.median),
wi(&self.uquartile)
)
}
}
/// Mean and standard deviation (or std ratio for geometric mean)
#[derive(Default)]
pub struct MStats {
pub mean: f64,
pub std: f64,
}
impl std::fmt::Display for MStats {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "mean±std: {}±{}", wi(&self.mean), wi(&self.std))
}
}
// Auxiliary Functions //
/// Potentially useful recast of a whole slice
pub fn tof64<T>(s: &[T]) -> Vec<f64> where T: Copy, f64: From<T> {
s.iter().map(| &x | f64::from(x)).collect()
}
/// Necessary recast of a whole i64 slice to f64
pub fn i64tof64(s: &[i64]) -> Vec<f64> {
s.iter().map(| &x | x as f64).collect()
}
/// Sum of linear weights 1..n.
/// Also the size of an upper or lower triangle
/// of a square array (including the diagonal)
pub fn wsum(n: usize) -> f64 {
(n * (n + 1)) as f64 / 2.
}
/// Generates a vector of n vectors, each of length d, all filled with random numbers for testing.
/// It needs two seeds s1 and s2. Same seeds will produce the same random sequence.
/// Uses local closure `rand` to generate random numbers (avoids dependencies).
/// Random numbers are in the open interval 0..1 with uniform distribution.
pub fn genvec(d: usize, n: usize, mut s1: u32, mut s2: u32) -> Vec<Vec<f64>> {
if n * d < 1 { panic!("{}\n\tnon positive dimensions",here!()) }
// random numbers generating closure with captured seeds s1,s2
let mut rand = || {
s1 = 36969 * (s1 & 65535) + (s1 >> 16);
s2 = 18000 * (s2 & 65535) + (s2 >> 16);
(((s1 << 16) & s2) as f64 + 1.0) * 2.328306435454494e-10
};
let mut v: Vec<Vec<f64>> = Vec::with_capacity(n);
for _i in 0..n {
let mut pt = Vec::with_capacity(d);
for _j in 0..d {
pt.push(rand())
}
v.push(pt)
} // fills the lot with random numbers
v
}
/// Generates a vector of n vectors, each of length d, all filled with random numbers for testing.
/// It needs two seeds s1 and s2. Same seeds will produce the same random sequence.
/// Uses local closure `rand` to generate random numbers (avoids dependencies).
/// Random numbers are in the closed interval 0..255 with uniform distribution.
pub fn genvecu8(d: usize, n: usize, mut s1: u32, mut s2: u32) -> Vec<Vec<u8>> {
if n * d < 1 { panic!("{}\n\tgenvecu8: non positive dimensions",here!()) }
// random numbers generating closure with captured seeds s1,s2
let mut rand = || {
s1 = 36969 * (s1 & 65535) + (s1 >> 16);
s2 = 18000 * (s2 & 65535) + (s2 >> 16);
(256.0*(((s1 << 16) & s2) as f64 + 1.0)*2.328306435454494e-10).floor() as u8
};
let mut v: Vec<Vec<u8>> = Vec::with_capacity(n);
for _i in 0..n {
let mut pt = Vec::with_capacity(d);
for _j in 0..d {
pt.push(rand())
}
v.push(pt)
} // fills the lot with random numbers
v
}
// Traits
/// Statistical measures of a single variable (one generic vector of data) and
/// vector algebra applicable to a single (generic) vector.
/// Thus these methods take no arguments.
/// There is just one limitation: data of end type `i64` has to be explicitly converted to `f64`.
/// That is to raise awareness that, in this particular case, some precision may be lost.
/// Function `statsg::i64tof64(&s)` will convert the whole slice.
pub trait Stats {
/// Vector magnitude
fn vmag(self) -> f64;
/// Vector magnitude squared (sum of squares)
fn vmagsq(self) -> f64;
/// vector with reciprocal components
fn vreciprocal(self) -> Result<Vec<f64>>;
/// vector with inverse magnitude
fn vinverse(self) -> Result<Vec<f64>>;
// negated vector (all components swap sign)
fn negv(self) -> Vec<f64>;
/// Unit vector
fn vunit(self) -> Vec<f64>;
/// Arithmetic mean
fn amean(self) -> Result<f64>
where Self: std::marker::Sized { bail!("amean not implemented for this type")}
/// Arithmetic mean and standard deviation
fn ameanstd(self) -> Result<MStats>
where Self: std::marker::Sized { bail!("ameanstd not implemented for this type")}
/// Weighted arithmetic mean
fn awmean(self) -> Result<f64>
where Self: std::marker::Sized { bail!("awmean not implemented for this type")}
/// Weighted arithmetic men and standard deviation
fn awmeanstd(self) -> Result<MStats>
where Self: std::marker::Sized { bail!("awmeanstd not implemented for this type")}
/// Harmonic mean
fn hmean(self) -> Result<f64>
where Self: std::marker::Sized { bail!("hmean not implemented for this type")}
/// Harmonic mean and experimental standard deviation
fn hmeanstd(self) -> Result<MStats>
where Self: std::marker::Sized { bail!("hmeanstd not implemented for this type")}
/// Weighted harmonic mean
fn hwmean(self) -> Result<f64>
where Self: std::marker::Sized { bail!("hwmean not implemented for this type")}
/// Weighted harmonic mean and standard deviation
fn hwmeanstd(self) -> Result<MStats>
where Self: std::marker::Sized { bail!("hwgmeanstd not implemented for this type")}
/// Geometric mean
fn gmean(self) -> Result<f64>
where Self: std::marker::Sized { bail!("gmean not implemented for this type")}
/// Geometric mean and standard deviation ratio
fn gmeanstd(self) -> Result<MStats>
where Self: std::marker::Sized { bail!("gmeanstd not implemented for this type")}
/// Weighed geometric mean
fn gwmean(self) -> Result<f64>
where Self: std::marker::Sized { bail!("gwmean not implemented for this type")}
/// Weighted geometric mean and standard deviation ratio
fn gwmeanstd(self) -> Result<MStats>
where Self: std::marker::Sized { bail!("gwmeanstd not implemented for this type")}
/// Median and quartiles
fn median(self) -> Result<Med>
where Self: std::marker::Sized { bail!("median not implemented for this type")}
/// Probability density function of a sorted slice
fn pdf(self) -> Vec<f64>;
/// Information (entropy) in nats
fn entropy(self) -> f64;
/// (Auto)correlation coefficient of pairs of successive values of (time series) variable.
fn autocorr(self) -> f64;
/// Linear transform to interval [0,1]
fn lintrans(self) -> Vec<f64>;
/// Reconstructs the full symmetric square matrix from its lower diagonal compact form,
/// produced by covar, covone, wcovar
fn symmatrix(self) -> Vec<Vec<f64>>;
}
/// A few of the `Stats` methods are reimplemented here
/// (only for f64), so that they mutate `self` in-place.
/// This is more efficient and convenient in some circumstances.
pub trait MutStats {
/// Invert the magnitude
fn minvert(self);
// negate vector (all components swap sign)
fn mneg(self);
/// Make into a unit vector
fn munit(self);
/// Linearly transform to interval [0,1]
fn mlintrans(self);
/// Sort in place
fn msortf(self);
}
/// Vector Algebra on two vectors (represented here as generic slices).
/// Also included are scalar operations on the `self` vector.
pub trait Vecg<T,U> {
/// Scalar addition to vector
fn sadd(self, s:U) -> Vec<f64>;
/// Scalar multiplication of vector, creates new vec
fn smult(self, s:U) -> Vec<f64>;
/// Scalar product
fn dotp(self, v:&[U]) -> f64;
/// Cosine of angle between two slices
fn cosine(self, v:&[U]) -> f64;
/// Vectors' subtraction (difference)
fn vsub(self, v:&[U]) -> Vec<f64>;
/// Vectors difference as unit vector (done together for efficiency)
fn vsubunit(self, v: &[U]) -> Vec<f64>;
/// Vector addition
fn vadd(self, v:&[U]) -> Vec<f64>;
/// Euclidian distance
fn vdist(self, v:&[U]) -> f64;
/// Euclidian distance squared
fn vdistsq(self, v:&[U]) -> f64;
/// cityblock distance
fn cityblockd(self, v:&[U]) -> f64;
/// Magnitude of the cross product |a x b| = |a||b|sin(theta).
/// Attains maximum `|a|.|b|` when the vectors are othogonal.
fn varea(self, v:&[U]) -> f64;
/// Area proportional to the swept arc
fn varc(self, v:&[U]) -> f64;
/// Vector similarity S in the interval [0,1]: S = (1+cos(theta))/2
fn vsim(self, v:&[U]) -> f64;
/// We define vector dissimilarity D in the interval [0,1]: D = 1-S = (1-cos(theta))/2
fn vdisim(self, v:&[U]) -> f64;
/// Lower triangular part of a covariance matrix for a single f64 vector.
fn covone(self, m:&[U]) -> Vec<f64>;
/// Kronecker product of two vectors
fn kron(self, m:&[U]) -> Vec<f64>;
/// Outer product of two vectors
fn outer(self, m:&[U]) -> Vec<Vec<f64>>;
/// Joint probability density function
fn jointpdf(self,v:&[U]) -> Vec<f64>;
/// Joint entropy of &[T],&[U] in nats (units of e)
fn jointentropy(self, v:&[U]) -> f64;
/// Statistical dependence based on joint entropy
fn dependence(self, v:&[U]) -> f64;
/// Pearson's correlation.
fn correlation(self, v:&[U]) -> f64;
/// Kendall Tau-B correlation.
fn kendalcorr(self, v:&[U]) -> f64;
/// Spearman rho correlation.
fn spearmancorr(self, v:&[U]) -> f64;
}
/// Vector algebra: methods operaing on one vector of generic end type
/// and the second argument, which is a scalar or vector of f64 end type.
pub trait Vecf64<_T> {
/// Scalar multiplication by f64
fn smultf64(self, s:f64) -> Vec<f64>;
/// Vector subtraction of `&[f64]`
fn vsubf64(self, v:&[f64]) -> Vec<f64>;
/// Vectors difference unitised (done together for efficiency)
fn vsubunitf64(self, v: &[f64]) -> Vec<f64>;
/// Euclidian distance to `&[f64]`
fn vdistf64(self, v:&[f64]) -> f64;
/// Adding `&[f64]`
fn vaddf64(self, v:&[f64]) -> Vec<f64>;
/// Euclidian distance to `&[f64]` squared
fn vdistsqf64(self, v:&[f64]) -> f64;
}
/// Mutable vector operations that take one generic argument.
/// A few of the essential `Vecg` methods are reimplemented here
/// to mutate `self` in-place (only for f64).
/// This is for efficiency and convenience, for example, in
/// vector iterative methods.
pub trait MutVecg<U> {
/// mutable multiplication by a scalar
fn mutsmult(self, _s:U);
/// mutable vector subtraction
fn mutvsub(self, _v: &[U]);
/// mutable vector addition
fn mutvadd(self, _v: &[U]);
}
/// Vector mutating operations that take one argument of f64 end type.
pub trait MutVecf64 {
/// mutable multiplication by a scalar
fn mutsmultf64(self, _s:f64);
/// mutable vector subtraction
fn mutvsubf64(self, _v:&[f64]);
/// mutable vector addition
fn mutvaddf64(self, _v:&[f64]);
}
/// Methods specialised to, or more efficient for `&[u8]`
pub trait Vecu8 {
/// Scalar product of two (positive) u8 slices.
/// Must be of the same length - no error checking (for speed)
fn dotpu8(self, v: &[u8]) -> u64;
/// Cosine between two (positive) u8 slices.
fn cosineu8(self, v: &[u8]) -> f64;
/// Vector subtraction (converts results to f64 as they can be negative)
fn vsubu8(self, v: &[u8]) -> Vec<f64>;
/// Vector addition ( converts results to f64, as they can exceed 255 )
fn vaddu8(self, v: &[u8]) -> Vec<f64>;
/// Euclidian distance between self &[u8] and v:&[u8].
/// Faster than vsub followed by vmag, as both are done in one loop
fn vdistu8(self, v: &[u8]) -> f64;
/// cityblock distance
fn cityblockdu8(self, v:&[u8]) -> f64;
///Euclidian distance squared, the arguments are both of &[u8] type
fn vdistsqu8(self, v: &[u8]) -> u64;
/// Probability density function of bytes data
fn pdfu8(self) -> Vec<f64>;
/// Information (entropy) of &[u8] (in nats)
fn entropyu8(self) -> f64;
/// Joint probability density function (here just co-occurence counts)
/// of paired values in two vectors of bytes of the same length.
/// Needs n^2 x 32bits of memory.
fn jointpdfu8(self, v:&[u8]) -> Vec<Vec<u32>>;
/// Joint entropy of &[u8],&[u8] (in nats)
fn jointentropyu8(self, v:&[u8]) -> f64;
/// Dependence of two &[u8] variables, the range is [0,1],
/// i.e. it returns 0 iff they are statistically independent
/// and 1 when they are identical
fn dependenceu8(self, v:&[u8]) -> f64;
}
/// A few specialised methods applicable to `Vec<Vec<u8>>` (vector of vectors of bytes).
pub trait VecVecu8 {
/// Centroid = euclidian mean of a set of points
fn acentroid(self) -> Vec<f64>;
/// Weighted Centre
fn wacentroid(self,ws: &[u8]) -> Vec<f64>;
/// Eccentricity vector added to a non member point,
fn nxnonmember(self, p:&[f64]) -> Vec<f64>;
/// Weighted eccentricity vector for a non member point
fn wnxnonmember(self, ws:&[u8], p:&[f64]) -> Vec<f64>;
/// Weighted geometric median, sorted eccentricities magnitudes, cpdf of the weights
fn gmedian(self, eps:f64) -> Vec<f64>;
/// The weighted geometric median
fn wgmedian(self, ws:&[u8], eps: f64) -> Vec<f64>;
/// Lower triangular part of a covariance matrix of a Vec of u8 vectors.
fn covar(self, med:&[f64]) -> Vec<f64>;
fn wcovar(self, ws:&[u8], m:&[f64]) -> Vec<f64>;
}
/// Methods applicable to a single argument: a vector of vectors of generic end type.
/// Operations on a set of multidimensional vectors.
pub trait VecVec<T> {
/// Arithmetic Centre = euclidian mean of a set of points
fn acentroid(self) -> Vec<f64>;
/// Geometric Centroid
fn gcentroid(self) -> Vec<f64>;
/// Harmonic Centroid = harmonic mean of a set of points
fn hcentroid(self) -> Vec<f64>;
/// Possible first iteration point for geometric medians
fn firstpoint(self) -> Vec<f64>;
/// Sums of distances from each point to all other points
fn distsums(self) -> Vec<f64>;
/// Medoid distance, its index, outlier distance, its index
fn medout(self) -> MinMax<f64>;
/// Fast sums of distances from each point to all other points
fn distsuminset(self, indx: usize) -> f64;
/// Next approx gm computed at a member point given by its indx
fn nxmember(self, indx: usize) -> Vec<f64>;
/// Next approx gm computed at a nonmember point p
fn nxnonmember(self, p:&[f64]) -> Vec<f64>;
/// Estimated ecentricity vector from a member point given by its indx to gm
fn eccmember(self, indx: usize) -> Vec<f64>;
/// Estimated eccentricity vector from a non member point
fn eccnonmember(self, p:&[f64]) -> Vec<f64>;
/// Estimated eccentricity vectors from each member point
fn eccentricities(self) -> Vec<Vec<f64>>;
/// Exact eccentricity vectors from each member point by first finding the gm.
/// As well as being more accurate, it is usually faster than `eccentricities` above,
/// especially for large numbers of points.
fn exacteccs(self, eps: f64) -> Vec<Vec<f64>>;
/// Median and quartiles of eccentricities (new robust measure of spread of a multivariate sample)
fn eccinfo(self, eps: f64) -> (MStats,Med,MinMax<f64>) where Vec<f64>:FromIterator<f64>;
/// Medoid and Outlier as defined by eccentricities.
fn emedoid(self, eps: f64) -> MinMax<f64> where Vec<f64>:FromIterator<f64>;
/// Returns sorted eccentricities magnitudes
fn sortedeccs(self, ascending:bool, gm:&[f64]) -> Vec<f64>;
/// Improved Weizsfeld's Algorithm for geometric median, to accuracy eps
// fn nmedian(self, eps: f64) -> Vec<f64>;
/// New algorithm for geometric median, to accuracy eps
fn gmedian(self, eps: f64) -> Vec<f64>;
/// Secant method geometric median
fn smedian(self, eps: f64) -> Vec<f64>;
/// Same a smedian but returns also the number of iterations
fn ismedian(self, eps: f64) -> (Vec<f64>,usize);
}
/// Methods applicable to vector of vectors of generic end type and one argument
/// of a similar kind.
pub trait VecVecg<T,U> {
/// Weighted Arithmetic Centre = weighted euclidian mean of a set of points
fn wacentroid(self,ws: &[U]) -> Vec<f64>;
/// Trend between two sets
fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Vec<f64>;
/// Subtract m from all points - e.g. transform to zero median form
fn translate(self, m: &[U]) -> Vec<Vec<f64>>;
/// Sum of distances from arbitrary point (v) to all the points in self
fn distsum(self, v: &[U]) -> f64;
/// Individual distances from any point v (typically not in self) to all the points in self.
fn dists(self, v: &[U]) -> Vec<f64>;
/// Medoid and Outlier (by distance) of a set of points
// fn medoid(self) -> MinMax<f64>;
/// ( wgm, sorted eccentricities magnitudes, associated cpdf )
fn wsortedeccs(self,ws:&[U],gm:&[f64]) -> (Vec<f64>,Vec<f64>);
/// Sorted cosines magnitudes and cpdf, needs central median
fn wsortedcos(self,medmed:&[U],unitzmed:&[U],ws:&[U]) -> (Vec<f64>,Vec<f64>);
/// Estimated weighted gm computed at a non member point
fn wnxnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64>;
/// Estimated weighted eccentricity for a non-member point
fn weccnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64>;
/// The weighted geometric median to accuracy eps
fn wgmedian(self, ws: &[U], eps: f64) -> Vec<f64>;
/// The weighted geometric median to accuracy eps
fn wsmedian(self, ws: &[U], eps: f64) -> Vec<f64>;
/// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
fn covar(self, med:&[U]) -> Vec<f64>;
/// Flattened lower triangular part of a comediance matrix of a Vec of f64 vectors.
fn comed(self, m:&[U], eps:f64) -> Vec<f64>;
/// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
fn wcovar(self, ws:&[U], m:&[f64]) -> Vec<f64>;
/// Flattened lower triangular part of a comediance matrix for weighted f64 vectors.
fn wcomed(self, ws:&[U], m:&[f64], eps:f64) -> Vec<f64>;
}