1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
#![warn(missing_docs)]
//! Statistics, Linear Algebra, Information Measures, Cholesky Matrix Decomposition,
//! Mahalanobis Distance, Multidimensional Data Analysis, Machine Learning and more ...
/// Custom error RError
pub mod error;
/// Vector algebra mutating an f64 vector
pub mod mutvec;
/// Basic statistics on a single generic vector
pub mod stats;
/// Associated functions implemented for struct TriangMat
pub mod triangmat;
/// Vector algebra on two generic vectors
pub mod vecg;
/// Stats and vector algebra on one or two u8 vectors
pub mod vecu8;
/// Multidimensional operations on sets of vectors
pub mod vecvec;
/// Multidimensional operations on sets of vectors, with additional inputs
pub mod vecvecg;
pub use crate::error::{re_error, RError, RE};
// reexporting useful related methods
pub use indxvec::{printing::*, MinMax, Printing};
pub use medians::{error::MedError, MStats, Median, Medianf64};
// Auxiliary Functions
/// Convenience dummy function for quantify closure
pub fn noop(f: &f64) -> f64 {
*f
}
/// Convenience From quantification invocation
pub fn fromop<T: Clone + Into<f64>>(f: &T) -> f64 {
(*f).clone().into()
}
/// t_statistic in 1d: (value-centre)/dispersion
/// generalized to any measure of central tendency and dispersion
pub fn t_stat(val: f64, mstats: MStats) -> f64 {
(val - mstats.centre) / mstats.dispersion
}
/// Sum of natural numbers 1..n.
/// Also the size of an upper or lower triangle
/// of a square array (including the diagonal)
/// to exclude the diagonal, use `sumn(n-1)`
pub fn sumn(n: usize) -> usize {
n * (n + 1) / 2
}
/// Generates full nxn unit (identity) matrix
pub fn unit_matrix(n: usize) -> Vec<Vec<f64>> {
let mut res: Vec<Vec<f64>> = Vec::with_capacity(n);
for i in 0..n {
let mut row = vec![0.; n];
row[i] = 1.0;
res.push(row);
}
res
}
/// Compact Triangular Matrix.
/// TriangMat is typically result of some matrix calculations,
/// so concrete end-type f64 is used for simplicity and accuracy.
/// Data is of length `n*(n+1)/2` instead of `n*n`, saving memory.
/// `.kind == 0` is plain lower triangular matrix.
/// `.kind == 1` is antisymmetric square matrix.
/// `.kind == 2` is symmetric square matrix.
/// `.kind == 3` is upper triangular matrix (transposed lower).
/// `.kind == 4` is upper (transposed lower), antisymmetric.
/// `.kind == 5` is unnecessary, as transposed symmetric matrix is unchanged.
/// Simply adding (or subtracting) 3 to .kind implicitly transposes the matrix.
/// `.kind > 2 are all transposed, individual variants are determined by kind % 3.
/// The full size of the implied square matrix, nxn, is not explicitly stored.
/// It is obtained by solving the quadratic equation: `n^2+n-2s=0` =>
/// `n=((((8 * s + 1) as f64).sqrt() - 1.) / 2.) as usize;`
/// where s = `triangmat.len()` = `n*(n+1)/2`
#[derive(Default, Clone)]
pub struct TriangMat {
/// Matrix kind encoding: 0=plain, 1=antisymmetric, 2=symmetric, +3=transposed
pub kind: usize,
/// Packed 1d vector of triangular matrix data of size `(n+1)*n/2`
pub data: Vec<f64>,
}
// Traits
/// Statistical measures of a single variable (one generic vector of data) and
/// vector algebra applicable to a single (generic) vector.
/// Thus these methods take no arguments.
pub trait Stats {
/// Vector magnitude
fn vmag(self) -> f64;
/// Vector magnitude squared (sum of squares)
fn vmagsq(self) -> f64;
/// vector with reciprocal components
fn vreciprocal(self) -> Result<Vec<f64>, RE>;
/// vector with inverse magnitude
fn vinverse(self) -> Result<Vec<f64>, RE>;
/// negated vector (all components swap sign)
fn negv(self) -> Result<Vec<f64>, RE>;
/// Unit vector
fn vunit(self) -> Result<Vec<f64>, RE>;
/// Harmonic spread from median
fn hmad(self) -> Result<f64, RE>;
/// Arithmetic mean
fn amean(self) -> Result<f64, RE>;
/// Arithmetic mean and standard deviation
fn ameanstd(self) -> Result<MStats, RE>;
/// Weighted arithmetic mean
fn awmean(self) -> Result<f64, RE>;
/// Weighted arithmetic men and standard deviation
fn awmeanstd(self) -> Result<MStats, RE>;
/// Harmonic mean
fn hmean(self) -> Result<f64, RE>;
/// Harmonic mean and experimental standard deviation
fn hmeanstd(self) -> Result<MStats, RE>;
/// Weighted harmonic mean
fn hwmean(self) -> Result<f64, RE>;
/// Weighted harmonic mean and standard deviation
fn hwmeanstd(self) -> Result<MStats, RE>;
/// Geometric mean
fn gmean(self) -> Result<f64, RE>;
/// Geometric mean and standard deviation ratio
fn gmeanstd(self) -> Result<MStats, RE>;
/// Weighed geometric mean
fn gwmean(self) -> Result<f64, RE>;
/// Weighted geometric mean and standard deviation ratio
fn gwmeanstd(self) -> Result<MStats, RE>;
/// Probability density function of a sorted slice
fn pdf(self) -> Vec<f64>;
/// Information (entropy) in nats
fn entropy(self) -> f64;
/// (Auto)correlation coefficient of pairs of successive values of (time series) variable.
fn autocorr(self) -> Result<f64, RE>;
/// Linear transform to interval `[0,1]`
fn lintrans(self) -> Result<Vec<f64>, RE>;
/// Linearly weighted approximate time series derivative at the last (present time) point.
fn dfdt(self) -> Result<f64, RE>;
/// Householder reflection
fn house_reflector(self) -> Vec<f64>;
}
/// Vector Algebra on two vectors (represented here as generic slices).
/// Also included are scalar operations on the `self` vector.
pub trait Vecg {
/// nd t_statistic of self against geometric median and madgm spread
fn t_statistic(self, gm: &[f64], madgm: f64) -> Result<f64, RE>;
/// Dot product of vector self with column c of matrix v
fn columnp<U: Clone + Into<f64>>(self, c: usize, v: &[Vec<U>]) -> f64;
/// Scalar addition to vector
fn sadd<U: Into<f64>>(self, s: U) -> Vec<f64>;
/// Scalar multiplication of vector, creates new vec
fn smult<U: Into<f64>>(self, s: U) -> Vec<f64>;
/// Scalar product
fn dotp<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Product with signature vec of hemispheric frequencies
fn dotsig(self, sig: &[f64]) -> Result<f64, RE>;
/// Cosine of angle between two slices
fn cosine<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Sine of an angle with correct sign in any number of dimensions
fn sine<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Vectors' subtraction (difference)
fn vsub<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
/// Vectors difference as unit vector (done together for efficiency)
fn vsubunit<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
/// Vector addition
fn vadd<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
/// Euclidian distance
fn vdist<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Weighted arithmetic mean of `self:&[T]`, scaled by `ws:&[U]`
fn wvmean<U: Clone + Into<f64>>(self, ws: &[U]) -> f64;
/// Weighted distance of `self:&[T]` to `v:&[V]`, weighted by `ws:&[U]`
fn wvdist<U: Clone + Into<f64>, V: Clone + Into<f64>>(self, ws: &[U], v: &[V]) -> f64;
/// Euclidian distance squared
fn vdistsq<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// cityblock distance
fn cityblockd<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Area spanned by two vectors always over their concave angle
fn varea<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64;
/// Area proportional to the swept arc
fn varc<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64;
/// Vector similarity S in the interval `[0,1]: S = (1+cos(theta))/2`
fn vsim<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Median correlation similarity in the interval [0,1]
fn vcorrsim(self, v: Self) -> f64;
/// Positive dotp [0,2|a||b|]
fn pdotp<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64;
/// We define vector dissimilarity D in the interval `[0,1]: D = 1-S = (1-cos(theta))/2`
fn vdisim<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Lower triangular part of a covariance matrix for a single f64 vector.
fn covone<U: Clone + Into<f64>>(self, m: &[U]) -> TriangMat;
/// Kronecker product of two vectors
fn kron<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
/// Outer product: matrix multiplication of column self with row v.
fn outer<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<Vec<f64>>;
/// Exterior (Grassman) algebra product: produces 2-blade components
fn wedge<U: Clone + Into<f64>>(self, v: &[U]) -> TriangMat;
/// Geometric (Clifford) algebra product in matrix form: **a*b** + **a∧b**
fn geometric<U: Clone + Into<f64>>(self, b: &[U]) -> TriangMat;
/// Joint probability density function
fn jointpdf<U: Clone + Into<f64>>(self, v: &[U]) -> Result<Vec<f64>, RE>;
/// Joint entropy of `&[T],&[U]` in nats (units of e)
fn jointentropy<U: Clone + Into<f64>>(self, v: &[U]) -> Result<f64, RE>;
/// Statistical pairwise dependence of `&[T] &[U]` variables in the range `[0,1]`
fn dependence<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> Result<f64, RE>;
/// Statistical pairwise independence in the range `[0,1]` based on joint entropy
fn independence<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> Result<f64, RE>;
/// Pearson's correlation.
fn correlation<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Kendall Tau-B correlation.
fn kendalcorr<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Spearman rho correlation.
fn spearmancorr<U: PartialOrd + Clone + Into<f64>>(self, v: &[U]) -> f64;
/// Change to gm that adding point self will cause
fn contribvec_newpt(self, gm: &[f64], recips: f64) -> Result<Vec<f64>,RE>;
/// Normalized magnitude of change to gm that adding point self will cause
fn contrib_newpt(self, gm: &[f64], recips: f64, nf: f64) -> Result<f64,RE>;
/// Contribution of removing point self
fn contribvec_oldpt(self, gm: &[f64], recips: f64) -> Result<Vec<f64>,RE>;
/// Normalized contribution of removing point self (as negative scalar)
fn contrib_oldpt(self, gm: &[f64], recips: f64, nf: f64) -> Result<f64,RE>;
/// Householder reflect
fn house_reflect<U: Clone + PartialOrd + Into<f64>>(self, x: &[U]) -> Vec<f64>;
}
/// Mutable operations on one generic slice.
/// A few of the essential `Vecg` methods are reimplemented here
/// to mutate `self`. This is for efficiency and convenience.
/// For example, in vector iterative methods.
pub trait MutVecg {
/// mutable multiplication by a scalar
fn mutsmult<U: PartialOrd + Into<f64>>(self, _s: U);
/// mutable vector subtraction
fn mutvsub<U: Clone + PartialOrd + Into<f64>>(self, _v: &[U]);
/// mutable vector addition
fn mutvadd<U: Clone + PartialOrd + Into<f64>>(self, _v: &[U]);
/// Invert the magnitude
fn minvert(self);
/// Negate the vector (all components swap sign)
fn mneg(self);
/// Make into a unit vector
fn munit(self);
/// Linearly transform to interval `[0,1]`
fn mlintrans(self);
}
/// Methods specialised to and more efficient, for `&[u8]`
pub trait Vecu8 {
/// Probability density function of bytes data
fn pdfu8(self) -> Vec<f64>;
/// Information (entropy) of &[u8] (in nats)
fn entropyu8(self) -> f64;
/// Joint probability density function (here just co-occurence counts)
/// of paired values in two vectors of bytes of the same length.
/// Needs n^2 x 32bits of memory.
fn jointpdfu8(self, v: &[u8]) -> Result<Vec<Vec<u32>>, RE>;
/// Joint entropy of &[u8],&[u8] (in nats)
fn jointentropyu8(self, v: &[u8]) -> Result<f64, RE>;
/// Statistical pairwise dependence of two `&[u8]` variables in the range `[0,1]`
fn dependenceu8(self, v: &[u8]) -> Result<f64, RE>;
/// Independence in the range `[1,2]` of two `&[u8]` variables
fn independenceu8(self, v: &[u8]) -> Result<f64, RE>;
}
/// Methods applicable to a slice of vectors of generic end type.
/// Operations on a whole set of multidimensional vectors.
pub trait VecVec<T> {
/// Maps a scalar valued closure onto all vectors in self
fn scalar_fn(self, f: impl Fn(&[T]) -> Result<f64, RE>) -> Result<Vec<f64>, RE>;
/// Maps vector valued closure onto all vectors in self and collects
fn vector_fn(self, f: impl Fn(&[T]) -> Result<Vec<f64>, RE>) -> Result<Vec<Vec<f64>>, RE>;
/// Exact radii magnitudes to all member points from the Geometric Median.
fn radii(self, gm: &[f64]) -> Result<Vec<f64>, RE>;
/// Selects a column by number
fn column(self, cnum: usize) -> Vec<f64>;
/// Transpose slice of vecs matrix
fn transpose(self) -> Vec<Vec<f64>>;
/// Normalize columns, so that they are all unit vectors
fn normalize(self) -> Result<Vec<Vec<f64>>, RE>;
/// Householder's method returning matrices (U,R)
fn house_ur(self) -> Result<(TriangMat, TriangMat), RE>;
/// Joint probability density function of n matched slices of the same length
fn jointpdfn(self) -> Result<Vec<f64>, RE>;
/// Joint entropy between a set of vectors of the same length
fn jointentropyn(self) -> Result<f64, RE>;
/// Dependence (component wise) of a set of vectors.
fn dependencen(self) -> Result<f64, RE>;
/// Binary relations between columns of self
fn crossfeatures(self, f: fn(&[T], &[T]) -> f64) -> Result<TriangMat, RE>;
/// Sum of nd points (or vectors)
fn sumv(self) -> Vec<f64>;
/// Arithmetic Centre = euclidian mean of a set of points
fn acentroid(self) -> Vec<f64>;
/// Multithreaded Arithmetic Centre = euclidian mean of a set of points
fn par_acentroid(self) -> Vec<f64>;
/// Geometric Centroid
fn gcentroid(self) -> Result<Vec<f64>, RE>;
/// Harmonic Centroid = harmonic mean of a set of points
fn hcentroid(self) -> Result<Vec<f64>, RE>;
/// Possible first iteration point for geometric medians
fn firstpoint(self) -> Vec<f64>;
/// Sums of distances from each point to all other points
fn distsums(self) -> Vec<f64>;
/// Medoid distance, its index, outlier distance, its index
fn medout(self, gm: &[f64]) -> Result<MinMax<f64>, RE>;
/// Like gmparts, except only does one iteration from any non-member point g
fn nxnonmember(self, g: &[f64]) -> (Vec<f64>, Vec<f64>, f64);
/// Radius of a point specified by its subscript.
fn radius(self, i: usize, gm: &[f64]) -> Result<f64, RE>;
/// Arith mean and std (in MStats struct), Median and mad, Medoid and Outlier (in MinMax struct)
fn eccinfo(self, gm: &[f64]) -> Result<(MStats, MStats, MinMax<f64>), RE>
where
Vec<f64>: FromIterator<f64>;
/// Quasi median, recommended only for comparison purposes
fn quasimedian(self) -> Result<Vec<f64>, RE>;
/// Geometric median estimate's error
fn gmerror(self, gm: &[f64]) -> f64;
/// Proportions of points in idx along each +/-axis (hemisphere)
fn sigvec(self, idx: &[usize]) -> Result<Vec<f64>, RE>;
/// madgm, median of radii from geometric median: stable nd data spread estimator
fn madgm(self, gm: &[f64]) -> Result<f64, RE>;
/// stdgm mean of radii from gm: nd data spread estimator
fn stdgm(self, gm: &[f64]) -> Result<f64, RE>;
/// Collects indices of outer and inner hull points, from zero median data
fn hulls(self) -> (Vec<usize>, Vec<usize>);
/// New algorithm for geometric median, to accuracy eps
fn gmedian(self, eps: f64) -> Vec<f64>;
/// Parallel (multithreaded) implementation of Geometric Median. Possibly the fastest you will find.
fn par_gmedian(self, eps: f64) -> Vec<f64>;
/// Like `gmedian` but returns the sum of unit vecs and the sum of reciprocals of distances.
fn gmparts(self, eps: f64) -> (Vec<f64>, Vec<f64>, f64);
}
/// Methods applicable to slice of vectors of generic end type, plus one other argument
/// of a similar kind
pub trait VecVecg<T, U> {
/// Scalar valued closure for all vectors in self, multiplied by their weights
fn scalar_wfn(
self,
ws: &[U],
f: impl Fn(&[T]) -> Result<f64, RE>,
) -> Result<(Vec<f64>, f64), RE>;
/// Vector valued closure for all vectors in self, multiplied by their weights
fn vector_wfn(
self,
v: &[U],
f: impl Fn(&[T]) -> Result<Vec<f64>, RE>,
) -> Result<(Vec<Vec<f64>>, f64), RE>;
/// 1.0-dotproducts with **v**, in range [0,2]
fn divs(self, v: &[U]) -> Result<Vec<f64>, RE>;
/// weighted 1.0-dotproduct of **v**, with all in self
fn wdivs(self, ws: &[U], v: &[f64]) -> Result<(Vec<f64>, f64), RE>;
/// median of weighted cos deviations from **v**
fn wdivsmed(self, ws: &[U], v: &[f64]) -> Result<f64,RE>;
/// weighted radii to all points in self
fn wradii(self, ws:&[U], gm: &[f64]) -> Result<(Vec<f64>,f64),RE>;
/// wmadgm median of weighted radii from (weighted) gm: stable nd data spread estimator
fn wmadgm(self, ws: &[U], wgm: &[f64]) -> Result<f64, RE>;
/// Leftmultiply (column) vector v by (rows of) matrix self
fn leftmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
/// Rightmultiply (row) vector v by (columns of) matrix self
fn rightmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
/// Matrix multiplication self * m
fn matmult(self, m: &[Vec<U>]) -> Result<Vec<Vec<f64>>, RE>;
/// Weighted sum of nd points (or vectors)
fn wsumv(self, ws: &[U]) -> Vec<f64>;
/// Weighted Arithmetic Centre = weighted euclidian mean of a set of points
fn wacentroid(self, ws: &[U]) -> Vec<f64>;
/// Trend between two sets
fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Result<Vec<f64>, RE>;
/// Subtract m from all points - e.g. transform to zero median form
fn translate(self, m: &[U]) -> Result<Vec<Vec<f64>>, RE>;
/// Proportions of points along each +/-axis (hemisphere)
fn wsigvec(self, idx: &[usize], ws: &[U]) -> Result<Vec<f64>, RE>;
/// Dependencies of vector m on each vector in self
fn dependencies(self, m: &[U]) -> Result<Vec<f64>, RE>;
/// Sum of distances from arbitrary point (v) to all the points in self
fn distsum(self, v: &[U]) -> Result<f64, RE>;
/// Individual distances from any point v (typically not in self) to all the points in self.
fn dists(self, v: &[U]) -> Result<Vec<f64>, RE>;
/// Weighted sorted weighted radii magnitudes, normalised
fn wsortedrads(self, ws: &[U], gm: &[f64]) -> Result<Vec<f64>, RE>;
/// Like wgmparts, except only does one iteration from any non-member point g
fn wnxnonmember(self, ws: &[U], g: &[f64]) -> Result<(Vec<f64>, Vec<f64>, f64), RE>;
/// The weighted geometric median to accuracy eps
fn wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
/// Parallel (multithreaded) implementation of the weighted Geometric Median.
fn par_wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
/// Like `wgmedian` but returns also the sum of unit vecs and the sum of reciprocals.
fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, Vec<f64>, f64), RE>;
/// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
fn covar(self, med: &[U]) -> Result<TriangMat, RE>;
/// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
fn wcovar(self, ws: &[U], m: &[f64]) -> Result<TriangMat, RE>;
}