1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#![warn(missing_docs)]
//! Statistics, Linear Algebra, Information Measures, Cholesky Matrix Decomposition,
//! Mahalanobis Distance, Multidimensional Data Analysis, Machine Learning and more ...

/// Custom error RError
pub mod error;
/// Vector algebra mutating an f64 vector
pub mod mutvec;
/// Basic statistics on a single generic vector
pub mod stats;
/// Associated functions implemented for struct TriangMat
pub mod triangmat;
/// Vector algebra on two generic vectors
pub mod vecg;
/// Stats and vector algebra on one or two u8 vectors
pub mod vecu8;
/// Multidimensional operations on sets of vectors
pub mod vecvec;
/// Multidimensional operations on sets of vectors, with additional inputs
pub mod vecvecg;

pub use crate::error::{re_error, RError, RE};
// reexporting useful related methods
pub use indxvec::{printing::*, MinMax, Printing};
pub use medians::{error::MedError, MStats, Median, Medianf64};

// Auxiliary Functions

/// Convenience dummy function for quantify closure
pub fn noop(f: &f64) -> f64 {
    *f
}

/// Convenience From quantification invocation
pub fn fromop<T: Clone + Into<f64>>(f: &T) -> f64 {
    (*f).clone().into()
}

/// t_statistic in 1d: (value-centre)/dispersion
/// generalized to any measure of central tendency and dispersion
pub fn t_stat(val: f64, mstats: MStats) -> f64 {
    (val - mstats.centre) / mstats.dispersion
}

/// Sum of natural numbers 1..n.
/// Also the size of an upper or lower triangle
/// of a square array (including the diagonal)
/// to exclude the diagonal, use `sumn(n-1)`
pub fn sumn(n: usize) -> usize {
    n * (n + 1) / 2
}

/// Generates full nxn unit (identity) matrix
pub fn unit_matrix(n: usize) -> Vec<Vec<f64>> {
    let mut res: Vec<Vec<f64>> = Vec::with_capacity(n);
    for i in 0..n {
        let mut row = vec![0.; n];
        row[i] = 1.0;
        res.push(row);
    }
    res
}

/// Compact Triangular Matrix.
/// TriangMat is typically result of some matrix calculations,
/// so concrete end-type f64 is used for simplicity and accuracy.
/// Data is of length `n*(n+1)/2` instead of `n*n`, saving memory.  
/// `.kind == 0` is plain lower triangular matrix.  
/// `.kind == 1` is antisymmetric square matrix.  
/// `.kind == 2` is symmetric square matrix.  
/// `.kind == 3` is upper triangular matrix (transposed lower).  
/// `.kind == 4` is upper (transposed lower), antisymmetric.  
/// `.kind == 5` is unnecessary, as transposed symmetric matrix is unchanged.  
/// Simply adding (or subtracting) 3 to .kind implicitly transposes the matrix.
/// `.kind > 2 are all transposed, individual variants are determined by kind % 3.
/// The full size of the implied square matrix, nxn, is not explicitly stored.
/// It is obtained by solving the quadratic equation: `n^2+n-2s=0` =>
/// `n=((((8 * s + 1) as f64).sqrt() - 1.) / 2.) as usize;`
/// where s = `triangmat.len()` = `n*(n+1)/2`
#[derive(Default, Clone)]
pub struct TriangMat {
    /// Matrix kind encoding: 0=plain, 1=antisymmetric, 2=symmetric, +3=transposed
    pub kind: usize,
    /// Packed 1d vector of triangular matrix data of size `(n+1)*n/2`
    pub data: Vec<f64>,
}

// Traits

/// Statistical measures of a single variable (one generic vector of data) and
/// vector algebra applicable to a single (generic) vector.
/// Thus these methods take no arguments.
pub trait Stats {
    /// Vector magnitude
    fn vmag(self) -> f64;
    /// Vector magnitude squared (sum of squares)
    fn vmagsq(self) -> f64;
    /// vector with reciprocal components
    fn vreciprocal(self) -> Result<Vec<f64>, RE>;
    /// vector with inverse magnitude
    fn vinverse(self) -> Result<Vec<f64>, RE>;
    /// negated vector (all components swap sign)
    fn negv(self) -> Result<Vec<f64>, RE>;
    /// Unit vector
    fn vunit(self) -> Result<Vec<f64>, RE>;
    /// Harmonic spread from median
    fn hmad(self) -> Result<f64, RE>;
    /// Arithmetic mean
    fn amean(self) -> Result<f64, RE>;
    /// Arithmetic mean and standard deviation
    fn ameanstd(self) -> Result<MStats, RE>;
    /// Weighted arithmetic mean
    fn awmean(self) -> Result<f64, RE>;
    /// Weighted arithmetic men and standard deviation
    fn awmeanstd(self) -> Result<MStats, RE>;
    /// Harmonic mean
    fn hmean(self) -> Result<f64, RE>;
    /// Harmonic mean and experimental standard deviation
    fn hmeanstd(self) -> Result<MStats, RE>;
    /// Weighted harmonic mean
    fn hwmean(self) -> Result<f64, RE>;
    /// Weighted harmonic mean and standard deviation
    fn hwmeanstd(self) -> Result<MStats, RE>;
    /// Geometric mean
    fn gmean(self) -> Result<f64, RE>;
    /// Geometric mean and standard deviation ratio
    fn gmeanstd(self) -> Result<MStats, RE>;
    /// Weighed geometric mean
    fn gwmean(self) -> Result<f64, RE>;
    /// Weighted geometric mean and standard deviation ratio
    fn gwmeanstd(self) -> Result<MStats, RE>;
    /// Probability density function of a sorted slice
    fn pdf(self) -> Vec<f64>;
    /// Information (entropy) in nats
    fn entropy(self) -> f64;
    /// (Auto)correlation coefficient of pairs of successive values of (time series) variable.
    fn autocorr(self) -> Result<f64, RE>;
    /// Linear transform to interval `[0,1]`
    fn lintrans(self) -> Result<Vec<f64>, RE>;
    /// Linearly weighted approximate time series derivative at the last (present time) point.
    fn dfdt(self) -> Result<f64, RE>;
    /// Householder reflection
    fn house_reflector(self) -> Vec<f64>;
}

/// Vector Algebra on two vectors (represented here as generic slices).
/// Also included are scalar operations on the `self` vector.
pub trait Vecg {
    /// nd t_statistic of self against geometric median and madgm spread
    fn t_statistic(self, gm: &[f64], madgm: f64) -> Result<f64, RE>;
    /// Dot product of vector self with column c of matrix v
    fn columnp<U: Clone + Into<f64>>(self, c: usize, v: &[Vec<U>]) -> f64;
    /// Scalar addition to vector
    fn sadd<U: Into<f64>>(self, s: U) -> Vec<f64>;
    /// Scalar multiplication of vector, creates new vec
    fn smult<U: Into<f64>>(self, s: U) -> Vec<f64>;
    /// Scalar product
    fn dotp<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Product with signature vec of hemispheric frequencies
    fn dotsig(self, sig: &[f64]) -> Result<f64, RE>;
    /// Cosine of angle between two slices
    fn cosine<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Sine of an angle with correct sign in any number of dimensions
    fn sine<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Vectors' subtraction (difference)
    fn vsub<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
    /// Vectors difference as unit vector (done together for efficiency)
    fn vsubunit<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
    /// Vector addition
    fn vadd<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
    /// Euclidian distance
    fn vdist<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Weighted arithmetic mean of `self:&[T]`, scaled by `ws:&[U]`
    fn wvmean<U: Clone + Into<f64>>(self, ws: &[U]) -> f64;
    /// Weighted distance of `self:&[T]` to `v:&[V]`, weighted by `ws:&[U]`
    fn wvdist<U: Clone + Into<f64>, V: Clone + Into<f64>>(self, ws: &[U], v: &[V]) -> f64;
    /// Euclidian distance squared
    fn vdistsq<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// cityblock distance
    fn cityblockd<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Area spanned by two vectors always over their concave angle
    fn varea<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64;
    /// Area proportional to the swept arc
    fn varc<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64;
    /// Vector similarity S in the interval `[0,1]: S = (1+cos(theta))/2`
    fn vsim<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Median correlation similarity in the interval [0,1]
    fn vcorrsim(self, v: Self) -> f64;
    /// Positive dotp [0,2|a||b|]
    fn pdotp<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> f64;
    /// We define vector dissimilarity D in the interval `[0,1]: D = 1-S = (1-cos(theta))/2`
    fn vdisim<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Lower triangular part of a covariance matrix for a single f64 vector.
    fn covone<U: Clone + Into<f64>>(self, m: &[U]) -> TriangMat;
    /// Kronecker product of two vectors
    fn kron<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<f64>;
    /// Outer product: matrix multiplication of column self with row v.
    fn outer<U: Clone + Into<f64>>(self, v: &[U]) -> Vec<Vec<f64>>;
    /// Exterior (Grassman) algebra product: produces 2-blade components
    fn wedge<U: Clone + Into<f64>>(self, v: &[U]) -> TriangMat;
    /// Geometric (Clifford) algebra product in matrix form: **a*b** + **a∧b**
    fn geometric<U: Clone + Into<f64>>(self, b: &[U]) -> TriangMat;
    /// Joint probability density function
    fn jointpdf<U: Clone + Into<f64>>(self, v: &[U]) -> Result<Vec<f64>, RE>;
    /// Joint entropy of `&[T],&[U]` in nats (units of e)
    fn jointentropy<U: Clone + Into<f64>>(self, v: &[U]) -> Result<f64, RE>;
    /// Statistical pairwise dependence of `&[T] &[U]` variables in the range `[0,1]`
    fn dependence<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> Result<f64, RE>;
    /// Statistical pairwise independence in the range `[0,1]` based on joint entropy
    fn independence<U: Clone + PartialOrd + Into<f64>>(self, v: &[U]) -> Result<f64, RE>;
    /// Pearson's correlation.  
    fn correlation<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Kendall Tau-B correlation.
    fn kendalcorr<U: Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Spearman rho correlation.
    fn spearmancorr<U: PartialOrd + Clone + Into<f64>>(self, v: &[U]) -> f64;
    /// Change to gm that adding point self will cause
    fn contribvec_newpt(self, gm: &[f64], recips: f64) -> Result<Vec<f64>,RE>;
    /// Normalized magnitude of change to gm that adding point self will cause
    fn contrib_newpt(self, gm: &[f64], recips: f64, nf: f64) -> Result<f64,RE>;
    /// Contribution of removing point self
    fn contribvec_oldpt(self, gm: &[f64], recips: f64) -> Result<Vec<f64>,RE>;
    /// Normalized contribution of removing point self (as negative scalar)
    fn contrib_oldpt(self, gm: &[f64], recips: f64, nf: f64) -> Result<f64,RE>;
    /// Householder reflect
    fn house_reflect<U: Clone + PartialOrd + Into<f64>>(self, x: &[U]) -> Vec<f64>;
}

/// Mutable operations on one generic slice.
/// A few of the essential `Vecg` methods are reimplemented here
/// to mutate `self`. This is for efficiency and convenience.
/// For example, in vector iterative methods.
pub trait MutVecg {
    /// mutable multiplication by a scalar
    fn mutsmult<U: PartialOrd + Into<f64>>(self, _s: U);
    /// mutable vector subtraction
    fn mutvsub<U: Clone + PartialOrd + Into<f64>>(self, _v: &[U]);
    /// mutable vector addition
    fn mutvadd<U: Clone + PartialOrd + Into<f64>>(self, _v: &[U]);
    /// Invert the magnitude
    fn minvert(self);
    /// Negate the vector (all components swap sign)
    fn mneg(self);
    /// Make into a unit vector
    fn munit(self);
    /// Linearly transform to interval `[0,1]`
    fn mlintrans(self);
}

/// Methods specialised to and more efficient, for `&[u8]`
pub trait Vecu8 {
    /// Probability density function of bytes data
    fn pdfu8(self) -> Vec<f64>;
    /// Information (entropy) of &[u8] (in nats)
    fn entropyu8(self) -> f64;
    /// Joint probability density function (here just co-occurence counts)
    /// of paired values in two vectors of bytes of the same length.
    /// Needs n^2 x 32bits of memory.
    fn jointpdfu8(self, v: &[u8]) -> Result<Vec<Vec<u32>>, RE>;
    /// Joint entropy of &[u8],&[u8] (in nats)
    fn jointentropyu8(self, v: &[u8]) -> Result<f64, RE>;
    /// Statistical pairwise dependence of two `&[u8]` variables in the range `[0,1]`
    fn dependenceu8(self, v: &[u8]) -> Result<f64, RE>;
    /// Independence in the range `[1,2]` of two `&[u8]` variables
    fn independenceu8(self, v: &[u8]) -> Result<f64, RE>;
}

/// Methods applicable to a slice of vectors of generic end type.
/// Operations on a whole set of multidimensional vectors.
pub trait VecVec<T> {
    /// Maps a scalar valued closure onto all vectors in self
    fn scalar_fn(self, f: impl Fn(&[T]) -> Result<f64, RE>) -> Result<Vec<f64>, RE>;
    /// Maps vector valued closure onto all vectors in self and collects
    fn vector_fn(self, f: impl Fn(&[T]) -> Result<Vec<f64>, RE>) -> Result<Vec<Vec<f64>>, RE>;
    /// Exact radii magnitudes to all member points from the Geometric Median.
    fn radii(self, gm: &[f64]) -> Result<Vec<f64>, RE>;
    /// Selects a column by number
    fn column(self, cnum: usize) -> Vec<f64>;
    /// Transpose slice of vecs matrix
    fn transpose(self) -> Vec<Vec<f64>>;
    /// Normalize columns, so that they are all unit vectors
    fn normalize(self) -> Result<Vec<Vec<f64>>, RE>;
    /// Householder's method returning matrices (U,R)
    fn house_ur(self) -> Result<(TriangMat, TriangMat), RE>;
    /// Joint probability density function of n matched slices of the same length
    fn jointpdfn(self) -> Result<Vec<f64>, RE>;
    /// Joint entropy between a set of vectors of the same length
    fn jointentropyn(self) -> Result<f64, RE>;
    /// Dependence (component wise) of a set of vectors.
    fn dependencen(self) -> Result<f64, RE>;
    /// Binary relations between columns of self
    fn crossfeatures(self, f: fn(&[T], &[T]) -> f64) -> Result<TriangMat, RE>;
    /// Sum of nd points (or vectors)
    fn sumv(self) -> Vec<f64>;
    /// Arithmetic Centre = euclidian mean of a set of points
    fn acentroid(self) -> Vec<f64>;
    /// Multithreaded Arithmetic Centre = euclidian mean of a set of points
    fn par_acentroid(self) -> Vec<f64>;
    /// Geometric Centroid
    fn gcentroid(self) -> Result<Vec<f64>, RE>;
    /// Harmonic Centroid = harmonic mean of a set of points
    fn hcentroid(self) -> Result<Vec<f64>, RE>;
    /// Possible first iteration point for geometric medians
    fn firstpoint(self) -> Vec<f64>;
    /// Sums of distances from each point to all other points
    fn distsums(self) -> Vec<f64>;
    /// Medoid distance, its index, outlier distance, its index
    fn medout(self, gm: &[f64]) -> Result<MinMax<f64>, RE>;
    /// Like gmparts, except only does one iteration from any non-member point g
    fn nxnonmember(self, g: &[f64]) -> (Vec<f64>, Vec<f64>, f64);
    /// Radius of a point specified by its subscript.    
    fn radius(self, i: usize, gm: &[f64]) -> Result<f64, RE>;
    /// Arith mean and std (in MStats struct), Median and mad, Medoid and Outlier (in MinMax struct)
    fn eccinfo(self, gm: &[f64]) -> Result<(MStats, MStats, MinMax<f64>), RE>
    where
        Vec<f64>: FromIterator<f64>;
    /// Quasi median, recommended only for comparison purposes
    fn quasimedian(self) -> Result<Vec<f64>, RE>;
    /// Geometric median estimate's error
    fn gmerror(self, gm: &[f64]) -> f64;
    /// Proportions of points in idx along each +/-axis (hemisphere)
    fn sigvec(self, idx: &[usize]) -> Result<Vec<f64>, RE>;
    /// madgm, median of radii from geometric median: stable nd data spread estimator
    fn madgm(self, gm: &[f64]) -> Result<f64, RE>;
    /// stdgm mean of radii from gm: nd data spread estimator
    fn stdgm(self, gm: &[f64]) -> Result<f64, RE>;
    /// Collects indices of outer and inner hull points, from zero median data
    fn hulls(self) -> (Vec<usize>, Vec<usize>);
    /// New algorithm for geometric median, to accuracy eps    
    fn gmedian(self, eps: f64) -> Vec<f64>;
    /// Parallel (multithreaded) implementation of Geometric Median. Possibly the fastest you will find.
    fn par_gmedian(self, eps: f64) -> Vec<f64>;
    /// Like `gmedian` but returns the sum of unit vecs and the sum of reciprocals of distances.
    fn gmparts(self, eps: f64) -> (Vec<f64>, Vec<f64>, f64);
}

/// Methods applicable to slice of vectors of generic end type, plus one other argument
/// of a similar kind
pub trait VecVecg<T, U> {
    /// Scalar valued closure for all vectors in self, multiplied by their weights
    fn scalar_wfn(
        self,
        ws: &[U],
        f: impl Fn(&[T]) -> Result<f64, RE>,
    ) -> Result<(Vec<f64>, f64), RE>;
    /// Vector valued closure for all vectors in self, multiplied by their weights
    fn vector_wfn(
        self,
        v: &[U],
        f: impl Fn(&[T]) -> Result<Vec<f64>, RE>,
    ) -> Result<(Vec<Vec<f64>>, f64), RE>;
    /// 1.0-dotproducts with **v**, in range [0,2]
    fn divs(self, v: &[U]) -> Result<Vec<f64>, RE>;
    /// weighted 1.0-dotproduct of **v**, with all in self
    fn wdivs(self, ws: &[U], v: &[f64]) -> Result<(Vec<f64>, f64), RE>;
    /// median of weighted cos deviations from **v**
    fn wdivsmed(self, ws: &[U], v: &[f64]) -> Result<f64,RE>;
    /// weighted radii to all points in self
    fn wradii(self, ws:&[U], gm: &[f64]) -> Result<(Vec<f64>,f64),RE>;
    /// wmadgm median of weighted radii from (weighted) gm: stable nd data spread estimator
    fn wmadgm(self, ws: &[U], wgm: &[f64]) -> Result<f64, RE>;
    /// Leftmultiply (column) vector v by (rows of) matrix self
    fn leftmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
    /// Rightmultiply (row) vector v by (columns of) matrix self
    fn rightmultv(self, v: &[U]) -> Result<Vec<f64>, RE>;
    /// Matrix multiplication self * m
    fn matmult(self, m: &[Vec<U>]) -> Result<Vec<Vec<f64>>, RE>;
    /// Weighted sum of nd points (or vectors)
    fn wsumv(self, ws: &[U]) -> Vec<f64>;
    /// Weighted Arithmetic Centre = weighted euclidian mean of a set of points
    fn wacentroid(self, ws: &[U]) -> Vec<f64>;
    /// Trend between two sets
    fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Result<Vec<f64>, RE>;
    /// Subtract m from all points - e.g. transform to zero median form
    fn translate(self, m: &[U]) -> Result<Vec<Vec<f64>>, RE>;
    /// Proportions of points along each +/-axis (hemisphere)
    fn wsigvec(self, idx: &[usize], ws: &[U]) -> Result<Vec<f64>, RE>;
    /// Dependencies of vector m on each vector in self
    fn dependencies(self, m: &[U]) -> Result<Vec<f64>, RE>;
    /// Sum of distances from arbitrary point (v) to all the points in self      
    fn distsum(self, v: &[U]) -> Result<f64, RE>;
    /// Individual distances from any point v (typically not in self) to all the points in self.    
    fn dists(self, v: &[U]) -> Result<Vec<f64>, RE>;
    /// Weighted sorted weighted radii magnitudes, normalised
    fn wsortedrads(self, ws: &[U], gm: &[f64]) -> Result<Vec<f64>, RE>;
    /// Like wgmparts, except only does one iteration from any non-member point g
    fn wnxnonmember(self, ws: &[U], g: &[f64]) -> Result<(Vec<f64>, Vec<f64>, f64), RE>;
    /// The weighted geometric median to accuracy eps
    fn wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
    /// Parallel (multithreaded) implementation of the weighted Geometric Median.  
    fn par_wgmedian(self, ws: &[U], eps: f64) -> Result<Vec<f64>, RE>;
    /// Like `wgmedian` but returns also the sum of unit vecs and the sum of reciprocals.
    fn wgmparts(self, ws: &[U], eps: f64) -> Result<(Vec<f64>, Vec<f64>, f64), RE>;
    /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
    fn covar(self, med: &[U]) -> Result<TriangMat, RE>;
    /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
    fn wcovar(self, ws: &[U], m: &[f64]) -> Result<TriangMat, RE>;
}