1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
pub mod statsg;
pub mod mutstats;
pub mod vecg;
pub mod vecu8;
pub mod mutvec;
pub mod vecvec;
pub mod vecvecg;

// reexporting to avoid duplication and for backwards compatibility
pub use indxvec::{MinMax,Printing,here}; 
/// simple error handling
use anyhow::{Result,bail}; 
use core::iter::FromIterator;

// Structs //

/// Median and quartiles
#[derive(Default)]
pub struct Med {
    pub lquartile: f64,
    pub median: f64,
    pub uquartile: f64,
}
impl std::fmt::Display for Med {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(
            f,
            "median:\n\tLower Q: {}\n\tMedian:  {}\n\tUpper Q: {}",
            self.lquartile.gr(),
            self.median.gr(),
            self.uquartile.gr()
        )
    }
}

/// Mean and standard deviation (or std ratio for geometric mean)
#[derive(Default)]
pub struct MStats {
    pub mean: f64,
    pub std: f64,
}
impl std::fmt::Display for MStats {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "mean±std: {}±{}", self.mean.gr(), self.std.gr())
    }
}

// Auxiliary Functions //

/// Necessary downcast of a whole i64 slice to f64  
pub fn i64tof64(s: &[i64]) -> Vec<f64> {
    s.iter().map(| &x | x as f64).collect()
}

/// Sum of linear weights 1..n.
/// Also the size of an upper or lower triangle 
/// of a square array (including the diagonal)
pub fn wsum(n: usize) -> f64 {
    (n * (n + 1)) as f64 / 2.
}


// Traits

/// Statistical measures of a single variable (one generic vector of data) and 
/// vector algebra applicable to a single (generic) vector. 
/// Thus these methods take no arguments.
/// There is just one limitation: data of end type `i64` has to be explicitly converted to `f64`.
/// That is to raise awareness that, in this particular case, some precision may be lost. 
/// Function `statsg::i64tof64(&s)` will convert the whole slice.
pub trait Stats { 

    /// Vector magnitude
    fn vmag(self) -> f64;
    /// Vector magnitude squared (sum of squares)
    fn vmagsq(self) -> f64;
    /// vector with reciprocal components
    fn vreciprocal(self) -> Result<Vec<f64>>;
    /// vector with inverse magnitude 
    fn vinverse(self) -> Result<Vec<f64>>;
    // negated vector (all components swap sign)
    fn negv(self) -> Vec<f64>;
    /// Unit vector
    fn vunit(self) -> Vec<f64>;
    
    /// Arithmetic mean
    fn amean(self) -> Result<f64> 
        where Self: std::marker::Sized { bail!("amean not implemented for this type")}
    /// Arithmetic mean and standard deviation
    fn ameanstd(self) -> Result<MStats> 
        where Self: std::marker::Sized { bail!("ameanstd not implemented for this type")}
    /// Weighted arithmetic mean
    fn awmean(self) -> Result<f64> 
        where Self: std::marker::Sized { bail!("awmean not implemented for this type")}
    /// Weighted arithmetic men and standard deviation
    fn awmeanstd(self) -> Result<MStats>
        where Self: std::marker::Sized { bail!("awmeanstd not implemented for this type")}
    /// Harmonic mean
    fn hmean(self) -> Result<f64>
        where Self: std::marker::Sized { bail!("hmean not implemented for this type")}
    /// Harmonic mean and experimental standard deviation 
    fn hmeanstd(self) -> Result<MStats>
        where Self: std::marker::Sized { bail!("hmeanstd not implemented for this type")}
    /// Weighted harmonic mean
    fn hwmean(self) -> Result<f64> 
        where Self: std::marker::Sized { bail!("hwmean not implemented for this type")}
    /// Weighted harmonic mean and standard deviation 
    fn hwmeanstd(self) -> Result<MStats>
        where Self: std::marker::Sized { bail!("hwgmeanstd not implemented for this type")}
    /// Geometric mean
    fn gmean(self) -> Result<f64>
        where Self: std::marker::Sized { bail!("gmean not implemented for this type")}
    /// Geometric mean and standard deviation ratio
    fn gmeanstd(self) -> Result<MStats>
        where Self: std::marker::Sized { bail!("gmeanstd not implemented for this type")}
    /// Weighed geometric mean
    fn gwmean(self) -> Result<f64> 
        where Self: std::marker::Sized { bail!("gwmean not implemented for this type")}
    /// Weighted geometric mean and standard deviation ratio
    fn gwmeanstd(self) -> Result<MStats>
        where Self: std::marker::Sized { bail!("gwmeanstd not implemented for this type")}
    /// Median and quartiles
    fn median(self) -> Result<Med>
        where Self: std::marker::Sized { bail!("median not implemented for this type")}
    /// Fast median only by partitioning
    fn newmedian(self) -> Result<f64> 
        where Self: std::marker::Sized { bail!("newmedian not implemented for this type")}
    /// MAD median absolute deviation: data spread estimator that is more stable than variance
    fn mad(self) -> Result<f64> 
        where Self: std::marker::Sized { bail!("median not implemented for this type")}    
    /// Zero median data, obtained by subtracting the median
    fn zeromedian(self) -> Result<Vec<f64>>
        where Self: std::marker::Sized { bail!("gwmeanstd not implemented for this type")}
    /// Probability density function of a sorted slice
    fn pdf(self) -> Vec<f64>;
    /// Information (entropy) in nats
    fn entropy(self) -> f64;
    /// (Auto)correlation coefficient of pairs of successive values of (time series) variable.
    fn autocorr(self) -> f64; 
    /// Linear transform to interval [0,1]
    fn lintrans(self) -> Vec<f64>;
    /// Reconstructs the full symmetric matrix from its lower diagonal compact form
    fn symmatrix(self) -> Vec<Vec<f64>>;
   }

/// A few of the `Stats` methods are reimplemented here
/// (only for f64), so that they mutate `self` in-place.
/// This is more efficient and convenient in some circumstances.
pub trait MutStats {
    /// Invert the magnitude
    fn minvert(self); 
    // negate vector (all components swap sign)
    fn mneg(self);
    /// Make into a unit vector
    fn munit(self);
    /// Linearly transform to interval [0,1]
    fn mlintrans(self);
    /// Sort in place  
    fn msortf(self); 
}

/// Vector Algebra on two vectors (represented here as generic slices).
/// Also included are scalar operations on the `self` vector.
pub trait Vecg<T,U> { 
    /// Scalar addition to vector
    fn sadd(self, s:U) -> Vec<f64>;
    /// Scalar multiplication of vector, creates new vec
     fn smult(self, s:U) -> Vec<f64>;
    /// Scalar product 
    fn dotp(self, v:&[U]) -> f64;
    /// Cosine of angle between two slices
    fn cosine(self, v:&[U]) -> f64;
    /// Vectors' subtraction (difference)
    fn vsub(self, v:&[U]) -> Vec<f64>;
    /// Vectors difference as unit vector (done together for efficiency)
    fn vsubunit(self, v: &[U]) -> Vec<f64>; 
    /// Vector addition
    fn vadd(self, v:&[U]) -> Vec<f64>; 
    /// Euclidian distance 
    fn vdist(self, v:&[U]) -> f64;
    /// Euclidian distance squared
    fn vdistsq(self, v:&[U]) -> f64; 
     /// cityblock distance
    fn cityblockd(self, v:&[U]) -> f64;
    /// Magnitude of the cross product |a x b| = |a||b|sin(theta).
    /// Attains maximum `|a|.|b|` when the vectors are othogonal.
    fn varea(self, v:&[U]) -> f64;
    /// Area proportional to the swept arc
     fn varc(self, v:&[U]) -> f64; 
    /// Vector similarity S in the interval [0,1]: S = (1+cos(theta))/2
    fn vsim(self, v:&[U]) -> f64;
    /// We define vector dissimilarity D in the interval [0,1]: D = 1-S = (1-cos(theta))/2
    fn vdisim(self, v:&[U]) -> f64;
    /// Lower triangular part of a covariance matrix for a single f64 vector.
    fn covone(self, m:&[U]) -> Vec<f64>;
    /// Kronecker product of two vectors 
    fn kron(self, m:&[U]) -> Vec<f64>; 
    /// Outer product of two vectors 
    fn outer(self, m:&[U]) -> Vec<Vec<f64>>; 
    /// Joint probability density function 
    fn jointpdf(self,v:&[U]) -> Vec<f64>;     
    /// Joint entropy of &[T],&[U] in nats (units of e)
    fn jointentropy(self, v:&[U]) -> f64;
    /// Statistical pairwise dependence of &[T] &[U] variables in the range [0,1] 
    fn dependence(self, v:&[U]) -> f64;   
    /// Statistical pairwise independence in the range [1,2] based on joint entropy
    fn independence(self, v:&[U]) -> f64; 
    /// Cholesky decomposition of positive definite matrix into LLt
    // fn cholesky(self) -> Vec<f64>;
    /// Pearson's correlation.  
    fn correlation(self, v:&[U]) -> f64;
    /// Median based correlation
    fn mediancorr(self, v: &[U]) -> f64;
    /// Kendall Tau-B correlation. 
    fn kendalcorr(self, v:&[U]) -> f64;
    /// Spearman rho correlation.
    fn spearmancorr(self, v:&[U]) -> f64;   
}
/// Vector algebra: methods operaing on one vector of generic end type
/// and the second argument, which is a scalar or vector of f64 end type.
pub trait Vecf64<_T> {
    /// Scalar multiplication by f64
    fn smultf64(self, s:f64) -> Vec<f64>;
    /// Vector subtraction of `&[f64]`
    fn vsubf64(self, v:&[f64]) -> Vec<f64>;
    /// Vectors difference unitised (done together for efficiency)
    fn vsubunitf64(self, v: &[f64]) -> Vec<f64>;
    /// Euclidian distance to `&[f64]`
    fn vdistf64(self, v:&[f64]) -> f64;
    /// Adding `&[f64]` 
    fn vaddf64(self, v:&[f64]) -> Vec<f64>;
    /// Euclidian distance to `&[f64]` squared  
    fn vdistsqf64(self, v:&[f64]) -> f64;
}

/// Mutable vector operations that take one generic argument. 
/// A few of the essential `Vecg` methods are reimplemented here 
/// to mutate `self` in-place (only for f64). 
/// This is for efficiency and convenience, for example, in
/// vector iterative methods.
pub trait MutVecg<U> {
    /// mutable multiplication by a scalar
    fn mutsmult(self, _s:U);  
    /// mutable vector subtraction
    fn mutvsub(self, _v: &[U]); 
    /// mutable vector addition
    fn mutvadd(self, _v: &[U]);  
}

/// Vector mutating operations that take one argument of f64 end type.
pub trait MutVecf64 {
    /// mutable multiplication by a scalar
    fn mutsmultf64(self, _s:f64); 
    /// mutable vector subtraction 
    fn mutvsubf64(self, _v:&[f64]);
    /// mutable vector addition
    fn mutvaddf64(self, _v:&[f64]); 
}

/// Methods specialised to, or more efficient for `&[u8]`
pub trait Vecu8 {
    /// Scalar product of two (positive) u8 slices.   
    /// Must be of the same length - no error checking (for speed)
    fn dotpu8(self, v: &[u8]) -> u64; 
    /// Cosine between two (positive) u8 slices.
    fn cosineu8(self, v: &[u8]) -> f64; 
    /// Vector subtraction (converts results to f64 as they can be negative)
    fn vsubu8(self, v: &[u8]) -> Vec<f64>;
    /// Vector addition ( converts results to f64, as they can exceed 255 )
    fn vaddu8(self, v: &[u8]) -> Vec<f64>;
    /// Euclidian distance between self &[u8] and v:&[u8].  
    /// Faster than vsub followed by vmag, as both are done in one loop
    fn vdistu8(self, v: &[u8]) -> f64; 
    /// cityblock distance
    fn cityblockdu8(self, v:&[u8]) -> f64;
    ///Euclidian distance squared, the arguments are both of &[u8] type  
    fn vdistsqu8(self, v: &[u8]) -> u64; 
    /// Probability density function of bytes data
    fn pdfu8(self) -> Vec<f64>;
    /// Information (entropy) of &[u8] (in nats)
    fn entropyu8(self) -> f64;
    /// Joint probability density function (here just co-occurence counts) 
    /// of paired values in two vectors of bytes of the same length.
    /// Needs n^2 x 32bits of memory. 
    fn jointpdfu8(self, v:&[u8]) -> Vec<Vec<u32>>;
    /// Joint entropy of &[u8],&[u8] (in nats)
    fn jointentropyu8(self, v:&[u8]) -> f64;
    /// Statistical pairwise dependence of two &[u8] variables in the range [0,1] 
    fn dependenceu8(self, v:&[u8]) -> f64;   
    /// Independence in the range [1,2] of two &[u8] variables 
    fn independenceu8(self, v:&[u8]) -> f64;
}

/// A few specialised methods applicable to `Vec<Vec<u8>>` (vector of vectors of bytes).
pub trait VecVecu8 { 
    /// Centroid = euclidian mean of a set of points  
    fn acentroid(self) -> Vec<f64>;
    /// Weighted Centre
    fn wacentroid(self,ws: &[u8]) -> Vec<f64>;
    /// Eccentricity vector added to a non member point,
    fn nxnonmember(self, p:&[f64]) -> Vec<f64>;
    /// Weighted eccentricity vector for a non member point
    fn wnxnonmember(self, ws:&[u8], p:&[f64]) -> Vec<f64>; 
    /// Weighted geometric median, sorted eccentricities magnitudes, cpdf of the weights
    fn gmedian(self, eps:f64) -> Vec<f64>; 
    /// The weighted geometric median
    fn wgmedian(self, ws:&[u8], eps: f64) -> Vec<f64>;
    /// Lower triangular part of a covariance matrix of a Vec of u8 vectors.
    fn covar(self, med:&[f64]) -> Vec<f64>; 
    fn wcovar(self, ws:&[u8], m:&[f64]) -> Vec<f64>;
}

/// Methods applicable to a single argument: a vector of vectors of generic end type.
/// Operations on a set of multidimensional vectors.
pub trait VecVec<T> {
    /// Transpose vec of vecs like a classical array
    fn transpose(self) -> Vec<Vec<T>>;
    /// Joint probability density function of n matched slices of the same length
    fn jointpdfn(self) -> Vec<f64>;
    /// Joint entropy between a set of vectors of the same length
    fn jointentropyn(self) -> f64;
    /// Independence (component wise) of a set of vectors.
    fn dependencen(self) -> f64; 
    /// Flattened lower triangular relations matrix between columns of self 
    fn crossfeatures<F>(self,f:F) -> Vec<f64> where F: Fn(&[T],&[T]) -> f64; 
    /// Arithmetic Centre = euclidian mean of a set of points
    fn acentroid(self) -> Vec<f64>;
    /// Geometric Centroid
    fn gcentroid(self) -> Vec<f64>;
    /// Harmonic Centroid = harmonic mean of a set of points
    fn hcentroid(self) -> Vec<f64>; 
    /// Possible first iteration point for geometric medians
    fn firstpoint(self) -> Vec<f64>;
    /// Sums of distances from each point to all other points
    fn distsums(self) -> Vec<f64>;
    /// Medoid distance, its index, outlier distance, its index
    fn medout(self) -> MinMax<f64>; 
    /// Fast sums of distances from each point to all other points 
    fn distsuminset(self, indx: usize) -> f64;
    /// Next approx gm computed at a member point given by its indx
    fn nxmember(self, indx: usize) -> Vec<f64>;
    /// Next approx gm computed at a nonmember point p
    fn nxnonmember(self, p:&[f64]) -> Vec<f64>; 
    /// Estimated ecentricity vector from a member point given by its indx to gm
    fn eccmember(self, indx: usize) -> Vec<f64>;
    /// Estimated eccentricity vector from a non member point
    fn eccnonmember(self, p:&[f64]) -> Vec<f64>;
    /// Estimated eccentricity vectors from each member point
    fn eccentricities(self) -> Vec<Vec<f64>>;
    /// Exact eccentricity vectors from each member point by first finding the gm.
    /// As well as being more accurate, it is usually faster than `eccentricities` above, 
    /// especially for large numbers of points.
    fn exacteccs(self, eps: f64) -> Vec<Vec<f64>>; 
    /// Median and quartiles of eccentricities (new robust measure of spread of a multivariate sample)
    fn eccinfo(self, eps: f64) -> (MStats,Med,MinMax<f64>) where Vec<f64>:FromIterator<f64>;
    /// MADn multidimensional median absolute deviation: data spread estimator that is more stable than variance
    fn madn(self, eps: f64) -> f64;
    /// Medoid and Outlier as defined by eccentricities.
    fn emedoid(self, eps: f64) -> MinMax<f64> where Vec<f64>:FromIterator<f64>;
    /// Returns sorted eccentricities magnitudes
    fn sortedeccs(self, ascending:bool, gm:&[f64]) -> Vec<f64>; 
    /// Improved Weizsfeld's Algorithm for geometric median, to accuracy eps
    // fn nmedian(self, eps: f64) -> Vec<f64>;
    /// New algorithm for geometric median, to accuracy eps    
    fn gmedian(self, eps: f64) -> Vec<f64>;
    /// Same a smedian but returns also the number of iterations 
    fn igmedian(self, eps: f64) -> (Vec<f64>,usize);   
}

/// Methods applicable to vector of vectors of generic end type and one argument
/// of a similar kind.
pub trait VecVecg<T,U> { 
    /// Weighted Arithmetic Centre = weighted euclidian mean of a set of points
    fn wacentroid(self,ws: &[U]) -> Vec<f64>;
    /// Trend between two sets
    fn trend(self, eps: f64, v: Vec<Vec<U>>) -> Vec<f64>;
    /// Subtract m from all points - e.g. transform to zero median form
    fn translate(self, m: &[U]) -> Vec<Vec<f64>>;
    /// Dependencies of vector m on each vector in self
    fn dependencies(self, m: &[U]) -> Vec<f64>;
    /// (Median) correlations of m with each vector in self
    fn correlations(self, m: &[U]) -> Vec<f64>;
    /// Sum of distances from arbitrary point (v) to all the points in self      
    fn distsum(self, v: &[U]) -> f64;
    /// Individual distances from any point v (typically not in self) to all the points in self.    
    fn dists(self, v: &[U]) -> Vec<f64>;
    /// Medoid and Outlier (by distance) of a set of points
    // fn medoid(self) -> MinMax<f64>; 
    /// ( wgm, sorted eccentricities magnitudes, associated cpdf )
    fn wsortedeccs(self,ws:&[U],gm:&[f64]) -> (Vec<f64>,Vec<f64>); 
    /// Sorted cosines magnitudes and cpdf, needs central median
    fn wsortedcos(self,medmed:&[U],unitzmed:&[U],ws:&[U]) -> (Vec<f64>,Vec<f64>); 
    /// Estimated weighted gm computed at a non member point
    fn wnxnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64>; 
    /// Estimated weighted eccentricity for a non-member point 
    fn weccnonmember(self, ws:&[U], p:&[f64]) -> Vec<f64>;
    /// The weighted geometric median to accuracy eps 
    fn wgmedian(self, ws: &[U], eps: f64) -> Vec<f64>;
    /// Flattened lower triangular part of a covariance matrix of a Vec of f64 vectors.
    fn covar(self, med:&[U]) -> Vec<f64>;  
    /// Flattened lower triangular part of a covariance matrix for weighted f64 vectors.
    fn wcovar(self, ws:&[U], m:&[f64]) -> Vec<f64>;
    /// Flattened comediance matrix for f64 vectors in self.
    /// Similar to `covar` above but medians instead of means are returned.
    fn comed(self, m:&[U]) -> Vec<f64>;
    /// Flatteened comediance matrix for weighted f64 vectors.
    /// Similar to `wcovar` above but medians instead of means are returned.
    fn wcomed(self, ws:&[U], m:&[f64]) -> Vec<f64>;
}