use linfa::Float;
use ndarray::{s, Array, Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2};
#[cfg(feature = "serializable")]
use serde::{Deserialize, Serialize};
#[derive(Debug)]
#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]
pub(crate) struct NormalizedMatrix<F: Float> {
pub data: Array2<F>,
pub mean: Array1<F>,
pub std: Array1<F>,
}
impl<F: Float> Clone for NormalizedMatrix<F> {
fn clone(&self) -> NormalizedMatrix<F> {
NormalizedMatrix {
data: self.data.to_owned(),
mean: self.mean.to_owned(),
std: self.std.to_owned(),
}
}
}
impl<F: Float> NormalizedMatrix<F> {
pub fn new(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> NormalizedMatrix<F> {
let (data, mean, std) = normalize(x);
NormalizedMatrix {
data: data.to_owned(),
mean: mean.to_owned(),
std: std.to_owned(),
}
}
pub fn ncols(&self) -> usize {
self.data.ncols()
}
}
pub fn normalize<F: Float>(
x: &ArrayBase<impl Data<Elem = F>, Ix2>,
) -> (Array2<F>, Array1<F>, Array1<F>) {
let x_mean = x.mean_axis(Axis(0)).unwrap();
let mut x_std = x.std_axis(Axis(0), F::one());
x_std.mapv_inplace(|v| if v == F::zero() { F::one() } else { v });
let xnorm = (x - &x_mean) / &x_std;
(xnorm, x_mean, x_std)
}
#[derive(Debug)]
pub struct DistanceMatrix<F: Float> {
pub d: Array2<F>,
pub d_indices: Array2<usize>,
pub n_obs: usize,
pub n_features: usize,
}
impl<F: Float> DistanceMatrix<F> {
pub fn new(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> DistanceMatrix<F> {
let (d, d_indices) = Self::_cross_distances(x);
let n_obs = x.nrows();
let n_features = x.ncols();
DistanceMatrix {
d: d.to_owned(),
d_indices: d_indices.to_owned(),
n_obs,
n_features,
}
}
fn _cross_distances(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> (Array2<F>, Array2<usize>) {
let n_obs = x.nrows();
let n_features = x.ncols();
let n_non_zero_cross_dist = n_obs * (n_obs - 1) / 2;
let mut indices = Array2::<usize>::zeros((n_non_zero_cross_dist, 2));
let mut d = Array2::zeros((n_non_zero_cross_dist, n_features));
let mut ll_1 = 0;
for k in 0..(n_obs - 1) {
let ll_0 = ll_1;
ll_1 = ll_0 + n_obs - k - 1;
indices
.slice_mut(s![ll_0..ll_1, 0..1])
.assign(&Array2::<usize>::from_elem((n_obs - k - 1, 1), k));
let init_values = ((k + 1)..n_obs).collect();
indices
.slice_mut(s![ll_0..ll_1, 1..2])
.assign(&Array2::from_shape_vec((n_obs - k - 1, 1), init_values).unwrap());
let diff = &x
.slice(s![k..(k + 1), ..])
.broadcast((n_obs - k - 1, n_features))
.unwrap()
- &x.slice(s![k + 1..n_obs, ..]);
d.slice_mut(s![ll_0..ll_1, ..]).assign(&diff);
}
d = d.mapv(|v| v.abs());
(d, indices)
}
}
pub fn pairwise_differences<F: Float>(
x: &ArrayBase<impl Data<Elem = F>, Ix2>,
y: &ArrayBase<impl Data<Elem = F>, Ix2>,
) -> Array2<F> {
assert!(x.ncols() == y.ncols());
let x3 = x.to_owned().insert_axis(Axis(1));
let y3 = y.to_owned().insert_axis(Axis(0));
let d = x3 - y3;
let n = d.len();
let res = Array::from_iter(d.iter().cloned());
res.into_shape((n / x.ncols(), x.ncols())).unwrap()
}
pub fn differences<F: Float>(
x: &ArrayBase<impl Data<Elem = F>, Ix1>,
y: &ArrayBase<impl Data<Elem = F>, Ix2>,
) -> Array2<F> {
assert!(x.len() == y.ncols());
x.to_owned() - y
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
use ndarray::array;
#[test]
fn test_pairwise_differences() {
let x = array![[-0.9486833], [-0.82219219]];
let y = array![
[-1.26491106],
[-0.63245553],
[0.],
[0.63245553],
[1.26491106]
];
assert_abs_diff_eq!(
&array![
[0.31622777],
[-0.31622777],
[-0.9486833],
[-1.58113883],
[-2.21359436],
[0.44271887],
[-0.18973666],
[-0.82219219],
[-1.45464772],
[-2.08710326]
],
&pairwise_differences(&x, &y),
epsilon = 1e-6
)
}
#[test]
fn test_differences() {
let x = array![-0.9486833];
let y = array![
[-1.26491106],
[-0.63245553],
[0.],
[0.63245553],
[1.26491106]
];
assert_abs_diff_eq!(
&array![
[0.31622777],
[-0.31622777],
[-0.9486833],
[-1.58113883],
[-2.21359436],
],
&differences(&x, &y),
epsilon = 1e-6
)
}
#[test]
fn test_normalized_matrix() {
let x = array![[1., 2.], [3., 4.]];
let xnorm = NormalizedMatrix::new(&x);
assert_eq!(xnorm.ncols(), 2);
assert_eq!(array![2., 3.], xnorm.mean);
assert_eq!(array![f64::sqrt(2.), f64::sqrt(2.)], xnorm.std);
}
#[test]
fn test_cross_distance_matrix() {
let xt = array![[0.5], [1.2], [2.0], [3.0], [4.0]];
let expected = (
array![
[0.7],
[1.5],
[2.5],
[3.5],
[0.8],
[1.8],
[2.8],
[1.],
[2.],
[1.]
],
array![
[0, 1],
[0, 2],
[0, 3],
[0, 4],
[1, 2],
[1, 3],
[1, 4],
[2, 3],
[2, 4],
[3, 4]
],
);
let dm = DistanceMatrix::new(&xt);
assert_eq!(expected.0, dm.d);
assert_eq!(expected.1, dm.d_indices);
}
}