egobox_gp/
utils.rs

1use linfa::Float;
2use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, s};
3#[cfg(feature = "serializable")]
4use serde::{Deserialize, Serialize};
5
6/// A structure to store (n, xdim) matrix data and its mean and standard deviation vectors.
7#[derive(Debug)]
8#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]
9pub(crate) struct NormalizedData<F: Float> {
10    /// normalized data
11    pub data: Array2<F>,
12    /// mean vector computed from data
13    pub mean: Array1<F>,
14    /// standard deviation vector computed from data
15    pub std: Array1<F>,
16}
17
18impl<F: Float> Clone for NormalizedData<F> {
19    fn clone(&self) -> NormalizedData<F> {
20        NormalizedData {
21            data: self.data.to_owned(),
22            mean: self.mean.to_owned(),
23            std: self.std.to_owned(),
24        }
25    }
26}
27
28impl<F: Float> NormalizedData<F> {
29    /// Constructor
30    pub fn new(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> NormalizedData<F> {
31        let (data, mean, std) = normalize(x);
32        NormalizedData {
33            data: data.to_owned(),
34            mean: mean.to_owned(),
35            std: std.to_owned(),
36        }
37    }
38
39    /// Dimension of data points
40    pub fn ncols(&self) -> usize {
41        self.data.ncols()
42    }
43}
44
45pub fn normalize<F: Float>(
46    x: &ArrayBase<impl Data<Elem = F>, Ix2>,
47) -> (Array2<F>, Array1<F>, Array1<F>) {
48    let x_mean = x.mean_axis(Axis(0)).unwrap();
49    let mut x_std = x.std_axis(Axis(0), F::one());
50    x_std.mapv_inplace(|v| if v == F::zero() { F::one() } else { v });
51    let xnorm = (x - &x_mean) / &x_std;
52
53    (xnorm, x_mean, x_std)
54}
55
56/// A structure to retain absolute differences computation used to compute covariance matrix
57#[derive(Debug)]
58pub struct DiffMatrix<F: Float> {
59    /// Differences as (n_obs * (n_obs-1))/2, nx) array
60    pub d: Array2<F>,
61    /// Indices of the differences in the original data array
62    pub d_indices: Array2<usize>,
63    /// Number of observations
64    pub n_obs: usize,
65}
66
67impl<F: Float> DiffMatrix<F> {
68    /// Compute differences given points given as an array (n_obs, nx)
69    pub fn new(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> DiffMatrix<F> {
70        let (d, d_indices) = Self::_cross_diff(x);
71        let n_obs = x.nrows();
72
73        DiffMatrix {
74            d,
75            d_indices,
76            n_obs,
77        }
78    }
79
80    fn _cross_diff(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> (Array2<F>, Array2<usize>) {
81        let n_obs = x.nrows();
82        let nx = x.ncols();
83        let n_non_zero_cross_dist = n_obs * (n_obs - 1) / 2;
84        let mut indices = Array2::<usize>::zeros((n_non_zero_cross_dist, 2));
85        let mut d = Array2::zeros((n_non_zero_cross_dist, nx));
86        let mut idx = 0;
87        for k in 0..(n_obs - 1) {
88            let idx0 = idx;
89            let offset = n_obs - k - 1;
90            idx = idx0 + offset;
91
92            for i in (k + 1)..n_obs {
93                let r = idx0 + i - k - 1;
94                indices[[r, 0]] = k;
95                indices[[r, 1]] = i;
96            }
97
98            let diff = &x.slice(s![k, ..]) - &x.slice(s![k + 1..n_obs, ..]);
99            d.slice_mut(s![idx0..idx, ..]).assign(&diff);
100        }
101        d = d.mapv(|v| v.abs());
102
103        (d, indices)
104    }
105}
106
107/// Computes differences between each element of x and each element of y
108/// resulting in a 2d array of shape (nrows(x) * nrows(y), ncols(x));
109/// *Panics* if x and y have not the same column numbers
110pub fn pairwise_differences<F: Float>(
111    x: &ArrayBase<impl Data<Elem = F>, Ix2>,
112    y: &ArrayBase<impl Data<Elem = F>, Ix2>,
113) -> Array2<F> {
114    assert!(x.ncols() == y.ncols());
115
116    let nx = x.nrows();
117    let ny = y.nrows();
118    let ncols = x.ncols();
119    let mut result = Array2::zeros((nx * ny, ncols));
120
121    for (i, x_row) in x.rows().into_iter().enumerate() {
122        for (j, y_row) in y.rows().into_iter().enumerate() {
123            let idx = i * ny + j;
124            for k in 0..ncols {
125                result[[idx, k]] = x_row[k] - y_row[k];
126            }
127        }
128    }
129
130    result
131}
132
133/// Computes differences between x and each element of y
134/// resulting in a 2d array of shape (nrows(y), ncols(x));
135/// *Panics* if x and y have not the same number of components
136pub fn differences<F: Float>(
137    x: &ArrayBase<impl Data<Elem = F>, Ix1>,
138    y: &ArrayBase<impl Data<Elem = F>, Ix2>,
139) -> Array2<F> {
140    assert!(x.len() == y.ncols());
141    x.to_owned() - y
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use approx::assert_abs_diff_eq;
148    use ndarray::array;
149
150    #[test]
151    fn test_pairwise_differences() {
152        let x = array![[-0.9486833], [-0.82219219]];
153        let y = array![
154            [-1.26491106],
155            [-0.63245553],
156            [0.],
157            [0.63245553],
158            [1.26491106]
159        ];
160        assert_abs_diff_eq!(
161            &array![
162                [0.31622777],
163                [-0.31622777],
164                [-0.9486833],
165                [-1.58113883],
166                [-2.21359436],
167                [0.44271887],
168                [-0.18973666],
169                [-0.82219219],
170                [-1.45464772],
171                [-2.08710326]
172            ],
173            &pairwise_differences(&x, &y),
174            epsilon = 1e-6
175        )
176    }
177
178    #[test]
179    fn test_differences() {
180        let x = array![-0.9486833];
181        let y = array![
182            [-1.26491106],
183            [-0.63245553],
184            [0.],
185            [0.63245553],
186            [1.26491106]
187        ];
188        assert_abs_diff_eq!(
189            &array![
190                [0.31622777],
191                [-0.31622777],
192                [-0.9486833],
193                [-1.58113883],
194                [-2.21359436],
195            ],
196            &differences(&x, &y),
197            epsilon = 1e-6
198        )
199    }
200
201    #[test]
202    fn test_normalized_matrix() {
203        let x = array![[1., 2.], [3., 4.]];
204        let xnorm = NormalizedData::new(&x);
205        assert_eq!(xnorm.ncols(), 2);
206        assert_eq!(array![2., 3.], xnorm.mean);
207        assert_eq!(array![f64::sqrt(2.), f64::sqrt(2.)], xnorm.std);
208    }
209
210    #[test]
211    fn test_diff_matrix() {
212        let xt = array![[0.5], [1.2], [2.0], [3.0], [4.0]];
213        let expected = (
214            array![
215                [0.7],
216                [1.5],
217                [2.5],
218                [3.5],
219                [0.8],
220                [1.8],
221                [2.8],
222                [1.],
223                [2.],
224                [1.]
225            ],
226            array![
227                [0, 1],
228                [0, 2],
229                [0, 3],
230                [0, 4],
231                [1, 2],
232                [1, 3],
233                [1, 4],
234                [2, 3],
235                [2, 4],
236                [3, 4]
237            ],
238        );
239        let dm = DiffMatrix::new(&xt);
240        assert_eq!(expected.0, dm.d);
241        assert_eq!(expected.1, dm.d_indices);
242    }
243}