1use linfa::Float;
2use ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2, s};
3#[cfg(feature = "serializable")]
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug)]
8#[cfg_attr(feature = "serializable", derive(Serialize, Deserialize))]
9pub(crate) struct NormalizedData<F: Float> {
10 pub data: Array2<F>,
12 pub mean: Array1<F>,
14 pub std: Array1<F>,
16}
17
18impl<F: Float> Clone for NormalizedData<F> {
19 fn clone(&self) -> NormalizedData<F> {
20 NormalizedData {
21 data: self.data.to_owned(),
22 mean: self.mean.to_owned(),
23 std: self.std.to_owned(),
24 }
25 }
26}
27
28impl<F: Float> NormalizedData<F> {
29 pub fn new(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> NormalizedData<F> {
31 let (data, mean, std) = normalize(x);
32 NormalizedData {
33 data: data.to_owned(),
34 mean: mean.to_owned(),
35 std: std.to_owned(),
36 }
37 }
38
39 pub fn ncols(&self) -> usize {
41 self.data.ncols()
42 }
43}
44
45pub fn normalize<F: Float>(
46 x: &ArrayBase<impl Data<Elem = F>, Ix2>,
47) -> (Array2<F>, Array1<F>, Array1<F>) {
48 let x_mean = x.mean_axis(Axis(0)).unwrap();
49 let mut x_std = x.std_axis(Axis(0), F::one());
50 x_std.mapv_inplace(|v| if v == F::zero() { F::one() } else { v });
51 let xnorm = (x - &x_mean) / &x_std;
52
53 (xnorm, x_mean, x_std)
54}
55
56#[derive(Debug)]
58pub struct DiffMatrix<F: Float> {
59 pub d: Array2<F>,
61 pub d_indices: Array2<usize>,
63 pub n_obs: usize,
65}
66
67impl<F: Float> DiffMatrix<F> {
68 pub fn new(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> DiffMatrix<F> {
70 let (d, d_indices) = Self::_cross_diff(x);
71 let n_obs = x.nrows();
72
73 DiffMatrix {
74 d,
75 d_indices,
76 n_obs,
77 }
78 }
79
80 fn _cross_diff(x: &ArrayBase<impl Data<Elem = F>, Ix2>) -> (Array2<F>, Array2<usize>) {
81 let n_obs = x.nrows();
82 let nx = x.ncols();
83 let n_non_zero_cross_dist = n_obs * (n_obs - 1) / 2;
84 let mut indices = Array2::<usize>::zeros((n_non_zero_cross_dist, 2));
85 let mut d = Array2::zeros((n_non_zero_cross_dist, nx));
86 let mut idx = 0;
87 for k in 0..(n_obs - 1) {
88 let idx0 = idx;
89 let offset = n_obs - k - 1;
90 idx = idx0 + offset;
91
92 for i in (k + 1)..n_obs {
93 let r = idx0 + i - k - 1;
94 indices[[r, 0]] = k;
95 indices[[r, 1]] = i;
96 }
97
98 let diff = &x.slice(s![k, ..]) - &x.slice(s![k + 1..n_obs, ..]);
99 d.slice_mut(s![idx0..idx, ..]).assign(&diff);
100 }
101 d = d.mapv(|v| v.abs());
102
103 (d, indices)
104 }
105}
106
107pub fn pairwise_differences<F: Float>(
111 x: &ArrayBase<impl Data<Elem = F>, Ix2>,
112 y: &ArrayBase<impl Data<Elem = F>, Ix2>,
113) -> Array2<F> {
114 assert!(x.ncols() == y.ncols());
115
116 let nx = x.nrows();
117 let ny = y.nrows();
118 let ncols = x.ncols();
119 let mut result = Array2::zeros((nx * ny, ncols));
120
121 for (i, x_row) in x.rows().into_iter().enumerate() {
122 for (j, y_row) in y.rows().into_iter().enumerate() {
123 let idx = i * ny + j;
124 for k in 0..ncols {
125 result[[idx, k]] = x_row[k] - y_row[k];
126 }
127 }
128 }
129
130 result
131}
132
133pub fn differences<F: Float>(
137 x: &ArrayBase<impl Data<Elem = F>, Ix1>,
138 y: &ArrayBase<impl Data<Elem = F>, Ix2>,
139) -> Array2<F> {
140 assert!(x.len() == y.ncols());
141 x.to_owned() - y
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147 use approx::assert_abs_diff_eq;
148 use ndarray::array;
149
150 #[test]
151 fn test_pairwise_differences() {
152 let x = array![[-0.9486833], [-0.82219219]];
153 let y = array![
154 [-1.26491106],
155 [-0.63245553],
156 [0.],
157 [0.63245553],
158 [1.26491106]
159 ];
160 assert_abs_diff_eq!(
161 &array![
162 [0.31622777],
163 [-0.31622777],
164 [-0.9486833],
165 [-1.58113883],
166 [-2.21359436],
167 [0.44271887],
168 [-0.18973666],
169 [-0.82219219],
170 [-1.45464772],
171 [-2.08710326]
172 ],
173 &pairwise_differences(&x, &y),
174 epsilon = 1e-6
175 )
176 }
177
178 #[test]
179 fn test_differences() {
180 let x = array![-0.9486833];
181 let y = array![
182 [-1.26491106],
183 [-0.63245553],
184 [0.],
185 [0.63245553],
186 [1.26491106]
187 ];
188 assert_abs_diff_eq!(
189 &array![
190 [0.31622777],
191 [-0.31622777],
192 [-0.9486833],
193 [-1.58113883],
194 [-2.21359436],
195 ],
196 &differences(&x, &y),
197 epsilon = 1e-6
198 )
199 }
200
201 #[test]
202 fn test_normalized_matrix() {
203 let x = array![[1., 2.], [3., 4.]];
204 let xnorm = NormalizedData::new(&x);
205 assert_eq!(xnorm.ncols(), 2);
206 assert_eq!(array![2., 3.], xnorm.mean);
207 assert_eq!(array![f64::sqrt(2.), f64::sqrt(2.)], xnorm.std);
208 }
209
210 #[test]
211 fn test_diff_matrix() {
212 let xt = array![[0.5], [1.2], [2.0], [3.0], [4.0]];
213 let expected = (
214 array![
215 [0.7],
216 [1.5],
217 [2.5],
218 [3.5],
219 [0.8],
220 [1.8],
221 [2.8],
222 [1.],
223 [2.],
224 [1.]
225 ],
226 array![
227 [0, 1],
228 [0, 2],
229 [0, 3],
230 [0, 4],
231 [1, 2],
232 [1, 3],
233 [1, 4],
234 [2, 3],
235 [2, 4],
236 [3, 4]
237 ],
238 );
239 let dm = DiffMatrix::new(&xt);
240 assert_eq!(expected.0, dm.d);
241 assert_eq!(expected.1, dm.d_indices);
242 }
243}