1use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2};
31use scirs2_core::numeric::{Float, FromPrimitive};
32use std::fmt::Debug;
33
34use crate::error::{ClusteringError, Result};
35
36mod distance_metrics;
37mod distance_simd;
38mod kmeans;
39mod kmeans2;
40mod minibatch_kmeans;
41mod parallel_kmeans;
42mod simd_kmeans;
43mod simd_optimizations;
44mod weighted_kmeans;
45pub use self::distance_metrics::{
46 create_metric, ChebyshevDistance, CorrelationDistance, CosineDistance,
47 DistanceMetric as VQDistanceMetric, EuclideanDistance, MahalanobisDistance, ManhattanDistance,
48 MetricType, MinkowskiDistance,
49};
50pub use distance_simd::{
51 distance_to_centroids_simd, pairwise_euclidean_parallel, pairwise_euclidean_simd,
52};
53pub use kmeans::{
54 kmeans, kmeans_init, kmeans_plus_plus, kmeans_with_metric, kmeans_with_options, KMeansInit,
55 KMeansOptions,
56};
57pub use kmeans2::{kmeans2, kmeans2_str, MinitMethod, MissingMethod};
58pub use minibatch_kmeans::*;
59pub use parallel_kmeans::{parallel_kmeans, ParallelKMeansOptions};
60pub use simd_kmeans::{kmeans_plus_plus_simd, kmeans_simd, mini_batch_kmeans_simd};
61pub use simd_optimizations::{
62 calculate_distortion_simd, compute_centroids_simd, euclidean_distance_simd, vq_simd,
63 whiten_simd, SimdOptimizationConfig,
64};
65pub use weighted_kmeans::{weighted_kmeans, weighted_kmeans_plus_plus, WeightedKMeansOptions};
66
67#[allow(dead_code)]
69pub fn euclidean_distance<F>(x: ArrayView1<F>, y: ArrayView1<F>) -> F
70where
71 F: Float + FromPrimitive,
72{
73 let mut sum = F::zero();
74 for i in 0..x.len() {
75 let diff = x[i] - y[i];
76 sum = sum + diff * diff;
77 }
78 sum.sqrt()
79}
80
81#[allow(dead_code)]
114pub fn whiten<F>(obs: &Array2<F>) -> Result<Array2<F>>
115where
116 F: Float + FromPrimitive + std::fmt::Debug,
117{
118 let n_samples = obs.shape()[0];
119 let n_features = obs.shape()[1];
120
121 let mut means = Array1::<F>::zeros(n_features);
123 for j in 0..n_features {
124 let mut sum = F::zero();
125 for i in 0..n_samples {
126 sum = sum + obs[[i, j]];
127 }
128 means[j] = sum / F::from(n_samples).unwrap();
129 }
130
131 let mut stds = Array1::<F>::zeros(n_features);
133 for j in 0..n_features {
134 let mut sum = F::zero();
135 for i in 0..n_samples {
136 let diff = obs[[i, j]] - means[j];
137 sum = sum + diff * diff;
138 }
139 stds[j] = (sum / F::from(n_samples - 1).unwrap()).sqrt();
140
141 if stds[j] < F::from(1e-10).unwrap() {
143 stds[j] = F::one();
144 }
145 }
146
147 let mut whitened = Array2::<F>::zeros((n_samples, n_features));
149 for i in 0..n_samples {
150 for j in 0..n_features {
151 whitened[[i, j]] = (obs[[i, j]] - means[j]) / stds[j];
152 }
153 }
154
155 Ok(whitened)
156}
157
158#[allow(dead_code)]
180pub fn vq<F>(data: ArrayView2<F>, centroids: ArrayView2<F>) -> Result<(Array1<usize>, Array1<F>)>
181where
182 F: Float + FromPrimitive + Debug,
183{
184 if data.shape()[1] != centroids.shape()[1] {
185 return Err(ClusteringError::InvalidInput(format!(
186 "Observation array and centroid array must have the same number of dimensions. Got {} and {}",
187 data.shape()[1], centroids.shape()[1]
188 )));
189 }
190
191 let n_samples = data.shape()[0];
192 let n_centroids = centroids.shape()[0];
193
194 let mut labels = Array1::zeros(n_samples);
195 let mut distances = Array1::zeros(n_samples);
196
197 for i in 0..n_samples {
198 let point = data.slice(s![i, ..]);
199 let mut min_dist = F::infinity();
200 let mut closest_centroid = 0;
201
202 for j in 0..n_centroids {
203 let centroid = centroids.slice(s![j, ..]);
204 let dist = euclidean_distance(point, centroid);
205
206 if dist < min_dist {
207 min_dist = dist;
208 closest_centroid = j;
209 }
210 }
211
212 labels[i] = closest_centroid;
213 distances[i] = min_dist;
214 }
215
216 Ok((labels, distances))
217}