1#![cfg_attr(not(feature = "std"), no_std)]
48
49#[cfg(not(feature = "std"))]
50extern crate alloc;
51
52pub mod algorithms;
53pub mod error;
54pub mod evaluation;
55pub mod initialization;
56pub mod traits;
57pub mod utils;
58
59pub use algorithms::{
61 dbscan::{DBSCANConfig, DBSCANResult, HDBSCANConfig, HDBSCANResult, DBSCAN, HDBSCAN},
62 gaussian_mixture::{GMConfig, GMResult, GaussianMixture},
63 hierarchical::{AgglomerativeClustering, HierarchicalResult, Linkage},
64 incremental::{
65 IncrementalClustering, OnlineKMeans, OnlineKMeansConfig, OnlineKMeansResult,
66 SlidingWindowConfig, SlidingWindowKMeans, SlidingWindowResult,
67 },
68 kmeans::{InitMethod, KMeans, KMeansAlgorithm, KMeansConfig, KMeansResult},
69 optics::{OPTICSConfig, OPTICSResult, OPTICS},
70 spectral::{SpectralClustering, SpectralConfig, SpectralResult},
71};
72
73pub use evaluation::{
75 metrics::{
76 adjusted_mutual_info_score, adjusted_rand_score, calinski_harabasz_score,
77 davies_bouldin_score, fowlkes_mallows_score, homogeneity_score,
78 normalized_mutual_info_score, silhouette_score, v_measure_score,
79 },
80 ClusteringMetric, EvaluationResult,
81};
82
83pub use initialization::{
85 forgy::Forgy, kmeans_plus_plus::KMeansPlusPlus, random_partition::RandomPartition,
86 InitializationStrategy,
87};
88
89pub use traits::{ClusteringAlgorithm, ClusteringResult, Fit, FitPredict, Transform};
91
92pub use utils::{
94 adaptive::{suggest_dbscan_params, suggest_epsilon},
95 distance::{cosine_distance, euclidean_distance, manhattan_distance, DistanceMetric},
96 drift_detection::{CompositeDriftDetector, DriftStatus, PageHinkleyTest, ADWIN, DDM},
97 memory_efficient::{ChunkedDataProcessor, IncrementalCentroidUpdater, MemoryEfficientConfig},
98 preprocessing::{normalize_features, standardize_features, PreprocessingMethod},
99 validation::{validate_cluster_input, validate_n_clusters, ClusterValidation},
100};
101
102pub use error::{ClusterError, ClusterResult};
104
105pub const VERSION: &str = env!("CARGO_PKG_VERSION");
107pub const VERSION_MAJOR: u32 = 0;
108pub const VERSION_MINOR: u32 = 1;
109pub const VERSION_PATCH: u32 = 0;
110
111pub mod prelude {
113
114 pub use crate::algorithms::{
115 dbscan::{DBSCANConfig, DBSCAN},
116 gaussian_mixture::{GMConfig, GaussianMixture},
117 hierarchical::{AgglomerativeClustering, Linkage},
118 kmeans::{InitMethod, KMeans, KMeansConfig},
119 spectral::{SpectralClustering, SpectralConfig},
120 };
121
122 pub use crate::evaluation::{
123 metrics::{adjusted_rand_score, silhouette_score},
124 ClusteringMetric,
125 };
126
127 pub use crate::initialization::{
128 Forgy, InitializationStrategy, KMeansPlusPlus, RandomPartition,
129 };
130
131 pub use crate::traits::{ClusteringAlgorithm, ClusteringResult, Fit, FitPredict, Transform};
132
133 pub use crate::utils::{
134 distance::{cosine_distance, euclidean_distance, DistanceMetric},
135 preprocessing::{normalize_features, standardize_features},
136 };
137
138 pub use crate::error::{ClusterError, ClusterResult};
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144 use approx::assert_relative_eq;
145 use torsh_tensor::Tensor;
146
147 #[test]
148 fn test_version_info() {
149 assert!(!VERSION.is_empty());
150 assert_eq!(VERSION_MAJOR, 0);
151 assert_eq!(VERSION_MINOR, 1);
152 assert_eq!(VERSION_PATCH, 0);
153 }
154
155 #[test]
156 fn test_dbscan_basic() -> Result<(), Box<dyn std::error::Error>> {
157 let data = Tensor::from_vec(
159 vec![
160 0.0, 0.0, 0.1, 0.1, 0.0, 0.2, 0.2, 0.0, 5.0, 5.0, 5.1, 5.1, 5.0, 5.2, 5.2, 5.0,
163 10.0, 10.0,
165 ],
166 &[9, 2],
167 )?;
168
169 let dbscan = DBSCAN::new(0.5, 2);
170 let result = dbscan.fit(&data)?;
171
172 assert_eq!(result.n_clusters, 2);
174 assert_eq!(result.noise_points.len(), 1);
175 assert!(!result.core_sample_indices.is_empty());
176
177 Ok(())
178 }
179
180 #[test]
181 fn test_hierarchical_basic() -> Result<(), Box<dyn std::error::Error>> {
182 let data = Tensor::from_vec(vec![0.0, 0.0, 0.1, 0.1, 5.0, 5.0, 5.1, 5.1], &[4, 2])?;
184
185 let hierarchical = AgglomerativeClustering::new(2);
186 let result = hierarchical.fit(&data)?;
187
188 assert_eq!(result.n_clusters, 2);
190
191 let labels_vec = result.labels.to_vec()?;
193 let unique_labels: std::collections::HashSet<i32> =
194 labels_vec.iter().map(|&x| x as i32).collect();
195 assert_eq!(unique_labels.len(), 2);
196
197 Ok(())
198 }
199
200 #[test]
201 fn test_silhouette_score_basic() -> Result<(), Box<dyn std::error::Error>> {
202 let data = Tensor::from_vec(
204 vec![
205 0.0, 0.0, 0.1, 0.1, 10.0, 10.0, 10.1, 10.1,
208 ],
209 &[4, 2],
210 )?;
211
212 let labels = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
213
214 let score = silhouette_score(&data, &labels)?;
215
216 assert!(
218 score > 0.5,
219 "Silhouette score should be positive for well-separated clusters"
220 );
221
222 Ok(())
223 }
224
225 #[test]
226 fn test_adjusted_rand_score_perfect() -> Result<(), Box<dyn std::error::Error>> {
227 let labels_true = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
229 let labels_pred = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
230
231 let score = adjusted_rand_score(&labels_true, &labels_pred)?;
232
233 assert_relative_eq!(score, 1.0, epsilon = 1e-6);
235
236 Ok(())
237 }
238
239 #[test]
240 fn test_calinski_harabasz_score() -> Result<(), Box<dyn std::error::Error>> {
241 let data = Tensor::from_vec(
243 vec![
244 0.0, 0.0, 0.1, 0.1, 5.0, 5.0, 5.1, 5.1,
247 ],
248 &[4, 2],
249 )?;
250
251 let labels = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
252
253 let score = calinski_harabasz_score(&data, &labels)?;
254
255 assert!(
257 score > 1.0,
258 "Calinski-Harabasz score should be > 1 for well-separated clusters"
259 );
260
261 Ok(())
262 }
263
264 #[test]
265 fn test_davies_bouldin_score() -> Result<(), Box<dyn std::error::Error>> {
266 let data = Tensor::from_vec(
268 vec![
269 0.0, 0.0, 0.1, 0.1, 10.0, 10.0, 10.1, 10.1,
272 ],
273 &[4, 2],
274 )?;
275
276 let labels = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
277
278 let score = davies_bouldin_score(&data, &labels)?;
279
280 assert!(score >= 0.0, "Davies-Bouldin score should be non-negative");
282 assert!(
283 score < 5.0,
284 "Davies-Bouldin score should be reasonable for well-separated clusters"
285 );
286
287 Ok(())
288 }
289
290 #[test]
291 fn test_gmm_basic() -> Result<(), Box<dyn std::error::Error>> {
292 use algorithms::gaussian_mixture::{CovarianceType, GaussianMixture};
293
294 let data = Tensor::from_vec(
296 vec![
297 0.0, 0.0, 0.1, 0.1, -0.1, 0.1, 0.1, -0.1, 5.0, 5.0, 5.1, 5.1, 4.9, 5.1, 5.1, 4.9,
300 ],
301 &[8, 2],
302 )?;
303
304 let gmm = GaussianMixture::new(2)
305 .covariance_type(CovarianceType::Diag)
306 .max_iters(50)
307 .tolerance(1e-3);
308
309 let result = gmm.fit(&data)?;
310
311 assert_eq!(result.n_clusters(), 2);
313
314 assert_eq!(result.means.shape().dims(), &[2, 2]); assert_eq!(result.weights.shape().dims(), &[2]); assert_eq!(result.labels.shape().dims(), &[8]); assert_eq!(result.responsibilities.shape().dims(), &[8, 2]); assert!(result.log_likelihood > f64::NEG_INFINITY);
322
323 assert!(result.aic.is_finite());
325 assert!(result.bic.is_finite());
326
327 assert!(result.n_iter <= 50);
329
330 println!(
331 "GMM test passed - log_likelihood: {}, AIC: {}, BIC: {}, converged: {}",
332 result.log_likelihood, result.aic, result.bic, result.converged
333 );
334
335 Ok(())
336 }
337
338 #[test]
339 fn test_spectral_clustering_basic() -> Result<(), Box<dyn std::error::Error>> {
340 use algorithms::spectral::{AffinityType, SpectralClustering};
341
342 let data = Tensor::from_vec(
344 vec![
345 0.0, 0.0, 0.1, 0.1, -0.1, 0.1, 0.1, -0.1, 3.0, 3.0, 3.1, 3.1, 2.9, 3.1, 3.1, 2.9,
348 ],
349 &[8, 2],
350 )?;
351
352 let spectral = SpectralClustering::new(2)
353 .affinity(AffinityType::Rbf)
354 .gamma(1.0);
355
356 let result = spectral.fit(&data)?;
357
358 assert_eq!(result.n_clusters(), 2);
360
361 assert_eq!(result.labels.shape().dims(), &[8]); assert_eq!(result.affinity_matrix.shape().dims(), &[8, 8]); assert_eq!(result.embedding.shape().dims(), &[8, 2]); assert_eq!(result.eigenvalues.shape().dims(), &[8]); assert!(result.embedding_success);
369
370 assert!(result.kmeans_iterations <= 100);
372
373 println!(
374 "Spectral clustering test passed - embedding_success: {}, kmeans_iterations: {}",
375 result.embedding_success, result.kmeans_iterations
376 );
377
378 Ok(())
379 }
380}