#![cfg_attr(not(feature = "std"), no_std)]
#[cfg(not(feature = "std"))]
extern crate alloc;
pub mod algorithms;
pub mod error;
pub mod evaluation;
pub mod initialization;
pub mod traits;
pub mod utils;
pub use algorithms::{
dbscan::{DBSCANConfig, DBSCANResult, HDBSCANConfig, HDBSCANResult, DBSCAN, HDBSCAN},
gaussian_mixture::{GMConfig, GMResult, GaussianMixture},
hierarchical::{AgglomerativeClustering, HierarchicalResult, Linkage},
incremental::{
IncrementalClustering, OnlineKMeans, OnlineKMeansConfig, OnlineKMeansResult,
SlidingWindowConfig, SlidingWindowKMeans, SlidingWindowResult,
},
kmeans::{InitMethod, KMeans, KMeansAlgorithm, KMeansConfig, KMeansResult},
optics::{OPTICSConfig, OPTICSResult, OPTICS},
spectral::{SpectralClustering, SpectralConfig, SpectralResult},
};
pub use evaluation::{
metrics::{
adjusted_mutual_info_score, adjusted_rand_score, calinski_harabasz_score,
davies_bouldin_score, fowlkes_mallows_score, homogeneity_score,
normalized_mutual_info_score, silhouette_score, v_measure_score,
},
ClusteringMetric, EvaluationResult,
};
pub use initialization::{
forgy::Forgy, kmeans_plus_plus::KMeansPlusPlus, random_partition::RandomPartition,
InitializationStrategy,
};
pub use traits::{ClusteringAlgorithm, ClusteringResult, Fit, FitPredict, Transform};
pub use utils::{
adaptive::{suggest_dbscan_params, suggest_epsilon},
distance::{cosine_distance, euclidean_distance, manhattan_distance, DistanceMetric},
drift_detection::{CompositeDriftDetector, DriftStatus, PageHinkleyTest, ADWIN, DDM},
memory_efficient::{ChunkedDataProcessor, IncrementalCentroidUpdater, MemoryEfficientConfig},
preprocessing::{normalize_features, standardize_features, PreprocessingMethod},
validation::{validate_cluster_input, validate_n_clusters, ClusterValidation},
};
pub use error::{ClusterError, ClusterResult};
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
pub const VERSION_MAJOR: u32 = 0;
pub const VERSION_MINOR: u32 = 1;
pub const VERSION_PATCH: u32 = 0;
pub mod prelude {
pub use crate::algorithms::{
dbscan::{DBSCANConfig, DBSCAN},
gaussian_mixture::{GMConfig, GaussianMixture},
hierarchical::{AgglomerativeClustering, Linkage},
kmeans::{InitMethod, KMeans, KMeansConfig},
spectral::{SpectralClustering, SpectralConfig},
};
pub use crate::evaluation::{
metrics::{adjusted_rand_score, silhouette_score},
ClusteringMetric,
};
pub use crate::initialization::{
Forgy, InitializationStrategy, KMeansPlusPlus, RandomPartition,
};
pub use crate::traits::{ClusteringAlgorithm, ClusteringResult, Fit, FitPredict, Transform};
pub use crate::utils::{
distance::{cosine_distance, euclidean_distance, DistanceMetric},
preprocessing::{normalize_features, standardize_features},
};
pub use crate::error::{ClusterError, ClusterResult};
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use torsh_tensor::Tensor;
#[test]
fn test_version_info() {
assert!(!VERSION.is_empty());
assert_eq!(VERSION_MAJOR, 0);
assert_eq!(VERSION_MINOR, 1);
assert_eq!(VERSION_PATCH, 0);
}
#[test]
fn test_dbscan_basic() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, 0.0, 0.2, 0.2, 0.0, 5.0, 5.0, 5.1, 5.1, 5.0, 5.2, 5.2, 5.0,
10.0, 10.0,
],
&[9, 2],
)?;
let dbscan = DBSCAN::new(0.5, 2);
let result = dbscan.fit(&data)?;
assert_eq!(result.n_clusters, 2);
assert_eq!(result.noise_points.len(), 1);
assert!(!result.core_sample_indices.is_empty());
Ok(())
}
#[test]
fn test_hierarchical_basic() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(vec![0.0, 0.0, 0.1, 0.1, 5.0, 5.0, 5.1, 5.1], &[4, 2])?;
let hierarchical = AgglomerativeClustering::new(2);
let result = hierarchical.fit(&data)?;
assert_eq!(result.n_clusters, 2);
let labels_vec = result.labels.to_vec()?;
let unique_labels: std::collections::HashSet<i32> =
labels_vec.iter().map(|&x| x as i32).collect();
assert_eq!(unique_labels.len(), 2);
Ok(())
}
#[test]
fn test_silhouette_score_basic() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, 10.0, 10.0, 10.1, 10.1,
],
&[4, 2],
)?;
let labels = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let score = silhouette_score(&data, &labels)?;
assert!(
score > 0.5,
"Silhouette score should be positive for well-separated clusters"
);
Ok(())
}
#[test]
fn test_adjusted_rand_score_perfect() -> Result<(), Box<dyn std::error::Error>> {
let labels_true = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let labels_pred = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let score = adjusted_rand_score(&labels_true, &labels_pred)?;
assert_relative_eq!(score, 1.0, epsilon = 1e-6);
Ok(())
}
#[test]
fn test_calinski_harabasz_score() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, 5.0, 5.0, 5.1, 5.1,
],
&[4, 2],
)?;
let labels = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let score = calinski_harabasz_score(&data, &labels)?;
assert!(
score > 1.0,
"Calinski-Harabasz score should be > 1 for well-separated clusters"
);
Ok(())
}
#[test]
fn test_davies_bouldin_score() -> Result<(), Box<dyn std::error::Error>> {
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, 10.0, 10.0, 10.1, 10.1,
],
&[4, 2],
)?;
let labels = Tensor::from_vec(vec![0.0, 0.0, 1.0, 1.0], &[4])?;
let score = davies_bouldin_score(&data, &labels)?;
assert!(score >= 0.0, "Davies-Bouldin score should be non-negative");
assert!(
score < 5.0,
"Davies-Bouldin score should be reasonable for well-separated clusters"
);
Ok(())
}
#[test]
fn test_gmm_basic() -> Result<(), Box<dyn std::error::Error>> {
use algorithms::gaussian_mixture::{CovarianceType, GaussianMixture};
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, -0.1, 0.1, 0.1, -0.1, 5.0, 5.0, 5.1, 5.1, 4.9, 5.1, 5.1, 4.9,
],
&[8, 2],
)?;
let gmm = GaussianMixture::new(2)
.covariance_type(CovarianceType::Diag)
.max_iters(50)
.tolerance(1e-3);
let result = gmm.fit(&data)?;
assert_eq!(result.n_clusters(), 2);
assert_eq!(result.means.shape().dims(), &[2, 2]); assert_eq!(result.weights.shape().dims(), &[2]); assert_eq!(result.labels.shape().dims(), &[8]); assert_eq!(result.responsibilities.shape().dims(), &[8, 2]);
assert!(result.log_likelihood > f64::NEG_INFINITY);
assert!(result.aic.is_finite());
assert!(result.bic.is_finite());
assert!(result.n_iter <= 50);
println!(
"GMM test passed - log_likelihood: {}, AIC: {}, BIC: {}, converged: {}",
result.log_likelihood, result.aic, result.bic, result.converged
);
Ok(())
}
#[test]
fn test_spectral_clustering_basic() -> Result<(), Box<dyn std::error::Error>> {
use algorithms::spectral::{AffinityType, SpectralClustering};
let data = Tensor::from_vec(
vec![
0.0, 0.0, 0.1, 0.1, -0.1, 0.1, 0.1, -0.1, 3.0, 3.0, 3.1, 3.1, 2.9, 3.1, 3.1, 2.9,
],
&[8, 2],
)?;
let spectral = SpectralClustering::new(2)
.affinity(AffinityType::Rbf)
.gamma(1.0);
let result = spectral.fit(&data)?;
assert_eq!(result.n_clusters(), 2);
assert_eq!(result.labels.shape().dims(), &[8]); assert_eq!(result.affinity_matrix.shape().dims(), &[8, 8]); assert_eq!(result.embedding.shape().dims(), &[8, 2]); assert_eq!(result.eigenvalues.shape().dims(), &[8]);
assert!(result.embedding_success);
assert!(result.kmeans_iterations <= 100);
println!(
"Spectral clustering test passed - embedding_success: {}, kmeans_iterations: {}",
result.embedding_success, result.kmeans_iterations
);
Ok(())
}
}