sklears_clustering/
lib.rs

1#![allow(dead_code)]
2#![allow(non_snake_case)]
3#![allow(missing_docs)]
4#![allow(deprecated)]
5#![allow(clippy::all)]
6#![allow(clippy::pedantic)]
7#![allow(clippy::nursery)]
8#![allow(unused_imports)]
9#![allow(unused_variables)]
10#![allow(unused_mut)]
11#![allow(unused_assignments)]
12#![allow(unused_doc_comments)]
13#![allow(unused_parens)]
14#![allow(unused_comparisons)]
15#![allow(clippy::needless_range_loop)]
16#![allow(clippy::type_complexity)]
17#![allow(clippy::should_implement_trait)]
18#![allow(clippy::ptr_arg)]
19#![allow(clippy::uninit_assumed_init)]
20#![allow(clippy::non_canonical_partial_ord_impl)]
21#![allow(clippy::manual_clamp)]
22#![allow(clippy::legacy_numeric_constants)]
23#![allow(clippy::needless_option_as_deref)]
24#![allow(clippy::ifs_same_cond)]
25//! Clustering algorithms for sklears
26//!
27//! This crate provides implementations of clustering algorithms including:
28//! - K-Means clustering with various initialization methods
29//! - X-Means for automatic cluster number selection
30//! - G-Means for Gaussian cluster detection with automatic number selection
31//! - Mini-batch K-Means for large datasets
32//! - Fuzzy C-Means clustering with membership degrees
33//! - DBSCAN (Density-Based Spatial Clustering)
34//! - Incremental DBSCAN for streaming data and large datasets
35//! - HDBSCAN (Hierarchical Density-Based Spatial Clustering)
36//! - OPTICS (Ordering Points To Identify Clustering Structure)
37//! - BIRCH (Balanced Iterative Reducing and Clustering using Hierarchies)
38//! - Hierarchical clustering
39//! - Mean Shift with adaptive bandwidth estimation
40//! - Density Peaks clustering for automatic cluster center detection
41//! - KDE Clustering using kernel density estimation for density-based clustering
42//! - Spectral Clustering
43//! - Gaussian Mixture Models with model selection criteria (AIC, BIC, ICL)
44//! - Dirichlet Process Mixture Models for infinite mixture modeling
45//! - Local Outlier Factor (LOF) for density-based outlier detection
46//! - CURE (Clustering Using REpresentatives) for large datasets with irregular shapes
47//! - ROCK (RObust Clustering using linKs) for categorical data clustering
48//! - Streaming clustering algorithms (Online K-Means, CluStream, Sliding Window K-Means)
49//! - Graph clustering algorithms (Modularity-based, Louvain, Label Propagation, Spectral)
50//! - Evolutionary and bio-inspired clustering algorithms (PSO, GA, ACO, ABC, Differential Evolution)
51//! - Comprehensive validation metrics for clustering evaluation including stability analysis
52//!
53//! These implementations leverage scirs2's cluster module for efficient computation.
54
55pub mod birch;
56pub mod cure;
57pub mod dbscan;
58pub mod density_peaks;
59pub mod dirichlet_process;
60#[cfg(feature = "parallel")]
61pub mod distributed;
62pub mod ensemble;
63pub mod evolutionary;
64pub mod feature_selection;
65pub mod fuzzy_cmeans;
66pub mod gmm;
67#[cfg(feature = "gpu")]
68pub mod gpu_distances;
69pub mod graph_clustering;
70pub mod hdbscan;
71pub mod hierarchical;
72pub mod incremental_dbscan;
73pub mod kde_clustering;
74pub mod kmeans;
75pub mod locality_sensitive_hashing;
76pub mod lof;
77pub mod mean_shift;
78pub mod memory_mapped;
79pub mod multi_view;
80pub mod optics;
81pub mod out_of_core;
82pub mod rock;
83pub mod semi_supervised;
84pub mod simd_distances;
85pub mod sparse_matrix;
86pub mod spectral;
87pub mod streaming;
88pub mod text_clustering;
89pub mod time_series;
90pub mod validation;
91
92#[cfg(feature = "parallel")]
93pub mod parallel;
94#[cfg(feature = "parallel")]
95pub mod parallel_hierarchical;
96pub mod performance;
97
98pub use birch::{BIRCHConfig, ClusteringFeature, BIRCH};
99pub use cure::{CUREConfig, CUREDistanceMetric, CUREFitted, CURE};
100pub use dbscan::{DBSCANConfig, DBSCAN, NOISE};
101pub use density_peaks::{
102    DensityPeaks, DensityPeaksConfig, DistanceMetric as DensityPeaksDistanceMetric,
103};
104pub use dirichlet_process::{DirichletProcessConfig, DirichletProcessMixture, PredictProbaDP};
105#[cfg(feature = "parallel")]
106pub use distributed::{
107    DBSCANWorker, DataPartition, DistributedConfig, DistributedDBSCAN, WorkerMessage,
108};
109pub use ensemble::{
110    BaggingClustering, EnsembleConfig, EnsembleConfigBuilder, EnsembleMethod, EnsembleResult,
111    EvidenceAccumulationClustering, VotingEnsemble,
112};
113pub use evolutionary::{PSOClustering, PSOClusteringBuilder, PSOClusteringFitted};
114pub use feature_selection::{
115    FeatureSelectionConfig, FeatureSelectionConfigBuilder, FeatureSelectionMethod,
116    FeatureSelectionResult, FeatureSelector,
117};
118pub use fuzzy_cmeans::{FuzzyCMeans, FuzzyCMeansConfig, PredictMembership};
119pub use gmm::{
120    BayesianGaussianMixture, CovarianceType, GaussianMixture, GaussianMixtureConfig,
121    ModelSelectionCriterion, ModelSelectionResult, PredictProba, WeightInit,
122};
123#[cfg(feature = "gpu")]
124pub use gpu_distances::{GpuConfig, GpuDistanceComputer, GpuDistanceMetric};
125pub use graph_clustering::{
126    Graph, GraphClusteringResult, LabelPropagationClustering,
127    LabelPropagationConfig as GraphLabelPropagationConfig, LouvainClustering, LouvainConfig,
128    LouvainResult, ModularityClustering, ModularityClusteringConfig, SpectralGraphClustering,
129    SpectralGraphConfig,
130};
131pub use hdbscan::{ClusterStat, HDBSCANConfig, HDBSCAN};
132pub use hierarchical::{
133    AgglomerativeClustering, AgglomerativeClusteringConfig, Constraint, ConstraintSet, Dendrogram,
134    DendrogramExport, DendrogramLinkExport, DendrogramNode, DendrogramNodeExport, MemoryStrategy,
135};
136pub use incremental_dbscan::{
137    DistanceMetric as IncrementalDistanceMetric, IncrementalDBSCAN, IncrementalDBSCANConfig,
138};
139pub use kde_clustering::{BandwidthMethod, KDEClustering, KDEClusteringConfig, KernelType};
140pub use kmeans::{
141    GMeans, GMeansConfig, InformationCriterion, KMeans, KMeansConfig, KMeansInit, MiniBatchKMeans,
142    MiniBatchKMeansConfig, XMeans, XMeansConfig,
143};
144pub use locality_sensitive_hashing::{
145    LSHConfig, LSHFamily, LSHIndex, LSHIndexStats, MemoryUsage, TableStats,
146};
147pub use lof::{DistanceMetric as LOFDistanceMetric, LOFConfig, LOF};
148pub use mean_shift::{MeanShift, MeanShiftConfig};
149pub use memory_mapped::{MemoryMappedConfig, MemoryMappedDistanceMatrix, MemoryStats};
150pub use multi_view::{
151    ConsensusClustering, ConsensusClusteringConfig, ConsensusClusteringFitted, ConsensusMethod,
152    MultiViewData, MultiViewKMeans, MultiViewKMeansConfig, MultiViewKMeansFitted, ViewWeighting,
153    WeightLearning,
154};
155pub use optics::{
156    Algorithm, ClusterMethod, DistanceMetric as OpticsDistanceMetric, Optics, OpticsConfig,
157    OpticsOrdering,
158};
159pub use out_of_core::{ClusterSummary, OutOfCoreConfig, OutOfCoreDataLoader, OutOfCoreKMeans};
160pub use rock::{ROCKConfig, ROCKFitted, ROCKSimilarity, ROCK};
161pub use semi_supervised::{
162    ConstrainedKMeans, ConstrainedKMeansConfig, ConstrainedKMeansFitted, ConstraintHandling,
163    ConstraintType, LabelPropagation, LabelPropagationConfig, LabelPropagationFitted,
164};
165pub use simd_distances::{
166    simd_distance, simd_distance_batch, simd_k_nearest_neighbors, DistanceMetric,
167    OptimizedDistanceComputer, SimdDistanceMetric,
168};
169pub use sparse_matrix::{
170    GraphStats, SparseDistanceMatrix, SparseEntry, SparseMatrixConfig, SparseMatrixStats,
171    SparseNeighborhoodGraph,
172};
173pub use spectral::{
174    Affinity, EigenSolver, NormalizationMethod, SpectralClustering, SpectralClusteringConfig,
175};
176pub use streaming::{CluStream, MicroCluster, OnlineKMeans, SlidingWindowKMeans, StreamingConfig};
177pub use text_clustering::{
178    DocumentClustering, DocumentClusteringConfig, DocumentClusteringResult, SphericalInit,
179    SphericalKMeans, SphericalKMeansConfig, SphericalKMeansFitted,
180};
181pub use time_series::{
182    CentroidAveraging, ChangeDetectionTest, DTWKMeans, DTWKMeansConfig, DTWKMeansFitted,
183    RegimeChangeConfig, RegimeChangeDetector, RegimeChangeResult, ShapeClustering,
184    ShapeClusteringConfig, ShapeClusteringFitted, ShapeDistanceMetric,
185    TemporalSegmentationClustering, TemporalSegmentationConfig, TemporalSegmentationResult,
186};
187pub use validation::{
188    ClusteringValidator,
189    GapStatisticResult,
190    SilhouetteResult,
191    ValidationMetric,
192    // TODO: Fix these imports
193    // AccuracyMetrics, CrossValidationStabilityResult,
194    // ExternalValidationMetrics, FoldResult, NoiseStabilityResult,
195    // ParameterAgreement, ParameterResult, ParameterSensitivityResult, PerturbationStabilityResult,
196    // StabilityMetrics, StabilityResult, SubsampleStabilityResult, ValidationMetrics,
197};
198
199#[cfg(feature = "parallel")]
200pub use parallel::{SimpleParallelKMeans, SimpleParallelKMeansFitted};
201#[cfg(feature = "parallel")]
202pub use parallel_hierarchical::{
203    ClusterMerge, DistanceChunk, ParallelClusteringState, ParallelHierarchicalClustering,
204    ParallelHierarchicalConfig,
205};
206
207// Re-export parallel DBSCAN when parallel feature is enabled
208#[cfg(feature = "parallel")]
209pub use sklears_core::parallel::ParallelFit;
210
211// Re-export commonly used types from scirs2
212pub use scirs2_cluster::density::DistanceMetric as DensityDistanceMetric;
213pub use scirs2_cluster::hierarchy::{LinkageMethod, Metric};
214
215/// Prelude module for convenient imports
216pub mod prelude {
217    pub use crate::birch::{ClusteringFeature, BIRCH};
218    pub use crate::cure::{CUREDistanceMetric, CURE};
219    pub use crate::dbscan::{DBSCAN, NOISE};
220    pub use crate::density_peaks::{DensityPeaks, DistanceMetric as DensityPeaksDistanceMetric};
221    pub use crate::dirichlet_process::{DirichletProcessMixture, PredictProbaDP};
222    #[cfg(feature = "parallel")]
223    pub use crate::distributed::{DBSCANWorker, DataPartition, DistributedDBSCAN};
224    pub use crate::ensemble::{
225        BaggingClustering, EnsembleConfigBuilder, EnsembleMethod, EvidenceAccumulationClustering,
226        VotingEnsemble,
227    };
228    pub use crate::evolutionary::PSOClustering;
229    pub use crate::feature_selection::{
230        FeatureSelectionConfigBuilder, FeatureSelectionMethod, FeatureSelector,
231    };
232    pub use crate::fuzzy_cmeans::{FuzzyCMeans, PredictMembership};
233    pub use crate::gmm::{
234        BayesianGaussianMixture, CovarianceType, GaussianMixture, ModelSelectionCriterion,
235        ModelSelectionResult, PredictProba,
236    };
237    #[cfg(feature = "gpu")]
238    pub use crate::gpu_distances::{GpuDistanceComputer, GpuDistanceMetric};
239    pub use crate::graph_clustering::{
240        Graph, GraphClusteringResult, LabelPropagationClustering, LouvainClustering, LouvainResult,
241        ModularityClustering, SpectralGraphClustering,
242    };
243    pub use crate::hdbscan::{ClusterStat, HDBSCAN};
244    pub use crate::hierarchical::{
245        AgglomerativeClustering, Constraint, ConstraintSet, Dendrogram, DendrogramExport,
246        DendrogramNode, MemoryStrategy,
247    };
248    pub use crate::incremental_dbscan::{
249        DistanceMetric as IncrementalDistanceMetric, IncrementalDBSCAN,
250    };
251    pub use crate::kde_clustering::{BandwidthMethod, KDEClustering, KernelType};
252    pub use crate::kmeans::{
253        GMeans, InformationCriterion, KMeans, KMeansInit, MiniBatchKMeans, XMeans,
254    };
255    pub use crate::locality_sensitive_hashing::{LSHFamily, LSHIndex};
256    pub use crate::lof::{DistanceMetric as LOFDistanceMetric, LOF};
257    pub use crate::mean_shift::MeanShift;
258    pub use crate::memory_mapped::{MemoryMappedDistanceMatrix, MemoryStats};
259    pub use crate::multi_view::{
260        ConsensusClustering, ConsensusMethod, MultiViewData, MultiViewKMeans, ViewWeighting,
261        WeightLearning,
262    };
263    pub use crate::optics::{ClusterMethod, DistanceMetric as OpticsDistanceMetric, Optics};
264    pub use crate::out_of_core::{ClusterSummary, OutOfCoreDataLoader, OutOfCoreKMeans};
265    pub use crate::rock::{ROCKSimilarity, ROCK};
266    pub use crate::semi_supervised::{
267        ConstrainedKMeans, ConstraintHandling, ConstraintType, LabelPropagation,
268    };
269    pub use crate::sparse_matrix::{SparseDistanceMatrix, SparseNeighborhoodGraph};
270    pub use crate::spectral::{Affinity, NormalizationMethod, SpectralClustering};
271    pub use crate::streaming::{CluStream, MicroCluster, OnlineKMeans, SlidingWindowKMeans};
272    pub use crate::text_clustering::{DocumentClustering, SphericalInit, SphericalKMeans};
273    pub use crate::time_series::{
274        CentroidAveraging, ChangeDetectionTest, DTWKMeans, RegimeChangeDetector, ShapeClustering,
275        ShapeDistanceMetric, TemporalSegmentationClustering,
276    };
277    pub use crate::validation::{
278        ClusteringValidator,
279        ValidationMetric,
280        // TODO: Fix these imports
281        // AccuracyMetrics, ConsensusStabilityResult, BootstrapStabilityResult,
282        // CrossValidationStabilityResult, NoiseStabilityResult, ParameterSensitivityResult,
283        // PerturbationStabilityResult, StabilityResult, SubsampleStabilityResult,
284    };
285
286    #[cfg(feature = "parallel")]
287    pub use crate::parallel::{SimpleParallelKMeans, SimpleParallelKMeansFitted};
288    #[cfg(feature = "parallel")]
289    pub use crate::parallel_hierarchical::{
290        ParallelClusteringState, ParallelHierarchicalClustering,
291    };
292}