fastkmeans_rs/lib.rs
1//! # fastkmeans-rs
2//!
3//! A fast and efficient k-means clustering implementation in Rust,
4//! compatible with ndarray.
5//!
6//! ## Features
7//!
8//! - **Double-chunking algorithm**: Processes both data and centroids in chunks
9//! to minimize memory usage while maintaining efficiency
10//! - **Parallel computation**: Uses rayon for multi-threaded processing
11//! - **ndarray compatible**: Works seamlessly with ndarray arrays
12//! - **FAISS/scikit-learn compatible API**: Familiar `train()`, `fit()`, `predict()` interface
13//! - **Optional BLAS acceleration**: Enable `accelerate` (macOS) or `openblas` features for faster matrix operations
14//!
15//! ## Example
16//!
17//! ```rust
18//! use fastkmeans_rs::{FastKMeans, KMeansConfig};
19//! use ndarray::Array2;
20//! use ndarray_rand::RandomExt;
21//! use ndarray_rand::rand_distr::Uniform;
22//!
23//! // Generate random data
24//! let data = Array2::random((1000, 128), Uniform::new(-1.0f32, 1.0));
25//!
26//! // Create and train the model
27//! let mut kmeans = FastKMeans::new(128, 10);
28//! kmeans.train(&data.view()).unwrap();
29//!
30//! // Get cluster assignments
31//! let labels = kmeans.predict(&data.view()).unwrap();
32//! assert_eq!(labels.len(), 1000);
33//! ```
34//!
35//! ## Custom Configuration
36//!
37//! ```rust
38//! use fastkmeans_rs::{FastKMeans, KMeansConfig};
39//! use ndarray::Array2;
40//! use ndarray_rand::RandomExt;
41//! use ndarray_rand::rand_distr::Uniform;
42//!
43//! let data = Array2::random((5000, 64), Uniform::new(-1.0f32, 1.0));
44//!
45//! let config = KMeansConfig {
46//! k: 50,
47//! max_iters: 100,
48//! tol: 1e-6,
49//! seed: 42,
50//! max_points_per_centroid: None, // Disable subsampling
51//! chunk_size_data: 10_000,
52//! chunk_size_centroids: 1_000,
53//! verbose: false,
54//! };
55//!
56//! let mut kmeans = FastKMeans::with_config(config);
57//! let labels = kmeans.fit_predict(&data.view()).unwrap();
58//! ```
59//!
60//! ## BLAS Acceleration
61//!
62//! For improved performance on large datasets, enable a BLAS backend:
63//!
64//! ```toml
65//! # macOS (recommended - uses Apple Accelerate)
66//! fastkmeans-rs = { version = "0.1", features = ["accelerate"] }
67//!
68//! # Linux/Windows (requires OpenBLAS installed)
69//! fastkmeans-rs = { version = "0.1", features = ["openblas"] }
70//! ```
71//!
72//! ## CUDA GPU Acceleration
73//!
74//! For maximum performance on large datasets, enable CUDA support:
75//!
76//! ```toml
77//! fastkmeans-rs = { version = "0.1", features = ["cuda"] }
78//! ```
79//!
80//! This requires the CUDA toolkit to be installed. Then use `FastKMeansCuda`:
81//!
82//! ```ignore
83//! use fastkmeans_rs::cuda::FastKMeansCuda;
84//! use fastkmeans_rs::KMeansConfig;
85//! use ndarray::Array2;
86//! use ndarray_rand::RandomExt;
87//! use ndarray_rand::rand_distr::Uniform;
88//!
89//! let data = Array2::random((100000, 128), Uniform::new(-1.0f32, 1.0));
90//!
91//! let config = KMeansConfig::new(1024)
92//! .with_max_iters(50)
93//! .with_verbose(true);
94//!
95//! let mut kmeans = FastKMeansCuda::with_config(config).unwrap();
96//! kmeans.train(&data.view()).unwrap();
97//!
98//! let labels = kmeans.predict(&data.view()).unwrap();
99//! ```
100
101// Link BLAS libraries when features are enabled
102#[cfg(feature = "accelerate")]
103extern crate accelerate_src;
104
105#[cfg(feature = "openblas")]
106extern crate openblas_src;
107
108#[cfg(feature = "mkl")]
109extern crate intel_mkl_src;
110
111pub mod algorithm;
112mod config;
113mod distance;
114mod error;
115mod kmeans;
116
117#[cfg(feature = "cuda")]
118pub mod cuda;
119
120#[cfg(feature = "metal_gpu")]
121pub mod metal_gpu;
122
123pub use algorithm::kmeans_double_chunked;
124pub use config::KMeansConfig;
125pub use error::KMeansError;
126pub use kmeans::FastKMeans;
127
128#[cfg(feature = "cuda")]
129pub use cuda::FastKMeansCuda;
130
131#[cfg(feature = "metal_gpu")]
132pub use metal_gpu::FastKMeansMetal;