Skip to main content

fastkmeans_rs/
config.rs

1/// Configuration for the FastKMeans algorithm
2#[derive(Debug, Clone)]
3pub struct KMeansConfig {
4    /// Number of clusters
5    pub k: usize,
6
7    /// Maximum number of iterations
8    pub max_iters: usize,
9
10    /// Convergence tolerance. When centroid shift is below this threshold,
11    /// the algorithm stops early. Set to negative value to disable early stopping.
12    pub tol: f64,
13
14    /// Random seed for centroid initialization and subsampling
15    pub seed: u64,
16
17    /// Maximum points per centroid for subsampling.
18    /// If n_samples > k * max_points_per_centroid, data will be subsampled.
19    /// Set to None to disable subsampling.
20    pub max_points_per_centroid: Option<usize>,
21
22    /// Chunk size for data processing. Larger values use more memory but may be faster.
23    pub chunk_size_data: usize,
24
25    /// Chunk size for centroid processing. Larger values use more memory but may be faster.
26    pub chunk_size_centroids: usize,
27
28    /// Print verbose output during training
29    pub verbose: bool,
30}
31
32impl Default for KMeansConfig {
33    fn default() -> Self {
34        Self {
35            k: 8,
36            max_iters: 25,
37            tol: 1e-8,
38            seed: 0,
39            max_points_per_centroid: Some(256),
40            chunk_size_data: 51_200,
41            chunk_size_centroids: 10_240,
42            verbose: false,
43        }
44    }
45}
46
47impl KMeansConfig {
48    /// Create a new configuration with the specified number of clusters
49    pub fn new(k: usize) -> Self {
50        Self {
51            k,
52            ..Default::default()
53        }
54    }
55
56    /// Set the maximum number of iterations
57    pub fn with_max_iters(mut self, max_iters: usize) -> Self {
58        self.max_iters = max_iters;
59        self
60    }
61
62    /// Set the convergence tolerance
63    pub fn with_tol(mut self, tol: f64) -> Self {
64        self.tol = tol;
65        self
66    }
67
68    /// Set the random seed
69    pub fn with_seed(mut self, seed: u64) -> Self {
70        self.seed = seed;
71        self
72    }
73
74    /// Set the maximum points per centroid (for subsampling)
75    pub fn with_max_points_per_centroid(mut self, max_ppc: Option<usize>) -> Self {
76        self.max_points_per_centroid = max_ppc;
77        self
78    }
79
80    /// Set verbose mode
81    pub fn with_verbose(mut self, verbose: bool) -> Self {
82        self.verbose = verbose;
83        self
84    }
85
86    /// Set the data chunk size
87    pub fn with_chunk_size_data(mut self, chunk_size: usize) -> Self {
88        self.chunk_size_data = chunk_size;
89        self
90    }
91
92    /// Set the centroid chunk size
93    pub fn with_chunk_size_centroids(mut self, chunk_size: usize) -> Self {
94        self.chunk_size_centroids = chunk_size;
95        self
96    }
97}