fastkmeans_rs/config.rs
1/// Configuration for the FastKMeans algorithm
2#[derive(Debug, Clone)]
3pub struct KMeansConfig {
4 /// Number of clusters
5 pub k: usize,
6
7 /// Maximum number of iterations
8 pub max_iters: usize,
9
10 /// Convergence tolerance. When centroid shift is below this threshold,
11 /// the algorithm stops early. Set to negative value to disable early stopping.
12 pub tol: f64,
13
14 /// Random seed for centroid initialization and subsampling
15 pub seed: u64,
16
17 /// Maximum points per centroid for subsampling.
18 /// If n_samples > k * max_points_per_centroid, data will be subsampled.
19 /// Set to None to disable subsampling.
20 pub max_points_per_centroid: Option<usize>,
21
22 /// Chunk size for data processing. Larger values use more memory but may be faster.
23 pub chunk_size_data: usize,
24
25 /// Chunk size for centroid processing. Larger values use more memory but may be faster.
26 pub chunk_size_centroids: usize,
27
28 /// Print verbose output during training
29 pub verbose: bool,
30}
31
32impl Default for KMeansConfig {
33 fn default() -> Self {
34 Self {
35 k: 8,
36 max_iters: 25,
37 tol: 1e-8,
38 seed: 0,
39 max_points_per_centroid: Some(256),
40 chunk_size_data: 51_200,
41 chunk_size_centroids: 10_240,
42 verbose: false,
43 }
44 }
45}
46
47impl KMeansConfig {
48 /// Create a new configuration with the specified number of clusters
49 pub fn new(k: usize) -> Self {
50 Self {
51 k,
52 ..Default::default()
53 }
54 }
55
56 /// Set the maximum number of iterations
57 pub fn with_max_iters(mut self, max_iters: usize) -> Self {
58 self.max_iters = max_iters;
59 self
60 }
61
62 /// Set the convergence tolerance
63 pub fn with_tol(mut self, tol: f64) -> Self {
64 self.tol = tol;
65 self
66 }
67
68 /// Set the random seed
69 pub fn with_seed(mut self, seed: u64) -> Self {
70 self.seed = seed;
71 self
72 }
73
74 /// Set the maximum points per centroid (for subsampling)
75 pub fn with_max_points_per_centroid(mut self, max_ppc: Option<usize>) -> Self {
76 self.max_points_per_centroid = max_ppc;
77 self
78 }
79
80 /// Set verbose mode
81 pub fn with_verbose(mut self, verbose: bool) -> Self {
82 self.verbose = verbose;
83 self
84 }
85
86 /// Set the data chunk size
87 pub fn with_chunk_size_data(mut self, chunk_size: usize) -> Self {
88 self.chunk_size_data = chunk_size;
89 self
90 }
91
92 /// Set the centroid chunk size
93 pub fn with_chunk_size_centroids(mut self, chunk_size: usize) -> Self {
94 self.chunk_size_centroids = chunk_size;
95 self
96 }
97}