scirs2_cluster/tuning/
search_spaces.rs

1//! Standard search spaces for clustering algorithms
2//!
3//! This module provides predefined hyperparameter search spaces
4//! for common clustering algorithms.
5
6use std::collections::HashMap;
7
8use super::config::*;
9
10/// Standard search spaces for clustering algorithms
11pub struct StandardSearchSpaces;
12
13impl StandardSearchSpaces {
14    /// K-means search space
15    pub fn kmeans() -> SearchSpace {
16        let mut parameters = HashMap::new();
17
18        parameters.insert(
19            "n_clusters".to_string(),
20            HyperParameter::Integer { min: 2, max: 20 },
21        );
22
23        parameters.insert(
24            "max_iter".to_string(),
25            HyperParameter::IntegerChoices {
26                choices: vec![100, 300, 500, 1000],
27            },
28        );
29
30        parameters.insert(
31            "tolerance".to_string(),
32            HyperParameter::LogUniform {
33                min: 1e-6,
34                max: 1e-2,
35            },
36        );
37
38        SearchSpace {
39            parameters,
40            constraints: vec![],
41        }
42    }
43
44    /// DBSCAN search space
45    pub fn dbscan() -> SearchSpace {
46        let mut parameters = HashMap::new();
47
48        parameters.insert(
49            "eps".to_string(),
50            HyperParameter::Float { min: 0.1, max: 2.0 },
51        );
52
53        parameters.insert(
54            "min_samples".to_string(),
55            HyperParameter::Integer { min: 3, max: 20 },
56        );
57
58        SearchSpace {
59            parameters,
60            constraints: vec![],
61        }
62    }
63
64    /// OPTICS search space
65    pub fn optics() -> SearchSpace {
66        let mut parameters = HashMap::new();
67
68        parameters.insert(
69            "min_samples".to_string(),
70            HyperParameter::Integer { min: 2, max: 20 },
71        );
72
73        parameters.insert(
74            "max_eps".to_string(),
75            HyperParameter::Float {
76                min: 0.5,
77                max: 10.0,
78            },
79        );
80
81        SearchSpace {
82            parameters,
83            constraints: vec![],
84        }
85    }
86
87    /// Spectral clustering search space
88    pub fn spectral() -> SearchSpace {
89        let mut parameters = HashMap::new();
90
91        parameters.insert(
92            "n_clusters".to_string(),
93            HyperParameter::Integer { min: 2, max: 20 },
94        );
95
96        parameters.insert(
97            "n_neighbors".to_string(),
98            HyperParameter::Integer { min: 5, max: 50 },
99        );
100
101        parameters.insert(
102            "gamma".to_string(),
103            HyperParameter::LogUniform {
104                min: 0.001,
105                max: 10.0,
106            },
107        );
108
109        parameters.insert(
110            "max_iter".to_string(),
111            HyperParameter::IntegerChoices {
112                choices: vec![100, 300, 500],
113            },
114        );
115
116        SearchSpace {
117            parameters,
118            constraints: vec![],
119        }
120    }
121
122    /// Affinity Propagation search space
123    pub fn affinity_propagation() -> SearchSpace {
124        let mut parameters = HashMap::new();
125
126        parameters.insert(
127            "damping".to_string(),
128            HyperParameter::Float {
129                min: 0.5,
130                max: 0.99,
131            },
132        );
133
134        parameters.insert(
135            "max_iter".to_string(),
136            HyperParameter::IntegerChoices {
137                choices: vec![200, 500, 1000],
138            },
139        );
140
141        parameters.insert(
142            "convergence_iter".to_string(),
143            HyperParameter::Integer { min: 10, max: 50 },
144        );
145
146        SearchSpace {
147            parameters,
148            constraints: vec![],
149        }
150    }
151
152    /// BIRCH search space
153    pub fn birch() -> SearchSpace {
154        let mut parameters = HashMap::new();
155
156        parameters.insert(
157            "branching_factor".to_string(),
158            HyperParameter::IntegerChoices {
159                choices: vec![25, 50, 100, 200],
160            },
161        );
162
163        parameters.insert(
164            "threshold".to_string(),
165            HyperParameter::Float { min: 0.1, max: 2.0 },
166        );
167
168        SearchSpace {
169            parameters,
170            constraints: vec![],
171        }
172    }
173
174    /// Gaussian Mixture Model search space
175    pub fn gmm() -> SearchSpace {
176        let mut parameters = HashMap::new();
177
178        parameters.insert(
179            "n_components".to_string(),
180            HyperParameter::Integer { min: 2, max: 20 },
181        );
182
183        parameters.insert(
184            "max_iter".to_string(),
185            HyperParameter::IntegerChoices {
186                choices: vec![100, 200, 500],
187            },
188        );
189
190        parameters.insert(
191            "tol".to_string(),
192            HyperParameter::LogUniform {
193                min: 1e-6,
194                max: 1e-2,
195            },
196        );
197
198        parameters.insert(
199            "reg_covar".to_string(),
200            HyperParameter::LogUniform {
201                min: 1e-8,
202                max: 1e-4,
203            },
204        );
205
206        SearchSpace {
207            parameters,
208            constraints: vec![],
209        }
210    }
211
212    /// Mean Shift search space
213    pub fn mean_shift() -> SearchSpace {
214        let mut parameters = HashMap::new();
215
216        parameters.insert(
217            "bandwidth".to_string(),
218            HyperParameter::Float { min: 0.1, max: 5.0 },
219        );
220
221        parameters.insert(
222            "max_iter".to_string(),
223            HyperParameter::IntegerChoices {
224                choices: vec![100, 300, 500],
225            },
226        );
227
228        SearchSpace {
229            parameters,
230            constraints: vec![],
231        }
232    }
233
234    /// Hierarchical clustering search space
235    pub fn hierarchical() -> SearchSpace {
236        let mut parameters = HashMap::new();
237
238        parameters.insert(
239            "n_clusters".to_string(),
240            HyperParameter::Integer { min: 2, max: 20 },
241        );
242
243        parameters.insert(
244            "linkage".to_string(),
245            HyperParameter::Categorical {
246                choices: vec![
247                    "ward".to_string(),
248                    "complete".to_string(),
249                    "average".to_string(),
250                    "single".to_string(),
251                ],
252            },
253        );
254
255        SearchSpace {
256            parameters,
257            constraints: vec![],
258        }
259    }
260
261    /// Get search space by algorithm name
262    pub fn get_search_space(algorithm: &str) -> Option<SearchSpace> {
263        match algorithm.to_lowercase().as_str() {
264            "kmeans" | "k-means" => Some(Self::kmeans()),
265            "dbscan" => Some(Self::dbscan()),
266            "optics" => Some(Self::optics()),
267            "spectral" => Some(Self::spectral()),
268            "affinity_propagation" | "affinity-propagation" => Some(Self::affinity_propagation()),
269            "birch" => Some(Self::birch()),
270            "gmm" | "gaussian_mixture" => Some(Self::gmm()),
271            "mean_shift" | "mean-shift" => Some(Self::mean_shift()),
272            "hierarchical" | "agglomerative" => Some(Self::hierarchical()),
273            _ => None,
274        }
275    }
276
277    /// Create a custom search space with specified parameter ranges
278    pub fn custom(parameters: HashMap<String, HyperParameter>) -> SearchSpace {
279        SearchSpace {
280            parameters,
281            constraints: vec![],
282        }
283    }
284
285    /// Create a search space with constraints
286    pub fn with_constraints(
287        mut search_space: SearchSpace,
288        constraints: Vec<ParameterConstraint>,
289    ) -> SearchSpace {
290        search_space.constraints = constraints;
291        search_space
292    }
293
294    /// Create a minimal search space for quick testing
295    pub fn minimal_kmeans() -> SearchSpace {
296        let mut parameters = HashMap::new();
297
298        parameters.insert(
299            "n_clusters".to_string(),
300            HyperParameter::IntegerChoices {
301                choices: vec![2, 3, 5, 8],
302            },
303        );
304
305        parameters.insert(
306            "max_iter".to_string(),
307            HyperParameter::IntegerChoices {
308                choices: vec![100, 300],
309            },
310        );
311
312        SearchSpace {
313            parameters,
314            constraints: vec![],
315        }
316    }
317
318    /// Create an extensive search space for thorough optimization
319    pub fn extensive_kmeans() -> SearchSpace {
320        let mut parameters = HashMap::new();
321
322        parameters.insert(
323            "n_clusters".to_string(),
324            HyperParameter::Integer { min: 2, max: 50 },
325        );
326
327        parameters.insert(
328            "max_iter".to_string(),
329            HyperParameter::Integer { min: 50, max: 2000 },
330        );
331
332        parameters.insert(
333            "tolerance".to_string(),
334            HyperParameter::LogUniform {
335                min: 1e-8,
336                max: 1e-1,
337            },
338        );
339
340        parameters.insert(
341            "n_init".to_string(),
342            HyperParameter::IntegerChoices {
343                choices: vec![1, 5, 10, 20],
344            },
345        );
346
347        SearchSpace {
348            parameters,
349            constraints: vec![],
350        }
351    }
352
353    /// Create search space for ensemble methods
354    pub fn ensemble() -> SearchSpace {
355        let mut parameters = HashMap::new();
356
357        parameters.insert(
358            "n_estimators".to_string(),
359            HyperParameter::IntegerChoices {
360                choices: vec![3, 5, 10, 15, 20],
361            },
362        );
363
364        parameters.insert(
365            "base_algorithm".to_string(),
366            HyperParameter::Categorical {
367                choices: vec![
368                    "kmeans".to_string(),
369                    "dbscan".to_string(),
370                    "spectral".to_string(),
371                ],
372            },
373        );
374
375        parameters.insert(
376            "consensus_threshold".to_string(),
377            HyperParameter::Float { min: 0.5, max: 1.0 },
378        );
379
380        SearchSpace {
381            parameters,
382            constraints: vec![],
383        }
384    }
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390
391    #[test]
392    fn test_kmeans_search_space() {
393        let search_space = StandardSearchSpaces::kmeans();
394        assert!(search_space.parameters.contains_key("n_clusters"));
395        assert!(search_space.parameters.contains_key("max_iter"));
396        assert!(search_space.parameters.contains_key("tolerance"));
397    }
398
399    #[test]
400    fn test_get_search_space() {
401        let search_space = StandardSearchSpaces::get_search_space("kmeans");
402        assert!(search_space.is_some());
403
404        let search_space = StandardSearchSpaces::get_search_space("unknown");
405        assert!(search_space.is_none());
406    }
407
408    #[test]
409    fn test_dbscan_search_space() {
410        let search_space = StandardSearchSpaces::dbscan();
411        assert!(search_space.parameters.contains_key("eps"));
412        assert!(search_space.parameters.contains_key("min_samples"));
413    }
414
415    #[test]
416    fn test_custom_search_space() {
417        let mut parameters = HashMap::new();
418        parameters.insert(
419            "test_param".to_string(),
420            HyperParameter::Float { min: 0.0, max: 1.0 },
421        );
422
423        let search_space = StandardSearchSpaces::custom(parameters);
424        assert!(search_space.parameters.contains_key("test_param"));
425    }
426}