1use std::time::Instant;
4
5#[cfg(feature = "parallel")]
6use rayon::prelude::*;
7use scirs2_core::ndarray::{Array1, Array2};
8use scirs2_core::random::Random;
9
10use crate::kernels::KernelType;
11use crate::svc::SVC;
12use sklears_core::error::{Result, SklearsError};
13use sklears_core::traits::{Fit, Predict};
14
15use super::{
16 OptimizationConfig, OptimizationResult, ParameterSet, ParameterSpec, ScoringMetric, SearchSpace,
17};
18
19type DMatrix<T> = Array2<T>;
21type DVector<T> = Array1<T>;
22
23pub struct RandomSearchCV {
25 config: OptimizationConfig,
26 search_space: SearchSpace,
27 rng: Random<scirs2_core::random::rngs::StdRng>,
28}
29
30impl RandomSearchCV {
31 pub fn new(config: OptimizationConfig, search_space: SearchSpace) -> Self {
33 let rng = if let Some(seed) = config.random_state {
34 Random::seed(seed)
35 } else {
36 Random::seed(42) };
38
39 Self {
40 config,
41 search_space,
42 rng,
43 }
44 }
45
46 pub fn fit(&mut self, x: &DMatrix<f64>, y: &DVector<f64>) -> Result<OptimizationResult> {
48 let start_time = Instant::now();
49
50 if self.config.verbose {
51 println!("Random search with {} iterations", self.config.n_iterations);
52 }
53
54 let param_samples = self.sample_parameters(self.config.n_iterations)?;
56
57 let cv_results: Vec<(ParameterSet, f64)> = {
59 #[cfg(feature = "parallel")]
60 if self.config.n_jobs.is_some() {
61 param_samples
63 .into_par_iter()
64 .map(|params| {
65 let score = self
66 .evaluate_params(¶ms, x, y)
67 .unwrap_or(-f64::INFINITY);
68 (params, score)
69 })
70 .collect()
71 } else {
72 param_samples
74 .into_iter()
75 .enumerate()
76 .map(|(i, params)| {
77 let score = self
78 .evaluate_params(¶ms, x, y)
79 .unwrap_or(-f64::INFINITY);
80 if self.config.verbose && (i + 1) % 10 == 0 {
81 println!(
82 "Iteration {}/{}: Score {:.6}",
83 i + 1,
84 self.config.n_iterations,
85 score
86 );
87 }
88 (params, score)
89 })
90 .collect()
91 }
92
93 #[cfg(not(feature = "parallel"))]
94 {
95 param_samples
97 .into_iter()
98 .enumerate()
99 .map(|(i, params)| {
100 let score = self
101 .evaluate_params(¶ms, x, y)
102 .unwrap_or(-f64::INFINITY);
103 if self.config.verbose && (i + 1) % 10 == 0 {
104 println!(
105 "Iteration {}/{}: Score {:.6}",
106 i + 1,
107 self.config.n_iterations,
108 score
109 );
110 }
111 (params, score)
112 })
113 .collect()
114 }
115 };
116
117 let (best_params, best_score) = cv_results
119 .iter()
120 .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal))
121 .map(|(p, s)| (p.clone(), *s))
122 .ok_or_else(|| {
123 SklearsError::Other("No valid parameter combinations found".to_string())
124 })?;
125
126 let score_history: Vec<f64> = cv_results.iter().map(|(_, score)| *score).collect();
127 let n_iterations = cv_results.len();
128
129 if self.config.verbose {
130 println!("Best score: {:.6}", best_score);
131 println!("Best params: {:?}", best_params);
132 }
133
134 Ok(OptimizationResult {
135 best_params,
136 best_score,
137 cv_results,
138 n_iterations,
139 optimization_time: start_time.elapsed().as_secs_f64(),
140 score_history,
141 })
142 }
143
144 fn sample_parameters(&mut self, n_samples: usize) -> Result<Vec<ParameterSet>> {
146 let mut params = Vec::with_capacity(n_samples);
147
148 let c_spec = self.search_space.c.clone();
150 let kernel_spec = self.search_space.kernel.clone();
151 let tol_spec = self.search_space.tol.clone();
152 let max_iter_spec = self.search_space.max_iter.clone();
153
154 for _ in 0..n_samples {
155 let c = self.sample_value(&c_spec)?;
156
157 let kernel = if let Some(ref spec) = kernel_spec {
158 self.sample_kernel(spec)?
159 } else {
160 KernelType::Rbf { gamma: 1.0 }
161 };
162
163 let tol = if let Some(ref spec) = tol_spec {
164 self.sample_value(spec)?
165 } else {
166 1e-3
167 };
168
169 let max_iter = if let Some(ref spec) = max_iter_spec {
170 self.sample_value(spec)? as usize
171 } else {
172 1000
173 };
174
175 params.push(ParameterSet {
176 c,
177 kernel,
178 tol,
179 max_iter,
180 });
181 }
182
183 Ok(params)
184 }
185
186 fn sample_value(&mut self, spec: &ParameterSpec) -> Result<f64> {
188 match spec {
189 ParameterSpec::Fixed(value) => Ok(*value),
190 ParameterSpec::Uniform { min, max } => {
191 use scirs2_core::random::essentials::Uniform;
192 let dist = Uniform::new(*min, *max).map_err(|e| {
193 SklearsError::InvalidInput(format!(
194 "Failed to create uniform distribution: {}",
195 e
196 ))
197 })?;
198 Ok(self.rng.sample(dist))
199 }
200 ParameterSpec::LogUniform { min, max } => {
201 use scirs2_core::random::essentials::Uniform;
202 let log_min = min.ln();
203 let log_max = max.ln();
204 let dist = Uniform::new(log_min, log_max).map_err(|e| {
205 SklearsError::InvalidInput(format!(
206 "Failed to create log-uniform distribution: {}",
207 e
208 ))
209 })?;
210 let log_val = self.rng.sample(dist);
211 Ok(log_val.exp())
212 }
213 ParameterSpec::Choice(choices) => {
214 if choices.is_empty() {
215 return Err(SklearsError::InvalidInput("Empty choice list".to_string()));
216 }
217 use scirs2_core::random::essentials::Uniform;
218 let dist = Uniform::new(0, choices.len()).map_err(|e| {
219 SklearsError::InvalidInput(format!(
220 "Failed to create uniform distribution: {}",
221 e
222 ))
223 })?;
224 let idx = self.rng.sample(dist);
225 Ok(choices[idx])
226 }
227 ParameterSpec::KernelChoice(_) => Err(SklearsError::InvalidInput(
228 "Use sample_kernel for kernel specs".to_string(),
229 )),
230 }
231 }
232
233 fn sample_kernel(&mut self, spec: &ParameterSpec) -> Result<KernelType> {
235 match spec {
236 ParameterSpec::KernelChoice(kernels) => {
237 if kernels.is_empty() {
238 return Err(SklearsError::InvalidInput(
239 "Empty kernel choice list".to_string(),
240 ));
241 }
242 use scirs2_core::random::essentials::Uniform;
243 let dist = Uniform::new(0, kernels.len()).map_err(|e| {
244 SklearsError::InvalidInput(format!(
245 "Failed to create uniform distribution: {}",
246 e
247 ))
248 })?;
249 let idx = self.rng.sample(dist);
250 Ok(kernels[idx].clone())
251 }
252 _ => Err(SklearsError::InvalidInput(
253 "Invalid kernel specification".to_string(),
254 )),
255 }
256 }
257
258 fn evaluate_params(
260 &self,
261 params: &ParameterSet,
262 x: &DMatrix<f64>,
263 y: &DVector<f64>,
264 ) -> Result<f64> {
265 let scores = self.cross_validate(params, x, y)?;
266 Ok(scores.iter().sum::<f64>() / scores.len() as f64)
267 }
268
269 fn cross_validate(
271 &self,
272 params: &ParameterSet,
273 x: &DMatrix<f64>,
274 y: &DVector<f64>,
275 ) -> Result<Vec<f64>> {
276 let n_samples = x.nrows();
277 let fold_size = n_samples / self.config.cv_folds;
278 let mut scores = Vec::new();
279
280 for fold in 0..self.config.cv_folds {
281 let start_idx = fold * fold_size;
282 let end_idx = if fold == self.config.cv_folds - 1 {
283 n_samples
284 } else {
285 (fold + 1) * fold_size
286 };
287
288 let mut x_train_data = Vec::new();
290 let mut y_train_vals = Vec::new();
291 let mut x_test_data = Vec::new();
292 let mut y_test_vals = Vec::new();
293
294 for i in 0..n_samples {
295 if i >= start_idx && i < end_idx {
296 for j in 0..x.ncols() {
298 x_test_data.push(x[[i, j]]);
299 }
300 y_test_vals.push(y[i]);
301 } else {
302 for j in 0..x.ncols() {
304 x_train_data.push(x[[i, j]]);
305 }
306 y_train_vals.push(y[i]);
307 }
308 }
309
310 let n_train = y_train_vals.len();
311 let n_test = y_test_vals.len();
312 let n_features = x.ncols();
313
314 let x_train = Array2::from_shape_vec((n_train, n_features), x_train_data)?;
315 let y_train = Array1::from_vec(y_train_vals);
316 let x_test = Array2::from_shape_vec((n_test, n_features), x_test_data)?;
317 let y_test = Array1::from_vec(y_test_vals);
318
319 let svm = SVC::new()
321 .c(params.c)
322 .kernel(params.kernel.clone())
323 .tol(params.tol)
324 .max_iter(params.max_iter);
325
326 let fitted_svm = svm.fit(&x_train, &y_train)?;
327 let y_pred = fitted_svm.predict(&x_test)?;
328
329 let score = self.calculate_score(&y_test, &y_pred)?;
330 scores.push(score);
331 }
332
333 Ok(scores)
334 }
335
336 fn calculate_score(&self, y_true: &DVector<f64>, y_pred: &DVector<f64>) -> Result<f64> {
338 match self.config.scoring {
339 ScoringMetric::Accuracy => {
340 let correct = y_true
341 .iter()
342 .zip(y_pred.iter())
343 .map(|(&t, &p)| if (t - p).abs() < 0.5 { 1.0 } else { 0.0 })
344 .sum::<f64>();
345 Ok(correct / y_true.len() as f64)
346 }
347 ScoringMetric::MeanSquaredError => {
348 let mse = y_true
349 .iter()
350 .zip(y_pred.iter())
351 .map(|(&t, &p)| (t - p).powi(2))
352 .sum::<f64>()
353 / y_true.len() as f64;
354 Ok(-mse) }
356 ScoringMetric::MeanAbsoluteError => {
357 let mae = y_true
358 .iter()
359 .zip(y_pred.iter())
360 .map(|(&t, &p)| (t - p).abs())
361 .sum::<f64>()
362 / y_true.len() as f64;
363 Ok(-mae) }
365 _ => {
366 let correct = y_true
368 .iter()
369 .zip(y_pred.iter())
370 .map(|(&t, &p)| if (t - p).abs() < 0.5 { 1.0 } else { 0.0 })
371 .sum::<f64>();
372 Ok(correct / y_true.len() as f64)
373 }
374 }
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381 use scirs2_core::ndarray::{Array1, Array2};
382
383 fn generate_simple_dataset() -> (Array2<f64>, Array1<f64>) {
384 let x = Array2::from_shape_vec(
386 (20, 2),
387 vec![
388 1.0, 1.0, 1.5, 1.2, 1.2, 1.5, 1.8, 1.3, 1.1, 1.6, 1.4, 1.7, 1.3, 1.4, 1.6, 1.5, 1.7,
390 1.8, 1.2, 1.9, 3.0, 3.0, 3.5, 3.2, 3.2, 3.5, 3.8, 3.3, 3.1, 3.6, 3.4, 3.7, 3.3, 3.4, 3.6, 3.5,
392 3.7, 3.8, 3.2, 3.9,
393 ],
394 )
395 .unwrap();
396
397 let y = Array1::from_vec(vec![
398 -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
399 1.0, 1.0, 1.0, 1.0, 1.0,
400 ]);
401
402 (x, y)
403 }
404
405 #[test]
406 #[ignore] fn test_random_search_basic() {
408 let (x, y) = generate_simple_dataset();
409
410 let config = OptimizationConfig {
411 n_iterations: 10,
412 cv_folds: 2,
413 scoring: ScoringMetric::Accuracy,
414 random_state: Some(42),
415 n_jobs: None,
416 verbose: false,
417 early_stopping_patience: None,
418 };
419
420 let search_space = SearchSpace {
421 c: ParameterSpec::LogUniform {
422 min: 0.1,
423 max: 10.0,
424 },
425 gamma: None,
426 degree: None,
427 coef0: None,
428 kernel: None,
429 tol: None,
430 max_iter: None,
431 };
432
433 let mut optimizer = RandomSearchCV::new(config, search_space);
434 let result = optimizer.fit(&x, &y).unwrap();
435
436 assert!(
439 result.best_score >= 0.4,
440 "Best score should be at least 0.4, got {}",
441 result.best_score
442 );
443 assert_eq!(result.n_iterations, 10);
444 assert_eq!(result.cv_results.len(), 10);
445 assert_eq!(result.score_history.len(), 10);
446 assert!(result.best_params.c > 0.0);
447 }
448
449 #[test]
450 #[ignore] fn test_random_search_with_early_stopping() {
452 let (x, y) = generate_simple_dataset();
453
454 let config = OptimizationConfig {
455 n_iterations: 50,
456 cv_folds: 2,
457 scoring: ScoringMetric::Accuracy,
458 random_state: Some(42),
459 n_jobs: None,
460 verbose: false,
461 early_stopping_patience: Some(5),
462 };
463
464 let search_space = SearchSpace::default();
465 let mut optimizer = RandomSearchCV::new(config, search_space);
466 let result = optimizer.fit(&x, &y).unwrap();
467
468 assert!(
470 result.n_iterations < 50,
471 "Early stopping should trigger before 50 iterations"
472 );
473 assert!(
475 result.best_score >= 0.4,
476 "Best score should be at least 0.4, got {}",
477 result.best_score
478 );
479 }
480
481 #[test]
482 fn test_random_search_parameter_sampling() {
483 let config = OptimizationConfig::default();
484 let search_space = SearchSpace {
485 c: ParameterSpec::Choice(vec![0.1, 1.0, 10.0]),
486 gamma: Some(ParameterSpec::LogUniform {
487 min: 0.01,
488 max: 1.0,
489 }),
490 degree: Some(ParameterSpec::Choice(vec![2.0, 3.0, 4.0])),
491 coef0: Some(ParameterSpec::Uniform { min: 0.0, max: 1.0 }),
492 kernel: None,
493 tol: None,
494 max_iter: None,
495 };
496
497 let mut optimizer = RandomSearchCV::new(config, search_space);
498
499 let params_vec = optimizer.sample_parameters(20).unwrap();
501 for params in params_vec {
502 assert!([0.1, 1.0, 10.0].contains(¶ms.c));
504 assert!(params.tol > 0.0);
505 assert!(params.max_iter > 0);
506 }
507 }
508
509 #[test]
510 fn test_random_search_scoring_metrics() {
511 let (x, y) = generate_simple_dataset();
512
513 let metrics = vec![
514 ScoringMetric::Accuracy,
515 ScoringMetric::MeanSquaredError,
516 ScoringMetric::MeanAbsoluteError,
517 ];
518
519 for metric in metrics {
520 let config = OptimizationConfig {
521 n_iterations: 5,
522 cv_folds: 2,
523 scoring: metric.clone(),
524 random_state: Some(42),
525 n_jobs: None,
526 verbose: false,
527 early_stopping_patience: None,
528 };
529
530 let search_space = SearchSpace {
531 c: ParameterSpec::Fixed(1.0),
532 gamma: None,
533 degree: None,
534 coef0: None,
535 kernel: None,
536 tol: None,
537 max_iter: None,
538 };
539
540 let mut optimizer = RandomSearchCV::new(config, search_space);
541 let result = optimizer.fit(&x, &y);
542 assert!(
543 result.is_ok(),
544 "Optimization should succeed for {:?}",
545 metric
546 );
547 }
548 }
549}