1use scirs2_core::ndarray::{Array1, Array2};
14use scirs2_core::random::essentials::{Normal, Uniform};
15use scirs2_core::random::thread_rng;
16use serde::{Deserialize, Serialize};
17use sklears_core::{
18 error::{Result, SklearsError},
19 prelude::{Fit, Transform},
20 traits::{Estimator, Trained, Untrained},
21 types::Float,
22};
23use std::collections::HashMap;
24use std::marker::PhantomData;
25
26#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
28pub enum MetaKernelType {
29 RBF,
31 Polynomial,
33 Laplacian,
35 Matern,
37 RationalQuadratic,
39 Linear,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct DatasetMetaFeatures {
46 pub n_samples: usize,
48 pub n_features: usize,
50 pub feature_means: Vec<Float>,
52 pub feature_stds: Vec<Float>,
54 pub mean_correlation: Float,
56 pub sparsity: Float,
58 pub effective_dim: Float,
60}
61
62impl DatasetMetaFeatures {
63 pub fn extract(x: &Array2<Float>) -> Result<Self> {
65 let (n_samples, n_features) = x.dim();
66
67 if n_samples == 0 || n_features == 0 {
68 return Err(SklearsError::InvalidInput(
69 "Dataset must have non-zero dimensions".to_string(),
70 ));
71 }
72
73 let mut feature_means = Vec::with_capacity(n_features);
75 let mut feature_stds = Vec::with_capacity(n_features);
76
77 for j in 0..n_features {
78 let col = x.column(j);
79 let mean = col.iter().sum::<Float>() / n_samples as Float;
80 let variance =
81 col.iter().map(|&v| (v - mean).powi(2)).sum::<Float>() / n_samples as Float;
82 let std = variance.sqrt();
83
84 feature_means.push(mean);
85 feature_stds.push(std);
86 }
87
88 let mut correlations = Vec::new();
90 for i in 0..n_features {
91 for j in (i + 1)..n_features {
92 let col_i = x.column(i);
93 let col_j = x.column(j);
94
95 let mean_i = feature_means[i];
96 let mean_j = feature_means[j];
97 let std_i = feature_stds[i].max(1e-10);
98 let std_j = feature_stds[j].max(1e-10);
99
100 let cov: Float = col_i
101 .iter()
102 .zip(col_j.iter())
103 .map(|(&vi, &vj)| (vi - mean_i) * (vj - mean_j))
104 .sum::<Float>()
105 / n_samples as Float;
106
107 let corr = cov / (std_i * std_j);
108 correlations.push(corr.abs());
109 }
110 }
111
112 let mean_correlation = if correlations.is_empty() {
113 0.0
114 } else {
115 correlations.iter().sum::<Float>() / correlations.len() as Float
116 };
117
118 let threshold = 1e-6;
120 let near_zero_count = x.iter().filter(|&&v| v.abs() < threshold).count();
121 let sparsity = near_zero_count as Float / (n_samples * n_features) as Float;
122
123 let significant_features = feature_stds.iter().filter(|&&std| std > 0.01).count();
126 let effective_dim = significant_features as Float / n_features as Float;
127
128 Ok(Self {
129 n_samples,
130 n_features,
131 feature_means,
132 feature_stds,
133 mean_correlation,
134 sparsity,
135 effective_dim,
136 })
137 }
138
139 pub fn to_feature_vector(&self) -> Vec<Float> {
141 vec![
142 (self.n_samples as Float).ln(),
143 (self.n_features as Float).ln(),
144 self.feature_means.iter().sum::<Float>() / self.n_features as Float,
145 self.feature_stds.iter().sum::<Float>() / self.n_features as Float,
146 self.mean_correlation,
147 self.sparsity,
148 self.effective_dim,
149 ]
150 }
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155pub enum MetaLearningStrategy {
156 PerformanceBased {
158 min_similar_tasks: usize,
160 similarity_threshold: Float,
162 },
163 Portfolio {
165 portfolio_size: usize,
167 },
168 BayesianOptimization {
170 n_initial: usize,
172 n_iterations: usize,
174 },
175 NeuralArchitectureSearch {
177 search_space_size: usize,
179 n_evaluations: usize,
181 },
182}
183
184impl Default for MetaLearningStrategy {
185 fn default() -> Self {
186 Self::PerformanceBased {
187 min_similar_tasks: 5,
188 similarity_threshold: 0.7,
189 }
190 }
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct MetaLearningConfig {
196 pub strategy: MetaLearningStrategy,
198 pub n_components: usize,
200 pub use_transfer_learning: bool,
202 pub performance_metric: PerformanceMetric,
204}
205
206impl Default for MetaLearningConfig {
207 fn default() -> Self {
208 Self {
209 strategy: MetaLearningStrategy::default(),
210 n_components: 100,
211 use_transfer_learning: false,
212 performance_metric: PerformanceMetric::KernelAlignment,
213 }
214 }
215}
216
217#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
219pub enum PerformanceMetric {
220 KernelAlignment,
222 CrossValidation,
224 SpectralQuality,
226 ApproximationError,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct TaskMetadata {
233 pub task_id: String,
235 pub meta_features: DatasetMetaFeatures,
237 pub best_kernel: MetaKernelType,
239 pub performance: Float,
241 pub hyperparameters: HashMap<String, Float>,
243}
244
245#[derive(Debug, Clone)]
265pub struct MetaLearningKernelSelector<State = Untrained> {
266 config: MetaLearningConfig,
267
268 selected_kernel: Option<MetaKernelType>,
270 selected_hyperparams: Option<HashMap<String, Float>>,
271 kernel_weights: Option<Array2<Float>>,
272 kernel_offset: Option<Array1<Float>>,
273 task_history: Vec<TaskMetadata>,
274
275 _state: PhantomData<State>,
276}
277
278impl MetaLearningKernelSelector<Untrained> {
279 pub fn new(config: MetaLearningConfig) -> Self {
281 Self {
282 config,
283 selected_kernel: None,
284 selected_hyperparams: None,
285 kernel_weights: None,
286 kernel_offset: None,
287 task_history: Vec::new(),
288 _state: PhantomData,
289 }
290 }
291
292 pub fn with_components(n_components: usize) -> Self {
294 Self {
295 config: MetaLearningConfig {
296 n_components,
297 ..Default::default()
298 },
299 selected_kernel: None,
300 selected_hyperparams: None,
301 kernel_weights: None,
302 kernel_offset: None,
303 task_history: Vec::new(),
304 _state: PhantomData,
305 }
306 }
307
308 pub fn add_task_history(mut self, task: TaskMetadata) -> Self {
310 self.task_history.push(task);
311 self
312 }
313
314 pub fn strategy(mut self, strategy: MetaLearningStrategy) -> Self {
316 self.config.strategy = strategy;
317 self
318 }
319
320 fn select_kernel(
322 &self,
323 meta_features: &DatasetMetaFeatures,
324 ) -> (MetaKernelType, HashMap<String, Float>) {
325 match &self.config.strategy {
326 MetaLearningStrategy::PerformanceBased {
327 min_similar_tasks,
328 similarity_threshold,
329 } => self.select_performance_based(
330 meta_features,
331 *min_similar_tasks,
332 *similarity_threshold,
333 ),
334 MetaLearningStrategy::Portfolio { portfolio_size } => {
335 self.select_portfolio_based(meta_features, *portfolio_size)
336 }
337 MetaLearningStrategy::BayesianOptimization {
338 n_initial,
339 n_iterations,
340 } => self.select_bayesian(meta_features, *n_initial, *n_iterations),
341 MetaLearningStrategy::NeuralArchitectureSearch {
342 search_space_size,
343 n_evaluations,
344 } => self.select_nas(meta_features, *search_space_size, *n_evaluations),
345 }
346 }
347
348 fn select_performance_based(
350 &self,
351 meta_features: &DatasetMetaFeatures,
352 min_similar: usize,
353 similarity_threshold: Float,
354 ) -> (MetaKernelType, HashMap<String, Float>) {
355 if self.task_history.len() < min_similar {
356 return self.heuristic_selection(meta_features);
358 }
359
360 let current_features = meta_features.to_feature_vector();
362 let mut similarities: Vec<(usize, Float)> = self
363 .task_history
364 .iter()
365 .enumerate()
366 .map(|(idx, task)| {
367 let hist_features = task.meta_features.to_feature_vector();
368 let similarity = Self::compute_similarity(¤t_features, &hist_features);
369 (idx, similarity)
370 })
371 .filter(|(_, sim)| *sim >= similarity_threshold)
372 .collect();
373
374 if similarities.is_empty() {
375 return self.heuristic_selection(meta_features);
376 }
377
378 similarities.sort_by(|a, b| {
380 let perf_a = self.task_history[a.0].performance;
381 let perf_b = self.task_history[b.0].performance;
382 (a.1 * perf_a)
383 .partial_cmp(&(b.1 * perf_b))
384 .unwrap()
385 .reverse()
386 });
387
388 let best_task = &self.task_history[similarities[0].0];
390 (best_task.best_kernel, best_task.hyperparameters.clone())
391 }
392
393 fn select_portfolio_based(
395 &self,
396 meta_features: &DatasetMetaFeatures,
397 _portfolio_size: usize,
398 ) -> (MetaKernelType, HashMap<String, Float>) {
399 self.heuristic_selection(meta_features)
402 }
403
404 fn select_bayesian(
406 &self,
407 meta_features: &DatasetMetaFeatures,
408 _n_initial: usize,
409 _n_iterations: usize,
410 ) -> (MetaKernelType, HashMap<String, Float>) {
411 self.heuristic_selection(meta_features)
414 }
415
416 fn select_nas(
418 &self,
419 meta_features: &DatasetMetaFeatures,
420 _search_space_size: usize,
421 _n_evaluations: usize,
422 ) -> (MetaKernelType, HashMap<String, Float>) {
423 self.heuristic_selection(meta_features)
426 }
427
428 fn heuristic_selection(
430 &self,
431 meta_features: &DatasetMetaFeatures,
432 ) -> (MetaKernelType, HashMap<String, Float>) {
433 let mut hyperparams = HashMap::new();
434
435 let kernel = if meta_features.sparsity > 0.5 {
437 hyperparams.insert("gamma".to_string(), 1.0);
439 MetaKernelType::Linear
440 } else if meta_features.effective_dim < 0.3 {
441 let gamma = 1.0 / (meta_features.n_features as Float);
443 hyperparams.insert("gamma".to_string(), gamma);
444 MetaKernelType::RBF
445 } else if meta_features.mean_correlation > 0.7 {
446 hyperparams.insert("degree".to_string(), 3.0);
448 hyperparams.insert("gamma".to_string(), 1.0);
449 MetaKernelType::Polynomial
450 } else {
451 hyperparams.insert("gamma".to_string(), 1.0);
453 MetaKernelType::RBF
454 };
455
456 (kernel, hyperparams)
457 }
458
459 fn compute_similarity(a: &[Float], b: &[Float]) -> Float {
461 if a.len() != b.len() {
462 return 0.0;
463 }
464
465 let dot: Float = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
466 let norm_a: Float = a.iter().map(|x| x * x).sum::<Float>().sqrt();
467 let norm_b: Float = b.iter().map(|x| x * x).sum::<Float>().sqrt();
468
469 if norm_a < 1e-10 || norm_b < 1e-10 {
470 return 0.0;
471 }
472
473 (dot / (norm_a * norm_b)).max(-1.0).min(1.0)
474 }
475}
476
477impl Estimator for MetaLearningKernelSelector<Untrained> {
478 type Config = MetaLearningConfig;
479 type Error = SklearsError;
480 type Float = Float;
481
482 fn config(&self) -> &Self::Config {
483 &self.config
484 }
485}
486
487impl Fit<Array2<Float>, ()> for MetaLearningKernelSelector<Untrained> {
488 type Fitted = MetaLearningKernelSelector<Trained>;
489
490 fn fit(self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
491 if x.nrows() == 0 || x.ncols() == 0 {
492 return Err(SklearsError::InvalidInput(
493 "Input array cannot be empty".to_string(),
494 ));
495 }
496
497 let meta_features = DatasetMetaFeatures::extract(x)?;
499
500 let (selected_kernel, selected_hyperparams) = self.select_kernel(&meta_features);
502
503 let (kernel_weights, kernel_offset) = Self::initialize_kernel_approximation(
505 selected_kernel,
506 &selected_hyperparams,
507 x,
508 self.config.n_components,
509 )?;
510
511 Ok(MetaLearningKernelSelector {
512 config: self.config,
513 selected_kernel: Some(selected_kernel),
514 selected_hyperparams: Some(selected_hyperparams),
515 kernel_weights: Some(kernel_weights),
516 kernel_offset: Some(kernel_offset),
517 task_history: self.task_history,
518 _state: PhantomData,
519 })
520 }
521}
522
523impl MetaLearningKernelSelector<Untrained> {
524 fn initialize_kernel_approximation(
526 kernel_type: MetaKernelType,
527 hyperparams: &HashMap<String, Float>,
528 x: &Array2<Float>,
529 n_components: usize,
530 ) -> Result<(Array2<Float>, Array1<Float>)> {
531 let n_features = x.ncols();
532 let mut rng = thread_rng();
533
534 match kernel_type {
535 MetaKernelType::RBF | MetaKernelType::Laplacian => {
536 let gamma = hyperparams.get("gamma").copied().unwrap_or(1.0);
537 let normal = Normal::new(0.0, 1.0).unwrap();
538
539 let weights = Array2::from_shape_fn((n_features, n_components), |_| {
540 rng.sample(normal) * (2.0 * gamma).sqrt()
541 });
542
543 let uniform = Uniform::new(0.0, 2.0 * std::f64::consts::PI).unwrap();
544 let offset = Array1::from_shape_fn(n_components, |_| rng.sample(uniform));
545
546 Ok((weights, offset))
547 }
548 MetaKernelType::Polynomial => {
549 let gamma = hyperparams.get("gamma").copied().unwrap_or(1.0);
550 let normal = Normal::new(0.0, 1.0).unwrap();
551
552 let weights = Array2::from_shape_fn((n_features, n_components), |_| {
553 rng.sample(normal) * (2.0 * gamma).sqrt()
554 });
555
556 let uniform = Uniform::new(0.0, 2.0 * std::f64::consts::PI).unwrap();
557 let offset = Array1::from_shape_fn(n_components, |_| rng.sample(uniform));
558
559 Ok((weights, offset))
560 }
561 MetaKernelType::Linear => {
562 let weights = Array2::from_shape_fn((n_features, n_components), |_| {
564 rng.sample(Normal::new(0.0, 1.0 / (n_features as Float).sqrt()).unwrap())
565 });
566 let offset = Array1::zeros(n_components);
567 Ok((weights, offset))
568 }
569 _ => {
570 let normal = Normal::new(0.0, 1.0).unwrap();
572 let weights =
573 Array2::from_shape_fn((n_features, n_components), |_| rng.sample(normal));
574 let uniform = Uniform::new(0.0, 2.0 * std::f64::consts::PI).unwrap();
575 let offset = Array1::from_shape_fn(n_components, |_| rng.sample(uniform));
576 Ok((weights, offset))
577 }
578 }
579 }
580}
581
582impl Transform<Array2<Float>, Array2<Float>> for MetaLearningKernelSelector<Trained> {
583 fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
584 let kernel_weights = self.kernel_weights.as_ref().unwrap();
585 let kernel_offset = self.kernel_offset.as_ref().unwrap();
586
587 if x.ncols() != kernel_weights.nrows() {
588 return Err(SklearsError::InvalidInput(format!(
589 "Feature dimension mismatch: expected {}, got {}",
590 kernel_weights.nrows(),
591 x.ncols()
592 )));
593 }
594
595 let projection = x.dot(kernel_weights);
597
598 let n_samples = x.nrows();
599 let n_components = self.config.n_components;
600 let mut output = Array2::zeros((n_samples, n_components));
601
602 let normalizer = (2.0 / n_components as Float).sqrt();
603 for i in 0..n_samples {
604 for j in 0..n_components {
605 output[[i, j]] = normalizer * (projection[[i, j]] + kernel_offset[j]).cos();
606 }
607 }
608
609 Ok(output)
610 }
611}
612
613impl MetaLearningKernelSelector<Trained> {
614 pub fn selected_kernel(&self) -> MetaKernelType {
616 self.selected_kernel.unwrap()
617 }
618
619 pub fn selected_hyperparameters(&self) -> &HashMap<String, Float> {
621 self.selected_hyperparams.as_ref().unwrap()
622 }
623
624 pub fn kernel_weights(&self) -> &Array2<Float> {
626 self.kernel_weights.as_ref().unwrap()
627 }
628
629 pub fn kernel_offset(&self) -> &Array1<Float> {
631 self.kernel_offset.as_ref().unwrap()
632 }
633}
634
635#[cfg(test)]
636mod tests {
637 use super::*;
638 use scirs2_core::ndarray::array;
639
640 #[test]
641 fn test_meta_features_extraction() {
642 let x = array![
643 [1.0, 2.0, 3.0],
644 [4.0, 5.0, 6.0],
645 [7.0, 8.0, 9.0],
646 [10.0, 11.0, 12.0]
647 ];
648
649 let meta_features = DatasetMetaFeatures::extract(&x).unwrap();
650
651 assert_eq!(meta_features.n_samples, 4);
652 assert_eq!(meta_features.n_features, 3);
653 assert!(meta_features.feature_means.len() == 3);
654 assert!(meta_features.mean_correlation >= 0.0);
655 assert!(meta_features.mean_correlation <= 1.0);
656 }
657
658 #[test]
659 fn test_meta_learning_selector_basic() {
660 let config = MetaLearningConfig::default();
661 let selector = MetaLearningKernelSelector::new(config);
662
663 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
664
665 let fitted = selector.fit(&x, &()).unwrap();
666 let features = fitted.transform(&x).unwrap();
667
668 assert_eq!(features.shape(), &[3, 100]);
669 }
670
671 #[test]
672 fn test_kernel_selection_with_history() {
673 let config = MetaLearningConfig {
675 strategy: MetaLearningStrategy::PerformanceBased {
676 min_similar_tasks: 1,
677 similarity_threshold: 0.5,
678 },
679 n_components: 50,
680 use_transfer_learning: false,
681 performance_metric: PerformanceMetric::KernelAlignment,
682 };
683
684 let mut selector = MetaLearningKernelSelector::new(config);
685
686 let x_hist = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
688 let meta_features = DatasetMetaFeatures::extract(&x_hist).unwrap();
689
690 let mut hyperparams = HashMap::new();
691 hyperparams.insert("gamma".to_string(), 0.5);
692
693 let task = TaskMetadata {
694 task_id: "task1".to_string(),
695 meta_features,
696 best_kernel: MetaKernelType::RBF,
697 performance: 0.95,
698 hyperparameters: hyperparams,
699 };
700
701 selector = selector.add_task_history(task);
702
703 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
705 let fitted = selector.fit(&x, &()).unwrap();
706
707 let selected = fitted.selected_kernel();
709 assert!(
710 selected == MetaKernelType::RBF
711 || selected == MetaKernelType::Polynomial
712 || selected == MetaKernelType::Linear,
713 "Unexpected kernel type: {:?}",
714 selected
715 );
716 }
717
718 #[test]
719 fn test_different_strategies() {
720 let strategies = vec![
721 MetaLearningStrategy::PerformanceBased {
722 min_similar_tasks: 3,
723 similarity_threshold: 0.6,
724 },
725 MetaLearningStrategy::Portfolio { portfolio_size: 3 },
726 ];
727
728 let x = array![[1.0, 2.0], [3.0, 4.0]];
729
730 for strategy in strategies {
731 let config = MetaLearningConfig {
732 strategy,
733 ..Default::default()
734 };
735
736 let selector = MetaLearningKernelSelector::new(config);
737 let fitted = selector.fit(&x, &()).unwrap();
738 let features = fitted.transform(&x).unwrap();
739
740 assert_eq!(features.nrows(), 2);
741 }
742 }
743
744 #[test]
745 fn test_heuristic_selection() {
746 let x_sparse = array![[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]];
747
748 let selector = MetaLearningKernelSelector::with_components(50);
749 let fitted = selector.fit(&x_sparse, &()).unwrap();
750
751 assert!(
753 fitted.selected_kernel() == MetaKernelType::Linear
754 || fitted.selected_kernel() == MetaKernelType::RBF
755 );
756 }
757
758 #[test]
759 fn test_similarity_computation() {
760 let a = vec![1.0, 2.0, 3.0];
761 let b = vec![1.0, 2.0, 3.0];
762
763 let similarity = MetaLearningKernelSelector::<Untrained>::compute_similarity(&a, &b);
764 assert!((similarity - 1.0).abs() < 1e-6);
765
766 let c = vec![-1.0, -2.0, -3.0];
767 let similarity2 = MetaLearningKernelSelector::<Untrained>::compute_similarity(&a, &c);
768 assert!((similarity2 + 1.0).abs() < 1e-6);
769 }
770
771 #[test]
772 fn test_empty_input_error() {
773 let selector = MetaLearningKernelSelector::with_components(50);
774 let x_empty: Array2<Float> = Array2::zeros((0, 0));
775
776 assert!(selector.fit(&x_empty, &()).is_err());
777 }
778
779 #[test]
780 fn test_dimension_mismatch_error() {
781 let selector = MetaLearningKernelSelector::with_components(50);
782 let x_train = array![[1.0, 2.0], [3.0, 4.0]];
783 let x_test = array![[1.0, 2.0, 3.0]];
784
785 let fitted = selector.fit(&x_train, &()).unwrap();
786 assert!(fitted.transform(&x_test).is_err());
787 }
788}