1use scirs2_core::ndarray::{Array1, Array2};
14use scirs2_core::random::essentials::{Normal, Uniform};
15use scirs2_core::random::thread_rng;
16use serde::{Deserialize, Serialize};
17use sklears_core::{
18 error::{Result, SklearsError},
19 prelude::{Fit, Transform},
20 traits::{Estimator, Trained, Untrained},
21 types::Float,
22};
23use std::collections::HashMap;
24use std::marker::PhantomData;
25
26#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
28pub enum MetaKernelType {
29 RBF,
31 Polynomial,
33 Laplacian,
35 Matern,
37 RationalQuadratic,
39 Linear,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct DatasetMetaFeatures {
46 pub n_samples: usize,
48 pub n_features: usize,
50 pub feature_means: Vec<Float>,
52 pub feature_stds: Vec<Float>,
54 pub mean_correlation: Float,
56 pub sparsity: Float,
58 pub effective_dim: Float,
60}
61
62impl DatasetMetaFeatures {
63 pub fn extract(x: &Array2<Float>) -> Result<Self> {
65 let (n_samples, n_features) = x.dim();
66
67 if n_samples == 0 || n_features == 0 {
68 return Err(SklearsError::InvalidInput(
69 "Dataset must have non-zero dimensions".to_string(),
70 ));
71 }
72
73 let mut feature_means = Vec::with_capacity(n_features);
75 let mut feature_stds = Vec::with_capacity(n_features);
76
77 for j in 0..n_features {
78 let col = x.column(j);
79 let mean = col.iter().sum::<Float>() / n_samples as Float;
80 let variance =
81 col.iter().map(|&v| (v - mean).powi(2)).sum::<Float>() / n_samples as Float;
82 let std = variance.sqrt();
83
84 feature_means.push(mean);
85 feature_stds.push(std);
86 }
87
88 let mut correlations = Vec::new();
90 for i in 0..n_features {
91 for j in (i + 1)..n_features {
92 let col_i = x.column(i);
93 let col_j = x.column(j);
94
95 let mean_i = feature_means[i];
96 let mean_j = feature_means[j];
97 let std_i = feature_stds[i].max(1e-10);
98 let std_j = feature_stds[j].max(1e-10);
99
100 let cov: Float = col_i
101 .iter()
102 .zip(col_j.iter())
103 .map(|(&vi, &vj)| (vi - mean_i) * (vj - mean_j))
104 .sum::<Float>()
105 / n_samples as Float;
106
107 let corr = cov / (std_i * std_j);
108 correlations.push(corr.abs());
109 }
110 }
111
112 let mean_correlation = if correlations.is_empty() {
113 0.0
114 } else {
115 correlations.iter().sum::<Float>() / correlations.len() as Float
116 };
117
118 let threshold = 1e-6;
120 let near_zero_count = x.iter().filter(|&&v| v.abs() < threshold).count();
121 let sparsity = near_zero_count as Float / (n_samples * n_features) as Float;
122
123 let significant_features = feature_stds.iter().filter(|&&std| std > 0.01).count();
126 let effective_dim = significant_features as Float / n_features as Float;
127
128 Ok(Self {
129 n_samples,
130 n_features,
131 feature_means,
132 feature_stds,
133 mean_correlation,
134 sparsity,
135 effective_dim,
136 })
137 }
138
139 pub fn to_feature_vector(&self) -> Vec<Float> {
141 vec![
142 (self.n_samples as Float).ln(),
143 (self.n_features as Float).ln(),
144 self.feature_means.iter().sum::<Float>() / self.n_features as Float,
145 self.feature_stds.iter().sum::<Float>() / self.n_features as Float,
146 self.mean_correlation,
147 self.sparsity,
148 self.effective_dim,
149 ]
150 }
151}
152
153#[derive(Debug, Clone, Serialize, Deserialize)]
155pub enum MetaLearningStrategy {
156 PerformanceBased {
158 min_similar_tasks: usize,
160 similarity_threshold: Float,
162 },
163 Portfolio {
165 portfolio_size: usize,
167 },
168 BayesianOptimization {
170 n_initial: usize,
172 n_iterations: usize,
174 },
175 NeuralArchitectureSearch {
177 search_space_size: usize,
179 n_evaluations: usize,
181 },
182}
183
184impl Default for MetaLearningStrategy {
185 fn default() -> Self {
186 Self::PerformanceBased {
187 min_similar_tasks: 5,
188 similarity_threshold: 0.7,
189 }
190 }
191}
192
193#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct MetaLearningConfig {
196 pub strategy: MetaLearningStrategy,
198 pub n_components: usize,
200 pub use_transfer_learning: bool,
202 pub performance_metric: PerformanceMetric,
204}
205
206impl Default for MetaLearningConfig {
207 fn default() -> Self {
208 Self {
209 strategy: MetaLearningStrategy::default(),
210 n_components: 100,
211 use_transfer_learning: false,
212 performance_metric: PerformanceMetric::KernelAlignment,
213 }
214 }
215}
216
217#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
219pub enum PerformanceMetric {
220 KernelAlignment,
222 CrossValidation,
224 SpectralQuality,
226 ApproximationError,
228}
229
230#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct TaskMetadata {
233 pub task_id: String,
235 pub meta_features: DatasetMetaFeatures,
237 pub best_kernel: MetaKernelType,
239 pub performance: Float,
241 pub hyperparameters: HashMap<String, Float>,
243}
244
245#[derive(Debug, Clone)]
265pub struct MetaLearningKernelSelector<State = Untrained> {
266 config: MetaLearningConfig,
267
268 selected_kernel: Option<MetaKernelType>,
270 selected_hyperparams: Option<HashMap<String, Float>>,
271 kernel_weights: Option<Array2<Float>>,
272 kernel_offset: Option<Array1<Float>>,
273 task_history: Vec<TaskMetadata>,
274
275 _state: PhantomData<State>,
276}
277
278impl MetaLearningKernelSelector<Untrained> {
279 pub fn new(config: MetaLearningConfig) -> Self {
281 Self {
282 config,
283 selected_kernel: None,
284 selected_hyperparams: None,
285 kernel_weights: None,
286 kernel_offset: None,
287 task_history: Vec::new(),
288 _state: PhantomData,
289 }
290 }
291
292 pub fn with_components(n_components: usize) -> Self {
294 Self {
295 config: MetaLearningConfig {
296 n_components,
297 ..Default::default()
298 },
299 selected_kernel: None,
300 selected_hyperparams: None,
301 kernel_weights: None,
302 kernel_offset: None,
303 task_history: Vec::new(),
304 _state: PhantomData,
305 }
306 }
307
308 pub fn add_task_history(mut self, task: TaskMetadata) -> Self {
310 self.task_history.push(task);
311 self
312 }
313
314 pub fn strategy(mut self, strategy: MetaLearningStrategy) -> Self {
316 self.config.strategy = strategy;
317 self
318 }
319
320 fn select_kernel(
322 &self,
323 meta_features: &DatasetMetaFeatures,
324 ) -> (MetaKernelType, HashMap<String, Float>) {
325 match &self.config.strategy {
326 MetaLearningStrategy::PerformanceBased {
327 min_similar_tasks,
328 similarity_threshold,
329 } => self.select_performance_based(
330 meta_features,
331 *min_similar_tasks,
332 *similarity_threshold,
333 ),
334 MetaLearningStrategy::Portfolio { portfolio_size } => {
335 self.select_portfolio_based(meta_features, *portfolio_size)
336 }
337 MetaLearningStrategy::BayesianOptimization {
338 n_initial,
339 n_iterations,
340 } => self.select_bayesian(meta_features, *n_initial, *n_iterations),
341 MetaLearningStrategy::NeuralArchitectureSearch {
342 search_space_size,
343 n_evaluations,
344 } => self.select_nas(meta_features, *search_space_size, *n_evaluations),
345 }
346 }
347
348 fn select_performance_based(
350 &self,
351 meta_features: &DatasetMetaFeatures,
352 min_similar: usize,
353 similarity_threshold: Float,
354 ) -> (MetaKernelType, HashMap<String, Float>) {
355 if self.task_history.len() < min_similar {
356 return self.heuristic_selection(meta_features);
358 }
359
360 let current_features = meta_features.to_feature_vector();
362 let mut similarities: Vec<(usize, Float)> = self
363 .task_history
364 .iter()
365 .enumerate()
366 .map(|(idx, task)| {
367 let hist_features = task.meta_features.to_feature_vector();
368 let similarity = Self::compute_similarity(¤t_features, &hist_features);
369 (idx, similarity)
370 })
371 .filter(|(_, sim)| *sim >= similarity_threshold)
372 .collect();
373
374 if similarities.is_empty() {
375 return self.heuristic_selection(meta_features);
376 }
377
378 similarities.sort_by(|a, b| {
380 let perf_a = self.task_history[a.0].performance;
381 let perf_b = self.task_history[b.0].performance;
382 (a.1 * perf_a)
383 .partial_cmp(&(b.1 * perf_b))
384 .expect("operation should succeed")
385 .reverse()
386 });
387
388 let best_task = &self.task_history[similarities[0].0];
390 (best_task.best_kernel, best_task.hyperparameters.clone())
391 }
392
393 fn select_portfolio_based(
395 &self,
396 meta_features: &DatasetMetaFeatures,
397 _portfolio_size: usize,
398 ) -> (MetaKernelType, HashMap<String, Float>) {
399 self.heuristic_selection(meta_features)
402 }
403
404 fn select_bayesian(
406 &self,
407 meta_features: &DatasetMetaFeatures,
408 _n_initial: usize,
409 _n_iterations: usize,
410 ) -> (MetaKernelType, HashMap<String, Float>) {
411 self.heuristic_selection(meta_features)
414 }
415
416 fn select_nas(
418 &self,
419 meta_features: &DatasetMetaFeatures,
420 _search_space_size: usize,
421 _n_evaluations: usize,
422 ) -> (MetaKernelType, HashMap<String, Float>) {
423 self.heuristic_selection(meta_features)
426 }
427
428 fn heuristic_selection(
430 &self,
431 meta_features: &DatasetMetaFeatures,
432 ) -> (MetaKernelType, HashMap<String, Float>) {
433 let mut hyperparams = HashMap::new();
434
435 let kernel = if meta_features.sparsity > 0.5 {
437 hyperparams.insert("gamma".to_string(), 1.0);
439 MetaKernelType::Linear
440 } else if meta_features.effective_dim < 0.3 {
441 let gamma = 1.0 / (meta_features.n_features as Float);
443 hyperparams.insert("gamma".to_string(), gamma);
444 MetaKernelType::RBF
445 } else if meta_features.mean_correlation > 0.7 {
446 hyperparams.insert("degree".to_string(), 3.0);
448 hyperparams.insert("gamma".to_string(), 1.0);
449 MetaKernelType::Polynomial
450 } else {
451 hyperparams.insert("gamma".to_string(), 1.0);
453 MetaKernelType::RBF
454 };
455
456 (kernel, hyperparams)
457 }
458
459 fn compute_similarity(a: &[Float], b: &[Float]) -> Float {
461 if a.len() != b.len() {
462 return 0.0;
463 }
464
465 let dot: Float = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
466 let norm_a: Float = a.iter().map(|x| x * x).sum::<Float>().sqrt();
467 let norm_b: Float = b.iter().map(|x| x * x).sum::<Float>().sqrt();
468
469 if norm_a < 1e-10 || norm_b < 1e-10 {
470 return 0.0;
471 }
472
473 (dot / (norm_a * norm_b)).max(-1.0).min(1.0)
474 }
475}
476
477impl Estimator for MetaLearningKernelSelector<Untrained> {
478 type Config = MetaLearningConfig;
479 type Error = SklearsError;
480 type Float = Float;
481
482 fn config(&self) -> &Self::Config {
483 &self.config
484 }
485}
486
487impl Fit<Array2<Float>, ()> for MetaLearningKernelSelector<Untrained> {
488 type Fitted = MetaLearningKernelSelector<Trained>;
489
490 fn fit(self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
491 if x.nrows() == 0 || x.ncols() == 0 {
492 return Err(SklearsError::InvalidInput(
493 "Input array cannot be empty".to_string(),
494 ));
495 }
496
497 let meta_features = DatasetMetaFeatures::extract(x)?;
499
500 let (selected_kernel, selected_hyperparams) = self.select_kernel(&meta_features);
502
503 let (kernel_weights, kernel_offset) = Self::initialize_kernel_approximation(
505 selected_kernel,
506 &selected_hyperparams,
507 x,
508 self.config.n_components,
509 )?;
510
511 Ok(MetaLearningKernelSelector {
512 config: self.config,
513 selected_kernel: Some(selected_kernel),
514 selected_hyperparams: Some(selected_hyperparams),
515 kernel_weights: Some(kernel_weights),
516 kernel_offset: Some(kernel_offset),
517 task_history: self.task_history,
518 _state: PhantomData,
519 })
520 }
521}
522
523impl MetaLearningKernelSelector<Untrained> {
524 fn initialize_kernel_approximation(
526 kernel_type: MetaKernelType,
527 hyperparams: &HashMap<String, Float>,
528 x: &Array2<Float>,
529 n_components: usize,
530 ) -> Result<(Array2<Float>, Array1<Float>)> {
531 let n_features = x.ncols();
532 let mut rng = thread_rng();
533
534 match kernel_type {
535 MetaKernelType::RBF | MetaKernelType::Laplacian => {
536 let gamma = hyperparams.get("gamma").copied().unwrap_or(1.0);
537 let normal = Normal::new(0.0, 1.0).expect("operation should succeed");
538
539 let weights = Array2::from_shape_fn((n_features, n_components), |_| {
540 rng.sample(normal) * (2.0 * gamma).sqrt()
541 });
542
543 let uniform = Uniform::new(0.0, 2.0 * std::f64::consts::PI)
544 .expect("operation should succeed");
545 let offset = Array1::from_shape_fn(n_components, |_| rng.sample(uniform));
546
547 Ok((weights, offset))
548 }
549 MetaKernelType::Polynomial => {
550 let gamma = hyperparams.get("gamma").copied().unwrap_or(1.0);
551 let normal = Normal::new(0.0, 1.0).expect("operation should succeed");
552
553 let weights = Array2::from_shape_fn((n_features, n_components), |_| {
554 rng.sample(normal) * (2.0 * gamma).sqrt()
555 });
556
557 let uniform = Uniform::new(0.0, 2.0 * std::f64::consts::PI)
558 .expect("operation should succeed");
559 let offset = Array1::from_shape_fn(n_components, |_| rng.sample(uniform));
560
561 Ok((weights, offset))
562 }
563 MetaKernelType::Linear => {
564 let weights = Array2::from_shape_fn((n_features, n_components), |_| {
566 rng.sample(
567 Normal::new(0.0, 1.0 / (n_features as Float).sqrt())
568 .expect("operation should succeed"),
569 )
570 });
571 let offset = Array1::zeros(n_components);
572 Ok((weights, offset))
573 }
574 _ => {
575 let normal = Normal::new(0.0, 1.0).expect("operation should succeed");
577 let weights =
578 Array2::from_shape_fn((n_features, n_components), |_| rng.sample(normal));
579 let uniform = Uniform::new(0.0, 2.0 * std::f64::consts::PI)
580 .expect("operation should succeed");
581 let offset = Array1::from_shape_fn(n_components, |_| rng.sample(uniform));
582 Ok((weights, offset))
583 }
584 }
585 }
586}
587
588impl Transform<Array2<Float>, Array2<Float>> for MetaLearningKernelSelector<Trained> {
589 fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
590 let kernel_weights = self
591 .kernel_weights
592 .as_ref()
593 .expect("operation should succeed");
594 let kernel_offset = self
595 .kernel_offset
596 .as_ref()
597 .expect("operation should succeed");
598
599 if x.ncols() != kernel_weights.nrows() {
600 return Err(SklearsError::InvalidInput(format!(
601 "Feature dimension mismatch: expected {}, got {}",
602 kernel_weights.nrows(),
603 x.ncols()
604 )));
605 }
606
607 let projection = x.dot(kernel_weights);
609
610 let n_samples = x.nrows();
611 let n_components = self.config.n_components;
612 let mut output = Array2::zeros((n_samples, n_components));
613
614 let normalizer = (2.0 / n_components as Float).sqrt();
615 for i in 0..n_samples {
616 for j in 0..n_components {
617 output[[i, j]] = normalizer * (projection[[i, j]] + kernel_offset[j]).cos();
618 }
619 }
620
621 Ok(output)
622 }
623}
624
625impl MetaLearningKernelSelector<Trained> {
626 pub fn selected_kernel(&self) -> MetaKernelType {
628 self.selected_kernel.expect("operation should succeed")
629 }
630
631 pub fn selected_hyperparameters(&self) -> &HashMap<String, Float> {
633 self.selected_hyperparams
634 .as_ref()
635 .expect("operation should succeed")
636 }
637
638 pub fn kernel_weights(&self) -> &Array2<Float> {
640 self.kernel_weights
641 .as_ref()
642 .expect("operation should succeed")
643 }
644
645 pub fn kernel_offset(&self) -> &Array1<Float> {
647 self.kernel_offset
648 .as_ref()
649 .expect("operation should succeed")
650 }
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656 use scirs2_core::ndarray::array;
657
658 #[test]
659 fn test_meta_features_extraction() {
660 let x = array![
661 [1.0, 2.0, 3.0],
662 [4.0, 5.0, 6.0],
663 [7.0, 8.0, 9.0],
664 [10.0, 11.0, 12.0]
665 ];
666
667 let meta_features = DatasetMetaFeatures::extract(&x).expect("operation should succeed");
668
669 assert_eq!(meta_features.n_samples, 4);
670 assert_eq!(meta_features.n_features, 3);
671 assert!(meta_features.feature_means.len() == 3);
672 assert!(meta_features.mean_correlation >= 0.0);
673 assert!(meta_features.mean_correlation <= 1.0);
674 }
675
676 #[test]
677 fn test_meta_learning_selector_basic() {
678 let config = MetaLearningConfig::default();
679 let selector = MetaLearningKernelSelector::new(config);
680
681 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
682
683 let fitted = selector.fit(&x, &()).expect("operation should succeed");
684 let features = fitted.transform(&x).expect("operation should succeed");
685
686 assert_eq!(features.shape(), &[3, 100]);
687 }
688
689 #[test]
690 fn test_kernel_selection_with_history() {
691 let config = MetaLearningConfig {
693 strategy: MetaLearningStrategy::PerformanceBased {
694 min_similar_tasks: 1,
695 similarity_threshold: 0.5,
696 },
697 n_components: 50,
698 use_transfer_learning: false,
699 performance_metric: PerformanceMetric::KernelAlignment,
700 };
701
702 let mut selector = MetaLearningKernelSelector::new(config);
703
704 let x_hist = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
706 let meta_features =
707 DatasetMetaFeatures::extract(&x_hist).expect("operation should succeed");
708
709 let mut hyperparams = HashMap::new();
710 hyperparams.insert("gamma".to_string(), 0.5);
711
712 let task = TaskMetadata {
713 task_id: "task1".to_string(),
714 meta_features,
715 best_kernel: MetaKernelType::RBF,
716 performance: 0.95,
717 hyperparameters: hyperparams,
718 };
719
720 selector = selector.add_task_history(task);
721
722 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
724 let fitted = selector.fit(&x, &()).expect("operation should succeed");
725
726 let selected = fitted.selected_kernel();
728 assert!(
729 selected == MetaKernelType::RBF
730 || selected == MetaKernelType::Polynomial
731 || selected == MetaKernelType::Linear,
732 "Unexpected kernel type: {:?}",
733 selected
734 );
735 }
736
737 #[test]
738 fn test_different_strategies() {
739 let strategies = vec![
740 MetaLearningStrategy::PerformanceBased {
741 min_similar_tasks: 3,
742 similarity_threshold: 0.6,
743 },
744 MetaLearningStrategy::Portfolio { portfolio_size: 3 },
745 ];
746
747 let x = array![[1.0, 2.0], [3.0, 4.0]];
748
749 for strategy in strategies {
750 let config = MetaLearningConfig {
751 strategy,
752 ..Default::default()
753 };
754
755 let selector = MetaLearningKernelSelector::new(config);
756 let fitted = selector.fit(&x, &()).expect("operation should succeed");
757 let features = fitted.transform(&x).expect("operation should succeed");
758
759 assert_eq!(features.nrows(), 2);
760 }
761 }
762
763 #[test]
764 fn test_heuristic_selection() {
765 let x_sparse = array![[0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [1.0, 0.0, 0.0]];
766
767 let selector = MetaLearningKernelSelector::with_components(50);
768 let fitted = selector
769 .fit(&x_sparse, &())
770 .expect("operation should succeed");
771
772 assert!(
774 fitted.selected_kernel() == MetaKernelType::Linear
775 || fitted.selected_kernel() == MetaKernelType::RBF
776 );
777 }
778
779 #[test]
780 fn test_similarity_computation() {
781 let a = vec![1.0, 2.0, 3.0];
782 let b = vec![1.0, 2.0, 3.0];
783
784 let similarity = MetaLearningKernelSelector::<Untrained>::compute_similarity(&a, &b);
785 assert!((similarity - 1.0).abs() < 1e-6);
786
787 let c = vec![-1.0, -2.0, -3.0];
788 let similarity2 = MetaLearningKernelSelector::<Untrained>::compute_similarity(&a, &c);
789 assert!((similarity2 + 1.0).abs() < 1e-6);
790 }
791
792 #[test]
793 fn test_empty_input_error() {
794 let selector = MetaLearningKernelSelector::with_components(50);
795 let x_empty: Array2<Float> = Array2::zeros((0, 0));
796
797 assert!(selector.fit(&x_empty, &()).is_err());
798 }
799
800 #[test]
801 fn test_dimension_mismatch_error() {
802 let selector = MetaLearningKernelSelector::with_components(50);
803 let x_train = array![[1.0, 2.0], [3.0, 4.0]];
804 let x_test = array![[1.0, 2.0, 3.0]];
805
806 let fitted = selector
807 .fit(&x_train, &())
808 .expect("operation should succeed");
809 assert!(fitted.transform(&x_test).is_err());
810 }
811}