1#![allow(dead_code)]
8
9use scirs2_core::ndarray::{Array1, Array2};
10use scirs2_core::random::prelude::*;
11use scirs2_core::random::prelude::*;
12use std::collections::HashMap;
13
14pub struct QuantumFeatureSelector {
16 features: FeatureData,
18 method: SelectionMethod,
20 criteria: EvaluationCriteria,
22 constraints: SelectionConstraints,
24 cv_strategy: CrossValidationStrategy,
26}
27
28#[derive(Debug, Clone)]
29pub struct FeatureData {
30 pub data: Array2<f64>,
32 pub feature_names: Vec<String>,
34 pub target: Array1<f64>,
36 pub feature_types: Vec<FeatureType>,
38 pub statistics: FeatureStatistics,
40}
41
42#[derive(Debug, Clone)]
43pub enum FeatureType {
44 Continuous,
46 Discrete { levels: usize },
48 Binary,
50 Categorical { categories: Vec<String> },
52 Ordinal { levels: Vec<String> },
54 Text,
56 TimeSeries { frequency: String },
58}
59
60#[derive(Debug, Clone)]
61pub struct FeatureStatistics {
62 pub means: Array1<f64>,
64 pub stds: Array1<f64>,
66 pub target_correlations: Array1<f64>,
68 pub feature_correlations: Array2<f64>,
70 pub missing_counts: Array1<usize>,
72 pub unique_counts: Array1<usize>,
74}
75
76#[derive(Debug, Clone)]
77pub enum SelectionMethod {
78 Filter {
80 metric: FilterMetric,
81 threshold: f64,
82 },
83 Wrapper {
85 model: MLModel,
86 search_strategy: SearchStrategy,
87 },
88 Embedded {
90 regularization: RegularizationType,
91 strength: f64,
92 },
93 Hybrid {
95 filter_metric: FilterMetric,
96 wrapper_model: MLModel,
97 balance: f64,
98 },
99 QuantumInspired {
101 entanglement_penalty: f64,
102 coherence_bonus: f64,
103 },
104}
105
106#[derive(Debug, Clone)]
107pub enum FilterMetric {
108 MutualInformation,
110 ChiSquared,
112 ANOVA,
114 Correlation,
116 InformationGain,
118 VarianceThreshold { threshold: f64 },
120 Relief,
122}
123
124#[derive(Debug, Clone)]
125pub struct MLModel {
126 pub model_type: ModelType,
128 pub hyperparameters: HashMap<String, f64>,
130 pub training_params: TrainingParameters,
132}
133
134#[derive(Debug, Clone)]
135pub enum ModelType {
136 LinearRegression,
138 LogisticRegression,
140 SVM { kernel: String },
142 RandomForest { n_trees: usize },
144 NeuralNetwork { architecture: Vec<usize> },
146 GradientBoosting { n_estimators: usize },
148 KNN { k: usize },
150}
151
152#[derive(Debug, Clone)]
153pub struct TrainingParameters {
154 pub learning_rate: f64,
156 pub epochs: usize,
158 pub batch_size: usize,
160 pub early_stopping: bool,
162 pub patience: usize,
164}
165
166#[derive(Debug, Clone)]
167pub enum SearchStrategy {
168 Exhaustive,
170 ForwardSelection,
172 BackwardElimination,
174 Bidirectional,
176 Genetic {
178 population_size: usize,
179 generations: usize,
180 },
181 SimulatedAnnealing { temperature: f64, cooling_rate: f64 },
183}
184
185#[derive(Debug, Clone)]
186pub enum RegularizationType {
187 L1,
189 L2,
191 ElasticNet { l1_ratio: f64 },
193 GroupLasso { groups: Vec<Vec<usize>> },
195 FusedLasso,
197}
198
199#[derive(Debug, Clone)]
200pub struct EvaluationCriteria {
201 pub primary_metric: EvaluationMetric,
203 pub secondary_metrics: Vec<EvaluationMetric>,
205 pub weights: HashMap<String, f64>,
207 pub target_performance: Option<f64>,
209}
210
211#[derive(Debug, Clone)]
212pub enum EvaluationMetric {
213 Accuracy,
215 Precision,
217 Recall,
219 F1Score,
221 AUCROC,
223 MSE,
225 MAE,
227 R2,
229 LogLoss,
231 Custom { name: String },
233}
234
235#[derive(Debug, Clone, Default)]
236pub struct SelectionConstraints {
237 pub min_features: Option<usize>,
239 pub max_features: Option<usize>,
241 pub must_include: Vec<usize>,
243 pub must_exclude: Vec<usize>,
245 pub feature_groups: Vec<Vec<usize>>,
247 pub feature_costs: Option<HashMap<usize, f64>>,
249 pub max_cost: Option<f64>,
251}
252
253#[derive(Debug, Clone)]
254pub enum CrossValidationStrategy {
255 KFold { k: usize, shuffle: bool },
257 StratifiedKFold { k: usize },
259 LeaveOneOut,
261 TimeSeriesSplit { n_splits: usize },
263 GroupKFold { k: usize, groups: Vec<usize> },
265 MonteCarlo { n_splits: usize, test_size: f64 },
267}
268
269impl QuantumFeatureSelector {
270 pub fn new(features: FeatureData, method: SelectionMethod) -> Self {
272 Self {
273 features,
274 method,
275 criteria: EvaluationCriteria {
276 primary_metric: EvaluationMetric::Accuracy,
277 secondary_metrics: vec![],
278 weights: HashMap::new(),
279 target_performance: None,
280 },
281 constraints: SelectionConstraints::default(),
282 cv_strategy: CrossValidationStrategy::KFold {
283 k: 5,
284 shuffle: true,
285 },
286 }
287 }
288
289 pub fn with_criteria(mut self, criteria: EvaluationCriteria) -> Self {
291 self.criteria = criteria;
292 self
293 }
294
295 pub fn with_constraints(mut self, constraints: SelectionConstraints) -> Self {
297 self.constraints = constraints;
298 self
299 }
300
301 pub fn with_cv_strategy(mut self, strategy: CrossValidationStrategy) -> Self {
303 self.cv_strategy = strategy;
304 self
305 }
306
307 pub fn build_qubo(&self) -> Result<(Array2<f64>, HashMap<String, usize>), String> {
309 let n_features = self.features.feature_names.len();
310 let mut qubo = Array2::zeros((n_features, n_features));
311 let mut var_map = HashMap::new();
312
313 for (i, _name) in self.features.feature_names.iter().enumerate() {
315 var_map.insert(format!("feature_{i}"), i);
316 }
317
318 match &self.method {
320 SelectionMethod::Filter { metric, threshold } => {
321 self.add_filter_objective(&mut qubo, metric, *threshold)?;
322 }
323 SelectionMethod::Wrapper { model, .. } => {
324 self.add_wrapper_objective(&mut qubo, model)?;
325 }
326 SelectionMethod::Embedded {
327 regularization,
328 strength,
329 } => {
330 self.add_embedded_objective(&mut qubo, regularization, *strength)?;
331 }
332 SelectionMethod::Hybrid {
333 filter_metric,
334 wrapper_model,
335 balance,
336 } => {
337 self.add_hybrid_objective(&mut qubo, filter_metric, wrapper_model, *balance)?;
338 }
339 SelectionMethod::QuantumInspired {
340 entanglement_penalty,
341 coherence_bonus,
342 } => {
343 self.add_quantum_objective(&mut qubo, *entanglement_penalty, *coherence_bonus)?;
344 }
345 }
346
347 self.add_selection_constraints(&mut qubo)?;
349
350 Ok((qubo, var_map))
351 }
352
353 fn add_filter_objective(
355 &self,
356 qubo: &mut Array2<f64>,
357 metric: &FilterMetric,
358 threshold: f64,
359 ) -> Result<(), String> {
360 match metric {
361 FilterMetric::MutualInformation => {
362 for i in 0..self.features.feature_names.len() {
364 let mi_score = self.compute_mutual_information(i)?;
365 qubo[[i, i]] -= mi_score;
366 }
367 }
368 FilterMetric::Correlation => {
369 for i in 0..self.features.feature_names.len() {
371 let corr = self.features.statistics.target_correlations[i].abs();
372 if corr >= threshold {
373 qubo[[i, i]] -= corr;
374 }
375 }
376 }
377 FilterMetric::VarianceThreshold { threshold } => {
378 for i in 0..self.features.feature_names.len() {
380 let variance = self.features.statistics.stds[i].powi(2);
381 if variance >= *threshold {
382 qubo[[i, i]] -= 1.0;
383 }
384 }
385 }
386 _ => {
387 for i in 0..self.features.feature_names.len() {
389 qubo[[i, i]] -= 1.0; }
391 }
392 }
393
394 self.add_correlation_penalty(qubo)?;
396
397 Ok(())
398 }
399
400 fn compute_mutual_information(&self, feature_idx: usize) -> Result<f64, String> {
402 let feature = self.features.data.column(feature_idx);
406 let target = &self.features.target;
407
408 let n_bins = 10;
410 let feature_discrete = self.discretize_array(&feature.to_owned(), n_bins)?;
411 let target_discrete = self.discretize_array(&target.to_owned(), n_bins)?;
412
413 let mut joint_counts = Array2::<f64>::zeros((n_bins, n_bins));
415 for (f, t) in feature_discrete.iter().zip(target_discrete.iter()) {
416 joint_counts[[*f, *t]] += 1.0;
417 }
418
419 let joint_probs = &joint_counts / feature.len() as f64;
420 let feature_probs = joint_probs.sum_axis(scirs2_core::ndarray::Axis(1));
421 let target_probs = joint_probs.sum_axis(scirs2_core::ndarray::Axis(0));
422
423 let mut mi = 0.0;
425 for i in 0..n_bins {
426 for j in 0..n_bins {
427 if joint_probs[[i, j]] > 0.0 {
428 let ratio: f64 = joint_probs[[i, j]] / (feature_probs[i] * target_probs[j]);
429 mi += joint_probs[[i, j]] * ratio.ln();
430 }
431 }
432 }
433
434 Ok(mi)
435 }
436
437 fn discretize_array(&self, array: &Array1<f64>, n_bins: usize) -> Result<Vec<usize>, String> {
439 let min = array
440 .iter()
441 .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
442 .ok_or_else(|| "Cannot discretize empty array: no minimum value".to_string())?;
443 let max = array
444 .iter()
445 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
446 .ok_or_else(|| "Cannot discretize empty array: no maximum value".to_string())?;
447 let bin_width = (max - min) / n_bins as f64;
448
449 Ok(array
450 .iter()
451 .map(|&x| ((x - min) / bin_width).floor() as usize)
452 .map(|b| b.min(n_bins - 1))
453 .collect())
454 }
455
456 fn add_correlation_penalty(&self, qubo: &mut Array2<f64>) -> Result<(), String> {
458 let corr_threshold = 0.9;
459 let penalty = 10.0;
460
461 let corr_matrix = &self.features.statistics.feature_correlations;
462
463 for i in 0..corr_matrix.shape()[0] {
464 for j in i + 1..corr_matrix.shape()[1] {
465 if corr_matrix[[i, j]].abs() > corr_threshold {
466 qubo[[i, j]] += penalty;
468 qubo[[j, i]] += penalty;
469 }
470 }
471 }
472
473 Ok(())
474 }
475
476 fn add_wrapper_objective(&self, qubo: &mut Array2<f64>, model: &MLModel) -> Result<(), String> {
478 let importances = self.compute_feature_importances(model)?;
483
484 for (i, &importance) in importances.iter().enumerate() {
485 qubo[[i, i]] -= importance;
486 }
487
488 self.add_feature_interactions(qubo, model)?;
490
491 Ok(())
492 }
493
494 fn compute_feature_importances(&self, _model: &MLModel) -> Result<Array1<f64>, String> {
496 let n_features = self.features.feature_names.len();
500 let mut rng = thread_rng();
501
502 Ok(Array1::from_shape_fn(n_features, |_| rng.gen::<f64>()))
503 }
504
505 fn add_feature_interactions(
507 &self,
508 qubo: &mut Array2<f64>,
509 _model: &MLModel,
510 ) -> Result<(), String> {
511 let synergy_bonus = -5.0;
515 let corr_matrix = &self.features.statistics.feature_correlations;
516
517 for i in 0..corr_matrix.shape()[0] {
518 for j in i + 1..corr_matrix.shape()[1] {
519 let corr = corr_matrix[[i, j]].abs();
521 if corr > 0.3 && corr < 0.7 {
522 qubo[[i, j]] += synergy_bonus * corr;
523 qubo[[j, i]] += synergy_bonus * corr;
524 }
525 }
526 }
527
528 Ok(())
529 }
530
531 fn add_embedded_objective(
533 &self,
534 qubo: &mut Array2<f64>,
535 regularization: &RegularizationType,
536 strength: f64,
537 ) -> Result<(), String> {
538 match regularization {
539 RegularizationType::L1 => {
540 for i in 0..self.features.feature_names.len() {
542 qubo[[i, i]] += strength;
543 }
544 }
545 RegularizationType::L2 => {
546 for i in 0..self.features.feature_names.len() {
548 qubo[[i, i]] += strength;
549 for j in 0..self.features.feature_names.len() {
551 if i != j {
552 qubo[[i, j]] += strength * 0.1;
553 }
554 }
555 }
556 }
557 RegularizationType::ElasticNet { l1_ratio } => {
558 let l1_strength = strength * l1_ratio;
560 let l2_strength = strength * (1.0 - l1_ratio);
561
562 for i in 0..self.features.feature_names.len() {
563 qubo[[i, i]] += l1_strength + l2_strength;
564 }
565 }
566 RegularizationType::GroupLasso { groups } => {
567 for group in groups {
569 let group_penalty = strength / group.len() as f64;
570 for &i in group {
571 for &j in group {
572 if i < self.features.feature_names.len()
573 && j < self.features.feature_names.len()
574 {
575 qubo[[i, j]] += group_penalty;
576 }
577 }
578 }
579 }
580 }
581 RegularizationType::FusedLasso => {}
582 }
583
584 Ok(())
585 }
586
587 fn add_hybrid_objective(
589 &self,
590 qubo: &mut Array2<f64>,
591 filter_metric: &FilterMetric,
592 wrapper_model: &MLModel,
593 balance: f64,
594 ) -> Result<(), String> {
595 let shape = qubo.shape();
599 let mut filter_qubo = Array2::zeros((shape[0], shape[1]));
600 self.add_filter_objective(&mut filter_qubo, filter_metric, 0.0)?;
601
602 let mut wrapper_qubo = Array2::zeros((shape[0], shape[1]));
604 self.add_wrapper_objective(&mut wrapper_qubo, wrapper_model)?;
605
606 *qubo = &filter_qubo * balance + &wrapper_qubo * (1.0 - balance);
608
609 Ok(())
610 }
611
612 fn add_quantum_objective(
614 &self,
615 qubo: &mut Array2<f64>,
616 entanglement_penalty: f64,
617 coherence_bonus: f64,
618 ) -> Result<(), String> {
619 let corr_matrix = &self.features.statistics.feature_correlations;
624
625 for i in 0..corr_matrix.shape()[0] {
626 for j in i + 1..corr_matrix.shape()[1] {
627 let correlation = corr_matrix[[i, j]].abs();
628 let entanglement = correlation.powi(2);
630 qubo[[i, j]] += entanglement_penalty * entanglement;
631 qubo[[j, i]] += entanglement_penalty * entanglement;
632 }
633 }
634
635 for i in 0..self.features.feature_names.len() {
637 let target_corr = self.features.statistics.target_correlations[i].abs();
638 let variance = self.features.statistics.stds[i].powi(2);
639
640 let coherence = target_corr * variance.sqrt();
642 qubo[[i, i]] -= coherence_bonus * coherence;
643 }
644
645 self.add_diversity_bonus(qubo, coherence_bonus * 0.5)?;
647
648 Ok(())
649 }
650
651 fn add_diversity_bonus(&self, qubo: &mut Array2<f64>, bonus: f64) -> Result<(), String> {
653 let mut type_groups: HashMap<String, Vec<usize>> = HashMap::new();
657
658 for (i, ftype) in self.features.feature_types.iter().enumerate() {
659 let type_key = match ftype {
660 FeatureType::Continuous => "continuous",
661 FeatureType::Discrete { .. } => "discrete",
662 FeatureType::Binary => "binary",
663 FeatureType::Categorical { .. } => "categorical",
664 _ => "other",
665 };
666
667 type_groups.entry(type_key.to_string()).or_default().push(i);
668 }
669
670 for group1 in type_groups.values() {
672 for group2 in type_groups.values() {
673 if group1 != group2 {
674 for &i in group1 {
675 for &j in group2 {
676 if i < j {
677 qubo[[i, j]] -= bonus;
678 qubo[[j, i]] -= bonus;
679 }
680 }
681 }
682 }
683 }
684 }
685
686 Ok(())
687 }
688
689 fn add_selection_constraints(&self, qubo: &mut Array2<f64>) -> Result<(), String> {
691 let penalty = 100.0;
692
693 for &feature_idx in &self.constraints.must_include {
695 qubo[[feature_idx, feature_idx]] -= penalty * 10.0;
696 }
697
698 for &feature_idx in &self.constraints.must_exclude {
700 qubo[[feature_idx, feature_idx]] += penalty * 10.0;
701 }
702
703 for group in &self.constraints.feature_groups {
705 for &i in group {
706 for &j in group {
707 if i != j {
708 qubo[[i, j]] -= penalty;
709 }
710 }
711 }
712 }
713
714 if let (Some(costs), Some(max_cost)) =
716 (&self.constraints.feature_costs, self.constraints.max_cost)
717 {
718 for (&feature_idx, &cost) in costs {
720 if feature_idx < qubo.shape()[0] {
721 qubo[[feature_idx, feature_idx]] += (cost / max_cost) * penalty;
722 }
723 }
724 }
725
726 Ok(())
727 }
728
729 pub fn decode_solution(&self, solution: &HashMap<String, bool>) -> SelectedFeatures {
731 let mut selected_indices = Vec::new();
732 let mut selected_names = Vec::new();
733
734 for (i, name) in self.features.feature_names.iter().enumerate() {
735 let var_name = format!("feature_{i}");
736 if *solution.get(&var_name).unwrap_or(&false) {
737 selected_indices.push(i);
738 selected_names.push(name.clone());
739 }
740 }
741
742 SelectedFeatures {
743 indices: selected_indices,
744 names: selected_names,
745 performance_estimate: self.estimate_performance(solution),
746 importance_scores: self.calculate_importance_scores(solution),
747 }
748 }
749
750 fn estimate_performance(&self, solution: &HashMap<String, bool>) -> f64 {
752 let mut total_score = 0.0;
754 let mut count = 0;
755
756 for (i, _) in self.features.feature_names.iter().enumerate() {
757 let var_name = format!("feature_{i}");
758 if *solution.get(&var_name).unwrap_or(&false) {
759 total_score += self.features.statistics.target_correlations[i].abs();
760 count += 1;
761 }
762 }
763
764 if count > 0 {
765 total_score / count as f64
766 } else {
767 0.0
768 }
769 }
770
771 fn calculate_importance_scores(
773 &self,
774 solution: &HashMap<String, bool>,
775 ) -> HashMap<String, f64> {
776 let mut scores = HashMap::new();
777
778 for (i, name) in self.features.feature_names.iter().enumerate() {
779 let var_name = format!("feature_{i}");
780 if *solution.get(&var_name).unwrap_or(&false) {
781 let score = self.features.statistics.target_correlations[i].abs();
782 scores.insert(name.clone(), score);
783 }
784 }
785
786 scores
787 }
788}
789
790#[derive(Debug, Clone)]
791pub struct SelectedFeatures {
792 pub indices: Vec<usize>,
793 pub names: Vec<String>,
794 pub performance_estimate: f64,
795 pub importance_scores: HashMap<String, f64>,
796}
797
798pub struct HyperparameterOptimizer {
800 model: MLModel,
802 param_space: ParameterSpace,
804 strategy: OptimizationStrategy,
806 evaluation: HyperparameterEvaluation,
808}
809
810#[derive(Debug, Clone)]
811pub struct ParameterSpace {
812 pub continuous: HashMap<String, ContinuousParam>,
814 pub discrete: HashMap<String, DiscreteParam>,
816 pub categorical: HashMap<String, CategoricalParam>,
818 pub conditional: Vec<ConditionalParam>,
820}
821
822#[derive(Debug, Clone)]
823pub struct ContinuousParam {
824 pub min: f64,
825 pub max: f64,
826 pub scale: ScaleType,
827 pub default: f64,
828}
829
830#[derive(Debug, Clone)]
831pub enum ScaleType {
832 Linear,
833 Log,
834 Exponential,
835}
836
837#[derive(Debug, Clone)]
838pub struct DiscreteParam {
839 pub values: Vec<i32>,
840 pub default: i32,
841}
842
843#[derive(Debug, Clone)]
844pub struct CategoricalParam {
845 pub choices: Vec<String>,
846 pub default: String,
847}
848
849#[derive(Debug, Clone)]
850pub struct ConditionalParam {
851 pub parameter: String,
852 pub condition: String,
853 pub condition_value: String,
854}
855
856#[derive(Debug, Clone)]
857pub enum OptimizationStrategy {
858 GridSearch,
860 RandomSearch { n_trials: usize },
862 BayesianOptimization {
864 acquisition: AcquisitionFunction,
865 n_initial: usize,
866 },
867 EvolutionaryStrategy {
869 population_size: usize,
870 mutation_rate: f64,
871 },
872 QuantumOptimization {
874 tunneling_rate: f64,
875 superposition_size: usize,
876 },
877}
878
879#[derive(Debug, Clone)]
880pub enum AcquisitionFunction {
881 ExpectedImprovement,
882 ProbabilityOfImprovement,
883 UpperConfidenceBound { kappa: f64 },
884 EntropySearch,
885}
886
887#[derive(Debug, Clone)]
888pub struct HyperparameterEvaluation {
889 pub metric: EvaluationMetric,
891 pub cv_strategy: CrossValidationStrategy,
893 pub constraints: ResourceConstraints,
895}
896
897#[derive(Debug, Clone)]
898pub struct ResourceConstraints {
899 pub max_time_per_trial: Option<std::time::Duration>,
901 pub max_total_time: Option<std::time::Duration>,
903 pub max_memory: Option<usize>,
905 pub early_stopping: bool,
907}
908
909impl HyperparameterOptimizer {
910 pub fn build_qubo(&self) -> Result<(Array2<f64>, HashMap<String, usize>), String> {
912 let discretized = self.discretize_parameters()?;
914
915 let n_vars = discretized.total_combinations();
916 let mut qubo = Array2::zeros((n_vars, n_vars));
917 let mut var_map = HashMap::new();
918
919 self.create_parameter_variables(&mut var_map, &discretized)?;
921
922 match &self.strategy {
924 OptimizationStrategy::QuantumOptimization {
925 tunneling_rate,
926 superposition_size,
927 } => {
928 self.add_quantum_hyperopt_objective(
929 &mut qubo,
930 &var_map,
931 &discretized,
932 *tunneling_rate,
933 *superposition_size,
934 )?;
935 }
936 _ => {
937 self.add_standard_hyperopt_objective(&mut qubo, &var_map, &discretized)?;
938 }
939 }
940
941 Ok((qubo, var_map))
942 }
943
944 fn discretize_parameters(&self) -> Result<DiscretizedSpace, String> {
946 let mut discretized = DiscretizedSpace {
947 parameters: Vec::new(),
948 grid_points: Vec::new(),
949 };
950
951 for (name, param) in &self.param_space.continuous {
953 let n_points = 10; let mut points = Vec::new();
955
956 for i in 0..n_points {
957 let t = i as f64 / (n_points - 1) as f64;
958 let value = match param.scale {
959 ScaleType::Linear => param.min + t * (param.max - param.min),
960 ScaleType::Log => {
961 let log_min = param.min.ln();
962 let log_max = param.max.ln();
963 (log_min + t * (log_max - log_min)).exp()
964 }
965 ScaleType::Exponential => param.min * (param.max / param.min).powf(t),
966 };
967 points.push(value);
968 }
969
970 discretized.parameters.push(name.clone());
971 discretized.grid_points.push(points);
972 }
973
974 for (name, param) in &self.param_space.discrete {
976 discretized.parameters.push(name.clone());
977 discretized
978 .grid_points
979 .push(param.values.iter().map(|&v| v as f64).collect());
980 }
981
982 Ok(discretized)
983 }
984
985 fn create_parameter_variables(
987 &self,
988 var_map: &mut HashMap<String, usize>,
989 discretized: &DiscretizedSpace,
990 ) -> Result<(), String> {
991 let mut var_idx = 0;
992
993 for (param_idx, param_name) in discretized.parameters.iter().enumerate() {
995 for (value_idx, _) in discretized.grid_points[param_idx].iter().enumerate() {
996 let var_name = format!("param_{param_name}_{value_idx}");
997 var_map.insert(var_name, var_idx);
998 var_idx += 1;
999 }
1000 }
1001
1002 Ok(())
1003 }
1004
1005 fn add_quantum_hyperopt_objective(
1007 &self,
1008 qubo: &mut Array2<f64>,
1009 var_map: &HashMap<String, usize>,
1010 discretized: &DiscretizedSpace,
1011 tunneling_rate: f64,
1012 superposition_size: usize,
1013 ) -> Result<(), String> {
1014 for (var_name, &var_idx) in var_map {
1018 let performance = self.estimate_parameter_performance(var_name, discretized)?;
1020 qubo[[var_idx, var_idx]] -= performance;
1021 }
1022
1023 self.add_tunneling_terms(qubo, var_map, tunneling_rate)?;
1025
1026 self.add_superposition_bonus(qubo, var_map, superposition_size)?;
1028
1029 Ok(())
1030 }
1031
1032 fn estimate_parameter_performance(
1034 &self,
1035 _var_name: &str,
1036 _discretized: &DiscretizedSpace,
1037 ) -> Result<f64, String> {
1038 let mut rng = thread_rng();
1043 Ok(rng.gen::<f64>())
1044 }
1045
1046 fn add_tunneling_terms(
1048 &self,
1049 qubo: &mut Array2<f64>,
1050 var_map: &HashMap<String, usize>,
1051 tunneling_rate: f64,
1052 ) -> Result<(), String> {
1053 for (var1, &idx1) in var_map {
1056 for (var2, &idx2) in var_map {
1057 if var1 != var2 && self.are_neighbors(var1, var2) {
1058 qubo[[idx1, idx2]] -= tunneling_rate;
1060 }
1061 }
1062 }
1063
1064 Ok(())
1065 }
1066
1067 fn are_neighbors(&self, var1: &str, var2: &str) -> bool {
1069 let parts1: Vec<&str> = var1.split('_').collect();
1072 let parts2: Vec<&str> = var2.split('_').collect();
1073
1074 if parts1.len() >= 3 && parts2.len() >= 3 {
1075 if parts1[1] == parts2[1] {
1077 let idx1: usize = parts1[2].parse().unwrap_or(0);
1078 let idx2: usize = parts2[2].parse().unwrap_or(0);
1079 return (idx1 as i32 - idx2 as i32).abs() == 1;
1080 }
1081 }
1082
1083 false
1084 }
1085
1086 fn add_superposition_bonus(
1088 &self,
1089 qubo: &mut Array2<f64>,
1090 var_map: &HashMap<String, usize>,
1091 _superposition_size: usize,
1092 ) -> Result<(), String> {
1093 let bonus = -0.1;
1097
1098 for &idx in var_map.values() {
1100 qubo[[idx, idx]] += bonus;
1101 }
1102
1103 Ok(())
1104 }
1105
1106 fn add_standard_hyperopt_objective(
1108 &self,
1109 qubo: &mut Array2<f64>,
1110 var_map: &HashMap<String, usize>,
1111 discretized: &DiscretizedSpace,
1112 ) -> Result<(), String> {
1113 for (var_name, &var_idx) in var_map {
1116 let performance = self.estimate_parameter_performance(var_name, discretized)?;
1117 qubo[[var_idx, var_idx]] -= performance;
1118 }
1119
1120 self.add_smoothness_regularization(qubo, var_map)?;
1122
1123 Ok(())
1124 }
1125
1126 fn add_smoothness_regularization(
1128 &self,
1129 qubo: &mut Array2<f64>,
1130 var_map: &HashMap<String, usize>,
1131 ) -> Result<(), String> {
1132 let regularization_strength = 0.01;
1133
1134 for (var1, &idx1) in var_map {
1136 for (var2, &idx2) in var_map {
1137 if var1 != var2 && self.are_neighbors(var1, var2) {
1138 qubo[[idx1, idx2]] -= regularization_strength;
1140 }
1141 }
1142 }
1143
1144 Ok(())
1145 }
1146}
1147
1148#[derive(Debug, Clone)]
1149struct DiscretizedSpace {
1150 parameters: Vec<String>,
1151 grid_points: Vec<Vec<f64>>,
1152}
1153
1154impl DiscretizedSpace {
1155 fn total_combinations(&self) -> usize {
1156 self.grid_points.iter().map(|points| points.len()).sum()
1157 }
1158}
1159
1160pub struct ModelSelector {
1162 candidates: Vec<CandidateModel>,
1164 criteria: ModelSelectionCriteria,
1166 ensemble_options: EnsembleOptions,
1168}
1169
1170#[derive(Debug, Clone)]
1171pub struct CandidateModel {
1172 pub model: MLModel,
1174 pub prior_performance: Option<f64>,
1176 pub complexity: f64,
1178 pub training_time: f64,
1180}
1181
1182#[derive(Debug, Clone)]
1183pub struct ModelSelectionCriteria {
1184 pub performance_weight: f64,
1186 pub complexity_penalty: f64,
1188 pub time_penalty: f64,
1190 pub interpretability: Option<f64>,
1192}
1193
1194#[derive(Debug, Clone)]
1195pub struct EnsembleOptions {
1196 pub allow_ensemble: bool,
1198 pub max_size: usize,
1200 pub method: EnsembleMethod,
1202 pub min_diversity: f64,
1204}
1205
1206#[derive(Debug, Clone)]
1207pub enum EnsembleMethod {
1208 Averaging,
1210 WeightedAveraging,
1212 Stacking { meta_model: Box<MLModel> },
1214 Boosting,
1216 Bagging,
1218}
1219
1220#[cfg(test)]
1221mod tests {
1222 use super::*;
1223
1224 #[test]
1225 fn test_feature_selector() {
1226 let n_samples = 100;
1227 let n_features = 10;
1228
1229 let mut rng = thread_rng();
1230 let data = Array2::from_shape_fn((n_samples, n_features), |_| rng.gen::<f64>());
1231 let target = Array1::from_shape_fn(n_samples, |_| rng.gen::<f64>());
1232
1233 let feature_names: Vec<_> = (0..n_features).map(|i| format!("feature_{i}")).collect();
1234
1235 let mut feature_types = vec![FeatureType::Continuous; n_features];
1236
1237 let statistics = FeatureStatistics {
1238 means: data
1239 .mean_axis(scirs2_core::ndarray::Axis(0))
1240 .expect("test data should have valid axis for mean"),
1241 stds: data.std_axis(scirs2_core::ndarray::Axis(0), 0.0),
1242 target_correlations: Array1::from_shape_fn(n_features, |_| rng.gen::<f64>()),
1243 feature_correlations: Array2::from_shape_fn((n_features, n_features), |(i, j)| {
1244 if i == j {
1245 1.0
1246 } else {
1247 rng.gen::<f64>() * 0.5
1248 }
1249 }),
1250 missing_counts: Array1::zeros(n_features),
1251 unique_counts: Array1::from_elem(n_features, n_samples),
1252 };
1253
1254 let features = FeatureData {
1255 data,
1256 feature_names,
1257 target,
1258 feature_types,
1259 statistics,
1260 };
1261
1262 let selector = QuantumFeatureSelector::new(
1263 features,
1264 SelectionMethod::Filter {
1265 metric: FilterMetric::Correlation,
1266 threshold: 0.3,
1267 },
1268 );
1269
1270 let mut result = selector.build_qubo();
1271 assert!(result.is_ok());
1272 }
1273
1274 #[test]
1275 fn test_hyperparameter_optimizer() {
1276 let model = MLModel {
1277 model_type: ModelType::RandomForest { n_trees: 100 },
1278 hyperparameters: HashMap::new(),
1279 training_params: TrainingParameters {
1280 learning_rate: 0.01,
1281 epochs: 100,
1282 batch_size: 32,
1283 early_stopping: true,
1284 patience: 10,
1285 },
1286 };
1287
1288 let param_space = ParameterSpace {
1289 continuous: {
1290 let mut params = HashMap::new();
1291 params.insert(
1292 "learning_rate".to_string(),
1293 ContinuousParam {
1294 min: 0.001,
1295 max: 0.1,
1296 scale: ScaleType::Log,
1297 default: 0.01,
1298 },
1299 );
1300 params
1301 },
1302 discrete: {
1303 let mut params = HashMap::new();
1304 params.insert(
1305 "n_trees".to_string(),
1306 DiscreteParam {
1307 values: vec![50, 100, 200, 500],
1308 default: 100,
1309 },
1310 );
1311 params
1312 },
1313 categorical: HashMap::new(),
1314 conditional: Vec::new(),
1315 };
1316
1317 let evaluation = HyperparameterEvaluation {
1318 metric: EvaluationMetric::Accuracy,
1319 cv_strategy: CrossValidationStrategy::KFold {
1320 k: 5,
1321 shuffle: true,
1322 },
1323 constraints: ResourceConstraints {
1324 max_time_per_trial: None,
1325 max_total_time: None,
1326 max_memory: None,
1327 early_stopping: true,
1328 },
1329 };
1330
1331 let optimizer = HyperparameterOptimizer {
1332 model,
1333 param_space,
1334 strategy: OptimizationStrategy::QuantumOptimization {
1335 tunneling_rate: 0.1,
1336 superposition_size: 5,
1337 },
1338 evaluation,
1339 };
1340
1341 let mut result = optimizer.build_qubo();
1342 assert!(result.is_ok());
1343 }
1344}