1use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
8use scirs2_core::random::essentials::{Normal as RandNormal, Uniform as RandUniform};
9use scirs2_core::random::rngs::StdRng as RealStdRng;
10use scirs2_core::random::Rng;
11use scirs2_core::random::{thread_rng, SeedableRng};
12use sklears_core::error::Result;
13
14const LOG2: f64 = std::f64::consts::LN_2;
15
16#[derive(Debug, Clone)]
21pub struct MutualInformationKernel {
23 n_components: usize,
24 n_bins: usize,
25 sigma: f64,
26 random_state: Option<u64>,
27}
28
29impl MutualInformationKernel {
30 pub fn new(n_components: usize) -> Self {
32 Self {
33 n_components,
34 n_bins: 10,
35 sigma: 1.0,
36 random_state: None,
37 }
38 }
39
40 pub fn n_bins(mut self, n_bins: usize) -> Self {
42 self.n_bins = n_bins;
43 self
44 }
45
46 pub fn sigma(mut self, sigma: f64) -> Self {
48 self.sigma = sigma;
49 self
50 }
51
52 pub fn random_state(mut self, seed: u64) -> Self {
54 self.random_state = Some(seed);
55 self
56 }
57}
58
59#[derive(Debug, Clone)]
61pub struct FittedMutualInformationKernel {
63 feature_weights: Array1<f64>,
64 random_features: Array2<f64>,
65 mi_scores: Array1<f64>,
66}
67
68impl sklears_core::traits::Fit<Array2<f64>, Array1<f64>> for MutualInformationKernel {
69 type Fitted = FittedMutualInformationKernel;
70
71 fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
72 let mut rng = match self.random_state {
73 Some(seed) => RealStdRng::seed_from_u64(seed),
74 None => RealStdRng::from_seed(thread_rng().gen()),
75 };
76
77 let (_n_samples, n_features) = x.dim();
78
79 let mut mi_scores = Array1::zeros(n_features);
81 for (i, score) in mi_scores.iter_mut().enumerate() {
82 let feature = x.column(i);
83 *score = compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?;
84 }
85
86 let max_mi = mi_scores.fold(0.0_f64, |acc, &x| acc.max(x));
88 let feature_weights = if max_mi > 0.0 {
89 &mi_scores / max_mi
90 } else {
91 Array1::ones(n_features) / n_features as f64
92 };
93
94 let normal = RandNormal::new(0.0, 1.0 / self.sigma).unwrap();
96 let mut random_features = Array2::zeros((self.n_components, n_features));
97
98 for i in 0..self.n_components {
99 for j in 0..n_features {
100 let weight = feature_weights[j].sqrt();
101 random_features[[i, j]] = rng.sample(normal) * weight;
102 }
103 }
104
105 Ok(FittedMutualInformationKernel {
106 feature_weights,
107 random_features,
108 mi_scores,
109 })
110 }
111}
112
113impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>> for FittedMutualInformationKernel {
114 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
115 let (n_samples, _) = x.dim();
116 let mut features = Array2::zeros((n_samples, self.random_features.nrows() * 2));
117
118 for (i, sample) in x.axis_iter(Axis(0)).enumerate() {
119 for (j, random_feature) in self.random_features.axis_iter(Axis(0)).enumerate() {
120 let projection = sample.dot(&random_feature);
121 features[[i, 2 * j]] = projection.cos();
122 features[[i, 2 * j + 1]] = projection.sin();
123 }
124 }
125
126 Ok(features)
127 }
128}
129
130#[derive(Debug, Clone)]
134pub struct EntropyFeatureSelector {
136 n_features: usize,
137 selection_method: EntropySelectionMethod,
138 n_bins: usize,
139}
140
141#[derive(Debug, Clone)]
143pub enum EntropySelectionMethod {
145 MaxEntropy,
147 MaxMutualInformation,
149 InformationGain,
151 MRMR,
153}
154
155impl EntropyFeatureSelector {
156 pub fn new(n_features: usize) -> Self {
158 Self {
159 n_features,
160 selection_method: EntropySelectionMethod::MaxMutualInformation,
161 n_bins: 10,
162 }
163 }
164
165 pub fn selection_method(mut self, method: EntropySelectionMethod) -> Self {
167 self.selection_method = method;
168 self
169 }
170
171 pub fn n_bins(mut self, n_bins: usize) -> Self {
173 self.n_bins = n_bins;
174 self
175 }
176}
177
178#[derive(Debug, Clone)]
180pub struct FittedEntropyFeatureSelector {
182 selected_features: Vec<usize>,
183 feature_scores: Array1<f64>,
184 entropies: Array1<f64>,
185}
186
187impl sklears_core::traits::Fit<Array2<f64>, Array1<f64>> for EntropyFeatureSelector {
188 type Fitted = FittedEntropyFeatureSelector;
189
190 fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
191 let (_, n_features) = x.dim();
192
193 let mut entropies = Array1::zeros(n_features);
195 let mut feature_scores = Array1::zeros(n_features);
196
197 for i in 0..n_features {
198 let feature = x.column(i);
199 entropies[i] = compute_entropy(&feature.to_owned(), self.n_bins)?;
200
201 feature_scores[i] = match self.selection_method {
202 EntropySelectionMethod::MaxEntropy => entropies[i],
203 EntropySelectionMethod::MaxMutualInformation => {
204 compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?
205 }
206 EntropySelectionMethod::InformationGain => {
207 let mi = compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?;
208 let target_entropy = compute_entropy(&y.to_owned(), self.n_bins)?;
209 mi / target_entropy.max(1e-8)
210 }
211 EntropySelectionMethod::MRMR => {
212 let relevance =
214 compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?;
215 relevance - entropies[i] * 0.1
217 }
218 };
219 }
220
221 let mut indexed_scores: Vec<(usize, f64)> = feature_scores
223 .iter()
224 .enumerate()
225 .map(|(i, &score)| (i, score))
226 .collect();
227 indexed_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
228
229 let selected_features: Vec<usize> = indexed_scores
230 .iter()
231 .take(self.n_features)
232 .map(|(i, _)| *i)
233 .collect();
234
235 Ok(FittedEntropyFeatureSelector {
236 selected_features,
237 feature_scores,
238 entropies,
239 })
240 }
241}
242
243impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>> for FittedEntropyFeatureSelector {
244 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
245 let (n_samples, _) = x.dim();
246 let mut selected_x = Array2::zeros((n_samples, self.selected_features.len()));
247
248 for (j, &feature_idx) in self.selected_features.iter().enumerate() {
249 selected_x.column_mut(j).assign(&x.column(feature_idx));
250 }
251
252 Ok(selected_x)
253 }
254}
255
256#[derive(Debug, Clone)]
260pub struct KLDivergenceKernel {
262 n_components: usize,
263 reference_distribution: KLReferenceDistribution,
264 n_bins: usize,
265 random_state: Option<u64>,
266}
267
268#[derive(Debug, Clone)]
270pub enum KLReferenceDistribution {
272 Gaussian { mean: f64, std: f64 },
274 Uniform { low: f64, high: f64 },
276 Empirical,
278}
279
280impl KLDivergenceKernel {
281 pub fn new(n_components: usize) -> Self {
283 Self {
284 n_components,
285 reference_distribution: KLReferenceDistribution::Gaussian {
286 mean: 0.0,
287 std: 1.0,
288 },
289 n_bins: 20,
290 random_state: None,
291 }
292 }
293
294 pub fn reference_distribution(mut self, dist: KLReferenceDistribution) -> Self {
296 self.reference_distribution = dist;
297 self
298 }
299
300 pub fn n_bins(mut self, n_bins: usize) -> Self {
302 self.n_bins = n_bins;
303 self
304 }
305
306 pub fn random_state(mut self, seed: u64) -> Self {
308 self.random_state = Some(seed);
309 self
310 }
311}
312
313#[derive(Debug, Clone)]
315pub struct FittedKLDivergenceKernel {
317 random_projections: Array2<f64>,
318 reference_histograms: Vec<Array1<f64>>,
319 kl_weights: Array1<f64>,
320}
321
322impl sklears_core::traits::Fit<Array2<f64>, ()> for KLDivergenceKernel {
323 type Fitted = FittedKLDivergenceKernel;
324
325 fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
326 let mut rng = match self.random_state {
327 Some(seed) => RealStdRng::seed_from_u64(seed),
328 None => RealStdRng::from_seed(thread_rng().gen()),
329 };
330
331 let (_, n_features) = x.dim();
332
333 let normal = RandNormal::new(0.0, 1.0).unwrap();
335 let mut random_projections = Array2::zeros((self.n_components, n_features));
336 for i in 0..self.n_components {
337 for j in 0..n_features {
338 random_projections[[i, j]] = rng.sample(normal);
339 }
340 }
341
342 let mut reference_histograms = Vec::new();
344 let mut kl_weights = Array1::zeros(self.n_components);
345
346 for i in 0..self.n_components {
347 let projection = x.dot(&random_projections.row(i));
348 let (hist, bins) = compute_histogram(&projection, self.n_bins)?;
349
350 let reference_hist = match &self.reference_distribution {
351 KLReferenceDistribution::Gaussian { mean, std } => {
352 compute_gaussian_reference_histogram(&bins, *mean, *std)
353 }
354 KLReferenceDistribution::Uniform { low, high } => {
355 compute_uniform_reference_histogram(&bins, *low, *high)
356 }
357 KLReferenceDistribution::Empirical => hist.clone(),
358 };
359
360 let kl_div = compute_kl_divergence(&hist, &reference_hist)?;
361 kl_weights[i] = (-kl_div).exp(); reference_histograms.push(reference_hist);
364 }
365
366 let weight_sum = kl_weights.sum();
368 if weight_sum > 0.0 {
369 kl_weights /= weight_sum;
370 }
371
372 Ok(FittedKLDivergenceKernel {
373 random_projections,
374 reference_histograms,
375 kl_weights,
376 })
377 }
378}
379
380impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>> for FittedKLDivergenceKernel {
381 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
382 let (n_samples, _) = x.dim();
383 let mut features = Array2::zeros((n_samples, self.random_projections.nrows()));
384
385 for (i, sample) in x.axis_iter(Axis(0)).enumerate() {
386 for j in 0..self.random_projections.nrows() {
387 let projection = sample.dot(&self.random_projections.row(j));
388 let weight = self.kl_weights[j];
389 features[[i, j]] = projection * weight;
390 }
391 }
392
393 Ok(features)
394 }
395}
396
397#[derive(Debug, Clone)]
403pub struct InformationBottleneckExtractor {
405 n_components: usize,
406 beta: f64,
407 n_bins: usize,
408 max_iterations: usize,
409 tolerance: f64,
410 random_state: Option<u64>,
411}
412
413impl InformationBottleneckExtractor {
414 pub fn new(n_components: usize) -> Self {
416 Self {
417 n_components,
418 beta: 1.0,
419 n_bins: 10,
420 max_iterations: 100,
421 tolerance: 1e-6,
422 random_state: None,
423 }
424 }
425
426 pub fn beta(mut self, beta: f64) -> Self {
428 self.beta = beta;
429 self
430 }
431
432 pub fn n_bins(mut self, n_bins: usize) -> Self {
434 self.n_bins = n_bins;
435 self
436 }
437
438 pub fn max_iterations(mut self, max_iterations: usize) -> Self {
440 self.max_iterations = max_iterations;
441 self
442 }
443
444 pub fn tolerance(mut self, tolerance: f64) -> Self {
446 self.tolerance = tolerance;
447 self
448 }
449
450 pub fn random_state(mut self, seed: u64) -> Self {
452 self.random_state = Some(seed);
453 self
454 }
455}
456
457#[derive(Debug, Clone)]
459pub struct FittedInformationBottleneckExtractor {
461 cluster_centers: Array2<f64>,
462 assignment_probs: Array2<f64>,
463 information_values: Array1<f64>,
464 n_components: usize,
465}
466
467impl sklears_core::traits::Fit<Array2<f64>, Array1<f64>> for InformationBottleneckExtractor {
468 type Fitted = FittedInformationBottleneckExtractor;
469
470 fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
471 let mut rng = match self.random_state {
472 Some(seed) => RealStdRng::seed_from_u64(seed),
473 None => RealStdRng::from_seed(thread_rng().gen()),
474 };
475
476 let (n_samples, n_features) = x.dim();
477
478 let mut cluster_centers = Array2::zeros((self.n_components, n_features));
480 let uniform = RandUniform::new(0, n_samples).unwrap();
481 for i in 0..self.n_components {
482 let sample_idx = rng.sample(uniform);
483 cluster_centers.row_mut(i).assign(&x.row(sample_idx));
484 }
485
486 let mut assignment_probs = Array2::zeros((n_samples, self.n_components));
487 let mut prev_objective = f64::NEG_INFINITY;
488
489 for _iteration in 0..self.max_iterations {
491 self.update_assignment_probs(x, &cluster_centers, &mut assignment_probs)?;
493
494 self.update_cluster_centers(x, &assignment_probs, &mut cluster_centers)?;
496
497 let objective = self.compute_objective(x, y, &cluster_centers, &assignment_probs)?;
499
500 if (objective - prev_objective).abs() < self.tolerance {
501 break;
502 }
503 prev_objective = objective;
504 }
505
506 let mut information_values = Array1::zeros(self.n_components);
508 for i in 0..self.n_components {
509 let cluster_assignments = assignment_probs.column(i);
510 information_values[i] =
511 compute_mutual_information(&cluster_assignments.view(), &y.view(), self.n_bins)?;
512 }
513
514 Ok(FittedInformationBottleneckExtractor {
515 cluster_centers,
516 assignment_probs: assignment_probs.clone(),
517 information_values,
518 n_components: self.n_components,
519 })
520 }
521}
522
523impl InformationBottleneckExtractor {
524 fn update_assignment_probs(
525 &self,
526 x: &Array2<f64>,
527 cluster_centers: &Array2<f64>,
528 assignment_probs: &mut Array2<f64>,
529 ) -> Result<()> {
530 let (n_samples, _) = x.dim();
531
532 for i in 0..n_samples {
533 let sample = x.row(i);
534 let mut log_probs = Array1::zeros(self.n_components);
535
536 for j in 0..self.n_components {
537 let center = cluster_centers.row(j);
538 let distance = (&sample - ¢er).mapv(|x| x * x).sum().sqrt();
539 log_probs[j] = -self.beta * distance;
540 }
541
542 let max_log_prob = log_probs.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
544 log_probs -= max_log_prob;
545 let exp_probs = log_probs.mapv(|x| x.exp());
546 let prob_sum = exp_probs.sum();
547
548 for j in 0..self.n_components {
549 assignment_probs[[i, j]] = exp_probs[j] / prob_sum;
550 }
551 }
552
553 Ok(())
554 }
555
556 fn update_cluster_centers(
557 &self,
558 x: &Array2<f64>,
559 assignment_probs: &Array2<f64>,
560 cluster_centers: &mut Array2<f64>,
561 ) -> Result<()> {
562 let (n_samples, n_features) = x.dim();
563
564 for j in 0..self.n_components {
565 let mut weighted_sum = Array1::zeros(n_features);
566 let mut weight_sum = 0.0;
567
568 for i in 0..n_samples {
569 let weight = assignment_probs[[i, j]];
570 weighted_sum += &(&x.row(i) * weight);
571 weight_sum += weight;
572 }
573
574 if weight_sum > 1e-8 {
575 cluster_centers
576 .row_mut(j)
577 .assign(&(weighted_sum / weight_sum));
578 }
579 }
580
581 Ok(())
582 }
583
584 fn compute_objective(
585 &self,
586 x: &Array2<f64>,
587 y: &Array1<f64>,
588 _cluster_centers: &Array2<f64>,
589 assignment_probs: &Array2<f64>,
590 ) -> Result<f64> {
591 let (n_samples, _) = x.dim();
592 let mut compression_term = 0.0;
593 let mut prediction_term = 0.0;
594
595 for i in 0..n_samples {
597 for j in 0..self.n_components {
598 let prob = assignment_probs[[i, j]];
599 if prob > 1e-8 {
600 compression_term -= prob * prob.ln();
601 }
602 }
603 }
604
605 for j in 0..self.n_components {
607 let cluster_assignments = assignment_probs.column(j);
608 let mi =
609 compute_mutual_information(&cluster_assignments.view(), &y.view(), self.n_bins)?;
610 prediction_term += mi;
611 }
612
613 Ok(prediction_term - self.beta * compression_term)
614 }
615}
616
617impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>>
618 for FittedInformationBottleneckExtractor
619{
620 fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
621 let (n_samples, _) = x.dim();
622 let mut features = Array2::zeros((n_samples, self.n_components));
623
624 for i in 0..n_samples {
625 let sample = x.row(i);
626
627 for j in 0..self.n_components {
628 let center = self.cluster_centers.row(j);
629 let distance = (&sample - ¢er).mapv(|x| x * x).sum().sqrt();
630 let weight = self.information_values[j];
631 features[[i, j]] = (-distance).exp() * weight;
632 }
633 }
634
635 Ok(features)
636 }
637}
638
639fn compute_entropy(data: &Array1<f64>, n_bins: usize) -> Result<f64> {
642 let (hist, _) = compute_histogram(data, n_bins)?;
643 let mut entropy = 0.0;
644
645 for &prob in hist.iter() {
646 if prob > 1e-12 {
647 entropy -= prob * prob.ln();
648 }
649 }
650
651 Ok(entropy / LOG2) }
653
654fn compute_mutual_information(
655 x: &ArrayView1<f64>,
656 y: &ArrayView1<f64>,
657 n_bins: usize,
658) -> Result<f64> {
659 let (hist_x, bins_x) = compute_histogram(&x.to_owned(), n_bins)?;
660 let (hist_y, bins_y) = compute_histogram(&y.to_owned(), n_bins)?;
661 let joint_hist = compute_joint_histogram(x, y, &bins_x, &bins_y)?;
662
663 let mut mi = 0.0;
664
665 for i in 0..n_bins {
666 for j in 0..n_bins {
667 let p_xy = joint_hist[[i, j]];
668 let p_x = hist_x[i];
669 let p_y = hist_y[j];
670
671 if p_xy > 1e-12 && p_x > 1e-12 && p_y > 1e-12 {
672 mi += p_xy * (p_xy / (p_x * p_y)).ln();
673 }
674 }
675 }
676
677 Ok(mi / LOG2) }
679
680fn compute_histogram(data: &Array1<f64>, n_bins: usize) -> Result<(Array1<f64>, Array1<f64>)> {
681 let min_val = data.fold(f64::INFINITY, |acc, &x| acc.min(x));
682 let max_val = data.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
683
684 if (max_val - min_val).abs() < 1e-12 {
685 let mut hist = Array1::zeros(n_bins);
686 hist[0] = 1.0;
687 let bins = Array1::linspace(min_val - 0.5, max_val + 0.5, n_bins + 1);
688 return Ok((hist, bins));
689 }
690
691 let bin_width = (max_val - min_val) / n_bins as f64;
692 let bins = Array1::linspace(min_val, max_val + bin_width, n_bins + 1);
693 let mut hist = Array1::zeros(n_bins);
694
695 for &value in data.iter() {
696 let bin_idx = ((value - min_val) / bin_width).floor() as usize;
697 let bin_idx = bin_idx.min(n_bins - 1);
698 hist[bin_idx] += 1.0;
699 }
700
701 hist /= data.len() as f64;
702 Ok((hist, bins))
703}
704
705fn compute_joint_histogram(
706 x: &ArrayView1<f64>,
707 y: &ArrayView1<f64>,
708 bins_x: &Array1<f64>,
709 bins_y: &Array1<f64>,
710) -> Result<Array2<f64>> {
711 let n_bins_x = bins_x.len() - 1;
712 let n_bins_y = bins_y.len() - 1;
713 let mut joint_hist = Array2::zeros((n_bins_x, n_bins_y));
714
715 let min_x = bins_x[0];
716 let max_x = bins_x[n_bins_x];
717 let min_y = bins_y[0];
718 let max_y = bins_y[n_bins_y];
719
720 let bin_width_x = (max_x - min_x) / n_bins_x as f64;
721 let bin_width_y = (max_y - min_y) / n_bins_y as f64;
722
723 for (&val_x, &val_y) in x.iter().zip(y.iter()) {
724 let bin_x = ((val_x - min_x) / bin_width_x).floor() as usize;
725 let bin_y = ((val_y - min_y) / bin_width_y).floor() as usize;
726
727 let bin_x = bin_x.min(n_bins_x - 1);
728 let bin_y = bin_y.min(n_bins_y - 1);
729
730 joint_hist[[bin_x, bin_y]] += 1.0;
731 }
732
733 joint_hist /= x.len() as f64;
734 Ok(joint_hist)
735}
736
737fn compute_kl_divergence(p: &Array1<f64>, q: &Array1<f64>) -> Result<f64> {
738 let mut kl = 0.0;
739
740 for (&p_i, &q_i) in p.iter().zip(q.iter()) {
741 if p_i > 1e-12 {
742 if q_i > 1e-12 {
743 kl += p_i * (p_i / q_i).ln();
744 } else {
745 return Ok(f64::INFINITY); }
747 }
748 }
749
750 Ok(kl)
751}
752
753fn compute_gaussian_reference_histogram(bins: &Array1<f64>, mean: f64, std: f64) -> Array1<f64> {
754 let n_bins = bins.len() - 1;
755 let mut hist = Array1::zeros(n_bins);
756 let _normal = RandNormal::new(mean, std).unwrap();
757
758 for i in 0..n_bins {
759 let bin_center = (bins[i] + bins[i + 1]) / 2.0;
760 let variance = std * std;
762 let exp_part = -0.5 * (bin_center - mean).powi(2) / variance;
763 hist[i] = (1.0 / (std * (2.0 * std::f64::consts::PI).sqrt())) * exp_part.exp();
764 }
765
766 let hist_sum = hist.sum();
767 if hist_sum > 0.0 {
768 hist /= hist_sum;
769 }
770
771 hist
772}
773
774fn compute_uniform_reference_histogram(bins: &Array1<f64>, low: f64, high: f64) -> Array1<f64> {
775 let n_bins = bins.len() - 1;
776 let mut hist = Array1::zeros(n_bins);
777 let uniform_density = 1.0 / (high - low);
778
779 for i in 0..n_bins {
780 let bin_start = bins[i];
781 let bin_end = bins[i + 1];
782
783 if bin_end <= low || bin_start >= high {
784 hist[i] = 0.0;
785 } else {
786 let overlap_start = bin_start.max(low);
787 let overlap_end = bin_end.min(high);
788 let overlap_length = overlap_end - overlap_start;
789 hist[i] = overlap_length * uniform_density;
790 }
791 }
792
793 let hist_sum = hist.sum();
794 if hist_sum > 0.0 {
795 hist /= hist_sum;
796 }
797
798 hist
799}
800
801#[allow(non_snake_case)]
802#[cfg(test)]
803mod tests {
804 use super::*;
805 use scirs2_core::ndarray::array;
806 use sklears_core::traits::{Fit, Transform};
807
808 #[test]
809 fn test_mutual_information_kernel() {
810 let x = array![
811 [1.0, 2.0, 3.0],
812 [2.0, 4.0, 6.0],
813 [3.0, 6.0, 9.0],
814 [4.0, 8.0, 12.0],
815 ];
816 let y = array![1.0, 2.0, 3.0, 4.0];
817
818 let mi_kernel = MutualInformationKernel::new(10)
819 .n_bins(5)
820 .sigma(1.0)
821 .random_state(42);
822
823 let fitted = mi_kernel.fit(&x, &y).unwrap();
824 let features = fitted.transform(&x).unwrap();
825
826 assert_eq!(features.shape(), &[4, 20]); assert!(fitted.mi_scores.iter().all(|&x| x >= 0.0));
828 }
829
830 #[test]
831 fn test_entropy_feature_selector() {
832 let x = array![
833 [1.0, 0.1, 100.0],
834 [2.0, 0.2, 200.0],
835 [3.0, 0.3, 300.0],
836 [4.0, 0.4, 400.0],
837 ];
838 let y = array![1.0, 2.0, 3.0, 4.0];
839
840 let selector = EntropyFeatureSelector::new(2)
841 .selection_method(EntropySelectionMethod::MaxMutualInformation)
842 .n_bins(3);
843
844 let fitted = selector.fit(&x, &y).unwrap();
845 let selected_features = fitted.transform(&x).unwrap();
846
847 assert_eq!(selected_features.shape(), &[4, 2]);
848 assert_eq!(fitted.selected_features.len(), 2);
849 }
850
851 #[test]
852 fn test_kl_divergence_kernel() {
853 let x = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0],];
854
855 let kl_kernel = KLDivergenceKernel::new(5)
856 .reference_distribution(KLReferenceDistribution::Gaussian {
857 mean: 0.0,
858 std: 1.0,
859 })
860 .n_bins(10)
861 .random_state(42);
862
863 let fitted = kl_kernel.fit(&x, &()).unwrap();
864 let features = fitted.transform(&x).unwrap();
865
866 assert_eq!(features.shape(), &[4, 5]);
867 assert!(fitted.kl_weights.iter().all(|&x| x >= 0.0));
868 }
869
870 #[test]
871 fn test_information_bottleneck_extractor() {
872 let x = array![
873 [1.0, 2.0, 3.0],
874 [2.0, 3.0, 4.0],
875 [3.0, 4.0, 5.0],
876 [4.0, 5.0, 6.0],
877 ];
878 let y = array![0.0, 0.0, 1.0, 1.0];
879
880 let ib_extractor = InformationBottleneckExtractor::new(2)
881 .beta(1.0)
882 .n_bins(2)
883 .max_iterations(10)
884 .random_state(42);
885
886 let fitted = ib_extractor.fit(&x, &y).unwrap();
887 let features = fitted.transform(&x).unwrap();
888
889 assert_eq!(features.shape(), &[4, 2]);
890 assert!(fitted.information_values.iter().all(|&x| x >= 0.0));
891 }
892
893 #[test]
894 fn test_entropy_computation() {
895 let data = array![1.0, 1.0, 2.0, 2.0, 3.0, 3.0];
896 let entropy = compute_entropy(&data.to_owned(), 3).unwrap();
897
898 assert!((entropy - 1.585).abs() < 0.1);
900 }
901
902 #[test]
903 fn test_mutual_information_computation() {
904 let x = array![1.0, 1.0, 2.0, 2.0];
905 let y = array![1.0, 1.0, 2.0, 2.0]; let mi = compute_mutual_information(&x.view(), &y.view(), 2).unwrap();
908
909 assert!(mi > 0.5);
911 }
912}