sklears_kernel_approximation/
information_theoretic.rs

1//! Information-Theoretic Kernel Methods
2//!
3//! This module implements kernel approximation methods based on information theory,
4//! including mutual information kernels, entropy-based feature selection,
5//! and KL-divergence kernel features.
6
7use scirs2_core::ndarray::{Array1, Array2, ArrayView1, Axis};
8use scirs2_core::random::essentials::{Normal as RandNormal, Uniform as RandUniform};
9use scirs2_core::random::rngs::StdRng as RealStdRng;
10use scirs2_core::random::Rng;
11use scirs2_core::random::{thread_rng, SeedableRng};
12use sklears_core::error::Result;
13
14const LOG2: f64 = std::f64::consts::LN_2;
15
16/// Mutual Information Kernel Approximation
17///
18/// Approximates kernels based on mutual information between features and targets.
19/// Uses histogram-based MI estimation with adaptive binning.
20#[derive(Debug, Clone)]
21/// MutualInformationKernel
22pub struct MutualInformationKernel {
23    n_components: usize,
24    n_bins: usize,
25    sigma: f64,
26    random_state: Option<u64>,
27}
28
29impl MutualInformationKernel {
30    /// Create a new Mutual Information Kernel
31    pub fn new(n_components: usize) -> Self {
32        Self {
33            n_components,
34            n_bins: 10,
35            sigma: 1.0,
36            random_state: None,
37        }
38    }
39
40    /// Set number of bins for histogram estimation
41    pub fn n_bins(mut self, n_bins: usize) -> Self {
42        self.n_bins = n_bins;
43        self
44    }
45
46    /// Set bandwidth parameter
47    pub fn sigma(mut self, sigma: f64) -> Self {
48        self.sigma = sigma;
49        self
50    }
51
52    /// Set random state for reproducibility
53    pub fn random_state(mut self, seed: u64) -> Self {
54        self.random_state = Some(seed);
55        self
56    }
57}
58
59/// Fitted Mutual Information Kernel
60#[derive(Debug, Clone)]
61/// FittedMutualInformationKernel
62pub struct FittedMutualInformationKernel {
63    feature_weights: Array1<f64>,
64    random_features: Array2<f64>,
65    mi_scores: Array1<f64>,
66}
67
68impl sklears_core::traits::Fit<Array2<f64>, Array1<f64>> for MutualInformationKernel {
69    type Fitted = FittedMutualInformationKernel;
70
71    fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
72        let mut rng = match self.random_state {
73            Some(seed) => RealStdRng::seed_from_u64(seed),
74            None => RealStdRng::from_seed(thread_rng().gen()),
75        };
76
77        let (_n_samples, n_features) = x.dim();
78
79        // Compute mutual information scores for each feature
80        let mut mi_scores = Array1::zeros(n_features);
81        for (i, score) in mi_scores.iter_mut().enumerate() {
82            let feature = x.column(i);
83            *score = compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?;
84        }
85
86        // Normalize MI scores to get feature weights
87        let max_mi = mi_scores.fold(0.0_f64, |acc, &x| acc.max(x));
88        let feature_weights = if max_mi > 0.0 {
89            &mi_scores / max_mi
90        } else {
91            Array1::ones(n_features) / n_features as f64
92        };
93
94        // Generate random features weighted by MI scores
95        let normal = RandNormal::new(0.0, 1.0 / self.sigma).unwrap();
96        let mut random_features = Array2::zeros((self.n_components, n_features));
97
98        for i in 0..self.n_components {
99            for j in 0..n_features {
100                let weight = feature_weights[j].sqrt();
101                random_features[[i, j]] = rng.sample(normal) * weight;
102            }
103        }
104
105        Ok(FittedMutualInformationKernel {
106            feature_weights,
107            random_features,
108            mi_scores,
109        })
110    }
111}
112
113impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>> for FittedMutualInformationKernel {
114    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
115        let (n_samples, _) = x.dim();
116        let mut features = Array2::zeros((n_samples, self.random_features.nrows() * 2));
117
118        for (i, sample) in x.axis_iter(Axis(0)).enumerate() {
119            for (j, random_feature) in self.random_features.axis_iter(Axis(0)).enumerate() {
120                let projection = sample.dot(&random_feature);
121                features[[i, 2 * j]] = projection.cos();
122                features[[i, 2 * j + 1]] = projection.sin();
123            }
124        }
125
126        Ok(features)
127    }
128}
129
130/// Entropy-based Feature Selection
131///
132/// Selects features based on their entropy and mutual information with targets.
133#[derive(Debug, Clone)]
134/// EntropyFeatureSelector
135pub struct EntropyFeatureSelector {
136    n_features: usize,
137    selection_method: EntropySelectionMethod,
138    n_bins: usize,
139}
140
141/// Methods for entropy-based feature selection
142#[derive(Debug, Clone)]
143/// EntropySelectionMethod
144pub enum EntropySelectionMethod {
145    /// Select features with highest entropy
146    MaxEntropy,
147    /// Select features with highest mutual information
148    MaxMutualInformation,
149    /// Select features optimizing information gain
150    InformationGain,
151    /// Select features using minimum redundancy maximum relevance
152    MRMR,
153}
154
155impl EntropyFeatureSelector {
156    /// Create a new entropy-based feature selector
157    pub fn new(n_features: usize) -> Self {
158        Self {
159            n_features,
160            selection_method: EntropySelectionMethod::MaxMutualInformation,
161            n_bins: 10,
162        }
163    }
164
165    /// Set selection method
166    pub fn selection_method(mut self, method: EntropySelectionMethod) -> Self {
167        self.selection_method = method;
168        self
169    }
170
171    /// Set number of bins for entropy estimation
172    pub fn n_bins(mut self, n_bins: usize) -> Self {
173        self.n_bins = n_bins;
174        self
175    }
176}
177
178/// Fitted Entropy Feature Selector
179#[derive(Debug, Clone)]
180/// FittedEntropyFeatureSelector
181pub struct FittedEntropyFeatureSelector {
182    selected_features: Vec<usize>,
183    feature_scores: Array1<f64>,
184    entropies: Array1<f64>,
185}
186
187impl sklears_core::traits::Fit<Array2<f64>, Array1<f64>> for EntropyFeatureSelector {
188    type Fitted = FittedEntropyFeatureSelector;
189
190    fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
191        let (_, n_features) = x.dim();
192
193        // Compute entropies and scores for all features
194        let mut entropies = Array1::zeros(n_features);
195        let mut feature_scores = Array1::zeros(n_features);
196
197        for i in 0..n_features {
198            let feature = x.column(i);
199            entropies[i] = compute_entropy(&feature.to_owned(), self.n_bins)?;
200
201            feature_scores[i] = match self.selection_method {
202                EntropySelectionMethod::MaxEntropy => entropies[i],
203                EntropySelectionMethod::MaxMutualInformation => {
204                    compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?
205                }
206                EntropySelectionMethod::InformationGain => {
207                    let mi = compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?;
208                    let target_entropy = compute_entropy(&y.to_owned(), self.n_bins)?;
209                    mi / target_entropy.max(1e-8)
210                }
211                EntropySelectionMethod::MRMR => {
212                    // For MRMR, we'll compute relevance - redundancy
213                    let relevance =
214                        compute_mutual_information(&feature.view(), &y.view(), self.n_bins)?;
215                    // Simplified redundancy computation (would need iterative selection for full MRMR)
216                    relevance - entropies[i] * 0.1
217                }
218            };
219        }
220
221        // Select top k features based on scores
222        let mut indexed_scores: Vec<(usize, f64)> = feature_scores
223            .iter()
224            .enumerate()
225            .map(|(i, &score)| (i, score))
226            .collect();
227        indexed_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
228
229        let selected_features: Vec<usize> = indexed_scores
230            .iter()
231            .take(self.n_features)
232            .map(|(i, _)| *i)
233            .collect();
234
235        Ok(FittedEntropyFeatureSelector {
236            selected_features,
237            feature_scores,
238            entropies,
239        })
240    }
241}
242
243impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>> for FittedEntropyFeatureSelector {
244    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
245        let (n_samples, _) = x.dim();
246        let mut selected_x = Array2::zeros((n_samples, self.selected_features.len()));
247
248        for (j, &feature_idx) in self.selected_features.iter().enumerate() {
249            selected_x.column_mut(j).assign(&x.column(feature_idx));
250        }
251
252        Ok(selected_x)
253    }
254}
255
256/// KL-Divergence Kernel Features
257///
258/// Generates kernel features based on KL-divergence approximation between distributions.
259#[derive(Debug, Clone)]
260/// KLDivergenceKernel
261pub struct KLDivergenceKernel {
262    n_components: usize,
263    reference_distribution: KLReferenceDistribution,
264    n_bins: usize,
265    random_state: Option<u64>,
266}
267
268/// Reference distributions for KL-divergence computation
269#[derive(Debug, Clone)]
270/// KLReferenceDistribution
271pub enum KLReferenceDistribution {
272    /// Gaussian reference distribution
273    Gaussian { mean: f64, std: f64 },
274    /// Uniform reference distribution
275    Uniform { low: f64, high: f64 },
276    /// Empirical distribution from training data
277    Empirical,
278}
279
280impl KLDivergenceKernel {
281    /// Create a new KL-divergence kernel
282    pub fn new(n_components: usize) -> Self {
283        Self {
284            n_components,
285            reference_distribution: KLReferenceDistribution::Gaussian {
286                mean: 0.0,
287                std: 1.0,
288            },
289            n_bins: 20,
290            random_state: None,
291        }
292    }
293
294    /// Set reference distribution
295    pub fn reference_distribution(mut self, dist: KLReferenceDistribution) -> Self {
296        self.reference_distribution = dist;
297        self
298    }
299
300    /// Set number of bins for histogram estimation
301    pub fn n_bins(mut self, n_bins: usize) -> Self {
302        self.n_bins = n_bins;
303        self
304    }
305
306    /// Set random state
307    pub fn random_state(mut self, seed: u64) -> Self {
308        self.random_state = Some(seed);
309        self
310    }
311}
312
313/// Fitted KL-Divergence Kernel
314#[derive(Debug, Clone)]
315/// FittedKLDivergenceKernel
316pub struct FittedKLDivergenceKernel {
317    random_projections: Array2<f64>,
318    reference_histograms: Vec<Array1<f64>>,
319    kl_weights: Array1<f64>,
320}
321
322impl sklears_core::traits::Fit<Array2<f64>, ()> for KLDivergenceKernel {
323    type Fitted = FittedKLDivergenceKernel;
324
325    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
326        let mut rng = match self.random_state {
327            Some(seed) => RealStdRng::seed_from_u64(seed),
328            None => RealStdRng::from_seed(thread_rng().gen()),
329        };
330
331        let (_, n_features) = x.dim();
332
333        // Generate random projections
334        let normal = RandNormal::new(0.0, 1.0).unwrap();
335        let mut random_projections = Array2::zeros((self.n_components, n_features));
336        for i in 0..self.n_components {
337            for j in 0..n_features {
338                random_projections[[i, j]] = rng.sample(normal);
339            }
340        }
341
342        // Compute reference histograms and KL weights
343        let mut reference_histograms = Vec::new();
344        let mut kl_weights = Array1::zeros(self.n_components);
345
346        for i in 0..self.n_components {
347            let projection = x.dot(&random_projections.row(i));
348            let (hist, bins) = compute_histogram(&projection, self.n_bins)?;
349
350            let reference_hist = match &self.reference_distribution {
351                KLReferenceDistribution::Gaussian { mean, std } => {
352                    compute_gaussian_reference_histogram(&bins, *mean, *std)
353                }
354                KLReferenceDistribution::Uniform { low, high } => {
355                    compute_uniform_reference_histogram(&bins, *low, *high)
356                }
357                KLReferenceDistribution::Empirical => hist.clone(),
358            };
359
360            let kl_div = compute_kl_divergence(&hist, &reference_hist)?;
361            kl_weights[i] = (-kl_div).exp(); // Convert to similarity weight
362
363            reference_histograms.push(reference_hist);
364        }
365
366        // Normalize weights
367        let weight_sum = kl_weights.sum();
368        if weight_sum > 0.0 {
369            kl_weights /= weight_sum;
370        }
371
372        Ok(FittedKLDivergenceKernel {
373            random_projections,
374            reference_histograms,
375            kl_weights,
376        })
377    }
378}
379
380impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>> for FittedKLDivergenceKernel {
381    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
382        let (n_samples, _) = x.dim();
383        let mut features = Array2::zeros((n_samples, self.random_projections.nrows()));
384
385        for (i, sample) in x.axis_iter(Axis(0)).enumerate() {
386            for j in 0..self.random_projections.nrows() {
387                let projection = sample.dot(&self.random_projections.row(j));
388                let weight = self.kl_weights[j];
389                features[[i, j]] = projection * weight;
390            }
391        }
392
393        Ok(features)
394    }
395}
396
397/// Information Bottleneck Feature Extractor
398///
399/// Implements the Information Bottleneck principle for feature extraction,
400/// finding representations that maximize information about targets while
401/// minimizing information about inputs.
402#[derive(Debug, Clone)]
403/// InformationBottleneckExtractor
404pub struct InformationBottleneckExtractor {
405    n_components: usize,
406    beta: f64,
407    n_bins: usize,
408    max_iterations: usize,
409    tolerance: f64,
410    random_state: Option<u64>,
411}
412
413impl InformationBottleneckExtractor {
414    /// Create a new Information Bottleneck extractor
415    pub fn new(n_components: usize) -> Self {
416        Self {
417            n_components,
418            beta: 1.0,
419            n_bins: 10,
420            max_iterations: 100,
421            tolerance: 1e-6,
422            random_state: None,
423        }
424    }
425
426    /// Set beta parameter (trade-off between compression and prediction)
427    pub fn beta(mut self, beta: f64) -> Self {
428        self.beta = beta;
429        self
430    }
431
432    /// Set number of bins for discretization
433    pub fn n_bins(mut self, n_bins: usize) -> Self {
434        self.n_bins = n_bins;
435        self
436    }
437
438    /// Set maximum iterations for optimization
439    pub fn max_iterations(mut self, max_iterations: usize) -> Self {
440        self.max_iterations = max_iterations;
441        self
442    }
443
444    /// Set convergence tolerance
445    pub fn tolerance(mut self, tolerance: f64) -> Self {
446        self.tolerance = tolerance;
447        self
448    }
449
450    /// Set random state
451    pub fn random_state(mut self, seed: u64) -> Self {
452        self.random_state = Some(seed);
453        self
454    }
455}
456
457/// Fitted Information Bottleneck Extractor
458#[derive(Debug, Clone)]
459/// FittedInformationBottleneckExtractor
460pub struct FittedInformationBottleneckExtractor {
461    cluster_centers: Array2<f64>,
462    assignment_probs: Array2<f64>,
463    information_values: Array1<f64>,
464    n_components: usize,
465}
466
467impl sklears_core::traits::Fit<Array2<f64>, Array1<f64>> for InformationBottleneckExtractor {
468    type Fitted = FittedInformationBottleneckExtractor;
469
470    fn fit(self, x: &Array2<f64>, y: &Array1<f64>) -> Result<Self::Fitted> {
471        let mut rng = match self.random_state {
472            Some(seed) => RealStdRng::seed_from_u64(seed),
473            None => RealStdRng::from_seed(thread_rng().gen()),
474        };
475
476        let (n_samples, n_features) = x.dim();
477
478        // Initialize cluster centers randomly
479        let mut cluster_centers = Array2::zeros((self.n_components, n_features));
480        let uniform = RandUniform::new(0, n_samples).unwrap();
481        for i in 0..self.n_components {
482            let sample_idx = rng.sample(uniform);
483            cluster_centers.row_mut(i).assign(&x.row(sample_idx));
484        }
485
486        let mut assignment_probs = Array2::zeros((n_samples, self.n_components));
487        let mut prev_objective = f64::NEG_INFINITY;
488
489        // Iterate until convergence
490        for _iteration in 0..self.max_iterations {
491            // E-step: Update assignment probabilities
492            self.update_assignment_probs(x, &cluster_centers, &mut assignment_probs)?;
493
494            // M-step: Update cluster centers
495            self.update_cluster_centers(x, &assignment_probs, &mut cluster_centers)?;
496
497            // Compute objective function
498            let objective = self.compute_objective(x, y, &cluster_centers, &assignment_probs)?;
499
500            if (objective - prev_objective).abs() < self.tolerance {
501                break;
502            }
503            prev_objective = objective;
504        }
505
506        // Compute information values for each component
507        let mut information_values = Array1::zeros(self.n_components);
508        for i in 0..self.n_components {
509            let cluster_assignments = assignment_probs.column(i);
510            information_values[i] =
511                compute_mutual_information(&cluster_assignments.view(), &y.view(), self.n_bins)?;
512        }
513
514        Ok(FittedInformationBottleneckExtractor {
515            cluster_centers,
516            assignment_probs: assignment_probs.clone(),
517            information_values,
518            n_components: self.n_components,
519        })
520    }
521}
522
523impl InformationBottleneckExtractor {
524    fn update_assignment_probs(
525        &self,
526        x: &Array2<f64>,
527        cluster_centers: &Array2<f64>,
528        assignment_probs: &mut Array2<f64>,
529    ) -> Result<()> {
530        let (n_samples, _) = x.dim();
531
532        for i in 0..n_samples {
533            let sample = x.row(i);
534            let mut log_probs = Array1::zeros(self.n_components);
535
536            for j in 0..self.n_components {
537                let center = cluster_centers.row(j);
538                let distance = (&sample - &center).mapv(|x| x * x).sum().sqrt();
539                log_probs[j] = -self.beta * distance;
540            }
541
542            // Softmax normalization
543            let max_log_prob = log_probs.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
544            log_probs -= max_log_prob;
545            let exp_probs = log_probs.mapv(|x| x.exp());
546            let prob_sum = exp_probs.sum();
547
548            for j in 0..self.n_components {
549                assignment_probs[[i, j]] = exp_probs[j] / prob_sum;
550            }
551        }
552
553        Ok(())
554    }
555
556    fn update_cluster_centers(
557        &self,
558        x: &Array2<f64>,
559        assignment_probs: &Array2<f64>,
560        cluster_centers: &mut Array2<f64>,
561    ) -> Result<()> {
562        let (n_samples, n_features) = x.dim();
563
564        for j in 0..self.n_components {
565            let mut weighted_sum = Array1::zeros(n_features);
566            let mut weight_sum = 0.0;
567
568            for i in 0..n_samples {
569                let weight = assignment_probs[[i, j]];
570                weighted_sum += &(&x.row(i) * weight);
571                weight_sum += weight;
572            }
573
574            if weight_sum > 1e-8 {
575                cluster_centers
576                    .row_mut(j)
577                    .assign(&(weighted_sum / weight_sum));
578            }
579        }
580
581        Ok(())
582    }
583
584    fn compute_objective(
585        &self,
586        x: &Array2<f64>,
587        y: &Array1<f64>,
588        _cluster_centers: &Array2<f64>,
589        assignment_probs: &Array2<f64>,
590    ) -> Result<f64> {
591        let (n_samples, _) = x.dim();
592        let mut compression_term = 0.0;
593        let mut prediction_term = 0.0;
594
595        // Compression term: -I(X; T)
596        for i in 0..n_samples {
597            for j in 0..self.n_components {
598                let prob = assignment_probs[[i, j]];
599                if prob > 1e-8 {
600                    compression_term -= prob * prob.ln();
601                }
602            }
603        }
604
605        // Prediction term: I(T; Y) - approximate with cluster assignments
606        for j in 0..self.n_components {
607            let cluster_assignments = assignment_probs.column(j);
608            let mi =
609                compute_mutual_information(&cluster_assignments.view(), &y.view(), self.n_bins)?;
610            prediction_term += mi;
611        }
612
613        Ok(prediction_term - self.beta * compression_term)
614    }
615}
616
617impl sklears_core::traits::Transform<Array2<f64>, Array2<f64>>
618    for FittedInformationBottleneckExtractor
619{
620    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
621        let (n_samples, _) = x.dim();
622        let mut features = Array2::zeros((n_samples, self.n_components));
623
624        for i in 0..n_samples {
625            let sample = x.row(i);
626
627            for j in 0..self.n_components {
628                let center = self.cluster_centers.row(j);
629                let distance = (&sample - &center).mapv(|x| x * x).sum().sqrt();
630                let weight = self.information_values[j];
631                features[[i, j]] = (-distance).exp() * weight;
632            }
633        }
634
635        Ok(features)
636    }
637}
638
639// Helper functions for information-theoretic computations
640
641fn compute_entropy(data: &Array1<f64>, n_bins: usize) -> Result<f64> {
642    let (hist, _) = compute_histogram(data, n_bins)?;
643    let mut entropy = 0.0;
644
645    for &prob in hist.iter() {
646        if prob > 1e-12 {
647            entropy -= prob * prob.ln();
648        }
649    }
650
651    Ok(entropy / LOG2) // Convert to bits
652}
653
654fn compute_mutual_information(
655    x: &ArrayView1<f64>,
656    y: &ArrayView1<f64>,
657    n_bins: usize,
658) -> Result<f64> {
659    let (hist_x, bins_x) = compute_histogram(&x.to_owned(), n_bins)?;
660    let (hist_y, bins_y) = compute_histogram(&y.to_owned(), n_bins)?;
661    let joint_hist = compute_joint_histogram(x, y, &bins_x, &bins_y)?;
662
663    let mut mi = 0.0;
664
665    for i in 0..n_bins {
666        for j in 0..n_bins {
667            let p_xy = joint_hist[[i, j]];
668            let p_x = hist_x[i];
669            let p_y = hist_y[j];
670
671            if p_xy > 1e-12 && p_x > 1e-12 && p_y > 1e-12 {
672                mi += p_xy * (p_xy / (p_x * p_y)).ln();
673            }
674        }
675    }
676
677    Ok(mi / LOG2) // Convert to bits
678}
679
680fn compute_histogram(data: &Array1<f64>, n_bins: usize) -> Result<(Array1<f64>, Array1<f64>)> {
681    let min_val = data.fold(f64::INFINITY, |acc, &x| acc.min(x));
682    let max_val = data.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
683
684    if (max_val - min_val).abs() < 1e-12 {
685        let mut hist = Array1::zeros(n_bins);
686        hist[0] = 1.0;
687        let bins = Array1::linspace(min_val - 0.5, max_val + 0.5, n_bins + 1);
688        return Ok((hist, bins));
689    }
690
691    let bin_width = (max_val - min_val) / n_bins as f64;
692    let bins = Array1::linspace(min_val, max_val + bin_width, n_bins + 1);
693    let mut hist = Array1::zeros(n_bins);
694
695    for &value in data.iter() {
696        let bin_idx = ((value - min_val) / bin_width).floor() as usize;
697        let bin_idx = bin_idx.min(n_bins - 1);
698        hist[bin_idx] += 1.0;
699    }
700
701    hist /= data.len() as f64;
702    Ok((hist, bins))
703}
704
705fn compute_joint_histogram(
706    x: &ArrayView1<f64>,
707    y: &ArrayView1<f64>,
708    bins_x: &Array1<f64>,
709    bins_y: &Array1<f64>,
710) -> Result<Array2<f64>> {
711    let n_bins_x = bins_x.len() - 1;
712    let n_bins_y = bins_y.len() - 1;
713    let mut joint_hist = Array2::zeros((n_bins_x, n_bins_y));
714
715    let min_x = bins_x[0];
716    let max_x = bins_x[n_bins_x];
717    let min_y = bins_y[0];
718    let max_y = bins_y[n_bins_y];
719
720    let bin_width_x = (max_x - min_x) / n_bins_x as f64;
721    let bin_width_y = (max_y - min_y) / n_bins_y as f64;
722
723    for (&val_x, &val_y) in x.iter().zip(y.iter()) {
724        let bin_x = ((val_x - min_x) / bin_width_x).floor() as usize;
725        let bin_y = ((val_y - min_y) / bin_width_y).floor() as usize;
726
727        let bin_x = bin_x.min(n_bins_x - 1);
728        let bin_y = bin_y.min(n_bins_y - 1);
729
730        joint_hist[[bin_x, bin_y]] += 1.0;
731    }
732
733    joint_hist /= x.len() as f64;
734    Ok(joint_hist)
735}
736
737fn compute_kl_divergence(p: &Array1<f64>, q: &Array1<f64>) -> Result<f64> {
738    let mut kl = 0.0;
739
740    for (&p_i, &q_i) in p.iter().zip(q.iter()) {
741        if p_i > 1e-12 {
742            if q_i > 1e-12 {
743                kl += p_i * (p_i / q_i).ln();
744            } else {
745                return Ok(f64::INFINITY); // KL divergence is infinite
746            }
747        }
748    }
749
750    Ok(kl)
751}
752
753fn compute_gaussian_reference_histogram(bins: &Array1<f64>, mean: f64, std: f64) -> Array1<f64> {
754    let n_bins = bins.len() - 1;
755    let mut hist = Array1::zeros(n_bins);
756    let _normal = RandNormal::new(mean, std).unwrap();
757
758    for i in 0..n_bins {
759        let bin_center = (bins[i] + bins[i + 1]) / 2.0;
760        // Use probability density function approximation
761        let variance = std * std;
762        let exp_part = -0.5 * (bin_center - mean).powi(2) / variance;
763        hist[i] = (1.0 / (std * (2.0 * std::f64::consts::PI).sqrt())) * exp_part.exp();
764    }
765
766    let hist_sum = hist.sum();
767    if hist_sum > 0.0 {
768        hist /= hist_sum;
769    }
770
771    hist
772}
773
774fn compute_uniform_reference_histogram(bins: &Array1<f64>, low: f64, high: f64) -> Array1<f64> {
775    let n_bins = bins.len() - 1;
776    let mut hist = Array1::zeros(n_bins);
777    let uniform_density = 1.0 / (high - low);
778
779    for i in 0..n_bins {
780        let bin_start = bins[i];
781        let bin_end = bins[i + 1];
782
783        if bin_end <= low || bin_start >= high {
784            hist[i] = 0.0;
785        } else {
786            let overlap_start = bin_start.max(low);
787            let overlap_end = bin_end.min(high);
788            let overlap_length = overlap_end - overlap_start;
789            hist[i] = overlap_length * uniform_density;
790        }
791    }
792
793    let hist_sum = hist.sum();
794    if hist_sum > 0.0 {
795        hist /= hist_sum;
796    }
797
798    hist
799}
800
801#[allow(non_snake_case)]
802#[cfg(test)]
803mod tests {
804    use super::*;
805    use scirs2_core::ndarray::array;
806    use sklears_core::traits::{Fit, Transform};
807
808    #[test]
809    fn test_mutual_information_kernel() {
810        let x = array![
811            [1.0, 2.0, 3.0],
812            [2.0, 4.0, 6.0],
813            [3.0, 6.0, 9.0],
814            [4.0, 8.0, 12.0],
815        ];
816        let y = array![1.0, 2.0, 3.0, 4.0];
817
818        let mi_kernel = MutualInformationKernel::new(10)
819            .n_bins(5)
820            .sigma(1.0)
821            .random_state(42);
822
823        let fitted = mi_kernel.fit(&x, &y).unwrap();
824        let features = fitted.transform(&x).unwrap();
825
826        assert_eq!(features.shape(), &[4, 20]); // 10 components * 2 (cos, sin)
827        assert!(fitted.mi_scores.iter().all(|&x| x >= 0.0));
828    }
829
830    #[test]
831    fn test_entropy_feature_selector() {
832        let x = array![
833            [1.0, 0.1, 100.0],
834            [2.0, 0.2, 200.0],
835            [3.0, 0.3, 300.0],
836            [4.0, 0.4, 400.0],
837        ];
838        let y = array![1.0, 2.0, 3.0, 4.0];
839
840        let selector = EntropyFeatureSelector::new(2)
841            .selection_method(EntropySelectionMethod::MaxMutualInformation)
842            .n_bins(3);
843
844        let fitted = selector.fit(&x, &y).unwrap();
845        let selected_features = fitted.transform(&x).unwrap();
846
847        assert_eq!(selected_features.shape(), &[4, 2]);
848        assert_eq!(fitted.selected_features.len(), 2);
849    }
850
851    #[test]
852    fn test_kl_divergence_kernel() {
853        let x = array![[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0],];
854
855        let kl_kernel = KLDivergenceKernel::new(5)
856            .reference_distribution(KLReferenceDistribution::Gaussian {
857                mean: 0.0,
858                std: 1.0,
859            })
860            .n_bins(10)
861            .random_state(42);
862
863        let fitted = kl_kernel.fit(&x, &()).unwrap();
864        let features = fitted.transform(&x).unwrap();
865
866        assert_eq!(features.shape(), &[4, 5]);
867        assert!(fitted.kl_weights.iter().all(|&x| x >= 0.0));
868    }
869
870    #[test]
871    fn test_information_bottleneck_extractor() {
872        let x = array![
873            [1.0, 2.0, 3.0],
874            [2.0, 3.0, 4.0],
875            [3.0, 4.0, 5.0],
876            [4.0, 5.0, 6.0],
877        ];
878        let y = array![0.0, 0.0, 1.0, 1.0];
879
880        let ib_extractor = InformationBottleneckExtractor::new(2)
881            .beta(1.0)
882            .n_bins(2)
883            .max_iterations(10)
884            .random_state(42);
885
886        let fitted = ib_extractor.fit(&x, &y).unwrap();
887        let features = fitted.transform(&x).unwrap();
888
889        assert_eq!(features.shape(), &[4, 2]);
890        assert!(fitted.information_values.iter().all(|&x| x >= 0.0));
891    }
892
893    #[test]
894    fn test_entropy_computation() {
895        let data = array![1.0, 1.0, 2.0, 2.0, 3.0, 3.0];
896        let entropy = compute_entropy(&data.to_owned(), 3).unwrap();
897
898        // For uniform distribution over 3 bins: H = log2(3) ≈ 1.585
899        assert!((entropy - 1.585).abs() < 0.1);
900    }
901
902    #[test]
903    fn test_mutual_information_computation() {
904        let x = array![1.0, 1.0, 2.0, 2.0];
905        let y = array![1.0, 1.0, 2.0, 2.0]; // Perfect correlation
906
907        let mi = compute_mutual_information(&x.view(), &y.view(), 2).unwrap();
908
909        // Perfect correlation should give high MI
910        assert!(mi > 0.5);
911    }
912}