sklears_kernel_approximation/
anisotropic_rbf.rs

1//! Anisotropic RBF Kernel Approximations
2//!
3//! This module implements anisotropic RBF kernels and their approximations.
4//! Anisotropic kernels use different length scales for different dimensions,
5//! allowing the kernel to adapt to the varying importance of features.
6//!
7//! # Key Features
8//!
9//! - **Anisotropic RBF Sampler**: Random features for anisotropic RBF kernels
10//! - **Automatic Relevance Determination (ARD)**: Learn feature relevance
11//! - **Mahalanobis Distance**: Use learned covariance matrix
12//! - **Robust Anisotropic RBF**: Outlier-resistant anisotropic kernels
13//! - **Adaptive Length Scales**: Automatic length scale optimization
14//!
15//! # Mathematical Background
16//!
17//! Anisotropic RBF kernel:
18//! k(x, x') = σ² exp(-0.5 * (x - x')ᵀ Λ⁻¹ (x - x'))
19//!
20//! Where Λ = diag(l₁², l₂², ..., lₐ²) is the diagonal matrix of squared length scales.
21//!
22//! # References
23
24use scirs2_core::ndarray::{Array1, Array2, Axis};
25use scirs2_core::random::essentials::{Normal as RandNormal, Uniform as RandUniform};
26use scirs2_core::random::rngs::StdRng as RealStdRng;
27use scirs2_core::random::seq::SliceRandom;
28use scirs2_core::random::RngExt;
29use scirs2_core::random::{thread_rng, SeedableRng};
30use scirs2_core::StandardNormal;
31use scirs2_linalg::compat::{Eig, Inverse, SVD};
32use sklears_core::error::{Result, SklearsError};
33use sklears_core::traits::{Fit, Transform};
34use std::f64::consts::PI;
35
36/// Anisotropic RBF kernel sampler using random Fourier features
37#[derive(Debug, Clone)]
38/// AnisotropicRBFSampler
39pub struct AnisotropicRBFSampler {
40    /// Number of random features
41    n_components: usize,
42    /// Length scales for each dimension (ARD)
43    length_scales: Vec<f64>,
44    /// Signal variance
45    signal_variance: f64,
46    /// Whether to learn length scales automatically
47    learn_length_scales: bool,
48    /// Maximum iterations for length scale optimization
49    max_iter: usize,
50    /// Convergence tolerance
51    tol: f64,
52    /// Random seed
53    random_state: Option<u64>,
54}
55
56/// Fitted anisotropic RBF sampler
57#[derive(Debug, Clone)]
58/// FittedAnisotropicRBF
59pub struct FittedAnisotropicRBF {
60    /// Random frequencies
61    random_weights: Array2<f64>,
62    /// Random biases
63    random_biases: Array1<f64>,
64    /// Learned length scales
65    length_scales: Array1<f64>,
66    /// Signal variance
67    signal_variance: f64,
68    /// Number of features
69    n_features: usize,
70}
71
72impl AnisotropicRBFSampler {
73    /// Create new anisotropic RBF sampler
74    pub fn new(n_components: usize) -> Self {
75        Self {
76            n_components,
77            length_scales: vec![1.0],
78            signal_variance: 1.0,
79            learn_length_scales: true,
80            max_iter: 100,
81            tol: 1e-6,
82            random_state: None,
83        }
84    }
85
86    /// Set length scales manually (disables automatic learning)
87    pub fn length_scales(mut self, length_scales: Vec<f64>) -> Self {
88        self.length_scales = length_scales;
89        self.learn_length_scales = false;
90        self
91    }
92
93    /// Set signal variance
94    pub fn signal_variance(mut self, signal_variance: f64) -> Self {
95        self.signal_variance = signal_variance;
96        self
97    }
98
99    /// Enable/disable automatic length scale learning
100    pub fn learn_length_scales(mut self, learn: bool) -> Self {
101        self.learn_length_scales = learn;
102        self
103    }
104
105    /// Set optimization parameters
106    pub fn optimization_params(mut self, max_iter: usize, tol: f64) -> Self {
107        self.max_iter = max_iter;
108        self.tol = tol;
109        self
110    }
111
112    /// Set random seed
113    pub fn random_state(mut self, seed: u64) -> Self {
114        self.random_state = Some(seed);
115        self
116    }
117
118    /// Learn length scales using maximum likelihood
119    fn learn_length_scales_ml(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
120        let n_features = x.ncols();
121        let mut length_scales = Array1::ones(n_features);
122
123        // Initialize with data variance per dimension
124        for j in 0..n_features {
125            let col = x.column(j);
126            let mean = col.mean().unwrap_or(0.0);
127            let var = col.mapv(|v| (v - mean).powi(2)).mean().unwrap_or(1.0);
128            length_scales[j] = var.sqrt();
129        }
130
131        // Simple optimization using coordinate descent
132        for _iter in 0..self.max_iter {
133            let mut improved = false;
134
135            for j in 0..n_features {
136                let old_scale = length_scales[j];
137                let mut best_scale = old_scale;
138                let mut best_ll = self.compute_log_likelihood(x, &length_scales)?;
139
140                // Try different scale values
141                for factor in [0.5, 0.8, 1.2, 2.0] {
142                    length_scales[j] = old_scale * factor;
143                    let ll = self.compute_log_likelihood(x, &length_scales)?;
144                    if ll > best_ll {
145                        best_ll = ll;
146                        best_scale = length_scales[j];
147                        improved = true;
148                    }
149                }
150
151                length_scales[j] = best_scale;
152            }
153
154            if !improved {
155                break;
156            }
157        }
158
159        Ok(length_scales)
160    }
161
162    /// Compute log likelihood for length scale optimization
163    fn compute_log_likelihood(&self, x: &Array2<f64>, length_scales: &Array1<f64>) -> Result<f64> {
164        let n = x.nrows();
165        let mut k = Array2::zeros((n, n));
166
167        // Compute kernel matrix
168        for i in 0..n {
169            for j in 0..n {
170                let mut dist_sq = 0.0;
171                for d in 0..x.ncols() {
172                    let diff = x[(i, d)] - x[(j, d)];
173                    dist_sq += diff * diff / (length_scales[d] * length_scales[d]);
174                }
175                k[(i, j)] = self.signal_variance * (-0.5 * dist_sq).exp();
176            }
177        }
178
179        // Add noise for numerical stability
180        for i in 0..n {
181            k[(i, i)] += 1e-6;
182        }
183
184        // Compute log determinant and log likelihood
185        // Use SVD for numerical stability instead of cholesky
186        let (u, s, vt) = k.svd(true).map_err(|e| {
187            SklearsError::NumericalError(format!("SVD decomposition failed: {:?}", e))
188        })?;
189        let s_inv = s.mapv(|x| if x > 1e-10 { 1.0 / x.sqrt() } else { 0.0 });
190        let s_inv_diag = Array2::from_diag(&s_inv);
191        let _k_inv_sqrt = u.dot(&s_inv_diag).dot(&vt);
192        let log_det = s.mapv(|x| if x > 1e-10 { x.ln() } else { -23.0 }).sum(); // log(1e-10) ≈ -23
193        let log_likelihood = -0.5 * (log_det + n as f64 * (2.0 * PI).ln());
194
195        Ok(log_likelihood)
196    }
197}
198
199impl Fit<Array2<f64>, ()> for AnisotropicRBFSampler {
200    type Fitted = FittedAnisotropicRBF;
201    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
202        let n_features = x.ncols();
203
204        // Determine length scales
205        let length_scales = if self.learn_length_scales {
206            self.learn_length_scales_ml(x)?
207        } else if self.length_scales.len() == 1 {
208            Array1::from_elem(n_features, self.length_scales[0])
209        } else if self.length_scales.len() == n_features {
210            Array1::from_vec(self.length_scales.clone())
211        } else {
212            return Err(SklearsError::InvalidInput(
213                "Length scales must be either scalar or match number of features".to_string(),
214            ));
215        };
216
217        // Generate random frequencies from scaled normal distribution
218        let mut rng = if let Some(seed) = self.random_state {
219            RealStdRng::seed_from_u64(seed)
220        } else {
221            RealStdRng::from_seed(thread_rng().random())
222        };
223
224        let mut random_weights = Array2::zeros((self.n_components, n_features));
225        for j in 0..n_features {
226            let normal =
227                RandNormal::new(0.0, 1.0 / length_scales[j]).expect("operation should succeed");
228            for i in 0..self.n_components {
229                random_weights[(i, j)] = rng.sample(normal);
230            }
231        }
232
233        // Generate random biases
234        let uniform = RandUniform::new(0.0, 2.0 * PI).expect("operation should succeed");
235        let random_biases = Array1::from_shape_fn(self.n_components, |_| rng.sample(uniform));
236
237        Ok(FittedAnisotropicRBF {
238            random_weights,
239            random_biases,
240            length_scales,
241            signal_variance: self.signal_variance,
242            n_features,
243        })
244    }
245}
246
247impl Transform<Array2<f64>, Array2<f64>> for FittedAnisotropicRBF {
248    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
249        if x.ncols() != self.n_features {
250            return Err(SklearsError::InvalidInput(
251                "Feature dimension mismatch".to_string(),
252            ));
253        }
254
255        let n_samples = x.nrows();
256        let n_components = self.random_weights.nrows();
257        let normalization = (2.0 * self.signal_variance / n_components as f64).sqrt();
258
259        let mut features = Array2::zeros((n_samples, n_components));
260
261        for i in 0..n_samples {
262            for j in 0..n_components {
263                let mut dot_product = 0.0;
264                for k in 0..self.n_features {
265                    dot_product += x[(i, k)] * self.random_weights[(j, k)];
266                }
267                features[(i, j)] = normalization * (dot_product + self.random_biases[j]).cos();
268            }
269        }
270
271        Ok(features)
272    }
273}
274
275/// Mahalanobis distance-based RBF sampler
276#[derive(Debug, Clone)]
277/// MahalanobisRBFSampler
278pub struct MahalanobisRBFSampler {
279    /// Number of random features
280    n_components: usize,
281    /// Signal variance
282    signal_variance: f64,
283    /// Regularization parameter for covariance matrix
284    reg_param: f64,
285    /// Random seed
286    random_state: Option<u64>,
287}
288
289/// Fitted Mahalanobis RBF sampler
290#[derive(Debug, Clone)]
291/// FittedMahalanobisRBF
292pub struct FittedMahalanobisRBF {
293    /// Random frequencies (pre-whitened)
294    random_weights: Array2<f64>,
295    /// Random biases
296    random_biases: Array1<f64>,
297    /// Whitening transformation matrix
298    whitening_matrix: Array2<f64>,
299    /// Data mean
300    mean: Array1<f64>,
301    /// Signal variance
302    signal_variance: f64,
303    /// Number of features
304    n_features: usize,
305}
306
307impl MahalanobisRBFSampler {
308    /// Create new Mahalanobis RBF sampler
309    pub fn new(n_components: usize) -> Self {
310        Self {
311            n_components,
312            signal_variance: 1.0,
313            reg_param: 1e-6,
314            random_state: None,
315        }
316    }
317
318    /// Set signal variance
319    pub fn signal_variance(mut self, signal_variance: f64) -> Self {
320        self.signal_variance = signal_variance;
321        self
322    }
323
324    /// Set regularization parameter
325    pub fn reg_param(mut self, reg_param: f64) -> Self {
326        self.reg_param = reg_param;
327        self
328    }
329
330    /// Set random seed
331    pub fn random_state(mut self, seed: u64) -> Self {
332        self.random_state = Some(seed);
333        self
334    }
335}
336
337impl Fit<Array2<f64>, ()> for MahalanobisRBFSampler {
338    type Fitted = FittedMahalanobisRBF;
339    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
340        let n_features = x.ncols();
341        let n_samples = x.nrows();
342
343        // Compute mean
344        let mean = x.mean_axis(Axis(0)).expect("operation should succeed");
345
346        // Center data
347        let x_centered = x - &mean.clone().insert_axis(Axis(0));
348
349        // Compute covariance matrix
350        let cov = x_centered.t().dot(&x_centered) / (n_samples - 1) as f64;
351
352        // Add regularization
353        let mut cov_reg = cov;
354        for i in 0..n_features {
355            cov_reg[(i, i)] += self.reg_param;
356        }
357
358        // Compute whitening matrix (inverse square root of covariance)
359        let (eigenvals_complex, eigenvecs_complex) = cov_reg.eig().map_err(|e| {
360            SklearsError::NumericalError(format!("Eigendecomposition failed: {:?}", e))
361        })?;
362
363        // Extract real parts (eigenvalues should be real for symmetric matrix)
364        let eigenvals = eigenvals_complex.mapv(|x| x.re);
365        let eigenvecs = eigenvecs_complex.mapv(|x| x.re);
366
367        // Create inverse square root matrix
368        let mut inv_sqrt_eigenvals = Array1::zeros(n_features);
369        for i in 0..n_features {
370            inv_sqrt_eigenvals[i] = 1.0 / eigenvals[i].sqrt();
371        }
372
373        let whitening_matrix = eigenvecs
374            .dot(&Array2::from_diag(&inv_sqrt_eigenvals))
375            .dot(&eigenvecs.t());
376
377        // Generate random frequencies from standard normal
378        let mut rng = if let Some(seed) = self.random_state {
379            RealStdRng::seed_from_u64(seed)
380        } else {
381            RealStdRng::from_seed(thread_rng().random())
382        };
383
384        let mut random_weights = Array2::zeros((self.n_components, n_features));
385        for i in 0..self.n_components {
386            for j in 0..n_features {
387                random_weights[(i, j)] = rng.sample(StandardNormal);
388            }
389        }
390
391        // Generate random biases
392        let uniform = RandUniform::new(0.0, 2.0 * PI).expect("operation should succeed");
393        let random_biases = Array1::from_shape_fn(self.n_components, |_| rng.sample(uniform));
394
395        Ok(FittedMahalanobisRBF {
396            random_weights,
397            random_biases,
398            whitening_matrix,
399            mean,
400            signal_variance: self.signal_variance,
401            n_features,
402        })
403    }
404}
405
406impl Transform<Array2<f64>, Array2<f64>> for FittedMahalanobisRBF {
407    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
408        if x.ncols() != self.n_features {
409            return Err(SklearsError::InvalidInput(
410                "Feature dimension mismatch".to_string(),
411            ));
412        }
413
414        let n_samples = x.nrows();
415        let n_components = self.random_weights.nrows();
416        let normalization = (2.0 * self.signal_variance / n_components as f64).sqrt();
417
418        // Center and whiten data
419        let x_centered = x - &self.mean.clone().insert_axis(Axis(0));
420        let x_whitened = x_centered.dot(&self.whitening_matrix);
421
422        let mut features = Array2::zeros((n_samples, n_components));
423
424        for i in 0..n_samples {
425            for j in 0..n_components {
426                let dot_product = x_whitened.row(i).dot(&self.random_weights.row(j));
427                features[(i, j)] = normalization * (dot_product + self.random_biases[j]).cos();
428            }
429        }
430
431        Ok(features)
432    }
433}
434
435/// Robust anisotropic RBF sampler with outlier resistance
436#[derive(Debug, Clone)]
437/// RobustAnisotropicRBFSampler
438pub struct RobustAnisotropicRBFSampler {
439    /// Number of random features
440    n_components: usize,
441    /// Robust estimator type
442    robust_estimator: RobustEstimator,
443    /// Signal variance
444    signal_variance: f64,
445    /// Random seed
446    random_state: Option<u64>,
447}
448
449/// Types of robust estimators for covariance
450#[derive(Debug, Clone)]
451/// RobustEstimator
452pub enum RobustEstimator {
453    /// Minimum Covariance Determinant
454    MCD { support_fraction: f64 },
455    /// Minimum Volume Ellipsoid
456    MVE,
457    /// Huber's M-estimator
458    Huber { c: f64 },
459}
460
461/// Fitted robust anisotropic RBF sampler
462#[derive(Debug, Clone)]
463/// FittedRobustAnisotropicRBF
464pub struct FittedRobustAnisotropicRBF {
465    /// Random frequencies
466    random_weights: Array2<f64>,
467    /// Random biases
468    random_biases: Array1<f64>,
469    /// Robust covariance matrix
470    robust_cov: Array2<f64>,
471    /// Robust mean
472    robust_mean: Array1<f64>,
473    /// Signal variance
474    signal_variance: f64,
475    /// Number of features
476    n_features: usize,
477}
478
479impl RobustAnisotropicRBFSampler {
480    /// Create new robust anisotropic RBF sampler
481    pub fn new(n_components: usize) -> Self {
482        Self {
483            n_components,
484            robust_estimator: RobustEstimator::MCD {
485                support_fraction: 0.8,
486            },
487            signal_variance: 1.0,
488            random_state: None,
489        }
490    }
491
492    /// Set robust estimator
493    pub fn robust_estimator(mut self, estimator: RobustEstimator) -> Self {
494        self.robust_estimator = estimator;
495        self
496    }
497
498    /// Set signal variance
499    pub fn signal_variance(mut self, signal_variance: f64) -> Self {
500        self.signal_variance = signal_variance;
501        self
502    }
503
504    /// Set random seed
505    pub fn random_state(mut self, seed: u64) -> Self {
506        self.random_state = Some(seed);
507        self
508    }
509
510    /// Compute robust covariance using MCD estimator
511    fn compute_mcd_covariance(
512        &self,
513        x: &Array2<f64>,
514        support_fraction: f64,
515    ) -> Result<(Array1<f64>, Array2<f64>)> {
516        let n_samples = x.nrows();
517        let n_features = x.ncols();
518        let h = ((n_samples as f64 * support_fraction).floor() as usize).max(n_features + 1);
519
520        let mut best_det = f64::INFINITY;
521        let mut best_mean = Array1::zeros(n_features);
522        let mut best_cov = Array2::eye(n_features);
523
524        let mut rng = thread_rng();
525
526        // Try multiple random subsets
527        for _ in 0..50 {
528            // Select random subset
529            let mut indices: Vec<usize> = (0..n_samples).collect();
530            indices.shuffle(&mut rng);
531            indices.truncate(h);
532
533            let subset = x.select(Axis(0), &indices);
534
535            // Compute mean and covariance of subset
536            let mean = subset.mean_axis(Axis(0)).expect("operation should succeed");
537            let centered = &subset - &mean.clone().insert_axis(Axis(0));
538            let cov = centered.t().dot(&centered) / (h - 1) as f64;
539
540            // Add regularization for numerical stability
541            let mut cov_reg = cov;
542            let trace = cov_reg.diag().sum();
543            let reg = trace * 1e-6 / n_features as f64;
544            for i in 0..n_features {
545                cov_reg[(i, i)] += reg;
546            }
547
548            // Compute determinant
549            if let Ok((_, s, _)) = cov_reg.svd(false) {
550                let log_det: f64 = s.mapv(|x| if x > 1e-10 { x.ln() } else { -23.0 }).sum();
551                let det = log_det.exp();
552
553                if det < best_det {
554                    best_det = det;
555                    best_mean = mean;
556                    best_cov = cov_reg;
557                }
558            }
559        }
560
561        Ok((best_mean, best_cov))
562    }
563
564    /// Compute robust covariance using Huber's M-estimator
565    fn compute_huber_covariance(
566        &self,
567        x: &Array2<f64>,
568        c: f64,
569    ) -> Result<(Array1<f64>, Array2<f64>)> {
570        let n_samples = x.nrows();
571        let n_features = x.ncols();
572
573        // Start with sample mean and covariance
574        let mut mean = x.mean_axis(Axis(0)).expect("operation should succeed");
575        let centered = x - &mean.clone().insert_axis(Axis(0));
576        let mut cov = centered.t().dot(&centered) / (n_samples - 1) as f64;
577
578        // Iteratively reweighted estimation
579        for _ in 0..20 {
580            // Compute Mahalanobis distances
581            let cov_inv = cov.inv().map_err(|_| {
582                SklearsError::InvalidInput("Covariance matrix not positive definite".to_string())
583            })?;
584
585            let mut weights = Array1::zeros(n_samples);
586            for i in 0..n_samples {
587                let diff = &x.row(i) - &mean;
588                let mahal_dist = diff.dot(&cov_inv).dot(&diff).sqrt();
589                weights[i] = if mahal_dist <= c { 1.0 } else { c / mahal_dist };
590            }
591
592            // Update mean
593            let weight_sum = weights.sum();
594            let mut new_mean = Array1::zeros(n_features);
595            for i in 0..n_samples {
596                for j in 0..n_features {
597                    new_mean[j] += weights[i] * x[(i, j)];
598                }
599            }
600            new_mean /= weight_sum;
601
602            // Update covariance
603            let mut new_cov = Array2::zeros((n_features, n_features));
604            for i in 0..n_samples {
605                let diff = &x.row(i) - &new_mean;
606                let outer =
607                    Array2::from_shape_fn((n_features, n_features), |(j, k)| diff[j] * diff[k]);
608                new_cov = new_cov + weights[i] * outer;
609            }
610            new_cov /= weight_sum;
611
612            // Check convergence
613            let mean_diff = (&new_mean - &mean).mapv(|x| x.abs()).sum();
614            if mean_diff < 1e-6 {
615                break;
616            }
617
618            mean = new_mean;
619            cov = new_cov;
620        }
621
622        Ok((mean, cov))
623    }
624}
625
626impl Fit<Array2<f64>, ()> for RobustAnisotropicRBFSampler {
627    type Fitted = FittedRobustAnisotropicRBF;
628    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
629        let n_features = x.ncols();
630
631        // Compute robust mean and covariance
632        let (robust_mean, robust_cov) = match &self.robust_estimator {
633            RobustEstimator::MCD { support_fraction } => {
634                self.compute_mcd_covariance(x, *support_fraction)?
635            }
636            RobustEstimator::MVE => {
637                // Simplified MVE - use MCD with smaller support fraction
638                self.compute_mcd_covariance(x, 0.5)?
639            }
640            RobustEstimator::Huber { c } => self.compute_huber_covariance(x, *c)?,
641        };
642
643        // Compute precision matrix (inverse covariance)
644        let precision = robust_cov.inv().map_err(|_| {
645            SklearsError::InvalidInput("Robust covariance matrix not positive definite".to_string())
646        })?;
647
648        // Generate random frequencies from multivariate normal with precision matrix
649        let mut rng = if let Some(seed) = self.random_state {
650            RealStdRng::seed_from_u64(seed)
651        } else {
652            RealStdRng::from_seed(thread_rng().random())
653        };
654
655        // Cholesky decomposition of precision matrix for sampling
656        // Use SVD decomposition for sampling
657        let (u, s, _vt) = precision.svd(true).map_err(|e| {
658            SklearsError::NumericalError(format!("SVD decomposition failed: {:?}", e))
659        })?;
660        let s_sqrt = s.mapv(|x| x.sqrt());
661        let precision_sqrt = u.dot(&Array2::from_diag(&s_sqrt));
662
663        let mut random_weights = Array2::zeros((self.n_components, n_features));
664        for i in 0..self.n_components {
665            let mut z = Array1::zeros(n_features);
666            for j in 0..n_features {
667                z[j] = rng.sample(StandardNormal);
668            }
669            let w = precision_sqrt.t().dot(&z);
670            random_weights.row_mut(i).assign(&w);
671        }
672
673        // Generate random biases
674        let uniform = RandUniform::new(0.0, 2.0 * PI).expect("operation should succeed");
675        let random_biases = Array1::from_shape_fn(self.n_components, |_| rng.sample(uniform));
676
677        Ok(FittedRobustAnisotropicRBF {
678            random_weights,
679            random_biases,
680            robust_cov,
681            robust_mean,
682            signal_variance: self.signal_variance,
683            n_features,
684        })
685    }
686}
687
688impl Transform<Array2<f64>, Array2<f64>> for FittedRobustAnisotropicRBF {
689    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
690        if x.ncols() != self.n_features {
691            return Err(SklearsError::InvalidInput(
692                "Feature dimension mismatch".to_string(),
693            ));
694        }
695
696        let n_samples = x.nrows();
697        let n_components = self.random_weights.nrows();
698        let normalization = (2.0 * self.signal_variance / n_components as f64).sqrt();
699
700        // Center data
701        let x_centered = x - &self.robust_mean.clone().insert_axis(Axis(0));
702
703        let mut features = Array2::zeros((n_samples, n_components));
704
705        for i in 0..n_samples {
706            for j in 0..n_components {
707                let dot_product = x_centered.row(i).dot(&self.random_weights.row(j));
708                features[(i, j)] = normalization * (dot_product + self.random_biases[j]).cos();
709            }
710        }
711
712        Ok(features)
713    }
714}
715
716#[allow(non_snake_case)]
717#[cfg(test)]
718mod tests {
719    use super::*;
720    use approx::assert_abs_diff_eq;
721    use scirs2_core::ndarray::array;
722
723    #[test]
724    fn test_anisotropic_rbf_sampler() {
725        let sampler = AnisotropicRBFSampler::new(100)
726            .length_scales(vec![1.0, 2.0])
727            .signal_variance(1.5);
728
729        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [-1.0, -1.0]];
730        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
731        let features = fitted.transform(&x).expect("operation should succeed");
732
733        assert_eq!(features.shape(), &[4, 100]);
734        assert!(features.iter().all(|&x| x.is_finite()));
735
736        // Check that features have approximately zero mean
737        let mean = features
738            .mean_axis(Axis(0))
739            .expect("operation should succeed");
740        for &m in mean.iter() {
741            assert!(m.abs() < 0.5);
742        }
743    }
744
745    #[test]
746    fn test_anisotropic_rbf_learned_scales() {
747        let sampler = AnisotropicRBFSampler::new(50)
748            .learn_length_scales(true)
749            .signal_variance(1.0);
750
751        // Data with different scales in each dimension
752        let x = array![
753            [0.0, 0.0],
754            [1.0, 0.1],
755            [2.0, 0.2],
756            [3.0, 0.3],
757            [4.0, 0.4],
758            [5.0, 0.5],
759            [-1.0, -0.1],
760            [-2.0, -0.2]
761        ];
762
763        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
764        let features = fitted.transform(&x).expect("operation should succeed");
765
766        assert_eq!(features.shape(), &[8, 50]);
767        assert!(features.iter().all(|&x| x.is_finite()));
768
769        // First dimension should have larger length scale than second
770        assert!(fitted.length_scales[0] > fitted.length_scales[1]);
771    }
772
773    #[test]
774    fn test_mahalanobis_rbf_sampler() {
775        let sampler = MahalanobisRBFSampler::new(80)
776            .signal_variance(2.0)
777            .reg_param(1e-4);
778
779        let x = array![
780            [1.0, 2.0],
781            [2.0, 4.0],
782            [3.0, 6.0],
783            [1.5, 3.0],
784            [2.5, 5.0],
785            [0.5, 1.0],
786            [3.5, 7.0],
787            [4.0, 8.0]
788        ];
789
790        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
791        let features = fitted.transform(&x).expect("operation should succeed");
792
793        assert_eq!(features.shape(), &[8, 80]);
794        assert!(features.iter().all(|&x| x.is_finite()));
795
796        // Test with new data
797        let x_test = array![[2.0, 4.0], [1.0, 2.0]];
798        let features_test = fitted.transform(&x_test).expect("operation should succeed");
799        assert_eq!(features_test.shape(), &[2, 80]);
800        assert!(features_test.iter().all(|&x| x.is_finite()));
801    }
802
803    #[test]
804    fn test_robust_anisotropic_rbf_mcd() {
805        let sampler = RobustAnisotropicRBFSampler::new(60)
806            .robust_estimator(RobustEstimator::MCD {
807                support_fraction: 0.7,
808            })
809            .signal_variance(1.0);
810
811        // Data with outliers
812        let x = array![
813            [1.0, 1.0],
814            [1.1, 1.1],
815            [0.9, 0.9],
816            [1.2, 1.2],
817            [0.8, 0.8],
818            [1.3, 1.3],
819            [10.0, 10.0], // outlier
820            [1.05, 1.05],
821            [0.95, 0.95],
822            [1.15, 1.15]
823        ];
824
825        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
826        let features = fitted.transform(&x).expect("operation should succeed");
827
828        assert_eq!(features.shape(), &[10, 60]);
829        assert!(features.iter().all(|&x| x.is_finite()));
830    }
831
832    #[test]
833    fn test_robust_anisotropic_rbf_huber() {
834        let sampler = RobustAnisotropicRBFSampler::new(40)
835            .robust_estimator(RobustEstimator::Huber { c: 1.345 })
836            .signal_variance(0.5);
837
838        let x = array![
839            [0.0, 0.0],
840            [1.0, 0.0],
841            [0.0, 1.0],
842            [1.0, 1.0],
843            [0.5, 0.5],
844            [1.5, 0.5],
845            [0.5, 1.5],
846            [5.0, 5.0] // outlier
847        ];
848
849        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
850        let features = fitted.transform(&x).expect("operation should succeed");
851
852        assert_eq!(features.shape(), &[8, 40]);
853        assert!(features.iter().all(|&x| x.is_finite()));
854    }
855
856    #[test]
857    fn test_anisotropic_rbf_reproducibility() {
858        let sampler1 = AnisotropicRBFSampler::new(30)
859            .random_state(42)
860            .length_scales(vec![1.0, 2.0]);
861
862        let sampler2 = AnisotropicRBFSampler::new(30)
863            .random_state(42)
864            .length_scales(vec![1.0, 2.0]);
865
866        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
867
868        let features1 = sampler1
869            .fit(&x, &())
870            .expect("operation should succeed")
871            .transform(&x)
872            .expect("operation should succeed");
873        let features2 = sampler2
874            .fit(&x, &())
875            .expect("operation should succeed")
876            .transform(&x)
877            .expect("operation should succeed");
878
879        for i in 0..features1.nrows() {
880            for j in 0..features1.ncols() {
881                assert_abs_diff_eq!(features1[(i, j)], features2[(i, j)], epsilon = 1e-10);
882            }
883        }
884    }
885
886    #[test]
887    fn test_length_scale_learning() {
888        let sampler = AnisotropicRBFSampler::new(20)
889            .learn_length_scales(true)
890            .optimization_params(10, 1e-4);
891
892        // Create data where first feature varies much more than second
893        let x = array![
894            [0.0, 0.0],
895            [10.0, 0.1],
896            [20.0, 0.2],
897            [30.0, 0.3],
898            [-10.0, -0.1],
899            [-20.0, -0.2],
900            [15.0, 0.15],
901            [25.0, 0.25]
902        ];
903
904        let fitted = sampler.fit(&x, &()).expect("operation should succeed");
905
906        // First dimension should have much larger length scale
907        assert!(fitted.length_scales[0] > 5.0 * fitted.length_scales[1]);
908    }
909}
sklears_kernel_approximation/anisotropic_rbf.rs

sklears_kernel_approximation/
anisotropic_rbf.rs