sklears_kernel_approximation/
anisotropic_rbf.rs

1//! Anisotropic RBF Kernel Approximations
2//!
3//! This module implements anisotropic RBF kernels and their approximations.
4//! Anisotropic kernels use different length scales for different dimensions,
5//! allowing the kernel to adapt to the varying importance of features.
6//!
7//! # Key Features
8//!
9//! - **Anisotropic RBF Sampler**: Random features for anisotropic RBF kernels
10//! - **Automatic Relevance Determination (ARD)**: Learn feature relevance
11//! - **Mahalanobis Distance**: Use learned covariance matrix
12//! - **Robust Anisotropic RBF**: Outlier-resistant anisotropic kernels
13//! - **Adaptive Length Scales**: Automatic length scale optimization
14//!
15//! # Mathematical Background
16//!
17//! Anisotropic RBF kernel:
18//! k(x, x') = σ² exp(-0.5 * (x - x')ᵀ Λ⁻¹ (x - x'))
19//!
20//! Where Λ = diag(l₁², l₂², ..., lₐ²) is the diagonal matrix of squared length scales.
21//!
22//! # References
23
24use scirs2_core::ndarray::{Array1, Array2, Axis};
25use scirs2_core::random::essentials::{Normal as RandNormal, Uniform as RandUniform};
26use scirs2_core::random::rngs::StdRng as RealStdRng;
27use scirs2_core::random::seq::SliceRandom;
28use scirs2_core::random::Rng;
29use scirs2_core::random::{thread_rng, SeedableRng};
30use scirs2_core::StandardNormal;
31use scirs2_linalg::compat::{Eig, Inverse, SVD};
32use sklears_core::error::{Result, SklearsError};
33use sklears_core::traits::{Fit, Transform};
34use std::f64::consts::PI;
35
36/// Anisotropic RBF kernel sampler using random Fourier features
37#[derive(Debug, Clone)]
38/// AnisotropicRBFSampler
39pub struct AnisotropicRBFSampler {
40    /// Number of random features
41    n_components: usize,
42    /// Length scales for each dimension (ARD)
43    length_scales: Vec<f64>,
44    /// Signal variance
45    signal_variance: f64,
46    /// Whether to learn length scales automatically
47    learn_length_scales: bool,
48    /// Maximum iterations for length scale optimization
49    max_iter: usize,
50    /// Convergence tolerance
51    tol: f64,
52    /// Random seed
53    random_state: Option<u64>,
54}
55
56/// Fitted anisotropic RBF sampler
57#[derive(Debug, Clone)]
58/// FittedAnisotropicRBF
59pub struct FittedAnisotropicRBF {
60    /// Random frequencies
61    random_weights: Array2<f64>,
62    /// Random biases
63    random_biases: Array1<f64>,
64    /// Learned length scales
65    length_scales: Array1<f64>,
66    /// Signal variance
67    signal_variance: f64,
68    /// Number of features
69    n_features: usize,
70}
71
72impl AnisotropicRBFSampler {
73    /// Create new anisotropic RBF sampler
74    pub fn new(n_components: usize) -> Self {
75        Self {
76            n_components,
77            length_scales: vec![1.0],
78            signal_variance: 1.0,
79            learn_length_scales: true,
80            max_iter: 100,
81            tol: 1e-6,
82            random_state: None,
83        }
84    }
85
86    /// Set length scales manually (disables automatic learning)
87    pub fn length_scales(mut self, length_scales: Vec<f64>) -> Self {
88        self.length_scales = length_scales;
89        self.learn_length_scales = false;
90        self
91    }
92
93    /// Set signal variance
94    pub fn signal_variance(mut self, signal_variance: f64) -> Self {
95        self.signal_variance = signal_variance;
96        self
97    }
98
99    /// Enable/disable automatic length scale learning
100    pub fn learn_length_scales(mut self, learn: bool) -> Self {
101        self.learn_length_scales = learn;
102        self
103    }
104
105    /// Set optimization parameters
106    pub fn optimization_params(mut self, max_iter: usize, tol: f64) -> Self {
107        self.max_iter = max_iter;
108        self.tol = tol;
109        self
110    }
111
112    /// Set random seed
113    pub fn random_state(mut self, seed: u64) -> Self {
114        self.random_state = Some(seed);
115        self
116    }
117
118    /// Learn length scales using maximum likelihood
119    fn learn_length_scales_ml(&self, x: &Array2<f64>) -> Result<Array1<f64>> {
120        let n_features = x.ncols();
121        let mut length_scales = Array1::ones(n_features);
122
123        // Initialize with data variance per dimension
124        for j in 0..n_features {
125            let col = x.column(j);
126            let mean = col.mean().unwrap_or(0.0);
127            let var = col.mapv(|v| (v - mean).powi(2)).mean().unwrap_or(1.0);
128            length_scales[j] = var.sqrt();
129        }
130
131        // Simple optimization using coordinate descent
132        for _iter in 0..self.max_iter {
133            let mut improved = false;
134
135            for j in 0..n_features {
136                let old_scale = length_scales[j];
137                let mut best_scale = old_scale;
138                let mut best_ll = self.compute_log_likelihood(x, &length_scales)?;
139
140                // Try different scale values
141                for factor in [0.5, 0.8, 1.2, 2.0] {
142                    length_scales[j] = old_scale * factor;
143                    let ll = self.compute_log_likelihood(x, &length_scales)?;
144                    if ll > best_ll {
145                        best_ll = ll;
146                        best_scale = length_scales[j];
147                        improved = true;
148                    }
149                }
150
151                length_scales[j] = best_scale;
152            }
153
154            if !improved {
155                break;
156            }
157        }
158
159        Ok(length_scales)
160    }
161
162    /// Compute log likelihood for length scale optimization
163    fn compute_log_likelihood(&self, x: &Array2<f64>, length_scales: &Array1<f64>) -> Result<f64> {
164        let n = x.nrows();
165        let mut k = Array2::zeros((n, n));
166
167        // Compute kernel matrix
168        for i in 0..n {
169            for j in 0..n {
170                let mut dist_sq = 0.0;
171                for d in 0..x.ncols() {
172                    let diff = x[(i, d)] - x[(j, d)];
173                    dist_sq += diff * diff / (length_scales[d] * length_scales[d]);
174                }
175                k[(i, j)] = self.signal_variance * (-0.5 * dist_sq).exp();
176            }
177        }
178
179        // Add noise for numerical stability
180        for i in 0..n {
181            k[(i, i)] += 1e-6;
182        }
183
184        // Compute log determinant and log likelihood
185        // Use SVD for numerical stability instead of cholesky
186        let (u, s, vt) = k.svd(true).map_err(|e| {
187            SklearsError::NumericalError(format!("SVD decomposition failed: {:?}", e))
188        })?;
189        let s_inv = s.mapv(|x| if x > 1e-10 { 1.0 / x.sqrt() } else { 0.0 });
190        let s_inv_diag = Array2::from_diag(&s_inv);
191        let _k_inv_sqrt = u.dot(&s_inv_diag).dot(&vt);
192        let log_det = s.mapv(|x| if x > 1e-10 { x.ln() } else { -23.0 }).sum(); // log(1e-10) ≈ -23
193        let log_likelihood = -0.5 * (log_det + n as f64 * (2.0 * PI).ln());
194
195        Ok(log_likelihood)
196    }
197}
198
199impl Fit<Array2<f64>, ()> for AnisotropicRBFSampler {
200    type Fitted = FittedAnisotropicRBF;
201    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
202        let n_features = x.ncols();
203
204        // Determine length scales
205        let length_scales = if self.learn_length_scales {
206            self.learn_length_scales_ml(x)?
207        } else if self.length_scales.len() == 1 {
208            Array1::from_elem(n_features, self.length_scales[0])
209        } else if self.length_scales.len() == n_features {
210            Array1::from_vec(self.length_scales.clone())
211        } else {
212            return Err(SklearsError::InvalidInput(
213                "Length scales must be either scalar or match number of features".to_string(),
214            ));
215        };
216
217        // Generate random frequencies from scaled normal distribution
218        let mut rng = if let Some(seed) = self.random_state {
219            RealStdRng::seed_from_u64(seed)
220        } else {
221            RealStdRng::from_seed(thread_rng().gen())
222        };
223
224        let mut random_weights = Array2::zeros((self.n_components, n_features));
225        for j in 0..n_features {
226            let normal = RandNormal::new(0.0, 1.0 / length_scales[j]).unwrap();
227            for i in 0..self.n_components {
228                random_weights[(i, j)] = rng.sample(normal);
229            }
230        }
231
232        // Generate random biases
233        let uniform = RandUniform::new(0.0, 2.0 * PI).unwrap();
234        let random_biases = Array1::from_shape_fn(self.n_components, |_| rng.sample(uniform));
235
236        Ok(FittedAnisotropicRBF {
237            random_weights,
238            random_biases,
239            length_scales,
240            signal_variance: self.signal_variance,
241            n_features,
242        })
243    }
244}
245
246impl Transform<Array2<f64>, Array2<f64>> for FittedAnisotropicRBF {
247    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
248        if x.ncols() != self.n_features {
249            return Err(SklearsError::InvalidInput(
250                "Feature dimension mismatch".to_string(),
251            ));
252        }
253
254        let n_samples = x.nrows();
255        let n_components = self.random_weights.nrows();
256        let normalization = (2.0 * self.signal_variance / n_components as f64).sqrt();
257
258        let mut features = Array2::zeros((n_samples, n_components));
259
260        for i in 0..n_samples {
261            for j in 0..n_components {
262                let mut dot_product = 0.0;
263                for k in 0..self.n_features {
264                    dot_product += x[(i, k)] * self.random_weights[(j, k)];
265                }
266                features[(i, j)] = normalization * (dot_product + self.random_biases[j]).cos();
267            }
268        }
269
270        Ok(features)
271    }
272}
273
274/// Mahalanobis distance-based RBF sampler
275#[derive(Debug, Clone)]
276/// MahalanobisRBFSampler
277pub struct MahalanobisRBFSampler {
278    /// Number of random features
279    n_components: usize,
280    /// Signal variance
281    signal_variance: f64,
282    /// Regularization parameter for covariance matrix
283    reg_param: f64,
284    /// Random seed
285    random_state: Option<u64>,
286}
287
288/// Fitted Mahalanobis RBF sampler
289#[derive(Debug, Clone)]
290/// FittedMahalanobisRBF
291pub struct FittedMahalanobisRBF {
292    /// Random frequencies (pre-whitened)
293    random_weights: Array2<f64>,
294    /// Random biases
295    random_biases: Array1<f64>,
296    /// Whitening transformation matrix
297    whitening_matrix: Array2<f64>,
298    /// Data mean
299    mean: Array1<f64>,
300    /// Signal variance
301    signal_variance: f64,
302    /// Number of features
303    n_features: usize,
304}
305
306impl MahalanobisRBFSampler {
307    /// Create new Mahalanobis RBF sampler
308    pub fn new(n_components: usize) -> Self {
309        Self {
310            n_components,
311            signal_variance: 1.0,
312            reg_param: 1e-6,
313            random_state: None,
314        }
315    }
316
317    /// Set signal variance
318    pub fn signal_variance(mut self, signal_variance: f64) -> Self {
319        self.signal_variance = signal_variance;
320        self
321    }
322
323    /// Set regularization parameter
324    pub fn reg_param(mut self, reg_param: f64) -> Self {
325        self.reg_param = reg_param;
326        self
327    }
328
329    /// Set random seed
330    pub fn random_state(mut self, seed: u64) -> Self {
331        self.random_state = Some(seed);
332        self
333    }
334}
335
336impl Fit<Array2<f64>, ()> for MahalanobisRBFSampler {
337    type Fitted = FittedMahalanobisRBF;
338    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
339        let n_features = x.ncols();
340        let n_samples = x.nrows();
341
342        // Compute mean
343        let mean = x.mean_axis(Axis(0)).unwrap();
344
345        // Center data
346        let x_centered = x - &mean.clone().insert_axis(Axis(0));
347
348        // Compute covariance matrix
349        let cov = x_centered.t().dot(&x_centered) / (n_samples - 1) as f64;
350
351        // Add regularization
352        let mut cov_reg = cov;
353        for i in 0..n_features {
354            cov_reg[(i, i)] += self.reg_param;
355        }
356
357        // Compute whitening matrix (inverse square root of covariance)
358        let (eigenvals_complex, eigenvecs_complex) = cov_reg.eig().map_err(|e| {
359            SklearsError::NumericalError(format!("Eigendecomposition failed: {:?}", e))
360        })?;
361
362        // Extract real parts (eigenvalues should be real for symmetric matrix)
363        let eigenvals = eigenvals_complex.mapv(|x| x.re);
364        let eigenvecs = eigenvecs_complex.mapv(|x| x.re);
365
366        // Create inverse square root matrix
367        let mut inv_sqrt_eigenvals = Array1::zeros(n_features);
368        for i in 0..n_features {
369            inv_sqrt_eigenvals[i] = 1.0 / eigenvals[i].sqrt();
370        }
371
372        let whitening_matrix = eigenvecs
373            .dot(&Array2::from_diag(&inv_sqrt_eigenvals))
374            .dot(&eigenvecs.t());
375
376        // Generate random frequencies from standard normal
377        let mut rng = if let Some(seed) = self.random_state {
378            RealStdRng::seed_from_u64(seed)
379        } else {
380            RealStdRng::from_seed(thread_rng().gen())
381        };
382
383        let mut random_weights = Array2::zeros((self.n_components, n_features));
384        for i in 0..self.n_components {
385            for j in 0..n_features {
386                random_weights[(i, j)] = rng.sample(StandardNormal);
387            }
388        }
389
390        // Generate random biases
391        let uniform = RandUniform::new(0.0, 2.0 * PI).unwrap();
392        let random_biases = Array1::from_shape_fn(self.n_components, |_| rng.sample(uniform));
393
394        Ok(FittedMahalanobisRBF {
395            random_weights,
396            random_biases,
397            whitening_matrix,
398            mean,
399            signal_variance: self.signal_variance,
400            n_features,
401        })
402    }
403}
404
405impl Transform<Array2<f64>, Array2<f64>> for FittedMahalanobisRBF {
406    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
407        if x.ncols() != self.n_features {
408            return Err(SklearsError::InvalidInput(
409                "Feature dimension mismatch".to_string(),
410            ));
411        }
412
413        let n_samples = x.nrows();
414        let n_components = self.random_weights.nrows();
415        let normalization = (2.0 * self.signal_variance / n_components as f64).sqrt();
416
417        // Center and whiten data
418        let x_centered = x - &self.mean.clone().insert_axis(Axis(0));
419        let x_whitened = x_centered.dot(&self.whitening_matrix);
420
421        let mut features = Array2::zeros((n_samples, n_components));
422
423        for i in 0..n_samples {
424            for j in 0..n_components {
425                let dot_product = x_whitened.row(i).dot(&self.random_weights.row(j));
426                features[(i, j)] = normalization * (dot_product + self.random_biases[j]).cos();
427            }
428        }
429
430        Ok(features)
431    }
432}
433
434/// Robust anisotropic RBF sampler with outlier resistance
435#[derive(Debug, Clone)]
436/// RobustAnisotropicRBFSampler
437pub struct RobustAnisotropicRBFSampler {
438    /// Number of random features
439    n_components: usize,
440    /// Robust estimator type
441    robust_estimator: RobustEstimator,
442    /// Signal variance
443    signal_variance: f64,
444    /// Random seed
445    random_state: Option<u64>,
446}
447
448/// Types of robust estimators for covariance
449#[derive(Debug, Clone)]
450/// RobustEstimator
451pub enum RobustEstimator {
452    /// Minimum Covariance Determinant
453    MCD { support_fraction: f64 },
454    /// Minimum Volume Ellipsoid
455    MVE,
456    /// Huber's M-estimator
457    Huber { c: f64 },
458}
459
460/// Fitted robust anisotropic RBF sampler
461#[derive(Debug, Clone)]
462/// FittedRobustAnisotropicRBF
463pub struct FittedRobustAnisotropicRBF {
464    /// Random frequencies
465    random_weights: Array2<f64>,
466    /// Random biases
467    random_biases: Array1<f64>,
468    /// Robust covariance matrix
469    robust_cov: Array2<f64>,
470    /// Robust mean
471    robust_mean: Array1<f64>,
472    /// Signal variance
473    signal_variance: f64,
474    /// Number of features
475    n_features: usize,
476}
477
478impl RobustAnisotropicRBFSampler {
479    /// Create new robust anisotropic RBF sampler
480    pub fn new(n_components: usize) -> Self {
481        Self {
482            n_components,
483            robust_estimator: RobustEstimator::MCD {
484                support_fraction: 0.8,
485            },
486            signal_variance: 1.0,
487            random_state: None,
488        }
489    }
490
491    /// Set robust estimator
492    pub fn robust_estimator(mut self, estimator: RobustEstimator) -> Self {
493        self.robust_estimator = estimator;
494        self
495    }
496
497    /// Set signal variance
498    pub fn signal_variance(mut self, signal_variance: f64) -> Self {
499        self.signal_variance = signal_variance;
500        self
501    }
502
503    /// Set random seed
504    pub fn random_state(mut self, seed: u64) -> Self {
505        self.random_state = Some(seed);
506        self
507    }
508
509    /// Compute robust covariance using MCD estimator
510    fn compute_mcd_covariance(
511        &self,
512        x: &Array2<f64>,
513        support_fraction: f64,
514    ) -> Result<(Array1<f64>, Array2<f64>)> {
515        let n_samples = x.nrows();
516        let n_features = x.ncols();
517        let h = ((n_samples as f64 * support_fraction).floor() as usize).max(n_features + 1);
518
519        let mut best_det = f64::INFINITY;
520        let mut best_mean = Array1::zeros(n_features);
521        let mut best_cov = Array2::eye(n_features);
522
523        let mut rng = thread_rng();
524
525        // Try multiple random subsets
526        for _ in 0..50 {
527            // Select random subset
528            let mut indices: Vec<usize> = (0..n_samples).collect();
529            indices.shuffle(&mut rng);
530            indices.truncate(h);
531
532            let subset = x.select(Axis(0), &indices);
533
534            // Compute mean and covariance of subset
535            let mean = subset.mean_axis(Axis(0)).unwrap();
536            let centered = &subset - &mean.clone().insert_axis(Axis(0));
537            let cov = centered.t().dot(&centered) / (h - 1) as f64;
538
539            // Add regularization for numerical stability
540            let mut cov_reg = cov;
541            let trace = cov_reg.diag().sum();
542            let reg = trace * 1e-6 / n_features as f64;
543            for i in 0..n_features {
544                cov_reg[(i, i)] += reg;
545            }
546
547            // Compute determinant
548            if let Ok((_, s, _)) = cov_reg.svd(false) {
549                let log_det: f64 = s.mapv(|x| if x > 1e-10 { x.ln() } else { -23.0 }).sum();
550                let det = log_det.exp();
551
552                if det < best_det {
553                    best_det = det;
554                    best_mean = mean;
555                    best_cov = cov_reg;
556                }
557            }
558        }
559
560        Ok((best_mean, best_cov))
561    }
562
563    /// Compute robust covariance using Huber's M-estimator
564    fn compute_huber_covariance(
565        &self,
566        x: &Array2<f64>,
567        c: f64,
568    ) -> Result<(Array1<f64>, Array2<f64>)> {
569        let n_samples = x.nrows();
570        let n_features = x.ncols();
571
572        // Start with sample mean and covariance
573        let mut mean = x.mean_axis(Axis(0)).unwrap();
574        let centered = x - &mean.clone().insert_axis(Axis(0));
575        let mut cov = centered.t().dot(&centered) / (n_samples - 1) as f64;
576
577        // Iteratively reweighted estimation
578        for _ in 0..20 {
579            // Compute Mahalanobis distances
580            let cov_inv = cov.inv().map_err(|_| {
581                SklearsError::InvalidInput("Covariance matrix not positive definite".to_string())
582            })?;
583
584            let mut weights = Array1::zeros(n_samples);
585            for i in 0..n_samples {
586                let diff = &x.row(i) - &mean;
587                let mahal_dist = diff.dot(&cov_inv).dot(&diff).sqrt();
588                weights[i] = if mahal_dist <= c { 1.0 } else { c / mahal_dist };
589            }
590
591            // Update mean
592            let weight_sum = weights.sum();
593            let mut new_mean = Array1::zeros(n_features);
594            for i in 0..n_samples {
595                for j in 0..n_features {
596                    new_mean[j] += weights[i] * x[(i, j)];
597                }
598            }
599            new_mean /= weight_sum;
600
601            // Update covariance
602            let mut new_cov = Array2::zeros((n_features, n_features));
603            for i in 0..n_samples {
604                let diff = &x.row(i) - &new_mean;
605                let outer =
606                    Array2::from_shape_fn((n_features, n_features), |(j, k)| diff[j] * diff[k]);
607                new_cov = new_cov + weights[i] * outer;
608            }
609            new_cov /= weight_sum;
610
611            // Check convergence
612            let mean_diff = (&new_mean - &mean).mapv(|x| x.abs()).sum();
613            if mean_diff < 1e-6 {
614                break;
615            }
616
617            mean = new_mean;
618            cov = new_cov;
619        }
620
621        Ok((mean, cov))
622    }
623}
624
625impl Fit<Array2<f64>, ()> for RobustAnisotropicRBFSampler {
626    type Fitted = FittedRobustAnisotropicRBF;
627    fn fit(self, x: &Array2<f64>, _y: &()) -> Result<Self::Fitted> {
628        let n_features = x.ncols();
629
630        // Compute robust mean and covariance
631        let (robust_mean, robust_cov) = match &self.robust_estimator {
632            RobustEstimator::MCD { support_fraction } => {
633                self.compute_mcd_covariance(x, *support_fraction)?
634            }
635            RobustEstimator::MVE => {
636                // Simplified MVE - use MCD with smaller support fraction
637                self.compute_mcd_covariance(x, 0.5)?
638            }
639            RobustEstimator::Huber { c } => self.compute_huber_covariance(x, *c)?,
640        };
641
642        // Compute precision matrix (inverse covariance)
643        let precision = robust_cov.inv().map_err(|_| {
644            SklearsError::InvalidInput("Robust covariance matrix not positive definite".to_string())
645        })?;
646
647        // Generate random frequencies from multivariate normal with precision matrix
648        let mut rng = if let Some(seed) = self.random_state {
649            RealStdRng::seed_from_u64(seed)
650        } else {
651            RealStdRng::from_seed(thread_rng().gen())
652        };
653
654        // Cholesky decomposition of precision matrix for sampling
655        // Use SVD decomposition for sampling
656        let (u, s, _vt) = precision.svd(true).map_err(|e| {
657            SklearsError::NumericalError(format!("SVD decomposition failed: {:?}", e))
658        })?;
659        let s_sqrt = s.mapv(|x| x.sqrt());
660        let precision_sqrt = u.dot(&Array2::from_diag(&s_sqrt));
661
662        let mut random_weights = Array2::zeros((self.n_components, n_features));
663        for i in 0..self.n_components {
664            let mut z = Array1::zeros(n_features);
665            for j in 0..n_features {
666                z[j] = rng.sample(StandardNormal);
667            }
668            let w = precision_sqrt.t().dot(&z);
669            random_weights.row_mut(i).assign(&w);
670        }
671
672        // Generate random biases
673        let uniform = RandUniform::new(0.0, 2.0 * PI).unwrap();
674        let random_biases = Array1::from_shape_fn(self.n_components, |_| rng.sample(uniform));
675
676        Ok(FittedRobustAnisotropicRBF {
677            random_weights,
678            random_biases,
679            robust_cov,
680            robust_mean,
681            signal_variance: self.signal_variance,
682            n_features,
683        })
684    }
685}
686
687impl Transform<Array2<f64>, Array2<f64>> for FittedRobustAnisotropicRBF {
688    fn transform(&self, x: &Array2<f64>) -> Result<Array2<f64>> {
689        if x.ncols() != self.n_features {
690            return Err(SklearsError::InvalidInput(
691                "Feature dimension mismatch".to_string(),
692            ));
693        }
694
695        let n_samples = x.nrows();
696        let n_components = self.random_weights.nrows();
697        let normalization = (2.0 * self.signal_variance / n_components as f64).sqrt();
698
699        // Center data
700        let x_centered = x - &self.robust_mean.clone().insert_axis(Axis(0));
701
702        let mut features = Array2::zeros((n_samples, n_components));
703
704        for i in 0..n_samples {
705            for j in 0..n_components {
706                let dot_product = x_centered.row(i).dot(&self.random_weights.row(j));
707                features[(i, j)] = normalization * (dot_product + self.random_biases[j]).cos();
708            }
709        }
710
711        Ok(features)
712    }
713}
714
715#[allow(non_snake_case)]
716#[cfg(test)]
717mod tests {
718    use super::*;
719    use approx::assert_abs_diff_eq;
720    use scirs2_core::ndarray::array;
721
722    #[test]
723    fn test_anisotropic_rbf_sampler() {
724        let sampler = AnisotropicRBFSampler::new(100)
725            .length_scales(vec![1.0, 2.0])
726            .signal_variance(1.5);
727
728        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [-1.0, -1.0]];
729        let fitted = sampler.fit(&x, &()).unwrap();
730        let features = fitted.transform(&x).unwrap();
731
732        assert_eq!(features.shape(), &[4, 100]);
733        assert!(features.iter().all(|&x| x.is_finite()));
734
735        // Check that features have approximately zero mean
736        let mean = features.mean_axis(Axis(0)).unwrap();
737        for &m in mean.iter() {
738            assert!(m.abs() < 0.5);
739        }
740    }
741
742    #[test]
743    fn test_anisotropic_rbf_learned_scales() {
744        let sampler = AnisotropicRBFSampler::new(50)
745            .learn_length_scales(true)
746            .signal_variance(1.0);
747
748        // Data with different scales in each dimension
749        let x = array![
750            [0.0, 0.0],
751            [1.0, 0.1],
752            [2.0, 0.2],
753            [3.0, 0.3],
754            [4.0, 0.4],
755            [5.0, 0.5],
756            [-1.0, -0.1],
757            [-2.0, -0.2]
758        ];
759
760        let fitted = sampler.fit(&x, &()).unwrap();
761        let features = fitted.transform(&x).unwrap();
762
763        assert_eq!(features.shape(), &[8, 50]);
764        assert!(features.iter().all(|&x| x.is_finite()));
765
766        // First dimension should have larger length scale than second
767        assert!(fitted.length_scales[0] > fitted.length_scales[1]);
768    }
769
770    #[test]
771    fn test_mahalanobis_rbf_sampler() {
772        let sampler = MahalanobisRBFSampler::new(80)
773            .signal_variance(2.0)
774            .reg_param(1e-4);
775
776        let x = array![
777            [1.0, 2.0],
778            [2.0, 4.0],
779            [3.0, 6.0],
780            [1.5, 3.0],
781            [2.5, 5.0],
782            [0.5, 1.0],
783            [3.5, 7.0],
784            [4.0, 8.0]
785        ];
786
787        let fitted = sampler.fit(&x, &()).unwrap();
788        let features = fitted.transform(&x).unwrap();
789
790        assert_eq!(features.shape(), &[8, 80]);
791        assert!(features.iter().all(|&x| x.is_finite()));
792
793        // Test with new data
794        let x_test = array![[2.0, 4.0], [1.0, 2.0]];
795        let features_test = fitted.transform(&x_test).unwrap();
796        assert_eq!(features_test.shape(), &[2, 80]);
797        assert!(features_test.iter().all(|&x| x.is_finite()));
798    }
799
800    #[test]
801    fn test_robust_anisotropic_rbf_mcd() {
802        let sampler = RobustAnisotropicRBFSampler::new(60)
803            .robust_estimator(RobustEstimator::MCD {
804                support_fraction: 0.7,
805            })
806            .signal_variance(1.0);
807
808        // Data with outliers
809        let x = array![
810            [1.0, 1.0],
811            [1.1, 1.1],
812            [0.9, 0.9],
813            [1.2, 1.2],
814            [0.8, 0.8],
815            [1.3, 1.3],
816            [10.0, 10.0], // outlier
817            [1.05, 1.05],
818            [0.95, 0.95],
819            [1.15, 1.15]
820        ];
821
822        let fitted = sampler.fit(&x, &()).unwrap();
823        let features = fitted.transform(&x).unwrap();
824
825        assert_eq!(features.shape(), &[10, 60]);
826        assert!(features.iter().all(|&x| x.is_finite()));
827    }
828
829    #[test]
830    fn test_robust_anisotropic_rbf_huber() {
831        let sampler = RobustAnisotropicRBFSampler::new(40)
832            .robust_estimator(RobustEstimator::Huber { c: 1.345 })
833            .signal_variance(0.5);
834
835        let x = array![
836            [0.0, 0.0],
837            [1.0, 0.0],
838            [0.0, 1.0],
839            [1.0, 1.0],
840            [0.5, 0.5],
841            [1.5, 0.5],
842            [0.5, 1.5],
843            [5.0, 5.0] // outlier
844        ];
845
846        let fitted = sampler.fit(&x, &()).unwrap();
847        let features = fitted.transform(&x).unwrap();
848
849        assert_eq!(features.shape(), &[8, 40]);
850        assert!(features.iter().all(|&x| x.is_finite()));
851    }
852
853    #[test]
854    fn test_anisotropic_rbf_reproducibility() {
855        let sampler1 = AnisotropicRBFSampler::new(30)
856            .random_state(42)
857            .length_scales(vec![1.0, 2.0]);
858
859        let sampler2 = AnisotropicRBFSampler::new(30)
860            .random_state(42)
861            .length_scales(vec![1.0, 2.0]);
862
863        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
864
865        let features1 = sampler1.fit(&x, &()).unwrap().transform(&x).unwrap();
866        let features2 = sampler2.fit(&x, &()).unwrap().transform(&x).unwrap();
867
868        for i in 0..features1.nrows() {
869            for j in 0..features1.ncols() {
870                assert_abs_diff_eq!(features1[(i, j)], features2[(i, j)], epsilon = 1e-10);
871            }
872        }
873    }
874
875    #[test]
876    fn test_length_scale_learning() {
877        let sampler = AnisotropicRBFSampler::new(20)
878            .learn_length_scales(true)
879            .optimization_params(10, 1e-4);
880
881        // Create data where first feature varies much more than second
882        let x = array![
883            [0.0, 0.0],
884            [10.0, 0.1],
885            [20.0, 0.2],
886            [30.0, 0.3],
887            [-10.0, -0.1],
888            [-20.0, -0.2],
889            [15.0, 0.15],
890            [25.0, 0.25]
891        ];
892
893        let fitted = sampler.fit(&x, &()).unwrap();
894
895        // First dimension should have much larger length scale
896        assert!(fitted.length_scales[0] > 5.0 * fitted.length_scales[1]);
897    }
898}
sklears_kernel_approximation/anisotropic_rbf.rs

sklears_kernel_approximation/
anisotropic_rbf.rs