sklears_kernel_approximation/sparse_gp/
approximations.rs

1//! Sparse approximation methods for Gaussian Processes
2//!
3//! This module implements various sparse approximation methods including
4//! Subset of Regressors (SoR), Fully Independent Conditional (FIC),
5//! Partially Independent Conditional (PIC), and framework for VFE.
6
7use crate::sparse_gp::core::*;
8use crate::sparse_gp::kernels::{KernelOps, SparseKernel};
9use scirs2_core::ndarray::{Array1, Array2, Axis};
10use scirs2_core::random::thread_rng;
11use sklears_core::error::{Result, SklearsError};
12
13/// Inducing point selection strategies implementation
14pub struct InducingPointSelector;
15
16impl InducingPointSelector {
17    /// Select inducing points based on the specified strategy
18    pub fn select_points<K: SparseKernel>(
19        strategy: &InducingPointStrategy,
20        x: &Array2<f64>,
21        num_inducing: usize,
22        kernel: &K,
23    ) -> Result<Array2<f64>> {
24        match strategy {
25            InducingPointStrategy::Random => Self::random_selection(x, num_inducing),
26            InducingPointStrategy::KMeans => Self::kmeans_selection(x, num_inducing),
27            InducingPointStrategy::UniformGrid { grid_size } => {
28                Self::uniform_grid_selection(x, grid_size)
29            }
30            InducingPointStrategy::GreedyVariance => {
31                Self::greedy_variance_selection(x, num_inducing, kernel)
32            }
33            InducingPointStrategy::UserSpecified(points) => Ok(points.clone()),
34        }
35    }
36
37    /// Random selection from training data
38    fn random_selection(x: &Array2<f64>, num_inducing: usize) -> Result<Array2<f64>> {
39        let n_samples = x.nrows();
40        if num_inducing > n_samples {
41            return Err(SklearsError::InvalidInput(
42                "Number of inducing points exceeds training data size".to_string(),
43            ));
44        }
45
46        let mut rng = thread_rng();
47        let mut indices: Vec<usize> = (0..n_samples).collect();
48
49        // Fisher-Yates shuffle
50        for i in (1..n_samples).rev() {
51            let j = rng.gen_range(0..i + 1);
52            indices.swap(i, j);
53        }
54
55        // Take first num_inducing points
56        let mut inducing_points = Array2::zeros((num_inducing, x.ncols()));
57        for (i, &idx) in indices.iter().take(num_inducing).enumerate() {
58            inducing_points.row_mut(i).assign(&x.row(idx));
59        }
60
61        Ok(inducing_points)
62    }
63
64    /// K-means clustering for inducing point selection
65    fn kmeans_selection(x: &Array2<f64>, num_inducing: usize) -> Result<Array2<f64>> {
66        let n_samples = x.nrows();
67        let n_features = x.ncols();
68
69        if num_inducing >= n_samples {
70            return Ok(x.clone());
71        }
72
73        // Initialize centroids randomly
74        let mut rng = thread_rng();
75        let mut centroids = Array2::zeros((num_inducing, n_features));
76        for i in 0..num_inducing {
77            let idx = rng.gen_range(0..n_samples);
78            centroids.row_mut(i).assign(&x.row(idx));
79        }
80
81        let max_iter = 100;
82        let tol = 1e-6;
83
84        for _iter in 0..max_iter {
85            let old_centroids = centroids.clone();
86
87            // Assign points to clusters
88            let mut assignments = vec![0; n_samples];
89            for i in 0..n_samples {
90                let mut min_dist = f64::INFINITY;
91                let mut best_cluster = 0;
92
93                for j in 0..num_inducing {
94                    let mut dist = 0.0;
95                    for k in 0..n_features {
96                        let diff = x[(i, k)] - centroids[(j, k)];
97                        dist += diff * diff;
98                    }
99
100                    if dist < min_dist {
101                        min_dist = dist;
102                        best_cluster = j;
103                    }
104                }
105                assignments[i] = best_cluster;
106            }
107
108            // Update centroids
109            centroids.fill(0.0);
110            let mut cluster_counts = vec![0; num_inducing];
111
112            for i in 0..n_samples {
113                let cluster = assignments[i];
114                cluster_counts[cluster] += 1;
115                for j in 0..n_features {
116                    centroids[(cluster, j)] += x[(i, j)];
117                }
118            }
119
120            for i in 0..num_inducing {
121                if cluster_counts[i] > 0 {
122                    for j in 0..n_features {
123                        centroids[(i, j)] /= cluster_counts[i] as f64;
124                    }
125                }
126            }
127
128            // Check convergence
129            let mut max_change: f64 = 0.0;
130            for i in 0..num_inducing {
131                for j in 0..n_features {
132                    let change = (centroids[(i, j)] - old_centroids[(i, j)]).abs();
133                    max_change = max_change.max(change);
134                }
135            }
136
137            if max_change < tol {
138                break;
139            }
140        }
141
142        Ok(centroids)
143    }
144
145    /// Uniform grid selection
146    fn uniform_grid_selection(x: &Array2<f64>, grid_size: &[usize]) -> Result<Array2<f64>> {
147        let n_features = x.ncols();
148        if grid_size.len() != n_features {
149            return Err(SklearsError::InvalidInput(
150                "Grid size must match number of features".to_string(),
151            ));
152        }
153
154        let total_points: usize = grid_size.iter().product();
155        let mut grid_points = Array2::zeros((total_points, n_features));
156
157        // Compute feature ranges
158        let mut ranges = Vec::with_capacity(n_features);
159        for j in 0..n_features {
160            let col = x.column(j);
161            let min_val = col.fold(f64::INFINITY, |a, &b| a.min(b));
162            let max_val = col.fold(f64::NEG_INFINITY, |a, &b| a.max(b));
163            ranges.push((min_val, max_val));
164        }
165
166        // Generate grid points
167        let mut point_idx = 0;
168        Self::generate_grid_recursive(
169            &mut grid_points,
170            &ranges,
171            grid_size,
172            &mut vec![0; n_features],
173            0,
174            &mut point_idx,
175        );
176
177        Ok(grid_points)
178    }
179
180    /// Recursive helper for grid generation
181    fn generate_grid_recursive(
182        grid_points: &mut Array2<f64>,
183        ranges: &[(f64, f64)],
184        grid_size: &[usize],
185        current_indices: &mut Vec<usize>,
186        dim: usize,
187        point_idx: &mut usize,
188    ) {
189        if dim == ranges.len() {
190            // Generate point
191            for (j, &idx) in current_indices.iter().enumerate() {
192                let (min_val, max_val) = ranges[j];
193                let grid_val = if grid_size[j] == 1 {
194                    (min_val + max_val) / 2.0
195                } else {
196                    min_val + idx as f64 * (max_val - min_val) / (grid_size[j] - 1) as f64
197                };
198                grid_points[(*point_idx, j)] = grid_val;
199            }
200            *point_idx += 1;
201            return;
202        }
203
204        for i in 0..grid_size[dim] {
205            current_indices[dim] = i;
206            Self::generate_grid_recursive(
207                grid_points,
208                ranges,
209                grid_size,
210                current_indices,
211                dim + 1,
212                point_idx,
213            );
214        }
215    }
216
217    /// Greedy variance-based selection
218    fn greedy_variance_selection<K: SparseKernel>(
219        x: &Array2<f64>,
220        num_inducing: usize,
221        kernel: &K,
222    ) -> Result<Array2<f64>> {
223        let n_samples = x.nrows();
224        let n_features = x.ncols();
225
226        if num_inducing >= n_samples {
227            return Ok(x.clone());
228        }
229
230        let mut selected_indices = Vec::new();
231        let mut remaining_indices: Vec<usize> = (0..n_samples).collect();
232
233        // Select first point randomly
234        let mut rng = thread_rng();
235        let first_idx = rng.gen_range(0..remaining_indices.len());
236        selected_indices.push(remaining_indices.remove(first_idx));
237
238        // Greedily select remaining points
239        for _ in 1..num_inducing {
240            let mut best_idx = 0;
241            let mut best_variance = -f64::INFINITY;
242
243            for (i, &candidate_idx) in remaining_indices.iter().enumerate() {
244                // Compute predictive variance at candidate point
245                let variance = Self::compute_predictive_variance(
246                    &x.row(candidate_idx).to_owned(),
247                    x,
248                    &selected_indices,
249                    kernel,
250                );
251
252                if variance > best_variance {
253                    best_variance = variance;
254                    best_idx = i;
255                }
256            }
257
258            selected_indices.push(remaining_indices.remove(best_idx));
259        }
260
261        // Create inducing points matrix
262        let mut inducing_points = Array2::zeros((num_inducing, n_features));
263        for (i, &idx) in selected_indices.iter().enumerate() {
264            inducing_points.row_mut(i).assign(&x.row(idx));
265        }
266
267        Ok(inducing_points)
268    }
269
270    /// Compute predictive variance for greedy selection
271    fn compute_predictive_variance<K: SparseKernel>(
272        x_star: &Array1<f64>,
273        x_train: &Array2<f64>,
274        selected_indices: &[usize],
275        kernel: &K,
276    ) -> f64 {
277        if selected_indices.is_empty() {
278            return 1.0; // Prior variance
279        }
280
281        // Create inducing points from selected indices
282        let num_selected = selected_indices.len();
283        let mut inducing_points = Array2::zeros((num_selected, x_train.ncols()));
284        for (i, &idx) in selected_indices.iter().enumerate() {
285            inducing_points.row_mut(i).assign(&x_train.row(idx));
286        }
287
288        // Compute kernel matrices
289        let k_mm = kernel.kernel_matrix(&inducing_points, &inducing_points);
290        let x_star_2d = x_star.clone().insert_axis(Axis(0));
291        let k_star_m = kernel.kernel_matrix(&x_star_2d, &inducing_points);
292        let k_star_star = kernel.kernel_diagonal(&x_star_2d)[0];
293
294        // Compute predictive variance: k(x*,x*) - k(x*,X_m) K_mm^(-1) k(X_m,x*)
295        if let Ok(k_mm_inv) = KernelOps::invert_using_cholesky(&k_mm) {
296            let temp = k_star_m.dot(&k_mm_inv);
297            let quad_form = temp.dot(&k_star_m.t())[(0, 0)];
298            (k_star_star - quad_form).max(0.0)
299        } else {
300            1.0 // Return prior variance if inversion fails
301        }
302    }
303}
304
305/// Sparse approximation method implementations
306pub struct SparseApproximationMethods;
307
308impl SparseApproximationMethods {
309    /// Fit Subset of Regressors (SoR) approximation
310    pub fn fit_sor<K: SparseKernel>(
311        x: &Array2<f64>,
312        y: &Array1<f64>,
313        inducing_points: &Array2<f64>,
314        kernel: &K,
315        noise_variance: f64,
316    ) -> Result<(Array1<f64>, Array2<f64>)> {
317        let m = inducing_points.nrows();
318
319        // Compute kernel matrix K_mm
320        let k_mm = kernel.kernel_matrix(inducing_points, inducing_points);
321
322        // Add noise for numerical stability
323        let mut k_mm_noise = k_mm.clone();
324        for i in 0..m {
325            k_mm_noise[(i, i)] += noise_variance;
326        }
327
328        // Compute K_nm (cross-covariance between data and inducing points)
329        let _k_nm = kernel.kernel_matrix(x, inducing_points);
330
331        // For SoR: use only inducing point subset
332        // Solve (K_mm + σ²I) α = K_mn y_subset
333        let inducing_targets = if x.nrows() == inducing_points.nrows() {
334            y.clone()
335        } else {
336            // If not exact match, use nearest neighbor mapping (simplified)
337            Self::map_targets_to_inducing(y, x, inducing_points, kernel)?
338        };
339
340        let k_mm_inv = KernelOps::invert_using_cholesky(&k_mm_noise)?;
341        let alpha = k_mm_inv.dot(&inducing_targets);
342
343        Ok((alpha, k_mm_inv))
344    }
345
346    /// Fit Fully Independent Conditional (FIC) approximation
347    pub fn fit_fic<K: SparseKernel>(
348        x: &Array2<f64>,
349        y: &Array1<f64>,
350        inducing_points: &Array2<f64>,
351        kernel: &K,
352        noise_variance: f64,
353    ) -> Result<(Array1<f64>, Array2<f64>)> {
354        let _n = x.nrows();
355        let _m = inducing_points.nrows();
356
357        // Compute kernel matrices
358        let k_mm = kernel.kernel_matrix(inducing_points, inducing_points);
359        let k_nm = kernel.kernel_matrix(x, inducing_points);
360        let k_diag = kernel.kernel_diagonal(x);
361
362        // FIC approximation: Q_nn + diag(K_nn - diag(Q_nn)) + σ²I
363        // where Q_nn = K_nm K_mm^(-1) K_mn
364
365        let k_mm_inv = KernelOps::invert_using_cholesky(&k_mm)?;
366        let q_nn_diag = Self::compute_q_diagonal(&k_nm, &k_mm_inv);
367
368        // Diagonal correction: diag(K_nn - Q_nn)
369        let diag_correction: Array1<f64> = k_diag
370            .iter()
371            .zip(q_nn_diag.iter())
372            .map(|(&k_ii, &q_ii)| k_ii - q_ii)
373            .collect();
374
375        // Build the FIC system: (Q_nn + diag_correction + σ²I) α = y
376        // Use Woodbury matrix identity for efficient solution
377        let lambda = &diag_correction + noise_variance;
378        let lambda_inv_diag = lambda.mapv(|x| if x > 1e-12 { 1.0 / x } else { 1e12 });
379
380        // Woodbury: (Q + Λ)^(-1) = Λ^(-1) - Λ^(-1) K_nm (K_mm + K_mn Λ^(-1) K_nm)^(-1) K_mn Λ^(-1)
381        let lambda_inv_k_nm = &k_nm * &lambda_inv_diag.clone().insert_axis(Axis(1));
382        let woodbury_middle = &k_mm + k_nm.t().dot(&lambda_inv_k_nm);
383        let woodbury_middle_inv = KernelOps::invert_using_cholesky(&woodbury_middle)?;
384
385        // Solve for alpha using Woodbury identity
386        let lambda_inv_y = &lambda_inv_diag * y;
387        let temp1 = k_nm.t().dot(&lambda_inv_y);
388        let temp2 = woodbury_middle_inv.dot(&temp1);
389        let temp3 = lambda_inv_k_nm.dot(&temp2);
390        let alpha = &lambda_inv_y - temp3;
391
392        Ok((alpha, k_mm_inv))
393    }
394
395    /// Fit Partially Independent Conditional (PIC) approximation
396    pub fn fit_pic<K: SparseKernel>(
397        x: &Array2<f64>,
398        y: &Array1<f64>,
399        inducing_points: &Array2<f64>,
400        kernel: &K,
401        noise_variance: f64,
402        block_size: usize,
403    ) -> Result<(Array1<f64>, Array2<f64>)> {
404        let n = x.nrows();
405        let _m = inducing_points.nrows();
406
407        if block_size >= n {
408            // If block size >= n, PIC reduces to standard GP
409            return Self::fit_full_gp(x, y, kernel, noise_variance);
410        }
411
412        // Compute kernel matrices
413        let k_mm = kernel.kernel_matrix(inducing_points, inducing_points);
414        let _k_nm = kernel.kernel_matrix(x, inducing_points);
415
416        let k_mm_inv = KernelOps::invert_using_cholesky(&k_mm)?;
417
418        // Build PIC system with block diagonal structure
419        let num_blocks = (n + block_size - 1) / block_size;
420        let mut alpha = Array1::zeros(n);
421
422        // Process each block independently
423        for block_idx in 0..num_blocks {
424            let start_idx = block_idx * block_size;
425            let end_idx = ((block_idx + 1) * block_size).min(n);
426            let block_indices: Vec<usize> = (start_idx..end_idx).collect();
427
428            if block_indices.is_empty() {
429                continue;
430            }
431
432            // Extract block data
433            let mut x_block = Array2::zeros((block_indices.len(), x.ncols()));
434            let mut y_block = Array1::zeros(block_indices.len());
435            for (i, &idx) in block_indices.iter().enumerate() {
436                x_block.row_mut(i).assign(&x.row(idx));
437                y_block[i] = y[idx];
438            }
439
440            // Compute block kernel matrix and solve
441            let k_block = kernel.kernel_matrix(&x_block, &x_block);
442            let k_block_m = kernel.kernel_matrix(&x_block, inducing_points);
443
444            // PIC block system
445            let q_block = k_block_m.dot(&k_mm_inv).dot(&k_block_m.t());
446            let pic_correction = &k_block - &q_block;
447
448            let mut pic_system = pic_correction;
449            for i in 0..pic_system.nrows() {
450                pic_system[(i, i)] += noise_variance;
451            }
452
453            let pic_inv = KernelOps::invert_using_cholesky(&pic_system)?;
454            let alpha_block = pic_inv.dot(&y_block);
455
456            // Store block solution
457            for (i, &idx) in block_indices.iter().enumerate() {
458                alpha[idx] = alpha_block[i];
459            }
460        }
461
462        Ok((alpha, k_mm_inv))
463    }
464
465    /// Helper: Compute Q diagonal for FIC
466    fn compute_q_diagonal(k_nm: &Array2<f64>, k_mm_inv: &Array2<f64>) -> Array1<f64> {
467        let temp = k_nm.dot(k_mm_inv);
468        let mut q_diag = Array1::zeros(k_nm.nrows());
469
470        for i in 0..k_nm.nrows() {
471            q_diag[i] = k_nm.row(i).dot(&temp.row(i));
472        }
473
474        q_diag
475    }
476
477    /// Helper: Map targets to inducing points for SoR
478    fn map_targets_to_inducing<K: SparseKernel>(
479        y: &Array1<f64>,
480        x: &Array2<f64>,
481        inducing_points: &Array2<f64>,
482        _kernel: &K,
483    ) -> Result<Array1<f64>> {
484        let n = x.nrows();
485        let m = inducing_points.nrows();
486        let mut inducing_targets = Array1::zeros(m);
487
488        // Simple approach: find nearest inducing point for each data point
489        for i in 0..n {
490            let mut min_dist = f64::INFINITY;
491            let mut nearest_idx = 0;
492
493            for j in 0..m {
494                let mut dist = 0.0;
495                for k in 0..x.ncols() {
496                    let diff = x[(i, k)] - inducing_points[(j, k)];
497                    dist += diff * diff;
498                }
499
500                if dist < min_dist {
501                    min_dist = dist;
502                    nearest_idx = j;
503                }
504            }
505
506            inducing_targets[nearest_idx] += y[i];
507        }
508
509        Ok(inducing_targets)
510    }
511
512    /// Full GP fit for comparison (when PIC block size is large)
513    fn fit_full_gp<K: SparseKernel>(
514        x: &Array2<f64>,
515        y: &Array1<f64>,
516        kernel: &K,
517        noise_variance: f64,
518    ) -> Result<(Array1<f64>, Array2<f64>)> {
519        let k_nn = kernel.kernel_matrix(x, x);
520        let mut k_noise = k_nn;
521
522        for i in 0..k_noise.nrows() {
523            k_noise[(i, i)] += noise_variance;
524        }
525
526        let k_inv = KernelOps::invert_using_cholesky(&k_noise)?;
527        let alpha = k_inv.dot(y);
528
529        Ok((alpha, k_inv))
530    }
531}
532
533#[allow(non_snake_case)]
534#[cfg(test)]
535mod tests {
536    use super::*;
537    use crate::sparse_gp::kernels::RBFKernel;
538    use scirs2_core::ndarray::array;
539
540    #[test]
541    fn test_random_inducing_selection() {
542        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0]];
543
544        let inducing = InducingPointSelector::random_selection(&x, 3).unwrap();
545        assert_eq!(inducing.shape(), &[3, 2]);
546        assert!(inducing.iter().all(|&x| x.is_finite()));
547    }
548
549    #[test]
550    fn test_kmeans_inducing_selection() {
551        let x = array![[0.0, 0.0], [0.1, 0.1], [5.0, 5.0], [5.1, 5.1], [10.0, 10.0]];
552
553        let inducing = InducingPointSelector::kmeans_selection(&x, 2).unwrap();
554        assert_eq!(inducing.shape(), &[2, 2]);
555        assert!(inducing.iter().all(|&x| x.is_finite()));
556    }
557
558    #[test]
559    fn test_uniform_grid_selection() {
560        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0], [3.0, 3.0]];
561        let grid_size = vec![2, 2];
562
563        let inducing = InducingPointSelector::uniform_grid_selection(&x, &grid_size).unwrap();
564        assert_eq!(inducing.shape(), &[4, 2]);
565        assert!(inducing.iter().all(|&x| x.is_finite()));
566    }
567
568    #[test]
569    fn test_sor_approximation() {
570        let kernel = RBFKernel::new(1.0, 1.0);
571        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]];
572        let y = array![0.0, 1.0, 4.0];
573        let inducing_points = array![[0.0, 0.0], [2.0, 2.0]];
574
575        let (alpha, k_mm_inv) =
576            SparseApproximationMethods::fit_sor(&x, &y, &inducing_points, &kernel, 0.1).unwrap();
577
578        assert_eq!(alpha.len(), 2);
579        assert_eq!(k_mm_inv.shape(), &[2, 2]);
580        assert!(alpha.iter().all(|&x| x.is_finite()));
581    }
582
583    #[test]
584    fn test_fic_approximation() {
585        let kernel = RBFKernel::new(1.0, 1.0);
586        let x = array![[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]];
587        let y = array![0.0, 1.0, 4.0];
588        let inducing_points = array![[0.0, 0.0], [2.0, 2.0]];
589
590        let (alpha, k_mm_inv) =
591            SparseApproximationMethods::fit_fic(&x, &y, &inducing_points, &kernel, 0.1).unwrap();
592
593        assert_eq!(alpha.len(), 3);
594        assert_eq!(k_mm_inv.shape(), &[2, 2]);
595        assert!(alpha.iter().all(|&x| x.is_finite()));
596    }
597}