Skip to main content

tensorlogic_sklears_kernels/
low_rank.rs

1//! Low-rank kernel matrix approximations
2//!
3//! This module provides methods for approximating large kernel matrices
4//! using low-rank representations, which significantly reduce memory
5//! and computational costs.
6//!
7//! ## Nyström Method
8//!
9//! The Nyström method approximates a kernel matrix K (n×n) using a subset
10//! of m landmark points, producing an approximation with O(nm) instead of O(n²) complexity.
11//!
12//! ## References
13//!
14//! - Williams & Seeger (2001): "Using the Nyström Method to Speed Up Kernel Machines"
15//! - Kumar et al. (2012): "Sampling Methods for the Nyström Method"
16
17use crate::error::{KernelError, Result};
18use crate::types::Kernel;
19use serde::{Deserialize, Serialize};
20
21/// Configuration for Nyström approximation
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct NystromConfig {
24    /// Number of landmark points to use
25    pub num_landmarks: usize,
26    /// Sampling method for selecting landmarks
27    pub sampling: SamplingMethod,
28    /// Regularization parameter for numerical stability
29    pub regularization: f64,
30}
31
32/// Sampling method for selecting landmark points
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34pub enum SamplingMethod {
35    /// Uniform random sampling
36    Uniform,
37    /// Sample first n points (deterministic)
38    First,
39    /// K-means++ style sampling (diverse landmarks)
40    KMeansPlusPlus,
41}
42
43impl NystromConfig {
44    /// Create a new configuration
45    pub fn new(num_landmarks: usize) -> Result<Self> {
46        if num_landmarks == 0 {
47            return Err(KernelError::InvalidParameter {
48                parameter: "num_landmarks".to_string(),
49                value: num_landmarks.to_string(),
50                reason: "must be greater than 0".to_string(),
51            });
52        }
53
54        Ok(Self {
55            num_landmarks,
56            sampling: SamplingMethod::Uniform,
57            regularization: 1e-6,
58        })
59    }
60
61    /// Set sampling method
62    pub fn with_sampling(mut self, sampling: SamplingMethod) -> Self {
63        self.sampling = sampling;
64        self
65    }
66
67    /// Set regularization parameter
68    pub fn with_regularization(mut self, regularization: f64) -> Result<Self> {
69        if regularization < 0.0 {
70            return Err(KernelError::InvalidParameter {
71                parameter: "regularization".to_string(),
72                value: regularization.to_string(),
73                reason: "must be non-negative".to_string(),
74            });
75        }
76        self.regularization = regularization;
77        Ok(self)
78    }
79}
80
81/// Low-rank Nyström approximation of a kernel matrix
82///
83/// Stores the approximation K ≈ C * W_inv * C^T where:
84/// - C is the n×m matrix of kernel values between all points and landmarks
85/// - W_inv is the m×m inverse of the kernel matrix on landmarks
86pub struct NystromApproximation {
87    /// Kernel values between all points and landmarks (n×m)
88    c_matrix: Vec<Vec<f64>>,
89    /// Pseudo-inverse of landmark kernel matrix (m×m)
90    w_inv: Vec<Vec<f64>>,
91    /// Indices of landmark points
92    landmark_indices: Vec<usize>,
93}
94
95impl NystromApproximation {
96    /// Create a low-rank approximation using the Nyström method
97    ///
98    /// # Arguments
99    ///
100    /// * `data` - The dataset (n samples)
101    /// * `kernel` - The kernel function
102    /// * `config` - Configuration for the approximation
103    ///
104    /// # Returns
105    ///
106    /// A Nyström approximation that can efficiently compute approximate kernel values
107    pub fn fit(data: &[Vec<f64>], kernel: &dyn Kernel, config: NystromConfig) -> Result<Self> {
108        let n = data.len();
109
110        if config.num_landmarks > n {
111            return Err(KernelError::InvalidParameter {
112                parameter: "num_landmarks".to_string(),
113                value: config.num_landmarks.to_string(),
114                reason: format!("cannot exceed dataset size ({})", n),
115            });
116        }
117
118        // Select landmark points
119        let landmark_indices = Self::select_landmarks(data, kernel, &config)?;
120
121        // Compute C matrix (n×m): kernel values between all points and landmarks
122        let mut c_matrix = vec![vec![0.0; config.num_landmarks]; n];
123        for i in 0..n {
124            for (j, &landmark_idx) in landmark_indices.iter().enumerate() {
125                c_matrix[i][j] = kernel.compute(&data[i], &data[landmark_idx])?;
126            }
127        }
128
129        // Compute W matrix (m×m): kernel matrix on landmarks
130        let mut w_matrix = vec![vec![0.0; config.num_landmarks]; config.num_landmarks];
131        for i in 0..config.num_landmarks {
132            for j in 0..config.num_landmarks {
133                w_matrix[i][j] =
134                    kernel.compute(&data[landmark_indices[i]], &data[landmark_indices[j]])?;
135            }
136        }
137
138        // Add regularization to diagonal
139        #[allow(clippy::needless_range_loop)]
140        for i in 0..config.num_landmarks {
141            w_matrix[i][i] += config.regularization;
142        }
143
144        // Compute pseudo-inverse of W
145        let w_inv = Self::pseudo_inverse(&w_matrix)?;
146
147        Ok(Self {
148            c_matrix,
149            w_inv,
150            landmark_indices,
151        })
152    }
153
154    /// Select landmark points based on sampling method
155    fn select_landmarks(
156        data: &[Vec<f64>],
157        kernel: &dyn Kernel,
158        config: &NystromConfig,
159    ) -> Result<Vec<usize>> {
160        match config.sampling {
161            SamplingMethod::First => {
162                // Simply take first m points
163                Ok((0..config.num_landmarks).collect())
164            }
165            SamplingMethod::Uniform => {
166                // Uniform random sampling (deterministic for reproducibility)
167                let step = data.len() / config.num_landmarks;
168                Ok((0..config.num_landmarks).map(|i| i * step).collect())
169            }
170            SamplingMethod::KMeansPlusPlus => {
171                // K-means++ style sampling for diversity
172                Self::kmeans_plusplus_sampling(data, kernel, config.num_landmarks)
173            }
174        }
175    }
176
177    /// K-means++ style sampling for diverse landmark selection
178    fn kmeans_plusplus_sampling(
179        data: &[Vec<f64>],
180        kernel: &dyn Kernel,
181        num_landmarks: usize,
182    ) -> Result<Vec<usize>> {
183        let n = data.len();
184        let mut landmarks = Vec::with_capacity(num_landmarks);
185        let mut min_distances = vec![f64::INFINITY; n];
186
187        // Select first landmark (first point for determinism)
188        landmarks.push(0);
189
190        // Select remaining landmarks based on distance from existing landmarks
191        for _ in 1..num_landmarks {
192            // Update minimum distances to nearest landmark
193            let last_landmark = *landmarks.last().unwrap();
194            for i in 0..n {
195                if landmarks.contains(&i) {
196                    continue;
197                }
198
199                // Compute kernel-based distance (1 - kernel_similarity)
200                let similarity = kernel.compute(&data[i], &data[last_landmark])?;
201                let distance = 1.0 - similarity;
202                min_distances[i] = min_distances[i].min(distance);
203            }
204
205            // Select point with maximum distance to nearest landmark
206            let mut max_dist = 0.0;
207            let mut best_idx = 0;
208            #[allow(clippy::needless_range_loop)]
209            for i in 0..n {
210                if !landmarks.contains(&i) && min_distances[i] > max_dist {
211                    max_dist = min_distances[i];
212                    best_idx = i;
213                }
214            }
215
216            landmarks.push(best_idx);
217        }
218
219        Ok(landmarks)
220    }
221
222    /// Compute pseudo-inverse using simple iterative method
223    ///
224    /// For production use, this should use a proper linear algebra library.
225    /// This is a simplified implementation for demonstration.
226    #[allow(clippy::needless_range_loop)]
227    fn pseudo_inverse(matrix: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
228        let n = matrix.len();
229        if n == 0 {
230            return Err(KernelError::ComputationError(
231                "Cannot invert empty matrix".to_string(),
232            ));
233        }
234
235        // For small matrices, use Gauss-Jordan elimination
236        let mut augmented = vec![vec![0.0; 2 * n]; n];
237
238        // Create augmented matrix [A | I]
239        for i in 0..n {
240            for j in 0..n {
241                augmented[i][j] = matrix[i][j];
242            }
243            augmented[i][n + i] = 1.0;
244        }
245
246        // Forward elimination
247        for i in 0..n {
248            // Find pivot
249            let mut max_row = i;
250            for k in (i + 1)..n {
251                if augmented[k][i].abs() > augmented[max_row][i].abs() {
252                    max_row = k;
253                }
254            }
255
256            // Swap rows
257            if max_row != i {
258                augmented.swap(i, max_row);
259            }
260
261            // Check for singular matrix
262            if augmented[i][i].abs() < 1e-10 {
263                return Err(KernelError::ComputationError(
264                    "Matrix is singular or nearly singular".to_string(),
265                ));
266            }
267
268            // Scale pivot row
269            let pivot = augmented[i][i];
270            for j in 0..(2 * n) {
271                augmented[i][j] /= pivot;
272            }
273
274            // Eliminate column
275            for k in 0..n {
276                if k != i {
277                    let factor = augmented[k][i];
278                    for j in 0..(2 * n) {
279                        augmented[k][j] -= factor * augmented[i][j];
280                    }
281                }
282            }
283        }
284
285        // Extract inverse from right half
286        let mut inverse = vec![vec![0.0; n]; n];
287        for i in 0..n {
288            for j in 0..n {
289                inverse[i][j] = augmented[i][n + j];
290            }
291        }
292
293        Ok(inverse)
294    }
295
296    /// Approximate kernel value between two points
297    ///
298    /// Uses the approximation: K(x_i, x_j) ≈ C_i * W_inv * C_j^T
299    pub fn approximate(&self, i: usize, j: usize) -> Result<f64> {
300        if i >= self.c_matrix.len() || j >= self.c_matrix.len() {
301            return Err(KernelError::ComputationError(format!(
302                "Indices out of bounds: i={}, j={}",
303                i, j
304            )));
305        }
306
307        // Compute C_i * W_inv * C_j^T
308        let m = self.w_inv.len();
309        let mut result = 0.0;
310
311        for k in 0..m {
312            for idx in 0..m {
313                result += self.c_matrix[i][k] * self.w_inv[k][idx] * self.c_matrix[j][idx];
314            }
315        }
316
317        Ok(result)
318    }
319
320    /// Get the full approximate kernel matrix
321    pub fn get_approximate_matrix(&self) -> Result<Vec<Vec<f64>>> {
322        let n = self.c_matrix.len();
323        let mut matrix = vec![vec![0.0; n]; n];
324
325        #[allow(clippy::needless_range_loop)]
326        for i in 0..n {
327            for j in 0..n {
328                matrix[i][j] = self.approximate(i, j)?;
329            }
330        }
331
332        Ok(matrix)
333    }
334
335    /// Get the number of samples
336    pub fn num_samples(&self) -> usize {
337        self.c_matrix.len()
338    }
339
340    /// Get the number of landmarks
341    pub fn num_landmarks(&self) -> usize {
342        self.landmark_indices.len()
343    }
344
345    /// Get landmark indices
346    pub fn landmark_indices(&self) -> &[usize] {
347        &self.landmark_indices
348    }
349
350    /// Compute approximation error (Frobenius norm) compared to exact kernel matrix
351    pub fn approximation_error(&self, exact_matrix: &[Vec<f64>]) -> Result<f64> {
352        let approx_matrix = self.get_approximate_matrix()?;
353        let n = exact_matrix.len();
354
355        if approx_matrix.len() != n || approx_matrix[0].len() != n {
356            return Err(KernelError::DimensionMismatch {
357                expected: vec![n, n],
358                got: vec![approx_matrix.len(), approx_matrix[0].len()],
359                context: "approximation error computation".to_string(),
360            });
361        }
362
363        let mut error = 0.0;
364        for i in 0..n {
365            for j in 0..n {
366                let diff = exact_matrix[i][j] - approx_matrix[i][j];
367                error += diff * diff;
368            }
369        }
370
371        Ok(error.sqrt())
372    }
373
374    /// Get compression ratio (original size / approximation size)
375    pub fn compression_ratio(&self) -> f64 {
376        let n = self.num_samples() as f64;
377        let m = self.num_landmarks() as f64;
378        (n * n) / (n * m + m * m)
379    }
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use crate::LinearKernel;
386
387    fn generate_test_data(n: usize, dim: usize) -> Vec<Vec<f64>> {
388        (0..n)
389            .map(|i| (0..dim).map(|j| ((i * dim + j) as f64).sin()).collect())
390            .collect()
391    }
392
393    #[test]
394    fn test_nystrom_config() {
395        let config = NystromConfig::new(10).unwrap();
396        assert_eq!(config.num_landmarks, 10);
397        assert_eq!(config.sampling, SamplingMethod::Uniform);
398    }
399
400    #[test]
401    fn test_nystrom_config_invalid() {
402        let result = NystromConfig::new(0);
403        assert!(result.is_err());
404    }
405
406    #[test]
407    fn test_nystrom_approximation_basic() {
408        let data = generate_test_data(50, 10);
409        let kernel = LinearKernel::new();
410        let config = NystromConfig::new(10).unwrap();
411
412        let approx = NystromApproximation::fit(&data, &kernel, config).unwrap();
413
414        assert_eq!(approx.num_samples(), 50);
415        assert_eq!(approx.num_landmarks(), 10);
416    }
417
418    #[test]
419    fn test_nystrom_approximation_value() {
420        let data = generate_test_data(20, 5);
421        let kernel = LinearKernel::new();
422        let config = NystromConfig::new(5).unwrap();
423
424        let approx = NystromApproximation::fit(&data, &kernel, config).unwrap();
425
426        // Approximate kernel value should be reasonable
427        let value = approx.approximate(0, 1).unwrap();
428        assert!(value.is_finite());
429    }
430
431    #[test]
432    fn test_nystrom_sampling_methods() {
433        let data = generate_test_data(30, 5);
434        let kernel = LinearKernel::new();
435
436        // First sampling
437        let config1 = NystromConfig::new(10)
438            .unwrap()
439            .with_sampling(SamplingMethod::First);
440        let approx1 = NystromApproximation::fit(&data, &kernel, config1).unwrap();
441        assert_eq!(approx1.landmark_indices()[0], 0);
442
443        // Uniform sampling
444        let config2 = NystromConfig::new(10)
445            .unwrap()
446            .with_sampling(SamplingMethod::Uniform);
447        let approx2 = NystromApproximation::fit(&data, &kernel, config2).unwrap();
448        assert_eq!(approx2.num_landmarks(), 10);
449
450        // K-means++ sampling
451        let config3 = NystromConfig::new(10)
452            .unwrap()
453            .with_sampling(SamplingMethod::KMeansPlusPlus);
454        let approx3 = NystromApproximation::fit(&data, &kernel, config3).unwrap();
455        assert_eq!(approx3.num_landmarks(), 10);
456    }
457
458    #[test]
459    fn test_nystrom_compression_ratio() {
460        let data = generate_test_data(100, 5);
461        let kernel = LinearKernel::new();
462        let config = NystromConfig::new(20).unwrap();
463
464        let approx = NystromApproximation::fit(&data, &kernel, config).unwrap();
465
466        let ratio = approx.compression_ratio();
467        // Should have significant compression
468        assert!(ratio > 3.0);
469    }
470
471    #[test]
472    fn test_nystrom_approximation_error() {
473        let data = generate_test_data(30, 5);
474        let kernel = LinearKernel::new();
475
476        // Compute exact kernel matrix
477        let exact = kernel.compute_matrix(&data).unwrap();
478
479        // Compute approximation with many landmarks (should be accurate)
480        let config = NystromConfig::new(20).unwrap();
481        let approx = NystromApproximation::fit(&data, &kernel, config).unwrap();
482
483        let error = approx.approximation_error(&exact).unwrap();
484        // Error should be relatively small with many landmarks
485        assert!(error < 10.0);
486    }
487
488    #[test]
489    fn test_nystrom_too_many_landmarks() {
490        let data = generate_test_data(10, 5);
491        let kernel = LinearKernel::new();
492        let config = NystromConfig::new(20).unwrap(); // More landmarks than data points
493
494        let result = NystromApproximation::fit(&data, &kernel, config);
495        assert!(result.is_err());
496    }
497
498    #[test]
499    fn test_nystrom_regularization() {
500        let data = generate_test_data(20, 5);
501        let kernel = LinearKernel::new();
502        let config = NystromConfig::new(5)
503            .unwrap()
504            .with_regularization(1e-4)
505            .unwrap();
506
507        let approx = NystromApproximation::fit(&data, &kernel, config).unwrap();
508        assert!(approx.approximate(0, 1).is_ok());
509    }
510
511    #[test]
512    fn test_nystrom_invalid_regularization() {
513        let result = NystromConfig::new(10).unwrap().with_regularization(-0.1);
514        assert!(result.is_err());
515    }
516}