tensorlogic_sklears_kernels/
kpca.rs1#![allow(clippy::needless_range_loop)]
2use crate::error::{KernelError, Result};
46use crate::types::Kernel;
47
48#[derive(Debug, Clone)]
50pub struct KernelPCAConfig {
51 pub n_components: usize,
53 pub center: bool,
55 pub eigenvalue_threshold: f64,
57}
58
59impl KernelPCAConfig {
60 pub fn new(n_components: usize) -> Self {
62 Self {
63 n_components,
64 center: true,
65 eigenvalue_threshold: 1e-10,
66 }
67 }
68
69 pub fn with_center(mut self, center: bool) -> Self {
71 self.center = center;
72 self
73 }
74
75 pub fn with_eigenvalue_threshold(mut self, threshold: f64) -> Self {
77 self.eigenvalue_threshold = threshold;
78 self
79 }
80}
81
82#[derive(Debug, Clone)]
84pub struct KernelPCA {
85 training_data: Vec<Vec<f64>>,
87 eigenvectors: Vec<Vec<f64>>,
89 eigenvalues: Vec<f64>,
91 n_components: usize,
93 centered: bool,
95 kernel_row_means: Vec<f64>,
97 kernel_mean: f64,
99}
100
101impl KernelPCA {
102 pub fn fit<K: Kernel + ?Sized>(
112 data: &[Vec<f64>],
113 kernel: &K,
114 config: KernelPCAConfig,
115 ) -> Result<Self> {
116 let n = data.len();
117 if n == 0 {
118 return Err(KernelError::ComputationError(
119 "Empty training data".to_string(),
120 ));
121 }
122
123 if config.n_components > n {
124 return Err(KernelError::InvalidParameter {
125 parameter: "n_components".to_string(),
126 value: config.n_components.to_string(),
127 reason: format!(
128 "Cannot extract {} components from {} samples",
129 config.n_components, n
130 ),
131 });
132 }
133
134 let mut k_matrix = vec![vec![0.0; n]; n];
136 for i in 0..n {
137 for j in i..n {
138 let k_val = kernel.compute(&data[i], &data[j])?;
139 k_matrix[i][j] = k_val;
140 k_matrix[j][i] = k_val;
141 }
142 }
143
144 let mut row_means = vec![0.0; n];
146 let mut total_mean = 0.0;
147 for i in 0..n {
148 let row_sum: f64 = k_matrix[i].iter().sum();
149 row_means[i] = row_sum / n as f64;
150 total_mean += row_sum;
151 }
152 total_mean /= (n * n) as f64;
153
154 let mut k_centered = k_matrix.clone();
156 if config.center {
157 for i in 0..n {
158 for j in 0..n {
159 k_centered[i][j] = k_matrix[i][j] - row_means[i] - row_means[j] + total_mean;
160 }
161 }
162 }
163
164 let (eigenvalues, eigenvectors) = eigen_decomposition(
166 &k_centered,
167 config.n_components,
168 config.eigenvalue_threshold,
169 )?;
170
171 let training_data = data.to_vec();
173
174 Ok(Self {
175 training_data,
176 eigenvectors,
177 eigenvalues,
178 n_components: config.n_components,
179 centered: config.center,
180 kernel_row_means: row_means,
181 kernel_mean: total_mean,
182 })
183 }
184
185 pub fn transform<K: Kernel + ?Sized>(
194 &self,
195 data: &[Vec<f64>],
196 kernel: &K,
197 ) -> Result<Vec<Vec<f64>>> {
198 let n_train = self.training_data.len();
199 let n_test = data.len();
200
201 let mut k_new = vec![vec![0.0; n_train]; n_test];
203 for i in 0..n_test {
204 for j in 0..n_train {
205 k_new[i][j] = kernel.compute(&data[i], &self.training_data[j])?;
206 }
207 }
208
209 if self.centered {
211 for i in 0..n_test {
212 let row_mean: f64 = k_new[i].iter().sum::<f64>() / n_train as f64;
213 for j in 0..n_train {
214 k_new[i][j] =
215 k_new[i][j] - row_mean - self.kernel_row_means[j] + self.kernel_mean;
216 }
217 }
218 }
219
220 let mut transformed = vec![vec![0.0; self.n_components]; n_test];
222 for i in 0..n_test {
223 for c in 0..self.n_components {
224 if self.eigenvalues[c] > 0.0 {
225 let mut proj = 0.0;
226 for j in 0..n_train {
227 proj += k_new[i][j] * self.eigenvectors[c][j];
228 }
229 transformed[i][c] = proj / self.eigenvalues[c].sqrt();
230 }
231 }
232 }
233
234 Ok(transformed)
235 }
236
237 pub fn transform_training<K: Kernel + ?Sized>(&self, kernel: &K) -> Result<Vec<Vec<f64>>> {
239 self.transform(&self.training_data, kernel)
240 }
241
242 pub fn eigenvalues(&self) -> &[f64] {
244 &self.eigenvalues
245 }
246
247 pub fn explained_variance_ratio(&self) -> Vec<f64> {
249 let total_var: f64 = self.eigenvalues.iter().sum();
250 if total_var > 0.0 {
251 self.eigenvalues.iter().map(|&e| e / total_var).collect()
252 } else {
253 vec![0.0; self.eigenvalues.len()]
254 }
255 }
256
257 pub fn cumulative_variance_explained(&self) -> Vec<f64> {
259 let ratios = self.explained_variance_ratio();
260 let mut cumulative = Vec::with_capacity(ratios.len());
261 let mut sum = 0.0;
262 for r in ratios {
263 sum += r;
264 cumulative.push(sum);
265 }
266 cumulative
267 }
268
269 pub fn n_components(&self) -> usize {
271 self.n_components
272 }
273
274 pub fn n_samples(&self) -> usize {
276 self.training_data.len()
277 }
278}
279
280pub fn center_kernel_matrix(matrix: &mut [Vec<f64>]) {
284 let n = matrix.len();
285 if n == 0 {
286 return;
287 }
288
289 let mut row_means = vec![0.0; n];
291 let mut total_mean = 0.0;
292 for i in 0..n {
293 let row_sum: f64 = matrix[i].iter().sum();
294 row_means[i] = row_sum / n as f64;
295 total_mean += row_sum;
296 }
297 total_mean /= (n * n) as f64;
298
299 for i in 0..n {
301 for j in 0..n {
302 matrix[i][j] = matrix[i][j] - row_means[i] - row_means[j] + total_mean;
303 }
304 }
305}
306
307fn eigen_decomposition(
311 matrix: &[Vec<f64>],
312 n_components: usize,
313 threshold: f64,
314) -> Result<(Vec<f64>, Vec<Vec<f64>>)> {
315 let n = matrix.len();
316 if n == 0 {
317 return Err(KernelError::ComputationError("Empty matrix".to_string()));
318 }
319
320 let max_iter = 1000;
321 let tol = 1e-10;
322
323 let mut eigenvalues = Vec::with_capacity(n_components);
324 let mut eigenvectors = Vec::with_capacity(n_components);
325
326 let mut work_matrix = matrix.to_vec();
328
329 for _ in 0..n_components {
330 let mut v = vec![1.0 / (n as f64).sqrt(); n]; let mut eigenvalue = 0.0;
334 for _ in 0..max_iter {
335 let mut w = vec![0.0; n];
337 for i in 0..n {
338 for j in 0..n {
339 w[i] += work_matrix[i][j] * v[j];
340 }
341 }
342
343 let new_eigenvalue: f64 = v.iter().zip(w.iter()).map(|(vi, wi)| vi * wi).sum();
345
346 let norm: f64 = w.iter().map(|x| x * x).sum::<f64>().sqrt();
348 if norm < 1e-15 {
349 break; }
351 for wi in &mut w {
352 *wi /= norm;
353 }
354
355 let diff: f64 = v.iter().zip(w.iter()).map(|(vi, wi)| (vi - wi).abs()).sum();
357 v = w;
358
359 if (new_eigenvalue - eigenvalue).abs() < tol && diff < tol {
360 eigenvalue = new_eigenvalue;
361 break;
362 }
363 eigenvalue = new_eigenvalue;
364 }
365
366 if eigenvalue < threshold {
368 while eigenvalues.len() < n_components {
370 eigenvalues.push(0.0);
371 eigenvectors.push(vec![0.0; n]);
372 }
373 break;
374 }
375
376 eigenvalues.push(eigenvalue);
377 eigenvectors.push(v.clone());
378
379 for i in 0..n {
381 for j in 0..n {
382 work_matrix[i][j] -= eigenvalue * v[i] * v[j];
383 }
384 }
385 }
386
387 Ok((eigenvalues, eigenvectors))
388}
389
390pub fn reconstruction_error(original_matrix: &[Vec<f64>], eigenvalues: &[f64]) -> f64 {
395 let total_var: f64 = (0..original_matrix.len())
397 .map(|i| original_matrix[i][i])
398 .sum();
399
400 let explained_var: f64 = eigenvalues.iter().sum();
402
403 (total_var - explained_var).max(0.0)
405}
406
407pub fn select_n_components(eigenvalues: &[f64], target_ratio: f64) -> usize {
416 let total: f64 = eigenvalues.iter().sum();
417 if total <= 0.0 {
418 return 1;
419 }
420
421 let mut cumsum = 0.0;
422 for (i, &e) in eigenvalues.iter().enumerate() {
423 cumsum += e;
424 if cumsum / total >= target_ratio {
425 return i + 1;
426 }
427 }
428 eigenvalues.len()
429}
430
431#[cfg(test)]
432mod tests {
433 use super::*;
434 use crate::tensor_kernels::{LinearKernel, RbfKernel};
435 use crate::types::RbfKernelConfig;
436
437 #[test]
438 fn test_kpca_config() {
439 let config = KernelPCAConfig::new(3)
440 .with_center(false)
441 .with_eigenvalue_threshold(1e-8);
442
443 assert_eq!(config.n_components, 3);
444 assert!(!config.center);
445 assert!((config.eigenvalue_threshold - 1e-8).abs() < 1e-15);
446 }
447
448 #[test]
449 fn test_kpca_linear_kernel() {
450 let kernel = LinearKernel::new();
451 let data = vec![
452 vec![1.0, 0.0],
453 vec![0.0, 1.0],
454 vec![1.0, 1.0],
455 vec![2.0, 1.0],
456 ];
457
458 let config = KernelPCAConfig::new(2);
459 let kpca = KernelPCA::fit(&data, &kernel, config).unwrap();
460
461 assert_eq!(kpca.n_components(), 2);
462 assert_eq!(kpca.n_samples(), 4);
463
464 let transformed = kpca.transform(&data, &kernel).unwrap();
465 assert_eq!(transformed.len(), 4);
466 assert_eq!(transformed[0].len(), 2);
467 }
468
469 #[test]
470 fn test_kpca_rbf_kernel() {
471 let kernel = RbfKernel::new(RbfKernelConfig::new(0.5)).unwrap();
472 let data = vec![
473 vec![0.0, 0.0],
474 vec![1.0, 0.0],
475 vec![0.0, 1.0],
476 vec![1.0, 1.0],
477 ];
478
479 let config = KernelPCAConfig::new(2);
480 let kpca = KernelPCA::fit(&data, &kernel, config).unwrap();
481
482 let transformed = kpca.transform(&data, &kernel).unwrap();
483 assert_eq!(transformed.len(), 4);
484
485 for eigenvalue in kpca.eigenvalues() {
487 assert!(*eigenvalue >= -1e-10);
488 }
489 }
490
491 #[test]
492 fn test_kpca_explained_variance() {
493 let kernel = LinearKernel::new();
494 let data = vec![
495 vec![1.0, 0.0, 0.0],
496 vec![0.0, 1.0, 0.0],
497 vec![0.0, 0.0, 1.0],
498 vec![1.0, 1.0, 0.0],
499 ];
500
501 let config = KernelPCAConfig::new(3);
502 let kpca = KernelPCA::fit(&data, &kernel, config).unwrap();
503
504 let ratios = kpca.explained_variance_ratio();
505 let total: f64 = ratios.iter().sum();
506
507 assert!(total <= 1.01, "Total explained variance ratio: {}", total);
509
510 for r in &ratios {
512 assert!(*r >= 0.0);
513 }
514
515 let cumulative = kpca.cumulative_variance_explained();
516 for i in 1..cumulative.len() {
518 assert!(cumulative[i] >= cumulative[i - 1] - 1e-10);
519 }
520 }
521
522 #[test]
523 fn test_kpca_too_many_components() {
524 let kernel = LinearKernel::new();
525 let data = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
526
527 let config = KernelPCAConfig::new(10); let result = KernelPCA::fit(&data, &kernel, config);
529 assert!(result.is_err());
530 }
531
532 #[test]
533 fn test_kpca_empty_data() {
534 let kernel = LinearKernel::new();
535 let data: Vec<Vec<f64>> = vec![];
536
537 let config = KernelPCAConfig::new(2);
538 let result = KernelPCA::fit(&data, &kernel, config);
539 assert!(result.is_err());
540 }
541
542 #[test]
543 fn test_center_kernel_matrix() {
544 let mut matrix = vec![vec![4.0, 2.0], vec![2.0, 4.0]];
545 center_kernel_matrix(&mut matrix);
546
547 let row_mean: f64 = matrix[0].iter().sum::<f64>() / 2.0;
549 let col_mean: f64 = matrix.iter().map(|r| r[0]).sum::<f64>() / 2.0;
550
551 assert!(row_mean.abs() < 1e-10);
552 assert!(col_mean.abs() < 1e-10);
553 }
554
555 #[test]
556 fn test_select_n_components() {
557 let eigenvalues = vec![5.0, 3.0, 1.5, 0.5];
558
559 assert_eq!(select_n_components(&eigenvalues, 0.95), 3);
561
562 assert_eq!(select_n_components(&eigenvalues, 0.80), 2);
564
565 assert_eq!(select_n_components(&eigenvalues, 0.50), 1);
567 }
568
569 #[test]
570 fn test_reconstruction_error() {
571 let matrix = vec![vec![4.0, 2.0], vec![2.0, 4.0]];
572 let eigenvalues = vec![6.0]; let error = reconstruction_error(&matrix, &eigenvalues);
575 assert!((error - 2.0).abs() < 0.1);
577 }
578
579 #[test]
580 fn test_kpca_transform_new_data() {
581 let kernel = RbfKernel::new(RbfKernelConfig::new(0.5)).unwrap();
582 let train_data = vec![
583 vec![0.0, 0.0],
584 vec![1.0, 0.0],
585 vec![0.0, 1.0],
586 vec![1.0, 1.0],
587 ];
588
589 let config = KernelPCAConfig::new(2);
590 let kpca = KernelPCA::fit(&train_data, &kernel, config).unwrap();
591
592 let new_data = vec![vec![0.5, 0.5]];
594 let transformed = kpca.transform(&new_data, &kernel).unwrap();
595
596 assert_eq!(transformed.len(), 1);
597 assert_eq!(transformed[0].len(), 2);
598 }
599
600 #[test]
601 fn test_kpca_no_centering() {
602 let kernel = LinearKernel::new();
603 let data = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
604
605 let config = KernelPCAConfig::new(2).with_center(false);
606 let kpca = KernelPCA::fit(&data, &kernel, config).unwrap();
607
608 let transformed = kpca.transform(&data, &kernel).unwrap();
609 assert_eq!(transformed.len(), 3);
610 }
611}