1use sphereql_core::{CartesianPoint, SphericalPoint, cartesian_to_spherical};
2
3use crate::projection::{SplitMix64, dot, normalize_vec, project_xyz_to_spherical};
4use crate::types::{Embedding, ProjectedPoint, RadialStrategy};
5
6use crate::projection::Projection;
7
8#[derive(Clone)]
43pub struct KernelPcaProjection {
44 training_data: Vec<Vec<f64>>,
47
48 sigma: f64,
50
51 inv_two_sigma_sq: f64,
53
54 alphas: [Vec<f64>; 3],
57
58 eigenvalues: [f64; 3],
60
61 col_means: Vec<f64>,
64
65 grand_mean: f64,
68
69 total_variance: f64,
71
72 dim: usize,
74
75 radial: RadialStrategy,
77
78 volumetric: bool,
81}
82
83impl KernelPcaProjection {
84 pub fn fit(embeddings: &[Embedding], radial: RadialStrategy) -> Self {
91 Self::fit_impl(embeddings, None, radial)
92 }
93
94 pub fn fit_with_sigma(embeddings: &[Embedding], sigma: f64, radial: RadialStrategy) -> Self {
99 assert!(sigma > 0.0, "σ must be positive, got {sigma}");
100 Self::fit_impl(embeddings, Some(sigma), radial)
101 }
102
103 pub fn fit_default(embeddings: &[Embedding]) -> Self {
105 Self::fit(embeddings, RadialStrategy::default())
106 }
107
108 pub fn with_volumetric(mut self, enabled: bool) -> Self {
111 self.volumetric = enabled;
112 self
113 }
114
115 pub fn sigma(&self) -> f64 {
117 self.sigma
118 }
119
120 pub fn num_training_points(&self) -> usize {
122 self.training_data.len()
123 }
124
125 pub fn explained_variance_ratio(&self) -> f64 {
131 if self.total_variance < f64::EPSILON {
132 return 1.0;
133 }
134 let explained: f64 = self.eigenvalues.iter().sum();
135 (explained / (self.total_variance * self.training_data.len() as f64)).clamp(0.0, 1.0)
139 }
140
141 pub fn eigenvalues(&self) -> [f64; 3] {
143 self.eigenvalues
144 }
145
146 fn fit_impl(embeddings: &[Embedding], sigma: Option<f64>, radial: RadialStrategy) -> Self {
149 assert!(
150 !embeddings.is_empty(),
151 "need at least one embedding to fit kernel PCA"
152 );
153 let dim = embeddings[0].dimension();
154 assert!(dim >= 3, "embedding dimension must be >= 3");
155 for (i, e) in embeddings.iter().enumerate() {
156 assert_eq!(
157 e.dimension(),
158 dim,
159 "embedding {i} has dimension {}, expected {dim}",
160 e.dimension()
161 );
162 }
163
164 let normalized: Vec<Vec<f64>> = embeddings.iter().map(|e| e.normalized()).collect();
166 let n = normalized.len();
167
168 let sigma = sigma.unwrap_or_else(|| auto_sigma(&normalized));
170 let inv_two_sigma_sq = 0.5 / (sigma * sigma);
171
172 let mut kernel_flat = vec![0.0_f64; n * n];
177 for i in 0..n {
178 kernel_flat[i * n + i] = 1.0; for j in (i + 1)..n {
180 let val = gaussian_kernel(&normalized[i], &normalized[j], inv_two_sigma_sq);
181 kernel_flat[i * n + j] = val;
182 kernel_flat[j * n + i] = val;
183 }
184 }
185
186 let mut col_means = vec![0.0; n];
190 let mut grand_sum = 0.0;
191 for i in 0..n {
192 let mut row_sum = 0.0;
193 for j in 0..n {
194 row_sum += kernel_flat[i * n + j];
195 }
196 col_means[i] = row_sum / n as f64;
197 grand_sum += row_sum;
198 }
199 let grand_mean = grand_sum / (n * n) as f64;
200
201 for i in 0..n {
206 for j in 0..n {
207 kernel_flat[i * n + j] -= col_means[i] + col_means[j] - grand_mean;
208 }
209 }
210
211 let trace: f64 = (0..n).map(|i| kernel_flat[i * n + i]).sum();
213 let total_variance = trace / n as f64;
214
215 let (raw_vectors, raw_values) = top_k_symmetric(&kernel_flat, n, 3);
217
218 let mut alphas: [Vec<f64>; 3] = [Vec::new(), Vec::new(), Vec::new()];
221 let mut eigenvalues = [0.0_f64; 3];
222 for l in 0..3 {
223 let lambda = raw_values.get(l).copied().unwrap_or(0.0);
224 eigenvalues[l] = lambda;
225 if lambda > f64::EPSILON {
226 let scale = 1.0 / lambda.sqrt();
227 alphas[l] = raw_vectors[l].iter().map(|&a| a * scale).collect();
228 } else {
229 alphas[l] = vec![0.0; n];
230 }
231 }
232
233 Self {
234 training_data: normalized,
235 sigma,
236 inv_two_sigma_sq,
237 alphas,
238 eigenvalues,
239 col_means,
240 grand_mean,
241 total_variance,
242 dim,
243 radial,
244 volumetric: false,
245 }
246 }
247
248 fn kernel_project(&self, normalized: &[f64]) -> (f64, f64, f64, f64) {
252 let n = self.training_data.len();
253
254 let k_z: Vec<f64> = self
256 .training_data
257 .iter()
258 .map(|x_i| gaussian_kernel(normalized, x_i, self.inv_two_sigma_sq))
259 .collect();
260
261 let mean_k_z: f64 = k_z.iter().sum::<f64>() / n as f64;
263
264 let k_z_centered: Vec<f64> = (0..n)
266 .map(|i| k_z[i] - mean_k_z - self.col_means[i] + self.grand_mean)
267 .collect();
268
269 let f1 = dot(&self.alphas[0], &k_z_centered);
271 let f2 = dot(&self.alphas[1], &k_z_centered);
272 let f3 = dot(&self.alphas[2], &k_z_centered);
273
274 let spherical_potential = (1.0 - 2.0 * mean_k_z + self.grand_mean).max(0.0);
280
281 (f1, f2, f3, spherical_potential)
282 }
283}
284
285impl Projection for KernelPcaProjection {
286 fn project(&self, embedding: &Embedding) -> SphericalPoint {
287 assert_eq!(
288 embedding.dimension(),
289 self.dim,
290 "expected dimension {}, got {}",
291 self.dim,
292 embedding.dimension()
293 );
294
295 let normalized = embedding.normalized();
296 let (x, y, z, _) = self.kernel_project(&normalized);
297
298 if self.volumetric {
299 let sp = cartesian_to_spherical(&CartesianPoint::new(x, y, z));
300 if sp.r < f64::EPSILON {
301 return SphericalPoint::new_unchecked(0.0, 0.0, 0.0);
302 }
303 SphericalPoint::new_unchecked(sp.r, sp.theta, sp.phi)
304 } else {
305 let r = self.radial.compute(embedding.magnitude());
306 project_xyz_to_spherical(x, y, z, r)
307 }
308 }
309
310 fn project_rich(&self, embedding: &Embedding) -> ProjectedPoint {
311 assert_eq!(
312 embedding.dimension(),
313 self.dim,
314 "expected dimension {}, got {}",
315 self.dim,
316 embedding.dimension()
317 );
318
319 let intensity = embedding.magnitude();
320 let normalized = embedding.normalized();
321 let (x, y, z, spherical_potential) = self.kernel_project(&normalized);
322
323 let projection_magnitude = (x * x + y * y + z * z).sqrt();
324
325 let projection_sq = x * x + y * y + z * z;
329 let certainty = if spherical_potential > f64::EPSILON {
330 (projection_sq / spherical_potential).clamp(0.0, 1.0)
331 } else {
332 0.0
333 };
334
335 let position = if self.volumetric {
336 let sp = cartesian_to_spherical(&CartesianPoint::new(x, y, z));
337 if sp.r < f64::EPSILON {
338 SphericalPoint::new_unchecked(0.0, 0.0, 0.0)
339 } else {
340 SphericalPoint::new_unchecked(sp.r, sp.theta, sp.phi)
341 }
342 } else {
343 let r = self.radial.compute(intensity);
344 project_xyz_to_spherical(x, y, z, r)
345 };
346
347 ProjectedPoint::new(position, certainty, intensity, projection_magnitude)
348 }
349
350 fn dimensionality(&self) -> usize {
351 self.dim
352 }
353}
354
355#[inline]
359fn gaussian_kernel(a: &[f64], b: &[f64], inv_two_sigma_sq: f64) -> f64 {
360 let sq_dist: f64 = a
361 .iter()
362 .zip(b.iter())
363 .map(|(&x, &y)| {
364 let d = x - y;
365 d * d
366 })
367 .sum();
368 (-sq_dist * inv_two_sigma_sq).exp()
369}
370
371fn auto_sigma(data: &[Vec<f64>]) -> f64 {
379 let n = data.len();
380 if n < 2 {
381 return 1.0;
382 }
383
384 let mut distances = Vec::new();
385 let mut rng = SplitMix64::new(0xCAFE_BABE);
386
387 if n <= 100 {
388 distances.reserve(n * (n - 1) / 2);
389 for i in 0..n {
390 for j in (i + 1)..n {
391 distances.push(euclidean_dist(&data[i], &data[j]));
392 }
393 }
394 } else {
395 let num_pairs = 5000.min(n * (n - 1) / 2);
396 distances.reserve(num_pairs);
397 for _ in 0..num_pairs {
398 let i = (rng.next_u64() as usize) % n;
399 let mut j = (rng.next_u64() as usize) % n;
400 if j == i {
401 j = (j + 1) % n;
402 }
403 distances.push(euclidean_dist(&data[i], &data[j]));
404 }
405 }
406
407 distances.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
408 let median = distances[distances.len() / 2];
409
410 (median * std::f64::consts::FRAC_1_SQRT_2).max(1e-8)
411}
412
413fn euclidean_dist(a: &[f64], b: &[f64]) -> f64 {
414 a.iter()
415 .zip(b.iter())
416 .map(|(&x, &y)| {
417 let d = x - y;
418 d * d
419 })
420 .sum::<f64>()
421 .sqrt()
422}
423
424fn top_k_symmetric(matrix: &[f64], n: usize, k: usize) -> (Vec<Vec<f64>>, Vec<f64>) {
431 let max_iters = 300;
432 let tol = 1e-10;
433 let mut vectors: Vec<Vec<f64>> = Vec::with_capacity(k);
434 let mut values: Vec<f64> = Vec::with_capacity(k);
435 let mut rng = SplitMix64::new(0xBEEF_CAFE);
436
437 for _ in 0..k {
438 let mut v: Vec<f64> = (0..n).map(|_| rng.normal()).collect();
439 normalize_vec(&mut v);
440 let mut eigenvalue = 0.0;
441
442 for _ in 0..max_iters {
443 let mut u = vec![0.0; n];
444 for (i, u_i) in u.iter_mut().enumerate().skip(1) {
445 let row_start = i * n;
446 let mut s = 0.0;
447 for j in 0..n {
448 s += matrix[row_start + j] * v[j];
449 }
450 *u_i = s;
451 }
452
453 for prev in &vectors {
454 let proj = dot(&u, prev);
455 for (ui, &pi) in u.iter_mut().zip(prev.iter()) {
456 *ui -= proj * pi;
457 }
458 }
459
460 let mag = normalize_vec(&mut u);
461 if mag < f64::EPSILON {
462 break;
463 }
464
465 eigenvalue = {
467 let mut s = 0.0;
468 for i in 0..n {
469 let row_start = i * n;
470 let mut mv_i = 0.0;
471 for j in 0..n {
472 mv_i += matrix[row_start + j] * u[j];
473 }
474 s += u[i] * mv_i;
475 }
476 s
477 };
478
479 let change = 1.0 - dot(&u, &v).abs();
480 v = u;
481
482 if change < tol {
483 break;
484 }
485 }
486
487 vectors.push(v);
488 values.push(eigenvalue);
489 }
490
491 while vectors.len() < k {
492 let mut v: Vec<f64> = (0..n).map(|_| rng.normal()).collect();
493 for prev in &vectors {
494 let proj = dot(&v, prev);
495 for (vi, &pi) in v.iter_mut().zip(prev.iter()) {
496 *vi -= proj * pi;
497 }
498 }
499 normalize_vec(&mut v);
500 vectors.push(v);
501 values.push(0.0);
502 }
503
504 (vectors, values)
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510 use sphereql_core::angular_distance;
511 use std::f64::consts::{PI, TAU};
512
513 fn emb(vals: &[f64]) -> Embedding {
514 Embedding::new(vals.to_vec())
515 }
516
517 fn corpus_10d() -> Vec<Embedding> {
518 vec![
519 emb(&[1.0, 0.0, 0.0, 0.1, 0.05, -0.02, 0.03, -0.01, 0.04, 0.02]),
520 emb(&[0.0, 1.0, 0.0, -0.05, 0.1, 0.03, -0.02, 0.01, -0.03, 0.04]),
521 emb(&[0.0, 0.0, 1.0, 0.02, -0.03, 0.1, 0.05, 0.02, -0.01, -0.04]),
522 emb(&[1.0, 1.0, 0.0, 0.05, 0.08, 0.01, 0.01, -0.02, 0.02, 0.03]),
523 emb(&[0.0, 1.0, 1.0, -0.02, 0.07, 0.07, 0.01, 0.02, -0.02, 0.01]),
524 emb(&[1.0, 0.0, 1.0, 0.06, 0.01, 0.05, -0.03, -0.01, 0.03, -0.02]),
525 emb(&[-1.0, 0.0, 0.0, -0.08, 0.02, 0.01, 0.02, 0.03, -0.02, 0.01]),
526 emb(&[0.0, -1.0, 0.0, 0.03, -0.09, -0.02, 0.01, -0.01, 0.02, -0.03]),
527 ]
528 }
529
530 fn assert_valid_spherical(sp: &SphericalPoint) {
531 assert!(sp.r >= 0.0, "r must be >= 0, got {}", sp.r);
532 assert!(
533 sp.theta >= 0.0 && sp.theta < TAU,
534 "theta must be in [0, 2π), got {}",
535 sp.theta
536 );
537 assert!(
538 sp.phi >= 0.0 && sp.phi <= PI,
539 "phi must be in [0, π], got {}",
540 sp.phi
541 );
542 }
543
544 #[test]
545 fn kernel_pca_fit_auto_sigma() {
546 let corpus = corpus_10d();
547 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
548 assert_eq!(kpca.dimensionality(), 10);
549 assert!(kpca.sigma() > 0.0);
550 assert_eq!(kpca.num_training_points(), 8);
551 }
552
553 #[test]
554 fn kernel_pca_fit_explicit_sigma() {
555 let corpus = corpus_10d();
556 let kpca = KernelPcaProjection::fit_with_sigma(&corpus, 0.5, RadialStrategy::Fixed(1.0));
557 assert!((kpca.sigma() - 0.5).abs() < 1e-12);
558 }
559
560 #[test]
561 fn kernel_pca_fit_default() {
562 let corpus = corpus_10d();
563 let kpca = KernelPcaProjection::fit_default(&corpus);
564 assert_eq!(kpca.dimensionality(), 10);
565 }
566
567 #[test]
568 #[should_panic(expected = "need at least one embedding")]
569 fn kernel_pca_empty_corpus_panics() {
570 KernelPcaProjection::fit(&[], RadialStrategy::Fixed(1.0));
571 }
572
573 #[test]
574 #[should_panic(expected = "embedding dimension must be >= 3")]
575 fn kernel_pca_too_few_dims_panics() {
576 KernelPcaProjection::fit(&[emb(&[1.0, 2.0])], RadialStrategy::Fixed(1.0));
577 }
578
579 #[test]
580 #[should_panic(expected = "σ must be positive")]
581 fn kernel_pca_zero_sigma_panics() {
582 let corpus = corpus_10d();
583 KernelPcaProjection::fit_with_sigma(&corpus, 0.0, RadialStrategy::Fixed(1.0));
584 }
585
586 #[test]
587 fn kernel_pca_produces_valid_spherical_points() {
588 let corpus = corpus_10d();
589 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
590 for e in &corpus {
591 assert_valid_spherical(&kpca.project(e));
592 }
593 }
594
595 #[test]
596 fn kernel_pca_out_of_sample_produces_valid_points() {
597 let corpus = corpus_10d();
598 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
599 let novel = emb(&[0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
600 assert_valid_spherical(&kpca.project(&novel));
601 }
602
603 #[test]
604 fn kernel_pca_preserves_angular_ordering() {
605 let corpus = corpus_10d();
606 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
607 let a = emb(&[1.0, 0.1, 0.0, 0.05, 0.02, -0.01, 0.01, 0.0, 0.02, 0.01]);
608 let b = emb(&[0.9, 0.2, 0.1, 0.04, 0.03, 0.0, 0.02, -0.01, 0.01, 0.02]);
609 let c = emb(&[-1.0, -0.1, 0.0, -0.04, 0.01, 0.02, 0.01, 0.02, -0.01, 0.01]);
610 let pa = kpca.project(&a);
611 let pb = kpca.project(&b);
612 let pc = kpca.project(&c);
613 let d_ab = angular_distance(&pa, &pb);
614 let d_ac = angular_distance(&pa, &pc);
615 assert!(
616 d_ab < d_ac,
617 "similar items should be closer: d(a,b)={d_ab:.4} < d(a,c)={d_ac:.4}"
618 );
619 }
620
621 #[test]
622 fn kernel_pca_fixed_radial() {
623 let corpus = corpus_10d();
624 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(2.5));
625 let sp = kpca.project(&corpus[0]);
626 assert!((sp.r - 2.5).abs() < 1e-12);
627 }
628
629 #[test]
630 fn kernel_pca_magnitude_radial() {
631 let corpus = corpus_10d();
632 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Magnitude);
633 let short = emb(&[0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
634 let long = emb(&[10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
635 let ps = kpca.project(&short);
636 let pl = kpca.project(&long);
637 assert!(ps.r < pl.r, "longer vector should have larger radius");
638 assert!((ps.r - 0.1).abs() < 1e-10);
639 assert!((pl.r - 10.0).abs() < 1e-10);
640 }
641
642 #[test]
643 fn kernel_pca_volumetric() {
644 let corpus = corpus_10d();
645 let kpca =
646 KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).with_volumetric(true);
647 let sp = kpca.project(&corpus[0]);
648 assert!(sp.r >= 0.0);
649 }
650
651 #[test]
652 fn kernel_pca_project_rich_has_valid_certainty() {
653 let corpus = corpus_10d();
654 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
655 for e in &corpus {
656 let rich = kpca.project_rich(e);
657 assert!(
658 rich.certainty >= 0.0 && rich.certainty <= 1.0,
659 "certainty must be in [0,1], got {}",
660 rich.certainty
661 );
662 assert!(rich.intensity > 0.0);
663 assert!(rich.projection_magnitude >= 0.0);
664 }
665 }
666
667 #[test]
668 fn kernel_pca_certainty_is_meaningful() {
669 let corpus = corpus_10d();
670 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
671 let total_certainty: f64 = corpus.iter().map(|e| kpca.project_rich(e).certainty).sum();
672 let mean_certainty = total_certainty / corpus.len() as f64;
673 assert!(
674 mean_certainty > 0.01,
675 "mean certainty of training data should be non-trivial, got {mean_certainty}"
676 );
677 }
678
679 #[test]
680 fn kernel_pca_explained_variance_in_range() {
681 let corpus = corpus_10d();
682 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
683 let ratio = kpca.explained_variance_ratio();
684 assert!(
685 ratio > 0.0 && ratio <= 1.0,
686 "explained variance ratio should be in (0, 1], got {ratio}"
687 );
688 }
689
690 #[test]
691 fn kernel_pca_eigenvalues_descending() {
692 let corpus = corpus_10d();
693 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
694 let ev = kpca.eigenvalues();
695 assert!(ev[0] >= ev[1], "eigenvalues must descend: {ev:?}");
696 assert!(ev[1] >= ev[2], "eigenvalues must descend: {ev:?}");
697 assert!(ev[0] > 0.0, "first eigenvalue should be positive");
698 }
699
700 #[test]
701 fn gaussian_kernel_self_similarity() {
702 let x = vec![1.0, 0.0, 0.0, 0.0, 0.0];
703 let inv = 0.5;
704 assert!((gaussian_kernel(&x, &x, inv) - 1.0).abs() < 1e-12);
705 }
706
707 #[test]
708 fn gaussian_kernel_symmetry() {
709 let a = vec![1.0, 0.0, 0.5];
710 let b = vec![0.0, 1.0, -0.3];
711 let inv = 1.0;
712 assert!((gaussian_kernel(&a, &b, inv) - gaussian_kernel(&b, &a, inv)).abs() < 1e-15);
713 }
714
715 #[test]
716 fn gaussian_kernel_range() {
717 let a = vec![1.0, 0.0, 0.0];
718 let b = vec![0.0, 1.0, 0.0];
719 let inv = 0.5;
720 let k = gaussian_kernel(&a, &b, inv);
721 assert!(
722 k > 0.0 && k <= 1.0,
723 "Gaussian kernel must be in (0, 1], got {k}"
724 );
725 }
726
727 #[test]
728 fn auto_sigma_is_positive() {
729 let corpus = corpus_10d();
730 let normalized: Vec<Vec<f64>> = corpus.iter().map(|e| e.normalized()).collect();
731 let sigma = auto_sigma(&normalized);
732 assert!(sigma > 0.0);
733 }
734
735 #[test]
736 fn auto_sigma_single_point() {
737 let data = vec![vec![1.0, 0.0, 0.0]];
738 assert!((auto_sigma(&data) - 1.0).abs() < 1e-12);
739 }
740
741 #[test]
742 #[should_panic(expected = "expected dimension 10")]
743 fn kernel_pca_dimension_mismatch_panics() {
744 let corpus = corpus_10d();
745 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
746 let _ = kpca.project(&emb(&[1.0, 2.0, 3.0]));
747 }
748
749 #[test]
750 fn kernel_pca_clone_produces_identical_results() {
751 let corpus = corpus_10d();
752 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
753 let kpca2 = kpca.clone();
754 for e in &corpus {
755 let sp1 = kpca.project(e);
756 let sp2 = kpca2.project(e);
757 assert!((sp1.theta - sp2.theta).abs() < 1e-12);
758 assert!((sp1.phi - sp2.phi).abs() < 1e-12);
759 }
760 }
761
762 #[test]
763 fn kernel_pca_works_with_embedding_index() {
764 use crate::query::EmbeddingIndex;
765 let corpus = corpus_10d();
766 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
767 let mut idx = EmbeddingIndex::builder(kpca)
768 .theta_divisions(4)
769 .phi_divisions(3)
770 .build();
771 for (i, e) in corpus.iter().enumerate() {
772 idx.insert(format!("item-{i}"), e);
773 }
774 assert_eq!(idx.len(), corpus.len());
775 let query = emb(&[0.9, 0.1, 0.0, 0.05, 0.02, -0.01, 0.01, 0.0, 0.02, 0.01]);
776 let results = idx.search_nearest(&query, 3);
777 assert_eq!(results.len(), 3);
778 assert!(results[0].distance <= results[1].distance);
779 }
780
781 #[test]
782 fn large_sigma_approaches_pca_ordering() {
783 use crate::projection::PcaProjection;
784 let corpus = corpus_10d();
785 let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0));
786 let kpca = KernelPcaProjection::fit_with_sigma(&corpus, 100.0, RadialStrategy::Fixed(1.0));
787 let query = emb(&[1.0, 0.1, 0.0, 0.05, 0.02, -0.01, 0.01, 0.0, 0.02, 0.01]);
788 let pca_pt = pca.project(&query);
789 let kpca_pt = kpca.project(&query);
790 let mut pca_dists: Vec<(usize, f64)> = corpus
791 .iter()
792 .enumerate()
793 .map(|(i, e)| (i, angular_distance(&pca_pt, &pca.project(e))))
794 .collect();
795 let mut kpca_dists: Vec<(usize, f64)> = corpus
796 .iter()
797 .enumerate()
798 .map(|(i, e)| (i, angular_distance(&kpca_pt, &kpca.project(e))))
799 .collect();
800 pca_dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
801 kpca_dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
802 assert_eq!(
803 pca_dists[0].0, kpca_dists[0].0,
804 "nearest neighbour should match between PCA and kernel PCA with large σ"
805 );
806 }
807}