1use sphereql_core::{CartesianPoint, SphericalPoint, cartesian_to_spherical};
2
3use crate::projection::{
4 ProjectionError, SplitMix64, dot, normalize_vec, project_xyz_to_spherical,
5};
6use crate::types::{Embedding, ProjectedPoint, RadialStrategy};
7
8use crate::projection::Projection;
9
10#[derive(Clone)]
45pub struct KernelPcaProjection {
46 training_data: Vec<Vec<f64>>,
49
50 sigma: f64,
52
53 inv_two_sigma_sq: f64,
55
56 alphas: [Vec<f64>; 3],
59
60 eigenvalues: [f64; 3],
62
63 col_means: Vec<f64>,
66
67 grand_mean: f64,
70
71 total_variance: f64,
73
74 dim: usize,
76
77 radial: RadialStrategy,
79
80 volumetric: bool,
83}
84
85const KPCA_FIT_WARN_THRESHOLD: usize = 2_000;
89
90impl KernelPcaProjection {
91 pub fn fit(embeddings: &[Embedding], radial: RadialStrategy) -> Result<Self, ProjectionError> {
98 Self::fit_impl(embeddings, None, radial)
99 }
100
101 pub fn fit_with_sigma(
107 embeddings: &[Embedding],
108 sigma: f64,
109 radial: RadialStrategy,
110 ) -> Result<Self, ProjectionError> {
111 if sigma <= 0.0 {
112 return Err(ProjectionError::InvalidSigma { got: sigma });
113 }
114 Self::fit_impl(embeddings, Some(sigma), radial)
115 }
116
117 pub fn fit_default(embeddings: &[Embedding]) -> Result<Self, ProjectionError> {
119 Self::fit(embeddings, RadialStrategy::default())
120 }
121
122 pub fn with_volumetric(mut self, enabled: bool) -> Self {
125 self.volumetric = enabled;
126 self
127 }
128
129 pub fn sigma(&self) -> f64 {
131 self.sigma
132 }
133
134 pub fn num_training_points(&self) -> usize {
136 self.training_data.len()
137 }
138
139 pub fn explained_variance_ratio(&self) -> f64 {
145 if self.total_variance < f64::EPSILON {
146 return 1.0;
147 }
148 let explained: f64 = self.eigenvalues.iter().sum();
149 (explained / (self.total_variance * self.training_data.len() as f64)).clamp(0.0, 1.0)
153 }
154
155 pub fn eigenvalues(&self) -> [f64; 3] {
157 self.eigenvalues
158 }
159
160 fn fit_impl(
163 embeddings: &[Embedding],
164 sigma: Option<f64>,
165 radial: RadialStrategy,
166 ) -> Result<Self, ProjectionError> {
167 if embeddings.is_empty() {
168 return Err(ProjectionError::EmptyCorpus);
169 }
170 let dim = embeddings[0].dimension();
171 if dim < 3 {
172 return Err(ProjectionError::DimensionTooLow {
173 got: dim,
174 required: 3,
175 });
176 }
177 let n = embeddings.len();
181 if n >= KPCA_FIT_WARN_THRESHOLD {
182 tracing::warn!(
183 n,
184 dim,
185 "KernelPcaProjection::fit: O(n²·d) cost, expect minutes-to-hours. \
186 Consider PCA or downsampling."
187 );
188 }
189 for (i, e) in embeddings.iter().enumerate() {
190 if e.dimension() != dim {
191 return Err(ProjectionError::InconsistentDimension {
192 index: i,
193 expected: dim,
194 got: e.dimension(),
195 });
196 }
197 }
198
199 let normalized: Vec<Vec<f64>> = embeddings.iter().map(|e| e.normalized()).collect();
201 let n = normalized.len();
202
203 let sigma = sigma.unwrap_or_else(|| auto_sigma(&normalized));
205 let inv_two_sigma_sq = 0.5 / (sigma * sigma);
206
207 let mut kernel_flat = vec![0.0_f64; n * n];
212 for i in 0..n {
213 kernel_flat[i * n + i] = 1.0; for j in (i + 1)..n {
215 let val = gaussian_kernel(&normalized[i], &normalized[j], inv_two_sigma_sq);
216 kernel_flat[i * n + j] = val;
217 kernel_flat[j * n + i] = val;
218 }
219 }
220
221 let mut col_means = vec![0.0; n];
225 let mut grand_sum = 0.0;
226 for i in 0..n {
227 let mut row_sum = 0.0;
228 for j in 0..n {
229 row_sum += kernel_flat[i * n + j];
230 }
231 col_means[i] = row_sum / n as f64;
232 grand_sum += row_sum;
233 }
234 let grand_mean = grand_sum / (n * n) as f64;
235
236 for i in 0..n {
246 for j in i..n {
247 let v = kernel_flat[i * n + j] - col_means[i] - col_means[j] + grand_mean;
248 kernel_flat[i * n + j] = v;
249 kernel_flat[j * n + i] = v;
250 }
251 }
252
253 let trace: f64 = (0..n).map(|i| kernel_flat[i * n + i]).sum();
255 let total_variance = trace / n as f64;
256
257 let (raw_vectors, raw_values) = top_k_symmetric(&kernel_flat, n, 3);
259
260 let mut alphas: [Vec<f64>; 3] = [Vec::new(), Vec::new(), Vec::new()];
263 let mut eigenvalues = [0.0_f64; 3];
264 for l in 0..3 {
265 let lambda = raw_values.get(l).copied().unwrap_or(0.0);
266 eigenvalues[l] = lambda;
267 if lambda > f64::EPSILON {
268 let scale = 1.0 / lambda.sqrt();
269 alphas[l] = raw_vectors[l].iter().map(|&a| a * scale).collect();
270 } else {
271 alphas[l] = vec![0.0; n];
272 }
273 }
274
275 Ok(Self {
276 training_data: normalized,
277 sigma,
278 inv_two_sigma_sq,
279 alphas,
280 eigenvalues,
281 col_means,
282 grand_mean,
283 total_variance,
284 dim,
285 radial,
286 volumetric: false,
287 })
288 }
289
290 fn kernel_project(&self, normalized: &[f64]) -> (f64, f64, f64, f64) {
294 let n = self.training_data.len();
295
296 let k_z: Vec<f64> = self
298 .training_data
299 .iter()
300 .map(|x_i| gaussian_kernel(normalized, x_i, self.inv_two_sigma_sq))
301 .collect();
302
303 let mean_k_z: f64 = k_z.iter().sum::<f64>() / n as f64;
305
306 let k_z_centered: Vec<f64> = (0..n)
308 .map(|i| k_z[i] - mean_k_z - self.col_means[i] + self.grand_mean)
309 .collect();
310
311 let f1 = dot(&self.alphas[0], &k_z_centered);
313 let f2 = dot(&self.alphas[1], &k_z_centered);
314 let f3 = dot(&self.alphas[2], &k_z_centered);
315
316 let spherical_potential = (1.0 - 2.0 * mean_k_z + self.grand_mean).max(0.0);
322
323 (f1, f2, f3, spherical_potential)
324 }
325}
326
327impl Projection for KernelPcaProjection {
328 fn project(&self, embedding: &Embedding) -> SphericalPoint {
329 assert_eq!(
331 embedding.dimension(),
332 self.dim,
333 "expected dimension {}, got {}",
334 self.dim,
335 embedding.dimension()
336 );
337
338 let normalized = embedding.normalized();
339 let (x, y, z, spherical_potential) = self.kernel_project(&normalized);
340
341 if self.volumetric {
342 let sp = cartesian_to_spherical(&CartesianPoint::new(x, y, z));
343 if sp.r < f64::EPSILON {
344 return SphericalPoint::origin();
345 }
346 SphericalPoint::new_unchecked(sp.r, sp.theta, sp.phi)
347 } else {
348 let projection_sq = x * x + y * y + z * z;
349 let projection_magnitude = projection_sq.sqrt();
350 let certainty = if spherical_potential > f64::EPSILON {
351 (projection_sq / spherical_potential).clamp(0.0, 1.0)
352 } else {
353 0.0
354 };
355 let r = self.radial.compute_rich(&crate::types::RadialContext::full(
356 embedding.magnitude(),
357 projection_magnitude,
358 certainty,
359 ));
360 project_xyz_to_spherical(x, y, z, r)
361 }
362 }
363
364 fn project_rich(&self, embedding: &Embedding) -> ProjectedPoint {
365 assert_eq!(
367 embedding.dimension(),
368 self.dim,
369 "expected dimension {}, got {}",
370 self.dim,
371 embedding.dimension()
372 );
373
374 let intensity = embedding.magnitude();
375 let normalized = embedding.normalized();
376 let (x, y, z, spherical_potential) = self.kernel_project(&normalized);
377
378 let projection_magnitude = (x * x + y * y + z * z).sqrt();
379
380 let projection_sq = x * x + y * y + z * z;
384 let certainty = if spherical_potential > f64::EPSILON {
385 (projection_sq / spherical_potential).clamp(0.0, 1.0)
386 } else {
387 0.0
388 };
389
390 let position = if self.volumetric {
391 let sp = cartesian_to_spherical(&CartesianPoint::new(x, y, z));
392 if sp.r < f64::EPSILON {
393 SphericalPoint::origin()
394 } else {
395 SphericalPoint::new_unchecked(sp.r, sp.theta, sp.phi)
396 }
397 } else {
398 let r = self.radial.compute_rich(&crate::types::RadialContext::full(
399 intensity,
400 projection_magnitude,
401 certainty,
402 ));
403 project_xyz_to_spherical(x, y, z, r)
404 };
405
406 ProjectedPoint::new(position, certainty, intensity, projection_magnitude)
407 }
408
409 fn dimensionality(&self) -> usize {
410 self.dim
411 }
412}
413
414#[inline]
418fn gaussian_kernel(a: &[f64], b: &[f64], inv_two_sigma_sq: f64) -> f64 {
419 let sq_dist: f64 = a
420 .iter()
421 .zip(b.iter())
422 .map(|(&x, &y)| {
423 let d = x - y;
424 d * d
425 })
426 .sum();
427 (-sq_dist * inv_two_sigma_sq).exp()
428}
429
430fn auto_sigma(data: &[Vec<f64>]) -> f64 {
438 let n = data.len();
439 if n < 2 {
440 return 1.0;
441 }
442
443 let mut distances = Vec::new();
444 let mut rng = SplitMix64::new(0xCAFE_BABE);
445
446 if n <= 100 {
447 distances.reserve(n * (n - 1) / 2);
448 for i in 0..n {
449 for j in (i + 1)..n {
450 distances.push(euclidean_dist(&data[i], &data[j]));
451 }
452 }
453 } else {
454 let num_pairs = 5000.min(n * (n - 1) / 2);
455 distances.reserve(num_pairs);
456 for _ in 0..num_pairs {
457 let i = (rng.next_u64() as usize) % n;
458 let mut j = (rng.next_u64() as usize) % n;
459 if j == i {
460 j = (j + 1) % n;
461 }
462 distances.push(euclidean_dist(&data[i], &data[j]));
463 }
464 }
465
466 let mid = distances.len() / 2;
472 let (_, median_ref, _) = distances.select_nth_unstable_by(mid, |a, b| a.total_cmp(b));
473 let median = *median_ref;
474
475 (median * std::f64::consts::FRAC_1_SQRT_2).max(1e-8)
476}
477
478fn euclidean_dist(a: &[f64], b: &[f64]) -> f64 {
479 a.iter()
480 .zip(b.iter())
481 .map(|(&x, &y)| {
482 let d = x - y;
483 d * d
484 })
485 .sum::<f64>()
486 .sqrt()
487}
488
489fn top_k_symmetric(matrix: &[f64], n: usize, k: usize) -> (Vec<Vec<f64>>, Vec<f64>) {
496 let max_iters = 300;
497 let tol = 1e-10;
498 let mut vectors: Vec<Vec<f64>> = Vec::with_capacity(k);
499 let mut values: Vec<f64> = Vec::with_capacity(k);
500 let mut rng = SplitMix64::new(0xBEEF_CAFE);
501
502 let matvec = |dst: &mut [f64], src: &[f64]| {
507 for (i, dst_i) in dst.iter_mut().enumerate() {
508 let row_start = i * n;
509 let mut s = 0.0;
510 for j in 0..n {
511 s += matrix[row_start + j] * src[j];
512 }
513 *dst_i = s;
514 }
515 };
516
517 for _ in 0..k {
518 let mut v: Vec<f64> = (0..n).map(|_| rng.normal()).collect();
519 normalize_vec(&mut v);
520 let mut eigenvalue = 0.0;
521
522 let mut mv_cache: Option<Vec<f64>> = None;
529
530 for _ in 0..max_iters {
531 let mut u = mv_cache.take().unwrap_or_else(|| {
532 let mut u = vec![0.0; n];
533 matvec(&mut u, &v);
534 u
535 });
536
537 for prev in &vectors {
538 let proj = dot(&u, prev);
539 for (ui, &pi) in u.iter_mut().zip(prev.iter()) {
540 *ui -= proj * pi;
541 }
542 }
543
544 let mag = normalize_vec(&mut u);
545 if mag < f64::EPSILON {
546 break;
547 }
548
549 let mut mv_next = vec![0.0; n];
553 matvec(&mut mv_next, &u);
554 eigenvalue = u.iter().zip(mv_next.iter()).map(|(a, b)| a * b).sum();
555
556 let change = (1.0 - dot(&u, &v).abs()).max(0.0);
560 v = u;
561 mv_cache = Some(mv_next);
562
563 if change < tol {
564 break;
565 }
566 }
567
568 vectors.push(v);
569 values.push(eigenvalue);
570 }
571
572 while vectors.len() < k {
573 let mut v: Vec<f64> = (0..n).map(|_| rng.normal()).collect();
574 for prev in &vectors {
575 let proj = dot(&v, prev);
576 for (vi, &pi) in v.iter_mut().zip(prev.iter()) {
577 *vi -= proj * pi;
578 }
579 }
580 normalize_vec(&mut v);
581 vectors.push(v);
582 values.push(0.0);
583 }
584
585 (vectors, values)
586}
587
588#[cfg(test)]
589mod tests {
590 use super::*;
591 use sphereql_core::angular_distance;
592 use std::f64::consts::{PI, TAU};
593
594 fn emb(vals: &[f64]) -> Embedding {
595 Embedding::new(vals.to_vec())
596 }
597
598 fn corpus_10d() -> Vec<Embedding> {
599 vec![
600 emb(&[1.0, 0.0, 0.0, 0.1, 0.05, -0.02, 0.03, -0.01, 0.04, 0.02]),
601 emb(&[0.0, 1.0, 0.0, -0.05, 0.1, 0.03, -0.02, 0.01, -0.03, 0.04]),
602 emb(&[0.0, 0.0, 1.0, 0.02, -0.03, 0.1, 0.05, 0.02, -0.01, -0.04]),
603 emb(&[1.0, 1.0, 0.0, 0.05, 0.08, 0.01, 0.01, -0.02, 0.02, 0.03]),
604 emb(&[0.0, 1.0, 1.0, -0.02, 0.07, 0.07, 0.01, 0.02, -0.02, 0.01]),
605 emb(&[1.0, 0.0, 1.0, 0.06, 0.01, 0.05, -0.03, -0.01, 0.03, -0.02]),
606 emb(&[-1.0, 0.0, 0.0, -0.08, 0.02, 0.01, 0.02, 0.03, -0.02, 0.01]),
607 emb(&[0.0, -1.0, 0.0, 0.03, -0.09, -0.02, 0.01, -0.01, 0.02, -0.03]),
608 ]
609 }
610
611 fn assert_valid_spherical(sp: &SphericalPoint) {
612 assert!(sp.r >= 0.0, "r must be >= 0, got {}", sp.r);
613 assert!(
614 sp.theta >= 0.0 && sp.theta < TAU,
615 "theta must be in [0, 2π), got {}",
616 sp.theta
617 );
618 assert!(
619 sp.phi >= 0.0 && sp.phi <= PI,
620 "phi must be in [0, π], got {}",
621 sp.phi
622 );
623 }
624
625 #[test]
631 fn top_k_symmetric_first_coordinate_nonzero() {
632 let n = 4;
635 #[rustfmt::skip]
636 let matrix = vec![
637 4.0, 1.0, 0.5, 0.2,
638 1.0, 4.0, 1.0, 0.5,
639 0.5, 1.0, 4.0, 1.0,
640 0.2, 0.5, 1.0, 4.0,
641 ];
642 let (vectors, values) = top_k_symmetric(&matrix, n, 2);
643 assert_eq!(vectors.len(), 2);
644 for (idx, v) in vectors.iter().enumerate() {
645 assert!(
646 v[0].abs() > 1e-6,
647 "eigenvector {idx} has a suspiciously small first \
648 coordinate ({:.2e}) — the .skip(1) bug is back",
649 v[0]
650 );
651 }
652 assert!(values[0] > values[1], "eigenvalues must be sorted");
653 }
654
655 #[test]
656 fn kernel_pca_fit_auto_sigma() {
657 let corpus = corpus_10d();
658 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
659 assert_eq!(kpca.dimensionality(), 10);
660 assert!(kpca.sigma() > 0.0);
661 assert_eq!(kpca.num_training_points(), 8);
662 }
663
664 #[test]
665 fn kernel_pca_fit_explicit_sigma() {
666 let corpus = corpus_10d();
667 let kpca =
668 KernelPcaProjection::fit_with_sigma(&corpus, 0.5, RadialStrategy::Fixed(1.0)).unwrap();
669 assert!((kpca.sigma() - 0.5).abs() < 1e-12);
670 }
671
672 #[test]
673 fn kernel_pca_fit_default() {
674 let corpus = corpus_10d();
675 let kpca = KernelPcaProjection::fit_default(&corpus).unwrap();
676 assert_eq!(kpca.dimensionality(), 10);
677 }
678
679 #[test]
680 fn kernel_pca_empty_corpus_returns_err() {
681 assert!(matches!(
682 KernelPcaProjection::fit(&[], RadialStrategy::Fixed(1.0)),
683 Err(ProjectionError::EmptyCorpus)
684 ));
685 }
686
687 #[test]
688 fn kernel_pca_too_few_dims_returns_err() {
689 assert!(matches!(
690 KernelPcaProjection::fit(&[emb(&[1.0, 2.0])], RadialStrategy::Fixed(1.0)),
691 Err(ProjectionError::DimensionTooLow {
692 got: 2,
693 required: 3
694 })
695 ));
696 }
697
698 #[test]
699 fn kernel_pca_zero_sigma_returns_err() {
700 let corpus = corpus_10d();
701 assert!(matches!(
702 KernelPcaProjection::fit_with_sigma(&corpus, 0.0, RadialStrategy::Fixed(1.0)),
703 Err(ProjectionError::InvalidSigma { got }) if got == 0.0
704 ));
705 }
706
707 #[test]
708 fn kernel_pca_produces_valid_spherical_points() {
709 let corpus = corpus_10d();
710 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
711 for e in &corpus {
712 assert_valid_spherical(&kpca.project(e));
713 }
714 }
715
716 #[test]
717 fn kernel_pca_out_of_sample_produces_valid_points() {
718 let corpus = corpus_10d();
719 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
720 let novel = emb(&[0.5, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
721 assert_valid_spherical(&kpca.project(&novel));
722 }
723
724 #[test]
725 fn kernel_pca_preserves_angular_ordering() {
726 let corpus = corpus_10d();
727 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
728 let a = emb(&[1.0, 0.1, 0.0, 0.05, 0.02, -0.01, 0.01, 0.0, 0.02, 0.01]);
729 let b = emb(&[0.9, 0.2, 0.1, 0.04, 0.03, 0.0, 0.02, -0.01, 0.01, 0.02]);
730 let c = emb(&[-1.0, -0.1, 0.0, -0.04, 0.01, 0.02, 0.01, 0.02, -0.01, 0.01]);
731 let pa = kpca.project(&a);
732 let pb = kpca.project(&b);
733 let pc = kpca.project(&c);
734 let d_ab = angular_distance(&pa, &pb);
735 let d_ac = angular_distance(&pa, &pc);
736 assert!(
737 d_ab < d_ac,
738 "similar items should be closer: d(a,b)={d_ab:.4} < d(a,c)={d_ac:.4}"
739 );
740 }
741
742 #[test]
743 fn kernel_pca_fixed_radial() {
744 let corpus = corpus_10d();
745 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(2.5)).unwrap();
746 let sp = kpca.project(&corpus[0]);
747 assert!((sp.r - 2.5).abs() < 1e-12);
748 }
749
750 #[test]
751 fn kernel_pca_magnitude_radial() {
752 let corpus = corpus_10d();
753 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Magnitude).unwrap();
754 let short = emb(&[0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
755 let long = emb(&[10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]);
756 let ps = kpca.project(&short);
757 let pl = kpca.project(&long);
758 assert!(ps.r < pl.r, "longer vector should have larger radius");
759 assert!((ps.r - 0.1).abs() < 1e-10);
760 assert!((pl.r - 10.0).abs() < 1e-10);
761 }
762
763 #[test]
764 fn kernel_pca_volumetric() {
765 let corpus = corpus_10d();
766 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0))
767 .unwrap()
768 .with_volumetric(true);
769 let sp = kpca.project(&corpus[0]);
770 assert!(sp.r >= 0.0);
771 }
772
773 #[test]
774 fn kernel_pca_project_rich_has_valid_certainty() {
775 let corpus = corpus_10d();
776 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
777 for e in &corpus {
778 let rich = kpca.project_rich(e);
779 assert!(
780 rich.certainty >= 0.0 && rich.certainty <= 1.0,
781 "certainty must be in [0,1], got {}",
782 rich.certainty
783 );
784 assert!(rich.intensity > 0.0);
785 assert!(rich.projection_magnitude >= 0.0);
786 }
787 }
788
789 #[test]
790 fn kernel_pca_certainty_is_meaningful() {
791 let corpus = corpus_10d();
792 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
793 let total_certainty: f64 = corpus.iter().map(|e| kpca.project_rich(e).certainty).sum();
794 let mean_certainty = total_certainty / corpus.len() as f64;
795 assert!(
796 mean_certainty > 0.01,
797 "mean certainty of training data should be non-trivial, got {mean_certainty}"
798 );
799 }
800
801 #[test]
802 fn kernel_pca_explained_variance_in_range() {
803 let corpus = corpus_10d();
804 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
805 let ratio = kpca.explained_variance_ratio();
806 assert!(
807 ratio > 0.0 && ratio <= 1.0,
808 "explained variance ratio should be in (0, 1], got {ratio}"
809 );
810 }
811
812 #[test]
813 fn kernel_pca_eigenvalues_descending() {
814 let corpus = corpus_10d();
815 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
816 let ev = kpca.eigenvalues();
817 assert!(ev[0] >= ev[1], "eigenvalues must descend: {ev:?}");
818 assert!(ev[1] >= ev[2], "eigenvalues must descend: {ev:?}");
819 assert!(ev[0] > 0.0, "first eigenvalue should be positive");
820 }
821
822 #[test]
823 fn gaussian_kernel_self_similarity() {
824 let x = vec![1.0, 0.0, 0.0, 0.0, 0.0];
825 let inv = 0.5;
826 assert!((gaussian_kernel(&x, &x, inv) - 1.0).abs() < 1e-12);
827 }
828
829 #[test]
830 fn gaussian_kernel_symmetry() {
831 let a = vec![1.0, 0.0, 0.5];
832 let b = vec![0.0, 1.0, -0.3];
833 let inv = 1.0;
834 assert!((gaussian_kernel(&a, &b, inv) - gaussian_kernel(&b, &a, inv)).abs() < 1e-15);
835 }
836
837 #[test]
838 fn gaussian_kernel_range() {
839 let a = vec![1.0, 0.0, 0.0];
840 let b = vec![0.0, 1.0, 0.0];
841 let inv = 0.5;
842 let k = gaussian_kernel(&a, &b, inv);
843 assert!(
844 k > 0.0 && k <= 1.0,
845 "Gaussian kernel must be in (0, 1], got {k}"
846 );
847 }
848
849 #[test]
850 fn auto_sigma_is_positive() {
851 let corpus = corpus_10d();
852 let normalized: Vec<Vec<f64>> = corpus.iter().map(|e| e.normalized()).collect();
853 let sigma = auto_sigma(&normalized);
854 assert!(sigma > 0.0);
855 }
856
857 #[test]
858 fn auto_sigma_single_point() {
859 let data = vec![vec![1.0, 0.0, 0.0]];
860 assert!((auto_sigma(&data) - 1.0).abs() < 1e-12);
861 }
862
863 #[test]
864 #[should_panic(expected = "expected dimension 10")]
865 fn kernel_pca_dimension_mismatch_panics() {
866 let corpus = corpus_10d();
867 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
868 let _ = kpca.project(&emb(&[1.0, 2.0, 3.0]));
869 }
870
871 #[test]
872 fn kernel_pca_clone_produces_identical_results() {
873 let corpus = corpus_10d();
874 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
875 let kpca2 = kpca.clone();
876 for e in &corpus {
877 let sp1 = kpca.project(e);
878 let sp2 = kpca2.project(e);
879 assert!((sp1.theta - sp2.theta).abs() < 1e-12);
880 assert!((sp1.phi - sp2.phi).abs() < 1e-12);
881 }
882 }
883
884 #[test]
885 fn kernel_pca_works_with_embedding_index() {
886 use crate::query::EmbeddingIndex;
887 let corpus = corpus_10d();
888 let kpca = KernelPcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
889 let mut idx = EmbeddingIndex::builder(kpca)
890 .theta_divisions(4)
891 .phi_divisions(3)
892 .build();
893 for (i, e) in corpus.iter().enumerate() {
894 idx.insert(format!("item-{i}"), e);
895 }
896 assert_eq!(idx.len(), corpus.len());
897 let query = emb(&[0.9, 0.1, 0.0, 0.05, 0.02, -0.01, 0.01, 0.0, 0.02, 0.01]);
898 let results = idx.search_nearest(&query, 3);
899 assert_eq!(results.len(), 3);
900 assert!(results[0].distance <= results[1].distance);
901 }
902
903 #[test]
904 fn large_sigma_approaches_pca_ordering() {
905 use crate::projection::PcaProjection;
906 let corpus = corpus_10d();
907 let pca = PcaProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
908 let kpca = KernelPcaProjection::fit_with_sigma(&corpus, 100.0, RadialStrategy::Fixed(1.0))
909 .unwrap();
910 let query = emb(&[1.0, 0.1, 0.0, 0.05, 0.02, -0.01, 0.01, 0.0, 0.02, 0.01]);
911 let pca_pt = pca.project(&query);
912 let kpca_pt = kpca.project(&query);
913 let mut pca_dists: Vec<(usize, f64)> = corpus
914 .iter()
915 .enumerate()
916 .map(|(i, e)| (i, angular_distance(&pca_pt, &pca.project(e))))
917 .collect();
918 let mut kpca_dists: Vec<(usize, f64)> = corpus
919 .iter()
920 .enumerate()
921 .map(|(i, e)| (i, angular_distance(&kpca_pt, &kpca.project(e))))
922 .collect();
923 pca_dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
924 kpca_dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
925 assert_eq!(
926 pca_dists[0].0, kpca_dists[0].0,
927 "nearest neighbour should match between PCA and kernel PCA with large σ"
928 );
929 }
930}