1use sphereql_core::SphericalPoint;
33
34use crate::projection::{
35 Projection, ProjectionError, SplitMix64, dot, normalize_vec, project_xyz_to_spherical,
36};
37use crate::types::{Embedding, ProjectedPoint, RadialStrategy};
38
39pub const DEFAULT_ACTIVE_THRESHOLD: f64 = 0.05;
44
45pub const DEFAULT_K_NEIGHBORS: usize = 15;
48
49pub(crate) const DEGREE_REGULARIZATION: f64 = 1e-6;
53
54const MAX_POWER_ITERS: usize = 400;
55const POWER_ITER_TOL: f64 = 1e-10;
56const RNG_SEED: u64 = 0xC0FF_EECA_FE00;
57
58#[derive(Clone)]
69pub struct LaplacianEigenmapProjection {
70 active_threshold: f64,
72 radial: RadialStrategy,
73 dim: usize,
74
75 corpus_active_sets: Vec<Vec<usize>>,
79 corpus_degrees: Vec<f64>,
82
83 eigenvectors: [Vec<f64>; 3],
86 eigenvalues: [f64; 3],
89
90 connectivity_ratio: f64,
95}
96
97impl LaplacianEigenmapProjection {
98 pub fn fit(embeddings: &[Embedding], radial: RadialStrategy) -> Result<Self, ProjectionError> {
100 Self::fit_with_params(
101 embeddings,
102 DEFAULT_K_NEIGHBORS,
103 DEFAULT_ACTIVE_THRESHOLD,
104 radial,
105 )
106 }
107
108 pub fn fit_with_params(
117 embeddings: &[Embedding],
118 k: usize,
119 active_threshold: f64,
120 radial: RadialStrategy,
121 ) -> Result<Self, ProjectionError> {
122 let n = embeddings.len();
123 if n < 4 {
124 return Err(ProjectionError::TooFewEmbeddings {
125 got: n,
126 required: 4,
127 });
128 }
129 let dim = embeddings[0].dimension();
130 if dim == 0 {
131 return Err(ProjectionError::DimensionTooLow {
132 got: dim,
133 required: 1,
134 });
135 }
136 let k = k.min(n - 1).max(1);
137
138 let corpus_active_sets: Vec<Vec<usize>> = embeddings
140 .iter()
141 .map(|e| {
142 let mut idxs: Vec<usize> = e
143 .values
144 .iter()
145 .enumerate()
146 .filter_map(|(i, v)| (v.abs() > active_threshold).then_some(i))
147 .collect();
148 idxs.sort_unstable();
149 idxs
150 })
151 .collect();
152
153 use std::cmp::Reverse;
159 use std::collections::BinaryHeap;
160 #[derive(PartialEq)]
161 struct TopKEntry(f64, usize);
162 impl Eq for TopKEntry {}
163 impl PartialOrd for TopKEntry {
164 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
165 Some(self.cmp(other))
166 }
167 }
168 impl Ord for TopKEntry {
169 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
170 self.0
173 .total_cmp(&other.0)
174 .then_with(|| self.1.cmp(&other.1))
175 }
176 }
177
178 let mut top_k_heaps: Vec<BinaryHeap<Reverse<TopKEntry>>> =
179 (0..n).map(|_| BinaryHeap::with_capacity(k + 1)).collect();
180
181 for i in 0..n {
182 for j in (i + 1)..n {
183 let s = jaccard_sorted(&corpus_active_sets[i], &corpus_active_sets[j]);
184 if s <= 0.0 {
185 continue;
186 }
187 for (owner, other) in [(i, j), (j, i)] {
188 let h = &mut top_k_heaps[owner];
189 h.push(Reverse(TopKEntry(s, other)));
190 if h.len() > k {
191 h.pop();
192 }
193 }
194 }
195 }
196
197 let mut edges: std::collections::HashMap<(usize, usize), f64> =
201 std::collections::HashMap::with_capacity(n * k);
202 for (i, heap) in top_k_heaps.into_iter().enumerate() {
203 for Reverse(TopKEntry(s, j)) in heap.into_iter() {
204 let key = if i < j { (i, j) } else { (j, i) };
205 edges.insert(key, s);
206 }
207 }
208
209 let mut degrees = vec![0.0f64; n];
211 for (&(a, b), &s) in edges.iter() {
212 degrees[a] += s;
213 degrees[b] += s;
214 }
215
216 let safe_degrees: Vec<f64> = degrees
218 .iter()
219 .map(|&d| d.max(DEGREE_REGULARIZATION))
220 .collect();
221 let d_inv_sqrt: Vec<f64> = safe_degrees.iter().map(|&d| 1.0 / d.sqrt()).collect();
222
223 let mut w_norm = vec![0.0f64; n * n];
226 for (&(a, b), &s) in edges.iter() {
227 let v = s * d_inv_sqrt[a] * d_inv_sqrt[b];
228 w_norm[a * n + b] = v;
229 w_norm[b * n + a] = v;
230 }
231
232 let sum_d: f64 = safe_degrees.iter().sum();
236 let trivial_ev: Vec<f64> = safe_degrees
237 .iter()
238 .map(|&d| d.sqrt() / sum_d.sqrt())
239 .collect();
240
241 let (eigenvectors_vec, eigenvalues_vec) =
243 top_k_symmetric_excluding(&w_norm, n, 3, &[trivial_ev]);
244
245 let eigenvalues: [f64; 3] = [eigenvalues_vec[0], eigenvalues_vec[1], eigenvalues_vec[2]];
246 let eigenvectors: [Vec<f64>; 3] = [
247 eigenvectors_vec[0].clone(),
248 eigenvectors_vec[1].clone(),
249 eigenvectors_vec[2].clone(),
250 ];
251
252 let connectivity_ratio =
253 (eigenvalues[0].abs() + eigenvalues[1].abs() + eigenvalues[2].abs()) / 3.0;
254
255 Ok(Self {
256 active_threshold,
257 radial,
258 dim,
259 corpus_active_sets,
260 corpus_degrees: safe_degrees,
261 eigenvectors,
262 eigenvalues,
263 connectivity_ratio,
264 })
265 }
266
267 pub fn eigenvalues(&self) -> [f64; 3] {
271 self.eigenvalues
272 }
273
274 pub fn connectivity_ratio(&self) -> f64 {
284 self.connectivity_ratio
285 }
286
287 pub fn explained_variance_ratio(&self) -> f64 {
292 self.connectivity_ratio
293 }
294
295 fn project_to_3d(&self, embedding: &Embedding) -> (f64, f64, f64) {
298 let mut active_y: Vec<usize> = embedding
299 .values
300 .iter()
301 .enumerate()
302 .filter_map(|(i, v)| (v.abs() > self.active_threshold).then_some(i))
303 .collect();
304 active_y.sort_unstable();
305
306 let n = self.corpus_active_sets.len();
307 let sims: Vec<f64> = self
308 .corpus_active_sets
309 .iter()
310 .map(|s| jaccard_sorted(&active_y, s))
311 .collect();
312
313 let degree_y = sims.iter().sum::<f64>().max(DEGREE_REGULARIZATION);
316 let inv_sqrt_dy = 1.0 / degree_y.sqrt();
317
318 let mut coords = [0.0f64; 3];
321 for (k, (ev, &mu)) in self
322 .eigenvectors
323 .iter()
324 .zip(self.eigenvalues.iter())
325 .enumerate()
326 {
327 if mu.abs() < 1e-10 {
328 continue;
329 }
330 let mut s = 0.0;
331 for j in 0..n {
332 let w_norm_yj = sims[j] * inv_sqrt_dy / self.corpus_degrees[j].sqrt();
333 s += w_norm_yj * ev[j];
334 }
335 coords[k] = s / mu;
336 }
337 (coords[0], coords[1], coords[2])
338 }
339}
340
341impl Projection for LaplacianEigenmapProjection {
342 fn project(&self, embedding: &Embedding) -> SphericalPoint {
343 let (x, y, z) = self.project_to_3d(embedding);
344 let proj_mag = (x * x + y * y + z * z).sqrt();
345 let r = self.radial.compute_rich(&crate::types::RadialContext::full(
346 embedding.magnitude(),
347 proj_mag,
348 proj_mag.tanh(),
349 ));
350 project_xyz_to_spherical(x, y, z, r)
351 }
352
353 fn project_rich(&self, embedding: &Embedding) -> ProjectedPoint {
354 let (x, y, z) = self.project_to_3d(embedding);
355 let proj_mag = (x * x + y * y + z * z).sqrt();
356 let certainty = proj_mag.tanh();
361 let intensity = embedding.magnitude();
362 let r = self.radial.compute_rich(&crate::types::RadialContext::full(
363 intensity, proj_mag, certainty,
364 ));
365 let position = project_xyz_to_spherical(x, y, z, r);
366 ProjectedPoint {
367 position,
368 certainty,
369 intensity,
370 projection_magnitude: proj_mag,
371 }
372 }
373
374 fn dimensionality(&self) -> usize {
375 self.dim
376 }
377}
378
379fn jaccard_sorted(a: &[usize], b: &[usize]) -> f64 {
384 if a.is_empty() && b.is_empty() {
385 return 0.0;
386 }
387 let mut ia = 0;
388 let mut ib = 0;
389 let mut intersection: usize = 0;
390 while ia < a.len() && ib < b.len() {
391 match a[ia].cmp(&b[ib]) {
392 std::cmp::Ordering::Equal => {
393 intersection += 1;
394 ia += 1;
395 ib += 1;
396 }
397 std::cmp::Ordering::Less => ia += 1,
398 std::cmp::Ordering::Greater => ib += 1,
399 }
400 }
401 let union = a.len() + b.len() - intersection;
402 if union == 0 {
403 0.0
404 } else {
405 intersection as f64 / union as f64
406 }
407}
408
409fn top_k_symmetric_excluding(
414 matrix: &[f64],
415 n: usize,
416 k: usize,
417 exclude: &[Vec<f64>],
418) -> (Vec<Vec<f64>>, Vec<f64>) {
419 let mut vectors: Vec<Vec<f64>> = Vec::with_capacity(k);
420 let mut values: Vec<f64> = Vec::with_capacity(k);
421 let mut rng = SplitMix64::new(RNG_SEED);
422
423 let matvec = |dst: &mut [f64], src: &[f64]| {
424 for (i, dst_i) in dst.iter_mut().enumerate() {
425 let row = i * n;
426 let mut s = 0.0;
427 for j in 0..n {
428 s += matrix[row + j] * src[j];
429 }
430 *dst_i = s;
431 }
432 };
433
434 for _ in 0..k {
435 let mut v: Vec<f64> = (0..n).map(|_| rng.normal()).collect();
436 for prev in exclude.iter().chain(vectors.iter()) {
438 let proj = dot(&v, prev);
439 for (vi, &pi) in v.iter_mut().zip(prev.iter()) {
440 *vi -= proj * pi;
441 }
442 }
443 normalize_vec(&mut v);
444 let mut eigenvalue = 0.0;
445
446 let mut mv_cache: Option<Vec<f64>> = None;
447
448 for _ in 0..MAX_POWER_ITERS {
449 let mut u = mv_cache.take().unwrap_or_else(|| {
450 let mut u = vec![0.0f64; n];
451 matvec(&mut u, &v);
452 u
453 });
454 for prev in exclude.iter().chain(vectors.iter()) {
457 let proj = dot(&u, prev);
458 for (ui, &pi) in u.iter_mut().zip(prev.iter()) {
459 *ui -= proj * pi;
460 }
461 }
462 let mag = normalize_vec(&mut u);
463 if mag < f64::EPSILON {
464 break;
465 }
466
467 let mut mv_next = vec![0.0f64; n];
468 matvec(&mut mv_next, &u);
469 eigenvalue = dot(&u, &mv_next);
470
471 let change = (1.0 - dot(&u, &v).abs()).max(0.0);
472 v = u;
473 mv_cache = Some(mv_next);
474 if change < POWER_ITER_TOL {
475 break;
476 }
477 }
478
479 vectors.push(v);
480 values.push(eigenvalue);
481 }
482
483 let mut order: Vec<usize> = (0..vectors.len()).collect();
487 order.sort_by(|&a, &b| {
488 values[b]
489 .partial_cmp(&values[a])
490 .unwrap_or(std::cmp::Ordering::Equal)
491 });
492 let sorted_vectors: Vec<Vec<f64>> = order.iter().map(|&i| vectors[i].clone()).collect();
493 let sorted_values: Vec<f64> = order.iter().map(|&i| values[i]).collect();
494 (sorted_vectors, sorted_values)
495}
496
497#[cfg(test)]
500mod tests {
501 use super::*;
502 use sphereql_core::angular_distance;
503
504 fn emb(vals: &[f64]) -> Embedding {
505 Embedding::new(vals.to_vec())
506 }
507
508 fn three_cluster_corpus() -> Vec<Embedding> {
511 let noise = 0.02;
512 let mut corpus = Vec::new();
513
514 for i in 0..5 {
516 let delta = i as f64 * 0.01;
517 corpus.push(emb(&[
518 1.0 + delta,
519 0.8 - delta,
520 0.7 + delta,
521 noise,
522 -noise,
523 noise,
524 -noise,
525 noise,
526 ]));
527 }
528 for i in 0..5 {
530 let delta = i as f64 * 0.01;
531 corpus.push(emb(&[
532 noise,
533 -noise,
534 noise,
535 1.0 + delta,
536 0.8 - delta,
537 0.7 + delta,
538 -noise,
539 noise,
540 ]));
541 }
542 for i in 0..5 {
544 let delta = i as f64 * 0.01;
545 corpus.push(emb(&[
546 -noise,
547 noise,
548 -noise,
549 noise,
550 -noise,
551 0.9 + delta,
552 1.0 - delta,
553 0.8 + delta,
554 ]));
555 }
556 corpus
557 }
558
559 #[test]
560 fn jaccard_empty_sets_zero() {
561 assert_eq!(jaccard_sorted(&[], &[]), 0.0);
562 assert_eq!(jaccard_sorted(&[1, 2], &[]), 0.0);
563 }
564
565 #[test]
566 fn jaccard_identical_sets_one() {
567 assert!((jaccard_sorted(&[1, 2, 3], &[1, 2, 3]) - 1.0).abs() < 1e-12);
568 }
569
570 #[test]
571 fn jaccard_disjoint_sets_zero() {
572 assert_eq!(jaccard_sorted(&[1, 2], &[3, 4]), 0.0);
573 }
574
575 #[test]
576 fn jaccard_partial_overlap() {
577 assert!((jaccard_sorted(&[1, 2, 3], &[2, 3, 4]) - 0.5).abs() < 1e-12);
579 }
580
581 #[test]
582 fn fit_produces_non_trivial_eigenvalues() {
583 let corpus = three_cluster_corpus();
584 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
585 let [m1, m2, m3] = lap.eigenvalues();
586 assert!(m1 < 1.0 && m1 > 0.0, "μ_1 = {m1}");
590 assert!(m2 < 1.0 && m2 > -1.0, "μ_2 = {m2}");
591 assert!(m3 < 1.0 && m3 > -1.0, "μ_3 = {m3}");
592 assert!(m1 >= m2 - 1e-10);
594 assert!(m2 >= m3 - 1e-10);
595 }
596
597 #[test]
598 fn connectivity_ratio_in_unit_interval() {
599 let corpus = three_cluster_corpus();
600 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
601 let r = lap.connectivity_ratio();
602 assert!((0.0..=1.0).contains(&r), "connectivity_ratio = {r}");
603 assert_eq!(r, lap.explained_variance_ratio());
604 }
605
606 #[test]
607 fn projection_lands_on_unit_sphere() {
608 let corpus = three_cluster_corpus();
609 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
610 for e in &corpus {
611 let sp = lap.project(e);
612 assert!((sp.r - 1.0).abs() < 1e-9, "r = {}", sp.r);
613 assert!(sp.theta >= 0.0 && sp.theta <= std::f64::consts::TAU);
614 assert!(sp.phi >= 0.0 && sp.phi <= std::f64::consts::PI);
615 }
616 }
617
618 #[test]
619 fn same_cluster_points_closer_than_cross_cluster() {
620 let corpus = three_cluster_corpus();
621 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
622 let positions: Vec<SphericalPoint> = corpus.iter().map(|e| lap.project(e)).collect();
623
624 let mut max_within_a = 0.0f64;
626 for i in 0..5 {
627 for j in (i + 1)..5 {
628 max_within_a = max_within_a.max(angular_distance(&positions[i], &positions[j]));
629 }
630 }
631 let mut min_a_to_c = f64::INFINITY;
635 for i in 0..5 {
636 for j in 10..15 {
637 min_a_to_c = min_a_to_c.min(angular_distance(&positions[i], &positions[j]));
638 }
639 }
640 assert!(
641 max_within_a < min_a_to_c,
642 "within-A max ({max_within_a}) should be less than A-to-C min ({min_a_to_c})"
643 );
644 }
645
646 #[test]
647 fn nystrom_roundtrip_approximates_training_position() {
648 let corpus = three_cluster_corpus();
654 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
655 let first = lap.project(&corpus[0]);
656 let again = lap.project(&corpus[0]);
657 assert!((first.theta - again.theta).abs() < 1e-12);
658 assert!((first.phi - again.phi).abs() < 1e-12);
659 }
660
661 #[test]
662 fn new_point_in_known_cluster_routes_to_cluster_region() {
663 let corpus = three_cluster_corpus();
664 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
665 let query = emb(&[1.0, 0.8, 0.7, 0.02, -0.02, 0.02, -0.02, 0.02]);
668 let q = lap.project(&query);
669 let a_member = lap.project(&corpus[0]);
670 let c_member = lap.project(&corpus[10]);
671 let d_to_a = angular_distance(&q, &a_member);
672 let d_to_c = angular_distance(&q, &c_member);
673 assert!(
674 d_to_a < d_to_c,
675 "query→A {d_to_a} should be less than query→C {d_to_c}"
676 );
677 }
678
679 #[test]
680 fn dimensionality_matches_input() {
681 let corpus = three_cluster_corpus();
682 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
683 assert_eq!(lap.dimensionality(), 8);
684 }
685
686 #[test]
687 fn fit_rejects_tiny_corpus() {
688 let corpus = vec![emb(&[1.0, 0.0]), emb(&[0.0, 1.0])];
689 assert!(matches!(
690 LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)),
691 Err(ProjectionError::TooFewEmbeddings {
692 got: 2,
693 required: 4
694 })
695 ));
696 }
697
698 #[test]
699 fn project_rich_certainty_in_range() {
700 let corpus = three_cluster_corpus();
701 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
702 for e in &corpus {
703 let pp = lap.project_rich(e);
704 assert!(pp.certainty >= 0.0 && pp.certainty <= 1.0);
705 }
706 }
707
708 #[test]
709 fn disconnected_query_gracefully_placed() {
710 let corpus = three_cluster_corpus();
714 let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
715 let query = emb(&[0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001]);
716 let sp = lap.project(&query);
717 assert!(sp.r.is_finite());
718 assert!(sp.theta.is_finite());
719 assert!(sp.phi.is_finite());
720 }
721}