Skip to main content

sphereql_embed/
laplacian.rs

1//! Laplacian eigenmap projection — connectivity-preserving embedding on S².
2//!
3//! Standard PCA/kernel PCA maximize *variance* — the directions along which
4//! data spreads. On sparse, noise-heavy embeddings where signal lives in a
5//! handful of co-activated axes and the rest is low-amplitude noise, variance
6//! maximization pulls the projection toward the noise axes and collapses
7//! genuine category structure. Laplacian eigenmaps instead preserve
8//! *connectivity*: two points are "close" on the sphere if they share many
9//! active axes in the original space, regardless of how small those
10//! activations are in absolute magnitude.
11//!
12//! # Algorithm
13//!
14//! 1. For each embedding, extract the set of axes whose absolute weight
15//!    exceeds `active_threshold` (noise filter).
16//! 2. Build a pairwise Jaccard-similarity matrix W over those active sets.
17//! 3. Sparsify W to a mutual-k-nearest-neighbors graph (union semantics).
18//! 4. Compute the normalized affinity matrix W_norm = D^(-1/2) W D^(-1/2).
19//! 5. Find the top-3 *non-trivial* eigenvectors of W_norm via power iteration
20//!    with deflation against the trivial eigenvector (μ=1, ∝ sqrt(D)).
21//! 6. Project: for a new embedding, apply the Nyström out-of-sample formula
22//!    u_k(y) = (1/μ_k) Σ_j w_norm(y, j) · u_k(j), then L2-normalize the
23//!    three coordinates to land on S².
24//!
25//! # References
26//!
27//! * Belkin & Niyogi. "Laplacian Eigenmaps for Dimensionality Reduction and
28//!   Data Representation." *Neural Computation* 15 (2003) 1373–1396.
29//! * Bengio et al. "Out-of-Sample Extensions for LLE, Isomap, MDS, Eigenmaps,
30//!   and Spectral Clustering." *NIPS* 2003.
31
32use sphereql_core::SphericalPoint;
33
34use crate::projection::{
35    Projection, ProjectionError, SplitMix64, dot, normalize_vec, project_xyz_to_spherical,
36};
37use crate::types::{Embedding, ProjectedPoint, RadialStrategy};
38
39// ── Defaults ───────────────────────────────────────────────────────────
40
41/// Absolute-weight cutoff below which an axis is treated as noise and
42/// excluded from the active set used to compute Jaccard similarity.
43pub const DEFAULT_ACTIVE_THRESHOLD: f64 = 0.05;
44
45/// k in the k-NN graph sparsification step. Each point keeps edges to its
46/// top-k most similar neighbors; the graph is the *union* of those edges.
47pub const DEFAULT_K_NEIGHBORS: usize = 15;
48
49/// Floor on the degree of any graph node. Zero-degree nodes would produce
50/// NaN during D^(-1/2) normalization; this regularization keeps the spectrum
51/// well-defined on disconnected graphs.
52pub(crate) const DEGREE_REGULARIZATION: f64 = 1e-6;
53
54const MAX_POWER_ITERS: usize = 400;
55const POWER_ITER_TOL: f64 = 1e-10;
56const RNG_SEED: u64 = 0xC0FF_EECA_FE00;
57
58// ── Projection ─────────────────────────────────────────────────────────
59
60/// Laplacian-eigenmap projection: embeds concepts on S² by the spectral
61/// structure of the axis-co-activation graph rather than raw variance.
62///
63/// Fit cost: O(n² · d) to build the Jaccard graph plus O(n² · q · iters)
64/// for power iteration on the n×n affinity matrix. Memory: O(n²) during
65/// fit, O(n) per eigenvector after.
66///
67/// Project cost (Nyström out-of-sample): O(n · d) per query.
68#[derive(Clone)]
69pub struct LaplacianEigenmapProjection {
70    // ── Fit parameters ──
71    active_threshold: f64,
72    radial: RadialStrategy,
73    dim: usize,
74
75    // ── Fitted corpus state ──
76    /// For each corpus point, the sorted list of active-axis indices.
77    /// Used at query time to compute Jaccard similarity with new points.
78    corpus_active_sets: Vec<Vec<usize>>,
79    /// Sparsified degree of each corpus point: Σ_j W[i][j].
80    /// Used in the Nyström extension's D^(-1/2) normalization.
81    corpus_degrees: Vec<f64>,
82
83    /// Top-3 non-trivial eigenvectors of the normalized affinity W_norm,
84    /// in descending eigenvalue order. Each vector has length n.
85    eigenvectors: [Vec<f64>; 3],
86    /// Corresponding eigenvalues μ_1, μ_2, μ_3 ∈ (−1, 1). The trivial
87    /// μ_0 = 1 is excluded.
88    eigenvalues: [f64; 3],
89
90    /// Mean of |μ_k| across the three retained eigenvalues. A proxy for
91    /// how much "low-frequency" connectivity structure the embedding
92    /// captured. Reported in place of PCA's explained_variance_ratio for
93    /// API compatibility; see [`Self::connectivity_ratio`].
94    connectivity_ratio: f64,
95}
96
97impl LaplacianEigenmapProjection {
98    /// Fit with default parameters (`k=15`, `active_threshold=0.05`).
99    pub fn fit(embeddings: &[Embedding], radial: RadialStrategy) -> Result<Self, ProjectionError> {
100        Self::fit_with_params(
101            embeddings,
102            DEFAULT_K_NEIGHBORS,
103            DEFAULT_ACTIVE_THRESHOLD,
104            radial,
105        )
106    }
107
108    /// Fit with explicit graph parameters.
109    ///
110    /// - `k` — k-NN graph density. Higher k = more edges = smoother embedding,
111    ///   less sensitive to local noise, but also more blurred category
112    ///   boundaries. Typical range: 10–30.
113    /// - `active_threshold` — absolute-weight cutoff for the active-axis
114    ///   filter. Axes with |v_i| ≤ threshold contribute nothing to the
115    ///   similarity metric.
116    pub fn fit_with_params(
117        embeddings: &[Embedding],
118        k: usize,
119        active_threshold: f64,
120        radial: RadialStrategy,
121    ) -> Result<Self, ProjectionError> {
122        let n = embeddings.len();
123        if n < 4 {
124            return Err(ProjectionError::TooFewEmbeddings {
125                got: n,
126                required: 4,
127            });
128        }
129        let dim = embeddings[0].dimension();
130        if dim == 0 {
131            return Err(ProjectionError::DimensionTooLow {
132                got: dim,
133                required: 1,
134            });
135        }
136        let k = k.min(n - 1).max(1);
137
138        // 1. Active-axis sets (sorted for merge-style intersection).
139        let corpus_active_sets: Vec<Vec<usize>> = embeddings
140            .iter()
141            .map(|e| {
142                let mut idxs: Vec<usize> = e
143                    .values
144                    .iter()
145                    .enumerate()
146                    .filter_map(|(i, v)| (v.abs() > active_threshold).then_some(i))
147                    .collect();
148                idxs.sort_unstable();
149                idxs
150            })
151            .collect();
152
153        // 2–4. Sparse top-k graph construction.
154        //
155        // Per-row top-k min-heap keyed on (sim, j). `std::cmp::Reverse`
156        // converts the default max-heap into the min-heap we need so we
157        // can evict the lowest-similarity candidate as we go.
158        use std::cmp::Reverse;
159        use std::collections::BinaryHeap;
160        #[derive(PartialEq)]
161        struct TopKEntry(f64, usize);
162        impl Eq for TopKEntry {}
163        impl PartialOrd for TopKEntry {
164            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
165                Some(self.cmp(other))
166            }
167        }
168        impl Ord for TopKEntry {
169            fn cmp(&self, other: &Self) -> std::cmp::Ordering {
170                // Compare on sim first (total_cmp — NaN-safe), break
171                // ties on index so the order is deterministic.
172                self.0
173                    .total_cmp(&other.0)
174                    .then_with(|| self.1.cmp(&other.1))
175            }
176        }
177
178        let mut top_k_heaps: Vec<BinaryHeap<Reverse<TopKEntry>>> =
179            (0..n).map(|_| BinaryHeap::with_capacity(k + 1)).collect();
180
181        for i in 0..n {
182            for j in (i + 1)..n {
183                let s = jaccard_sorted(&corpus_active_sets[i], &corpus_active_sets[j]);
184                if s <= 0.0 {
185                    continue;
186                }
187                for (owner, other) in [(i, j), (j, i)] {
188                    let h = &mut top_k_heaps[owner];
189                    h.push(Reverse(TopKEntry(s, other)));
190                    if h.len() > k {
191                        h.pop();
192                    }
193                }
194            }
195        }
196
197        // Union-symmetrize the per-row top-k candidates into a dedup
198        // edge set. Jaccard is symmetric by construction, so we can
199        // recover the edge weight from the heap entry.
200        let mut edges: std::collections::HashMap<(usize, usize), f64> =
201            std::collections::HashMap::with_capacity(n * k);
202        for (i, heap) in top_k_heaps.into_iter().enumerate() {
203            for Reverse(TopKEntry(s, j)) in heap.into_iter() {
204                let key = if i < j { (i, j) } else { (j, i) };
205                edges.insert(key, s);
206            }
207        }
208
209        // Degrees: sum of weights on each node's incident edges.
210        let mut degrees = vec![0.0f64; n];
211        for (&(a, b), &s) in edges.iter() {
212            degrees[a] += s;
213            degrees[b] += s;
214        }
215
216        // Regularize isolated/near-isolated nodes so D^(-1/2) stays finite.
217        let safe_degrees: Vec<f64> = degrees
218            .iter()
219            .map(|&d| d.max(DEGREE_REGULARIZATION))
220            .collect();
221        let d_inv_sqrt: Vec<f64> = safe_degrees.iter().map(|&d| 1.0 / d.sqrt()).collect();
222
223        // 5. Scatter the symmetric W_norm = D^(-1/2) · W · D^(-1/2)
224        //    directly from the edge map. Cells with no edge stay 0.
225        let mut w_norm = vec![0.0f64; n * n];
226        for (&(a, b), &s) in edges.iter() {
227            let v = s * d_inv_sqrt[a] * d_inv_sqrt[b];
228            w_norm[a * n + b] = v;
229            w_norm[b * n + a] = v;
230        }
231
232        // 6. Trivial eigenvector of W_norm at μ=1 is proportional to sqrt(D).
233        //    Explicitly constructed so deflation against it pulls it out of
234        //    the subsequent power-iteration subspace.
235        let sum_d: f64 = safe_degrees.iter().sum();
236        let trivial_ev: Vec<f64> = safe_degrees
237            .iter()
238            .map(|&d| d.sqrt() / sum_d.sqrt())
239            .collect();
240
241        // 7. Top-3 non-trivial eigenvectors + eigenvalues.
242        let (eigenvectors_vec, eigenvalues_vec) =
243            top_k_symmetric_excluding(&w_norm, n, 3, &[trivial_ev]);
244
245        let eigenvalues: [f64; 3] = [eigenvalues_vec[0], eigenvalues_vec[1], eigenvalues_vec[2]];
246        let eigenvectors: [Vec<f64>; 3] = [
247            eigenvectors_vec[0].clone(),
248            eigenvectors_vec[1].clone(),
249            eigenvectors_vec[2].clone(),
250        ];
251
252        let connectivity_ratio =
253            (eigenvalues[0].abs() + eigenvalues[1].abs() + eigenvalues[2].abs()) / 3.0;
254
255        Ok(Self {
256            active_threshold,
257            radial,
258            dim,
259            corpus_active_sets,
260            corpus_degrees: safe_degrees,
261            eigenvectors,
262            eigenvalues,
263            connectivity_ratio,
264        })
265    }
266
267    /// Top-3 non-trivial eigenvalues of the normalized affinity matrix
268    /// (in descending order). Values close to 1.0 indicate strong block
269    /// structure along that spectral direction.
270    pub fn eigenvalues(&self) -> [f64; 3] {
271        self.eigenvalues
272    }
273
274    /// Scalar quality proxy analogous to PCA's explained-variance ratio.
275    ///
276    /// Defined as the mean of `|μ_k|` across the three retained eigenvalues;
277    /// bounded in [0, 1]. Higher = stronger low-frequency community structure
278    /// captured by the 3D embedding.
279    ///
280    /// *Not* directly comparable to PCA EVR — the quantities have different
281    /// mathematical meaning. Use as a signal of projection health for this
282    /// projection family specifically.
283    pub fn connectivity_ratio(&self) -> f64 {
284        self.connectivity_ratio
285    }
286
287    /// Returned for API compatibility with `PcaProjection::explained_variance_ratio`.
288    /// See [`Self::connectivity_ratio`] for semantics — the two are not the
289    /// same metric, but both live in [0, 1] and can feed EVR-adaptive
290    /// thresholds downstream.
291    pub fn explained_variance_ratio(&self) -> f64 {
292        self.connectivity_ratio
293    }
294
295    /// Nyström-extend the three retained eigenvectors to a new embedding y,
296    /// returning raw (unnormalized) 3D coordinates.
297    fn project_to_3d(&self, embedding: &Embedding) -> (f64, f64, f64) {
298        let mut active_y: Vec<usize> = embedding
299            .values
300            .iter()
301            .enumerate()
302            .filter_map(|(i, v)| (v.abs() > self.active_threshold).then_some(i))
303            .collect();
304        active_y.sort_unstable();
305
306        let n = self.corpus_active_sets.len();
307        let sims: Vec<f64> = self
308            .corpus_active_sets
309            .iter()
310            .map(|s| jaccard_sorted(&active_y, s))
311            .collect();
312
313        // Treat the query as a new graph node whose similarities to corpus
314        // points are the sims vector. Its degree is the sum of those sims.
315        let degree_y = sims.iter().sum::<f64>().max(DEGREE_REGULARIZATION);
316        let inv_sqrt_dy = 1.0 / degree_y.sqrt();
317
318        // Nyström extension: u_k(y) = (1/μ_k) Σ_j w_norm(y, j) · u_k(j).
319        // w_norm(y, j) = sims[j] / (sqrt(d_y) · sqrt(d_j)).
320        let mut coords = [0.0f64; 3];
321        for (k, (ev, &mu)) in self
322            .eigenvectors
323            .iter()
324            .zip(self.eigenvalues.iter())
325            .enumerate()
326        {
327            if mu.abs() < 1e-10 {
328                continue;
329            }
330            let mut s = 0.0;
331            for j in 0..n {
332                let w_norm_yj = sims[j] * inv_sqrt_dy / self.corpus_degrees[j].sqrt();
333                s += w_norm_yj * ev[j];
334            }
335            coords[k] = s / mu;
336        }
337        (coords[0], coords[1], coords[2])
338    }
339}
340
341impl Projection for LaplacianEigenmapProjection {
342    fn project(&self, embedding: &Embedding) -> SphericalPoint {
343        let (x, y, z) = self.project_to_3d(embedding);
344        let proj_mag = (x * x + y * y + z * z).sqrt();
345        let r = self.radial.compute_rich(&crate::types::RadialContext::full(
346            embedding.magnitude(),
347            proj_mag,
348            proj_mag.tanh(),
349        ));
350        project_xyz_to_spherical(x, y, z, r)
351    }
352
353    fn project_rich(&self, embedding: &Embedding) -> ProjectedPoint {
354        let (x, y, z) = self.project_to_3d(embedding);
355        let proj_mag = (x * x + y * y + z * z).sqrt();
356        // Certainty reflects whether the Nyström extension produced a
357        // non-degenerate coordinate. A zero-magnitude projection means the
358        // query had no active axes in common with the corpus — the placement
359        // is effectively arbitrary, so certainty should be low.
360        let certainty = proj_mag.tanh();
361        let intensity = embedding.magnitude();
362        let r = self.radial.compute_rich(&crate::types::RadialContext::full(
363            intensity, proj_mag, certainty,
364        ));
365        let position = project_xyz_to_spherical(x, y, z, r);
366        ProjectedPoint {
367            position,
368            certainty,
369            intensity,
370            projection_magnitude: proj_mag,
371        }
372    }
373
374    fn dimensionality(&self) -> usize {
375        self.dim
376    }
377}
378
379// ── Helpers ────────────────────────────────────────────────────────────
380
381/// Jaccard similarity between two sorted, deduplicated index sets.
382/// Merge-intersect in O(|a| + |b|).
383fn jaccard_sorted(a: &[usize], b: &[usize]) -> f64 {
384    if a.is_empty() && b.is_empty() {
385        return 0.0;
386    }
387    let mut ia = 0;
388    let mut ib = 0;
389    let mut intersection: usize = 0;
390    while ia < a.len() && ib < b.len() {
391        match a[ia].cmp(&b[ib]) {
392            std::cmp::Ordering::Equal => {
393                intersection += 1;
394                ia += 1;
395                ib += 1;
396            }
397            std::cmp::Ordering::Less => ia += 1,
398            std::cmp::Ordering::Greater => ib += 1,
399        }
400    }
401    let union = a.len() + b.len() - intersection;
402    if union == 0 {
403        0.0
404    } else {
405        intersection as f64 / union as f64
406    }
407}
408
409/// Power iteration with deflation on a symmetric n×n matrix (row-major).
410/// Finds the top-k eigenvectors orthogonal to everything in `exclude`.
411///
412/// Returns (vectors, values) in descending eigenvalue order.
413fn top_k_symmetric_excluding(
414    matrix: &[f64],
415    n: usize,
416    k: usize,
417    exclude: &[Vec<f64>],
418) -> (Vec<Vec<f64>>, Vec<f64>) {
419    let mut vectors: Vec<Vec<f64>> = Vec::with_capacity(k);
420    let mut values: Vec<f64> = Vec::with_capacity(k);
421    let mut rng = SplitMix64::new(RNG_SEED);
422
423    let matvec = |dst: &mut [f64], src: &[f64]| {
424        for (i, dst_i) in dst.iter_mut().enumerate() {
425            let row = i * n;
426            let mut s = 0.0;
427            for j in 0..n {
428                s += matrix[row + j] * src[j];
429            }
430            *dst_i = s;
431        }
432    };
433
434    for _ in 0..k {
435        let mut v: Vec<f64> = (0..n).map(|_| rng.normal()).collect();
436        // Initial orthogonalization against exclude + previously-found vectors.
437        for prev in exclude.iter().chain(vectors.iter()) {
438            let proj = dot(&v, prev);
439            for (vi, &pi) in v.iter_mut().zip(prev.iter()) {
440                *vi -= proj * pi;
441            }
442        }
443        normalize_vec(&mut v);
444        let mut eigenvalue = 0.0;
445
446        let mut mv_cache: Option<Vec<f64>> = None;
447
448        for _ in 0..MAX_POWER_ITERS {
449            let mut u = mv_cache.take().unwrap_or_else(|| {
450                let mut u = vec![0.0f64; n];
451                matvec(&mut u, &v);
452                u
453            });
454            // Deflate each iteration so drift back into the excluded subspace
455            // is continuously removed.
456            for prev in exclude.iter().chain(vectors.iter()) {
457                let proj = dot(&u, prev);
458                for (ui, &pi) in u.iter_mut().zip(prev.iter()) {
459                    *ui -= proj * pi;
460                }
461            }
462            let mag = normalize_vec(&mut u);
463            if mag < f64::EPSILON {
464                break;
465            }
466
467            let mut mv_next = vec![0.0f64; n];
468            matvec(&mut mv_next, &u);
469            eigenvalue = dot(&u, &mv_next);
470
471            let change = (1.0 - dot(&u, &v).abs()).max(0.0);
472            v = u;
473            mv_cache = Some(mv_next);
474            if change < POWER_ITER_TOL {
475                break;
476            }
477        }
478
479        vectors.push(v);
480        values.push(eigenvalue);
481    }
482
483    // Sort in descending eigenvalue order (power iteration usually finds
484    // them in that order already, but deflation drift can swap adjacent
485    // pairs on near-degenerate spectra).
486    let mut order: Vec<usize> = (0..vectors.len()).collect();
487    order.sort_by(|&a, &b| {
488        values[b]
489            .partial_cmp(&values[a])
490            .unwrap_or(std::cmp::Ordering::Equal)
491    });
492    let sorted_vectors: Vec<Vec<f64>> = order.iter().map(|&i| vectors[i].clone()).collect();
493    let sorted_values: Vec<f64> = order.iter().map(|&i| values[i]).collect();
494    (sorted_vectors, sorted_values)
495}
496
497// ── Tests ──────────────────────────────────────────────────────────────
498
499#[cfg(test)]
500mod tests {
501    use super::*;
502    use sphereql_core::angular_distance;
503
504    fn emb(vals: &[f64]) -> Embedding {
505        Embedding::new(vals.to_vec())
506    }
507
508    /// Three well-separated 8-dim clusters. Each cluster activates 3
509    /// distinct axes strongly; the remaining 5 dims carry ±0.02 noise.
510    fn three_cluster_corpus() -> Vec<Embedding> {
511        let noise = 0.02;
512        let mut corpus = Vec::new();
513
514        // Cluster A: axes 0, 1, 2
515        for i in 0..5 {
516            let delta = i as f64 * 0.01;
517            corpus.push(emb(&[
518                1.0 + delta,
519                0.8 - delta,
520                0.7 + delta,
521                noise,
522                -noise,
523                noise,
524                -noise,
525                noise,
526            ]));
527        }
528        // Cluster B: axes 3, 4, 5
529        for i in 0..5 {
530            let delta = i as f64 * 0.01;
531            corpus.push(emb(&[
532                noise,
533                -noise,
534                noise,
535                1.0 + delta,
536                0.8 - delta,
537                0.7 + delta,
538                -noise,
539                noise,
540            ]));
541        }
542        // Cluster C: axes 5, 6, 7 (shares axis 5 with B to exercise a bridge)
543        for i in 0..5 {
544            let delta = i as f64 * 0.01;
545            corpus.push(emb(&[
546                -noise,
547                noise,
548                -noise,
549                noise,
550                -noise,
551                0.9 + delta,
552                1.0 - delta,
553                0.8 + delta,
554            ]));
555        }
556        corpus
557    }
558
559    #[test]
560    fn jaccard_empty_sets_zero() {
561        assert_eq!(jaccard_sorted(&[], &[]), 0.0);
562        assert_eq!(jaccard_sorted(&[1, 2], &[]), 0.0);
563    }
564
565    #[test]
566    fn jaccard_identical_sets_one() {
567        assert!((jaccard_sorted(&[1, 2, 3], &[1, 2, 3]) - 1.0).abs() < 1e-12);
568    }
569
570    #[test]
571    fn jaccard_disjoint_sets_zero() {
572        assert_eq!(jaccard_sorted(&[1, 2], &[3, 4]), 0.0);
573    }
574
575    #[test]
576    fn jaccard_partial_overlap() {
577        // {1,2,3} ∩ {2,3,4} = {2,3} (2), union = {1,2,3,4} (4) → 0.5
578        assert!((jaccard_sorted(&[1, 2, 3], &[2, 3, 4]) - 0.5).abs() < 1e-12);
579    }
580
581    #[test]
582    fn fit_produces_non_trivial_eigenvalues() {
583        let corpus = three_cluster_corpus();
584        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
585        let [m1, m2, m3] = lap.eigenvalues();
586        // All retained eigenvalues must be strictly below 1 (trivial excluded)
587        // and above 0 (the 3-cluster structure should produce meaningful
588        // non-zero connectivity eigenvalues).
589        assert!(m1 < 1.0 && m1 > 0.0, "μ_1 = {m1}");
590        assert!(m2 < 1.0 && m2 > -1.0, "μ_2 = {m2}");
591        assert!(m3 < 1.0 && m3 > -1.0, "μ_3 = {m3}");
592        // Descending order.
593        assert!(m1 >= m2 - 1e-10);
594        assert!(m2 >= m3 - 1e-10);
595    }
596
597    #[test]
598    fn connectivity_ratio_in_unit_interval() {
599        let corpus = three_cluster_corpus();
600        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
601        let r = lap.connectivity_ratio();
602        assert!((0.0..=1.0).contains(&r), "connectivity_ratio = {r}");
603        assert_eq!(r, lap.explained_variance_ratio());
604    }
605
606    #[test]
607    fn projection_lands_on_unit_sphere() {
608        let corpus = three_cluster_corpus();
609        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
610        for e in &corpus {
611            let sp = lap.project(e);
612            assert!((sp.r - 1.0).abs() < 1e-9, "r = {}", sp.r);
613            assert!(sp.theta >= 0.0 && sp.theta <= std::f64::consts::TAU);
614            assert!(sp.phi >= 0.0 && sp.phi <= std::f64::consts::PI);
615        }
616    }
617
618    #[test]
619    fn same_cluster_points_closer_than_cross_cluster() {
620        let corpus = three_cluster_corpus();
621        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
622        let positions: Vec<SphericalPoint> = corpus.iter().map(|e| lap.project(e)).collect();
623
624        // Within cluster A (indices 0..5): max pairwise distance.
625        let mut max_within_a = 0.0f64;
626        for i in 0..5 {
627            for j in (i + 1)..5 {
628                max_within_a = max_within_a.max(angular_distance(&positions[i], &positions[j]));
629            }
630        }
631        // A vs C (indices 0..5 vs 10..15): min pairwise distance. A and C
632        // share no axes, so C should be the farthest-out of the three
633        // clusters from A.
634        let mut min_a_to_c = f64::INFINITY;
635        for i in 0..5 {
636            for j in 10..15 {
637                min_a_to_c = min_a_to_c.min(angular_distance(&positions[i], &positions[j]));
638            }
639        }
640        assert!(
641            max_within_a < min_a_to_c,
642            "within-A max ({max_within_a}) should be less than A-to-C min ({min_a_to_c})"
643        );
644    }
645
646    #[test]
647    fn nystrom_roundtrip_approximates_training_position() {
648        // A corpus point fed back through project() should land near its
649        // Nyström-implied position. We don't demand exact recovery (the
650        // Nyström extension differs subtly from the in-sample eigenvector
651        // coord because Jaccard(x, x) = 1 but W[i][i] = 0 during fit), but
652        // the same corpus point should project consistently across calls.
653        let corpus = three_cluster_corpus();
654        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
655        let first = lap.project(&corpus[0]);
656        let again = lap.project(&corpus[0]);
657        assert!((first.theta - again.theta).abs() < 1e-12);
658        assert!((first.phi - again.phi).abs() < 1e-12);
659    }
660
661    #[test]
662    fn new_point_in_known_cluster_routes_to_cluster_region() {
663        let corpus = three_cluster_corpus();
664        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
665        // Synthesize a fresh "cluster-A" point and check it lands closer to
666        // an existing cluster-A member than to a cluster-C member.
667        let query = emb(&[1.0, 0.8, 0.7, 0.02, -0.02, 0.02, -0.02, 0.02]);
668        let q = lap.project(&query);
669        let a_member = lap.project(&corpus[0]);
670        let c_member = lap.project(&corpus[10]);
671        let d_to_a = angular_distance(&q, &a_member);
672        let d_to_c = angular_distance(&q, &c_member);
673        assert!(
674            d_to_a < d_to_c,
675            "query→A {d_to_a} should be less than query→C {d_to_c}"
676        );
677    }
678
679    #[test]
680    fn dimensionality_matches_input() {
681        let corpus = three_cluster_corpus();
682        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
683        assert_eq!(lap.dimensionality(), 8);
684    }
685
686    #[test]
687    fn fit_rejects_tiny_corpus() {
688        let corpus = vec![emb(&[1.0, 0.0]), emb(&[0.0, 1.0])];
689        assert!(matches!(
690            LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)),
691            Err(ProjectionError::TooFewEmbeddings {
692                got: 2,
693                required: 4
694            })
695        ));
696    }
697
698    #[test]
699    fn project_rich_certainty_in_range() {
700        let corpus = three_cluster_corpus();
701        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
702        for e in &corpus {
703            let pp = lap.project_rich(e);
704            assert!(pp.certainty >= 0.0 && pp.certainty <= 1.0);
705        }
706    }
707
708    #[test]
709    fn disconnected_query_gracefully_placed() {
710        // A query with no active axes (all below threshold) has no similarity
711        // to the corpus. It should still produce a valid SphericalPoint
712        // rather than NaN.
713        let corpus = three_cluster_corpus();
714        let lap = LaplacianEigenmapProjection::fit(&corpus, RadialStrategy::Fixed(1.0)).unwrap();
715        let query = emb(&[0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001]);
716        let sp = lap.project(&query);
717        assert!(sp.r.is_finite());
718        assert!(sp.theta.is_finite());
719        assert!(sp.phi.is_finite());
720    }
721}